From 4b36bb7138e5f7e62b855746f27c920972f05601 Mon Sep 17 00:00:00 2001 From: Uku Taht Date: Thu, 17 Sep 2020 16:36:01 +0300 Subject: [PATCH] Use clickhouse_ecto for db connection (#317) * Use clickhouse-ecto for stats * Use clickhouse ecto instead of low-level clickhousex * Remove defaults from event schema * Remove all references to Clickhousex * Document configuration change * Ensure createdb and migrations can be run in a release * Remove config added for debug * Update plausible_variables.sample.env --- .gitlab-ci.yml | 125 ------ CHANGELOG.md | 7 + HOSTING.md | 8 +- config/config.exs | 17 +- config/releases.exs | 29 +- config/test.exs | 15 +- lib/mix/tasks/clean_clickhouse.ex | 10 + lib/plausible/application.ex | 6 +- lib/plausible/clickhouse.ex | 117 ----- lib/plausible/clickhouse_repo.ex | 20 + lib/plausible/event/write_buffer.ex | 4 +- lib/plausible/session/store.ex | 6 +- lib/plausible/session/write_buffer.ex | 3 +- lib/plausible/stats/clickhouse.ex | 419 +++++++++--------- lib/plausible_release.ex | 101 +---- .../controllers/api/external_controller.ex | 16 +- .../controllers/site_controller.ex | 4 +- .../templates/email/weekly_report.html.eex | 8 +- lib/workers/fetch_tweets.ex | 4 +- mix.exs | 6 +- mix.lock | 9 +- plausible-variables.sample.env | 5 +- ...00915070607_create_events_and_sessions.exs | 52 +++ .../api/external_controller_test.exs | 74 ++-- .../api/stats_controller/referrers_test.exs | 4 +- test/support/clickhouse_setup.ex | 70 +-- test/support/factory.ex | 16 +- test/support/test_utils.ex | 21 +- 28 files changed, 425 insertions(+), 751 deletions(-) delete mode 100644 .gitlab-ci.yml create mode 100644 CHANGELOG.md create mode 100644 lib/mix/tasks/clean_clickhouse.ex delete mode 100644 lib/plausible/clickhouse.ex create mode 100644 lib/plausible/clickhouse_repo.ex create mode 100644 priv/clickhouse_repo/migrations/20200915070607_create_events_and_sessions.exs diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index f7e6a0e8d..000000000 --- a/.gitlab-ci.yml +++ /dev/null @@ -1,125 +0,0 @@ -include: - - template: Container-Scanning.gitlab-ci.yml - - template: License-Scanning.gitlab-ci.yml - - template: SAST.gitlab-ci.yml - -stages: - - prepare - - compile - - test - - build - - postbuild - -.commons: &elixir-commons - image: elixir:1.10.3 - cache: - key: ${CI_COMMIT_REF_SLUG} - paths: - - $CI_PROJECT_DIR/.mix - - $CI_PROJECT_DIR/priv/plts - - ~/.hex/ - before_script: - - mkdir -p $CI_PROJECT_DIR/priv/plts/ - - mix local.hex --force && mix local.rebar --force - - chmod +x .gitlab/build-scripts/* - - source .gitlab/build-scripts/docker.gitlab.sh - -deps: - <<: *elixir-commons - stage: prepare - variables: - MIX_HOME: $CI_PROJECT_DIR/.mix - script: - - mix deps.get - dependencies: [] - artifacts: - paths: - - mix.lock - - deps - -compile: - <<: *elixir-commons - stage: compile - script: - - mix compile - dependencies: - - deps - artifacts: - paths: - - mix.lock - - _build - - deps - - -license_scanning: - stage: compile - dependencies: - - deps - -sast: - stage: compile - -test:ex_unit: - <<: *elixir-commons - services: - - postgres - - name: yandex/clickhouse-server:20.3.9.70 - alias: clickhouse - stage: test - variables: - POSTGRES_PASSWORD: postgres - POSTGRES_USER: postgres - DATABASE_URL: postgres://postgres:postgres@postgres:5432/plausible_test?currentSchema=default - CLICKHOUSE_DATABASE_HOST: clickhouse - CLICKHOUSE_DATABASE_NAME: plausible_test - MIX_HOME: $CI_PROJECT_DIR/.mix - before_script: - - apt update && apt install -y clickhouse-client - - clickhouse-client --host clickhouse --query "CREATE DATABASE IF NOT EXISTS plausible_test" - script: - - mix test --cover - coverage: '/\[TOTAL\]\s+(\d+\.\d+)%/' - dependencies: - - compile - artifacts: - reports: - junit: plausible-report.xml - -build:docker: - <<: *elixir-commons - image: - name: gcr.io/kaniko-project/executor:debug - entrypoint: [""] - stage: build - variables: - MIX_ENV: prod - MIX_HOME: $CI_PROJECT_DIR/.mix/ - APP_VERSION: $CI_COMMIT_SHORT_SHA - before_script: - - chmod +x .gitlab/build-scripts/* - - source .gitlab/build-scripts/docker.gitlab.sh - - docker_create_config - script: - - docker_build_image - dependencies: - - compile - only: - - master - -deploy:plausible: - stage: postbuild - script: - - "curl -X POST -F token=$PLAUSIBLE_DEPLOY_TOKEN -F ref=master -F variables[IMAGE_TAG]=${CI_COMMIT_REF_SLUG}-${CI_COMMIT_SHORT_SHA} $PLAUSIBLE_DEPLOY_PROJECT" - only: - - master - -container_scanning: - stage: postbuild - image: registry.gitlab.com/gitlab-org/security-products/analyzers/klar:$CS_MAJOR_VERSION - variables: - CS_MAJOR_VERSION: 2 - KLAR_TRACE: "true" - CLAIR_TRACE: "true" - CLAIR_OUTPUT: "medium" - CI_APPLICATION_REPOSITORY: $CI_REGISTRY_IMAGE - CI_APPLICATION_TAG: ${CI_COMMIT_REF_SLUG}-$CI_COMMIT_SHORT_SHA diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..82fd85dd9 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,7 @@ +# Changelog +All notable changes to this project will be documented in this file. + +## [1.0.0] - Unreleased + +### Changed +- Replace configuration parameters `CLICKHOUSE_DATABASE_{HOST,NAME,USER,PASSWORD}` with a single `CLICKHOUSE_DATABASE_URL` diff --git a/HOSTING.md b/HOSTING.md index ca512b165..6c8187678 100644 --- a/HOSTING.md +++ b/HOSTING.md @@ -169,12 +169,10 @@ Plausible uses [postgresql as database](https://www.tutorialspoint.com/postgresq For performance reasons, all the analytics events are stored in [clickhouse](https://clickhouse.tech/docs/en/getting-started/tutorial/): -- CLICKHOUSE_DATABASE_HOST (*String*) -- CLICKHOUSE_DATABASE_NAME (*String*) -- CLICKHOUSE_DATABASE_USER (*String*) -- CLICKHOUSE_DATABASE_PASSWORD (*String*) +- CLICKHOUSE_DATABASE_URL (*String*) + - Connection string for Clickhouse. The protocol is either `http` or `https` depending on your setup. - CLICKHOUSE_DATABASE_POOLSIZE (*Number*) - - A default pool size for connecting to the database, defaults to *10*, a higher number is recommended for a production system. + - A default pool size for connecting to the database, defaults to *10*, a higher number is recommended for a production system. ### IP Geolocation diff --git a/config/config.exs b/config/config.exs index 3a578fb1a..b551d9489 100644 --- a/config/config.exs +++ b/config/config.exs @@ -5,7 +5,7 @@ config :plausible, admin_email: System.get_env("ADMIN_USER_EMAIL", "admin@plausible.local"), mailer_email: System.get_env("MAILER_EMAIL", "hello@plausible.local"), admin_pwd: System.get_env("ADMIN_USER_PWD", "!@d3in"), - ecto_repos: [Plausible.Repo], + ecto_repos: [Plausible.Repo, Plausible.ClickhouseRepo], environment: System.get_env("ENVIRONMENT", "dev") disable_auth = String.to_existing_atom(System.get_env("DISABLE_AUTH", "false")) @@ -19,13 +19,6 @@ config :plausible, :selfhost, else: String.to_existing_atom(System.get_env("DISABLE_REGISTRATION", "false")) ) -config :plausible, :clickhouse, - hostname: System.get_env("CLICKHOUSE_DATABASE_HOST", "localhost"), - database: System.get_env("CLICKHOUSE_DATABASE_NAME", "plausible_dev"), - username: System.get_env("CLICKHOUSE_DATABASE_USER"), - password: System.get_env("CLICKHOUSE_DATABASE_PASSWORD"), - pool_size: 10 - # Configures the endpoint config :plausible, PlausibleWeb.Endpoint, url: [ @@ -82,6 +75,14 @@ config :plausible, :paddle, vendor_id: "49430", vendor_auth_code: System.get_env("PADDLE_VENDOR_AUTH_CODE") +config :plausible, Plausible.ClickhouseRepo, + loggers: [Ecto.LogEntry], + pool_size: String.to_integer(System.get_env("CLICKHOUSE_DATABASE_POOLSIZE", "5")), + url: System.get_env( + "CLICKHOUSE_DATABASE_URL", + "http://127.0.0.1:8123/plausible_test" + ) + config :plausible, Plausible.Repo, pool_size: String.to_integer(System.get_env("DATABASE_POOLSIZE", "10")), diff --git a/config/releases.exs b/config/releases.exs index 54fc6dbaa..b4c3ee2af 100644 --- a/config/releases.exs +++ b/config/releases.exs @@ -19,7 +19,7 @@ db_pool_size = String.to_integer(System.get_env("DATABASE_POOLSIZE", "10")) db_url = System.get_env( "DATABASE_URL", - "postgres://postgres:postgres@127.0.0.1:5432/plausible_test?currentSchema=default" + "postgres://postgres:postgres@127.0.0.1:5432/plausible_dev" ) db_tls_enabled? = String.to_existing_atom(System.get_env("DATABASE_TLS_ENABLED", "false")) @@ -30,11 +30,8 @@ env = System.get_env("ENVIRONMENT", "prod") mailer_adapter = System.get_env("MAILER_ADAPTER", "Bamboo.PostmarkAdapter") mailer_email = System.get_env("MAILER_EMAIL", "hello@plausible.local") app_version = System.get_env("APP_VERSION", "0.0.1") -ck_host = System.get_env("CLICKHOUSE_DATABASE_HOST", "localhost") -ck_db = System.get_env("CLICKHOUSE_DATABASE_NAME", "plausible_dev") -ck_db_user = System.get_env("CLICKHOUSE_DATABASE_USER") -ck_db_pwd = System.get_env("CLICKHOUSE_DATABASE_PASSWORD") -ck_db_pool = String.to_integer(System.get_env("CLICKHOUSE_DATABASE_POOLSIZE", "10")) +ch_db_url = System.get_env("CLICKHOUSE_DATABASE_URL", "http://localhost:8123/plausible_dev") +ch_db_pool = String.to_integer(System.get_env("CLICKHOUSE_DATABASE_POOLSIZE", "10")) ### Mandatory params End sentry_dsn = System.get_env("SENTRY_DSN") @@ -86,11 +83,11 @@ config :plausible, PlausibleWeb.Endpoint, code_reloader: false config :plausible, - Plausible.Repo, - pool_size: db_pool_size, - url: db_url, - adapter: Ecto.Adapters.Postgres, - ssl: db_tls_enabled? + Plausible.Repo, + pool_size: db_pool_size, + url: db_url, + adapter: Ecto.Adapters.Postgres, + ssl: db_tls_enabled? config :sentry, dsn: sentry_dsn, @@ -107,12 +104,10 @@ config :plausible, :google, config :plausible, :slack, webhook: slack_hook_url -config :plausible, :clickhouse, - hostname: ck_host, - database: ck_db, - username: ck_db_user, - password: ck_db_pwd, - pool_size: ck_db_pool +config :plausible, Plausible.ClickhouseRepo, + loggers: [Ecto.LogEntry], + url: ch_db_url, + pool_size: ch_db_pool case mailer_adapter do "Bamboo.PostmarkAdapter" -> diff --git a/config/test.exs b/config/test.exs index 3725595fd..bb927b8bc 100644 --- a/config/test.exs +++ b/config/test.exs @@ -18,16 +18,17 @@ config :plausible, url: System.get_env( "DATABASE_URL", - "postgres://postgres:postgres@127.0.0.1:5432/plausible_test=default" + "postgres://postgres:postgres@127.0.0.1:5432/plausible_test" ), pool: Ecto.Adapters.SQL.Sandbox -config :plausible, :clickhouse, - hostname: System.get_env("CLICKHOUSE_DATABASE_HOST", "localhost"), - database: System.get_env("CLICKHOUSE_DATABASE_NAME", "plausible_test"), - username: System.get_env("CLICKHOUSE_DATABASE_USER"), - password: System.get_env("CLICKHOUSE_DATABASE_PASSWORD"), - pool_size: 10 +config :plausible, Plausible.ClickhouseRepo, + loggers: [Ecto.LogEntry], + pool_size: String.to_integer(System.get_env("CLICKHOUSE_DATABASE_POOLSIZE", "5")), + url: System.get_env( + "CLICKHOUSE_DATABASE_URL", + "http://127.0.0.1:8123/plausible_test" + ) config :plausible, Plausible.Mailer, adapter: Bamboo.TestAdapter diff --git a/lib/mix/tasks/clean_clickhouse.ex b/lib/mix/tasks/clean_clickhouse.ex new file mode 100644 index 000000000..02c4c97c8 --- /dev/null +++ b/lib/mix/tasks/clean_clickhouse.ex @@ -0,0 +1,10 @@ +defmodule Mix.Tasks.CleanClickhouse do + use Mix.Task + + def run(_) do + clean_events = "ALTER TABLE events DELETE WHERE 1" + clean_sessions = "ALTER TABLE sessions DELETE WHERE 1" + Ecto.Adapters.SQL.query!(Plausible.ClickhouseRepo, clean_events) + Ecto.Adapters.SQL.query!(Plausible.ClickhouseRepo, clean_sessions) + end +end diff --git a/lib/plausible/application.ex b/lib/plausible/application.ex index 199d59b7d..100aab6bd 100644 --- a/lib/plausible/application.ex +++ b/lib/plausible/application.ex @@ -4,16 +4,12 @@ defmodule Plausible.Application do use Application def start(_type, _args) do - clickhouse_config = Application.get_env(:plausible, :clickhouse) - children = [ Plausible.Repo, + Plausible.ClickhouseRepo, PlausibleWeb.Endpoint, Plausible.Event.WriteBuffer, Plausible.Session.WriteBuffer, - Clickhousex.child_spec( - Keyword.merge([scheme: :http, port: 8123, name: :clickhouse], clickhouse_config) - ), Plausible.Session.Store, Plausible.Session.Salts, {Oban, Application.get_env(:plausible, Oban)} diff --git a/lib/plausible/clickhouse.ex b/lib/plausible/clickhouse.ex deleted file mode 100644 index 6a5ed38cf..000000000 --- a/lib/plausible/clickhouse.ex +++ /dev/null @@ -1,117 +0,0 @@ -defmodule Plausible.Clickhouse do - def all(query) do - {q, params} = Ecto.Adapters.SQL.to_sql(:all, Plausible.Repo, query) - params = Enum.map(params, &escape_quote/1) - q = String.replace(q, ~r/\$[0-9]+/, "?") - res = Clickhousex.query!(:clickhouse, q, params, log: {Plausible.Clickhouse, :log, []}) - - Enum.map(res.rows, fn row -> - Enum.zip(res.columns, row) - |> Enum.into(%{}) - end) - end - - def delete_stats!(site) do - delete_events = "ALTER TABLE events DELETE WHERE domain = ?" - delete_sessions = "ALTER TABLE sessions DELETE WHERE domain = ?" - - Clickhousex.query!(:clickhouse, delete_events, [site.domain], - log: {Plausible.Clickhouse, :log, []} - ) - - Clickhousex.query!(:clickhouse, delete_sessions, [site.domain], - log: {Plausible.Clickhouse, :log, []} - ) - end - - def insert_events(events) do - insert = - """ - INSERT INTO events (name, timestamp, domain, user_id, session_id, hostname, pathname, referrer, referrer_source, country_code, screen_size, browser, operating_system) - VALUES - """ <> String.duplicate(" (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?),", length(events)) - - args = - Enum.reduce(events, [], fn event, acc -> - [ - escape_quote(event.name), - event.timestamp, - event.domain, - event.user_id, - event.session_id, - event.hostname, - escape_quote(event.pathname), - escape_quote(event.referrer || ""), - escape_quote(event.referrer_source || ""), - event.country_code || "", - event.screen_size || "", - event.browser || "", - event.operating_system || "" - ] ++ acc - end) - - Clickhousex.query(:clickhouse, insert, args, log: {Plausible.Clickhouse, :log, []}) - end - - def insert_sessions(sessions) do - insert = - """ - INSERT INTO sessions (sign, session_id, domain, user_id, timestamp, hostname, start, is_bounce, entry_page, exit_page, events, pageviews, duration, referrer, referrer_source, country_code, screen_size, browser, operating_system) - VALUES - """ <> - String.duplicate( - " (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?),", - Enum.count(sessions) - ) - - args = - Enum.reduce(sessions, [], fn session, acc -> - [ - session.sign, - session.session_id, - session.domain, - session.user_id, - session.timestamp, - session.hostname, - session.start, - (session.is_bounce && 1) || 0, - escape_quote(session.entry_page), - escape_quote(session.exit_page), - session.events, - session.pageviews, - session.duration, - escape_quote(session.referrer || ""), - escape_quote(session.referrer_source || ""), - session.country_code || "", - session.screen_size || "", - session.browser || "", - session.operating_system || "" - ] ++ acc - end) - - Clickhousex.query(:clickhouse, insert, args, log: {Plausible.Clickhouse, :log, []}) - end - - def escape_quote(l) when is_list(l), do: Enum.map(l, &escape_quote/1) - def escape_quote(s) when is_binary(s), do: String.replace(s, "'", "''") - def escape_quote(thing), do: thing - - def log(query) do - require Logger - - case query.result do - {:ok, _q, _res} -> - timing = System.convert_time_unit(query.connection_time, :native, :millisecond) - Logger.info("Clickhouse query OK db=#{timing}ms") - - e -> - Logger.error("Clickhouse query ERROR") - Logger.error(inspect(e)) - end - - Logger.debug(fn -> - statement = String.replace(query.query.statement, "\n", " ") - "#{statement} #{inspect(query.params)}" - end) - end -end diff --git a/lib/plausible/clickhouse_repo.ex b/lib/plausible/clickhouse_repo.ex new file mode 100644 index 000000000..6304c4950 --- /dev/null +++ b/lib/plausible/clickhouse_repo.ex @@ -0,0 +1,20 @@ +defmodule Plausible.ClickhouseRepo do + use Ecto.Repo, + otp_app: :plausible, + adapter: ClickhouseEcto + + defmacro __using__(_) do + quote do + alias Plausible.ClickhouseRepo + import Ecto + import Ecto.Query, only: [from: 1, from: 2] + end + end + + def clear_stats_for(domain) do + events_sql = "ALTER TABLE events DELETE WHERE domain = ?" + sessions_sql = "ALTER TABLE sessions DELETE WHERE domain = ?" + Ecto.Adapters.SQL.query!(__MODULE__, events_sql, [domain]) + Ecto.Adapters.SQL.query!(__MODULE__, sessions_sql, [domain]) + end +end diff --git a/lib/plausible/event/write_buffer.ex b/lib/plausible/event/write_buffer.ex index 2811c6a0b..723ca1fe3 100644 --- a/lib/plausible/event/write_buffer.ex +++ b/lib/plausible/event/write_buffer.ex @@ -1,7 +1,6 @@ defmodule Plausible.Event.WriteBuffer do use GenServer require Logger - alias Plausible.Clickhouse @flush_interval_ms 5_000 @max_buffer_size 10_000 @@ -64,7 +63,8 @@ defmodule Plausible.Event.WriteBuffer do events -> Logger.info("Flushing #{length(events)} events") - Clickhouse.insert_events(events) + events = Enum.map(events, &(Map.from_struct(&1) |> Map.delete(:__meta__))) + Plausible.ClickhouseRepo.insert_all(Plausible.ClickhouseEvent, events) end end end diff --git a/lib/plausible/session/store.ex b/lib/plausible/session/store.ex index e16703ba8..f2d57a370 100644 --- a/lib/plausible/session/store.ex +++ b/lib/plausible/session/store.ex @@ -26,14 +26,13 @@ defmodule Plausible.Session.Store do sessions = try do - Plausible.Clickhouse.all( + Plausible.ClickhouseRepo.all( from s in Plausible.ClickhouseSession, join: ls in subquery(latest_sessions), on: s.session_id == ls.session_id and s.timestamp == ls.timestamp, order_by: s.timestamp ) - |> Enum.map(fn s -> Map.new(s, fn {k, v} -> {String.to_atom(k), v} end) end) - |> Enum.map(fn s -> {s[:user_id], struct(Plausible.ClickhouseSession, s)} end) + |> Enum.map(fn s -> {s.user_id, s} end) |> Enum.into(%{}) rescue _e -> %{} @@ -106,6 +105,7 @@ defmodule Plausible.Session.Store do referrer: event.referrer, referrer_source: event.referrer_source, country_code: event.country_code, + screen_size: event.screen_size, operating_system: event.operating_system, browser: event.browser, timestamp: event.timestamp, diff --git a/lib/plausible/session/write_buffer.ex b/lib/plausible/session/write_buffer.ex index a0736aba8..24e9222c9 100644 --- a/lib/plausible/session/write_buffer.ex +++ b/lib/plausible/session/write_buffer.ex @@ -51,7 +51,8 @@ defmodule Plausible.Session.WriteBuffer do sessions -> Logger.info("Flushing #{length(sessions)} sessions") - Plausible.Clickhouse.insert_sessions(sessions) + sessions = Enum.map(sessions, &(Map.from_struct(&1) |> Map.delete(:__meta__))) + Plausible.ClickhouseRepo.insert_all(Plausible.ClickhouseSession, sessions) end end end diff --git a/lib/plausible/stats/clickhouse.ex b/lib/plausible/stats/clickhouse.ex index 34c4f1469..fbed5e1c0 100644 --- a/lib/plausible/stats/clickhouse.ex +++ b/lib/plausible/stats/clickhouse.ex @@ -1,7 +1,7 @@ defmodule Plausible.Stats.Clickhouse do use Plausible.Repo + use Plausible.ClickhouseRepo alias Plausible.Stats.Query - alias Plausible.Clickhouse @no_ref "Direct / None" def compare_pageviews_and_visitors(site, query, {pageviews, visitors}) do @@ -33,16 +33,14 @@ defmodule Plausible.Stats.Clickhouse do end) groups = - Clickhouse.all( + ClickhouseRepo.all( from e in base_query(site, query), select: {fragment("toStartOfMonth(toTimeZone(?, ?)) as month", e.timestamp, ^site.timezone), - fragment("uniq(?) as visitors", e.user_id)}, + fragment("uniq(?)", e.user_id)}, group_by: fragment("month"), order_by: fragment("month") - ) - |> Enum.map(fn row -> {row["month"], row["visitors"]} end) - |> Enum.into(%{}) + ) |> Enum.into(%{}) present_index = Enum.find_index(steps, fn step -> @@ -59,16 +57,14 @@ defmodule Plausible.Stats.Clickhouse do steps = Enum.into(query.date_range, []) groups = - Clickhouse.all( + ClickhouseRepo.all( from e in base_query(site, query), select: {fragment("toDate(toTimeZone(?, ?)) as day", e.timestamp, ^site.timezone), - fragment("uniq(?) as visitors", e.user_id)}, + fragment("uniq(?)", e.user_id)}, group_by: fragment("day"), order_by: fragment("day") - ) - |> Enum.map(fn row -> {row["day"], row["visitors"]} end) - |> Enum.into(%{}) + ) |> Enum.into(%{}) present_index = Enum.find_index(steps, fn step -> step == Timex.now(site.timezone) |> Timex.to_date() end) @@ -84,16 +80,14 @@ defmodule Plausible.Stats.Clickhouse do steps = 0..23 groups = - Clickhouse.all( + ClickhouseRepo.all( from e in base_query(site, query), select: {fragment("toHour(toTimeZone(?, ?)) as hour", e.timestamp, ^site.timezone), - fragment("uniq(?) as visitors", e.user_id)}, + fragment("uniq(?)", e.user_id)}, group_by: fragment("hour"), order_by: fragment("hour") - ) - |> Enum.map(fn row -> {row["hour"], row["visitors"]} end) - |> Enum.into(%{}) + ) |> Enum.into(%{}) now = Timex.now(site.timezone) is_today = Timex.to_date(now) == query.date_range.first @@ -115,17 +109,15 @@ defmodule Plausible.Stats.Clickhouse do query = %Query{query | period: "30m"} groups = - Clickhouse.all( + ClickhouseRepo.all( from e in base_query(site, query), select: { fragment("dateDiff('minute', now(), ?) as relativeMinute", e.timestamp), - fragment("count(*) as pageviews") + fragment("count(*)") }, group_by: fragment("relativeMinute"), order_by: fragment("relativeMinute") - ) - |> Enum.map(fn row -> {row["relativeMinute"], row["pageviews"]} end) - |> Enum.into(%{}) + ) |> Enum.into(%{}) labels = Enum.into(-30..-1, []) plot = Enum.map(labels, fn label -> groups[label] || 0 end) @@ -133,97 +125,78 @@ defmodule Plausible.Stats.Clickhouse do end def bounce_rate(site, query) do - [res] = - Clickhouse.all( - from s in base_session_query(site, query), - select: {fragment("round(sum(is_bounce * sign) / sum(sign) * 100) as bounce_rate")} - ) - - res["bounce_rate"] || 0 + ClickhouseRepo.one( + from s in base_session_query(site, query), + select: fragment("round(sum(is_bounce * sign) / sum(sign) * 100)") + ) || 0 end def visit_duration(site, query) do - [res] = - Clickhouse.all( - from s in base_session_query(site, query), - select: {fragment("round(avg(duration * sign)) as visit_duration")} - ) - - res["visit_duration"] || 0 + ClickhouseRepo.one( + from s in base_session_query(site, query), + select: fragment("round(avg(duration * sign))") + ) || 0 end def total_pageviews(site, %Query{period: "realtime"} = query) do query = %Query{query | period: "30m"} - [res] = - Clickhouse.all( - from e in base_session_query(site, query), - select: fragment("sum(sign * pageviews) as pageviews") - ) - - res["pageviews"] + ClickhouseRepo.one( + from e in base_session_query(site, query), + select: fragment("sum(sign * pageviews)") + ) end def total_events(site, query) do - [res] = - Clickhouse.all( - from e in base_query(site, query), - select: fragment("count(*) as events") - ) - - res["events"] + ClickhouseRepo.one( + from e in base_query(site, query), + select: fragment("count(*) as events") + ) end def pageviews_and_visitors(site, query) do - [res] = - Clickhouse.all( - from e in base_query_w_sessions(site, query), - select: - {fragment("count(*) as pageviews"), - fragment("uniq(user_id) as visitors")} - ) - - {res["pageviews"], res["visitors"]} + ClickhouseRepo.one( + from e in base_query_w_sessions(site, query), + select: {fragment("count(*)"), fragment("uniq(user_id)")} + ) end def unique_visitors(site, query) do - [res] = - Clickhouse.all( - from e in base_query(site, query), - select: fragment("uniq(user_id) as visitors") - ) - - res["visitors"] + ClickhouseRepo.one( + from e in base_query(site, query), + select: fragment("uniq(user_id)") + ) end def top_referrers_for_goal(site, query, limit, page) do - converted_sessions = - from( - from e in base_query(site, query), - select: %{session_id: e.session_id} - ) - offset = (page - 1) * limit - Plausible.Clickhouse.all( + + converted_sessions = + from(e in base_query(site, query), + select: %{session_id: e.session_id}) + + ClickhouseRepo.all( from s in Plausible.ClickhouseSession, - join: cs in subquery(converted_sessions), - on: s.session_id == cs.session_id, - select: - {fragment("? as name", s.referrer_source), fragment("any(?) as url", s.referrer), - fragment("uniq(user_id) as count")}, - where: s.referrer_source != "", - group_by: s.referrer_source, - order_by: [desc: fragment("count")], + join: cs in subquery(converted_sessions), + on: s.session_id == cs.session_id, + where: s.referrer_source != "", + group_by: s.referrer_source, + order_by: [desc: fragment("count")], limit: ^limit, - offset: ^offset - ) - |> Enum.map(fn ref -> - Map.update(ref, "url", nil, fn url -> url && URI.parse("http://" <> url).host end) + offset: ^offset, + select: %{ + name: s.referrer_source, + url: fragment("any(?)", s.referrer), + count: fragment("uniq(?) as count", s.user_id) + } + ) |> Enum.map(fn ref -> + Map.update(ref, :url, nil, fn url -> url && URI.parse("http://" <> url).host end) end) end def top_referrers(site, query, limit, page, show_noref \\ false, include \\ []) do offset = (page - 1) * limit + referrers = from(s in base_session_query(site, query), group_by: s.referrer_source, @@ -249,24 +222,28 @@ defmodule Plausible.Stats.Clickhouse do if "bounce_rate" in include do from( s in referrers, - select: - {fragment("if(empty(?), ?, ?) as name", s.referrer_source, @no_ref, s.referrer_source), fragment("any(?) as url", s.referrer), - fragment("uniq(user_id) as count"), - fragment("round(sum(is_bounce * sign) / sum(sign) * 100) as bounce_rate"), - fragment("round(avg(duration * sign)) as visit_duration")} + select: %{ + name: fragment("if(empty(?), ?, ?) as name", s.referrer_source, @no_ref, s.referrer_source), + url: fragment("any(?)", s.referrer), + count: fragment("uniq(user_id) as count"), + bounce_rate: fragment("round(sum(is_bounce * sign) / sum(sign) * 100) as bounce_rate"), + visit_duration: fragment("round(avg(duration * sign)) as visit_duration") + } ) else from( s in referrers, - select: - {fragment("if(empty(?), ?, ?) as name", s.referrer_source, @no_ref, s.referrer_source), fragment("any(?) as url", s.referrer), - fragment("uniq(user_id) as count")} + select: %{ + name: fragment("if(empty(?), ?, ?) as name", s.referrer_source, @no_ref, s.referrer_source), + url: fragment("any(?)", s.referrer), + count: fragment("uniq(user_id) as count") + } ) end - Clickhouse.all(referrers) + ClickhouseRepo.all(referrers) |> Enum.map(fn ref -> - Map.update(ref, "url", nil, fn url -> url && URI.parse("http://" <> url).host end) + Map.update(ref, :url, nil, fn url -> url && URI.parse("http://" <> url).host end) end) end @@ -277,16 +254,13 @@ defmodule Plausible.Stats.Clickhouse do select: %{session_id: e.session_id} ) - [res] = - Plausible.Clickhouse.all( - from s in Plausible.ClickhouseSession, - join: cs in subquery(converted_sessions), - on: s.session_id == cs.session_id, - where: s.referrer_source == ^referrer, - select: fragment("uniq(user_id) as visitors") - ) - - res["visitors"] + ClickhouseRepo.one( + from s in Plausible.ClickhouseSession, + join: cs in subquery(converted_sessions), + on: s.session_id == cs.session_id, + where: s.referrer_source == ^referrer, + select: fragment("uniq(user_id) as visitors") + ) end def referrer_drilldown(site, query, referrer, include, limit) do @@ -305,27 +279,30 @@ defmodule Plausible.Stats.Clickhouse do if "bounce_rate" in include do from( s in q, - select: - {fragment("if(empty(?), ?, ?) as name", s.referrer, @no_ref, s.referrer), - fragment("uniq(user_id) as count"), - fragment("round(sum(is_bounce * sign) / sum(sign) * 100) as bounce_rate"), - fragment("round(avg(duration * sign)) as visit_duration")} - ) + select: %{ + name: fragment("if(empty(?), ?, ?) as name", s.referrer, @no_ref, s.referrer), + count: fragment("uniq(user_id) as count"), + bounce_rate: fragment("round(sum(is_bounce * sign) / sum(sign) * 100) as bounce_rate"), + visit_duration: fragment("round(avg(duration * sign)) as visit_duration") + }) else from(s in q, - select: {fragment("if(empty(?), ?, ?) as name", s.referrer, @no_ref, s.referrer), fragment("uniq(user_id) as count")} + select: %{ + name: fragment("if(empty(?), ?, ?) as name", s.referrer, @no_ref, s.referrer), + count: fragment("uniq(user_id) as count") + } ) end referring_urls = - Clickhouse.all(q) + ClickhouseRepo.all(q) |> Enum.map(fn ref -> - url = if ref["name"] !== "", do: URI.parse("http://" <> ref["name"]).host - Map.put(ref, "url", url) + url = if ref[:name] !== "", do: URI.parse("http://" <> ref[:name]).host + Map.put(ref, :url, url) end) if referrer == "Twitter" do - urls = Enum.map(referring_urls, & &1["name"]) + urls = Enum.map(referring_urls, & &1[:name]) tweets = Repo.all( @@ -335,7 +312,7 @@ defmodule Plausible.Stats.Clickhouse do |> Enum.group_by(& &1.link) Enum.map(referring_urls, fn url -> - Map.put(url, "tweets", tweets[url["name"]]) + Map.put(url, :tweets, tweets[url[:name]]) end) else referring_urls @@ -349,15 +326,18 @@ defmodule Plausible.Stats.Clickhouse do select: %{session_id: e.session_id} ) - Plausible.Clickhouse.all( + Plausible.ClickhouseRepo.all( from s in Plausible.ClickhouseSession, - join: cs in subquery(converted_sessions), - on: s.session_id == cs.session_id, - select: {fragment("? as name", s.referrer), fragment("uniq(user_id) as count")}, - where: s.referrer_source == ^referrer, - group_by: s.referrer, - order_by: [desc: fragment("count")], - limit: 100 + join: cs in subquery(converted_sessions), + on: s.session_id == cs.session_id, + where: s.referrer_source == ^referrer, + group_by: s.referrer, + order_by: [desc: fragment("count")], + limit: 100, + select: %{ + name: s.referrer, + count: fragment("uniq(user_id) as count") + } ) end @@ -367,8 +347,10 @@ defmodule Plausible.Stats.Clickhouse do group_by: s.entry_page, order_by: [desc: fragment("count")], limit: ^limit, - select: - {fragment("? as name", s.entry_page), fragment("uniq(?) as count", s.user_id)} + select: %{ + name: s.entry_page, + count: fragment("uniq(?) as count", s.user_id) + } ) q = if query.filters["page"] do @@ -378,23 +360,26 @@ defmodule Plausible.Stats.Clickhouse do q end - pages = Clickhouse.all(q) + pages = ClickhouseRepo.all(q) if "bounce_rate" in include do bounce_rates = bounce_rates_by_page_url(site, query) - Enum.map(pages, fn url -> Map.put(url, "bounce_rate", bounce_rates[url["name"]]) end) + Enum.map(pages, fn url -> Map.put(url, :bounce_rate, bounce_rates[url[:name]]) end) else pages end end def top_pages(site, %Query{period: "realtime"} = query, limit, _include) do - Clickhouse.all( + ClickhouseRepo.all( from s in base_session_query(site, query), - select: {fragment("? as name", s.exit_page), fragment("uniq(?) as count", s.user_id)}, - group_by: s.exit_page, - order_by: [desc: fragment("count")], - limit: ^limit + group_by: s.exit_page, + order_by: [desc: fragment("count")], + limit: ^limit, + select: %{ + name: fragment("? as name", s.exit_page), + count: fragment("uniq(?) as count", s.user_id) + } ) end @@ -405,138 +390,141 @@ defmodule Plausible.Stats.Clickhouse do group_by: e.pathname, order_by: [desc: fragment("count")], limit: ^limit, - select: - {fragment("? as name", e.pathname), fragment("uniq(?) as count", e.user_id), - fragment("count(*) as pageviews")} + select: %{ + name: fragment("? as name", e.pathname), + count: fragment("uniq(?) as count", e.user_id), + pageviews: fragment("count(*) as pageviews") + } ) - pages = Clickhouse.all(q) + pages = ClickhouseRepo.all(q) if "bounce_rate" in include do bounce_rates = bounce_rates_by_page_url(site, query) - Enum.map(pages, fn url -> Map.put(url, "bounce_rate", bounce_rates[url["name"]]) end) + Enum.map(pages, fn url -> Map.put(url, :bounce_rate, bounce_rates[url[:name]]) end) else pages end end defp bounce_rates_by_page_url(site, query) do - Clickhouse.all( + ClickhouseRepo.all( from s in base_session_query(site, query), - select: - {s.entry_page, fragment("count(*) as total"), - fragment("round(sum(is_bounce * sign) / sum(sign) * 100) as bounce_rate")}, - group_by: s.entry_page, - order_by: [desc: fragment("total")], - limit: 100 + group_by: s.entry_page, + order_by: [desc: fragment("total")], + limit: 100, + select: %{ + entry_page: s.entry_page, + total: fragment("count(*) as total"), + bounce_rate: fragment("round(sum(is_bounce * sign) / sum(sign) * 100) as bounce_rate") + } ) - |> Enum.map(fn row -> {row["entry_page"], row["bounce_rate"]} end) + |> Enum.map(fn row -> {row[:entry_page], row[:bounce_rate]} end) |> Enum.into(%{}) end defp add_percentages(stat_list) do - total = Enum.reduce(stat_list, 0, fn %{"count" => count}, total -> total + count end) + total = Enum.reduce(stat_list, 0, fn %{count: count}, total -> total + count end) Enum.map(stat_list, fn stat -> - Map.put(stat, "percentage", round(stat["count"] / total * 100)) + Map.put(stat, :percentage, round(stat[:count] / total * 100)) end) end def top_screen_sizes(site, query) do - Clickhouse.all( + ClickhouseRepo.all( from e in base_query(site, query), - select: {fragment("? as name", e.screen_size), fragment("uniq(user_id) as count")}, - group_by: e.screen_size, - where: e.screen_size != "", - order_by: [desc: fragment("count")] - ) - |> add_percentages + group_by: e.screen_size, + where: e.screen_size != "", + order_by: [desc: fragment("count")], + select: %{ + name: e.screen_size, + count: fragment("uniq(user_id) as count") + } + ) |> add_percentages end def countries(site, query) do - Clickhouse.all( + ClickhouseRepo.all( from e in base_query(site, query), - select: {fragment("? as name", e.country_code), fragment("uniq(user_id) as count")}, - group_by: e.country_code, - where: e.country_code != "\0\0", - order_by: [desc: fragment("count")] + group_by: e.country_code, + where: e.country_code != "\0\0", + order_by: [desc: fragment("count")], + select: %{ + name: e.country_code, + count: fragment("uniq(user_id) as count") + } ) |> Enum.map(fn stat -> - two_letter_code = stat["name"] + two_letter_code = stat[:name] stat - |> Map.put("name", Plausible.Stats.CountryName.to_alpha3(two_letter_code)) - |> Map.put("full_country_name", Plausible.Stats.CountryName.from_iso3166(two_letter_code)) - end) - |> add_percentages + |> Map.put(:name, Plausible.Stats.CountryName.to_alpha3(two_letter_code)) + |> Map.put(:full_country_name, Plausible.Stats.CountryName.from_iso3166(two_letter_code)) + end) |> add_percentages end def browsers(site, query, limit \\ 5) do - Clickhouse.all( + ClickhouseRepo.all( from e in base_query(site, query), - select: {fragment("? as name", e.browser), fragment("uniq(user_id) as count")}, - group_by: e.browser, - where: e.browser != "", - order_by: [desc: fragment("count")] + group_by: e.browser, + where: e.browser != "", + order_by: [desc: fragment("count")], + select: %{ + name: e.browser, + count: fragment("uniq(user_id) as count") + } ) |> add_percentages |> Enum.take(limit) end def operating_systems(site, query, limit \\ 5) do - Clickhouse.all( + ClickhouseRepo.all( from e in base_query(site, query), - select: {fragment("? as name", e.operating_system), fragment("uniq(user_id) as count")}, - group_by: e.operating_system, - where: e.operating_system != "", - order_by: [desc: fragment("count")] + group_by: e.operating_system, + where: e.operating_system != "", + order_by: [desc: fragment("count")], + select: %{ + name: e.operating_system, + count: fragment("uniq(user_id) as count") + } ) |> add_percentages |> Enum.take(limit) end def current_visitors(site, query) do - [res] = - Clickhouse.all( - from s in base_query(site, query), - select: fragment("uniq(user_id) as visitors") - ) - - res["visitors"] + Plausible.ClickhouseRepo.one( + from s in base_query(site, query), + select: fragment("uniq(user_id)") + ) end def has_pageviews?([]), do: false def has_pageviews?(domains) when is_list(domains) do - res = - Clickhouse.all( - from e in "events", - select: e.timestamp, - where: fragment("? IN tuple(?)", e.domain, ^domains), - limit: 1 - ) - - !Enum.empty?(res) + ClickhouseRepo.exists?( + from e in "events", + select: e.timestamp, + where: fragment("? IN tuple(?)", e.domain, ^domains) + ) end def has_pageviews?(site) do - res = - Clickhouse.all( - from e in "events", - select: e.timestamp, - where: e.domain == ^site.domain, - limit: 1 - ) - - !Enum.empty?(res) + ClickhouseRepo.exists?(from e in "events", where: e.domain == ^site.domain) end def goal_conversions(site, %Query{filters: %{"goal" => goal}} = query) when is_binary(goal) do - Clickhouse.all( + ClickhouseRepo.all( from e in base_query(site, query), - select: {e.name, fragment("uniq(user_id) as count"), fragment("count(*) as total_count")}, - group_by: e.name, - order_by: [desc: fragment("count")] + group_by: e.name, + order_by: [desc: fragment("count")], + select: %{ + name: e.name, + count: fragment("uniq(user_id) as count"), + total_count: fragment("count(*) as total_count") + } ) end @@ -563,8 +551,12 @@ defmodule Plausible.Stats.Clickhouse do where: e.domain == ^site.domain, where: e.timestamp >= ^first_datetime and e.timestamp < ^last_datetime, where: fragment("? IN tuple(?)", e.name, ^events), - select: {e.name, fragment("uniq(user_id) as count"), fragment("count(*) as total_count")}, - group_by: e.name + group_by: e.name, + select: %{ + name: e.name, + count: fragment("uniq(user_id) as count"), + total_count: fragment("count(*) as total_count") + } ) q = @@ -589,7 +581,7 @@ defmodule Plausible.Stats.Clickhouse do q end - Clickhouse.all(q) + ClickhouseRepo.all(q) else [] end @@ -610,10 +602,11 @@ defmodule Plausible.Stats.Clickhouse do where: e.timestamp >= ^first_datetime and e.timestamp < ^last_datetime, where: fragment("? IN tuple(?)", e.pathname, ^pages), group_by: e.pathname, - select: - {fragment("concat('Visit ', ?) as name", e.pathname), - fragment("uniq(user_id) as count"), - fragment("count(*) as total_count") } + select: %{ + name: fragment("concat('Visit ', ?) as name", e.pathname), + count: fragment("uniq(user_id) as count"), + total_count: fragment("count(*) as total_count") + } ) q = @@ -638,14 +631,14 @@ defmodule Plausible.Stats.Clickhouse do q end - Clickhouse.all(q) + ClickhouseRepo.all(q) else [] end end defp sort_conversions(conversions) do - Enum.sort_by(conversions, fn conversion -> -conversion["count"] end) + Enum.sort_by(conversions, fn conversion -> -conversion[:count] end) end defp base_query_w_sessions(site, query) do diff --git a/lib/plausible_release.ex b/lib/plausible_release.ex index 33cd1208c..9caaa98e4 100644 --- a/lib/plausible_release.ex +++ b/lib/plausible_release.ex @@ -3,6 +3,7 @@ defmodule Plausible.Release do @app :plausible @start_apps [ :postgrex, + :clickhousex, :ecto ] @@ -37,8 +38,6 @@ defmodule Plausible.Release do def migrate do prepare() Enum.each(repos(), &run_migrations_for/1) - prepare_clickhouse() - run_migrations_for_ch() IO.puts("Migrations successful!") end @@ -53,8 +52,6 @@ defmodule Plausible.Release do def createdb do prepare() do_create_db() - prepare_clickhouse(:default_db) - do_create_ch_db() IO.puts("Creation of Db successful!") end @@ -115,79 +112,16 @@ defmodule Plausible.Release do end defp run_migrations_for(repo) do - app = Keyword.get(repo.config, :otp_app) - IO.puts("Running migrations for #{app}") + IO.puts("Running migrations for #{repo}") {:ok, _, _} = Ecto.Migrator.with_repo(repo, &Ecto.Migrator.run(&1, :up, all: true)) end - defp run_migrations_for_ch() do - db = Keyword.get(Application.get_env(:plausible, :clickhouse), :database) - - tb_events = """ - CREATE TABLE IF NOT EXISTS #{db}.events ( - timestamp DateTime, - name String, - domain String, - user_id UInt64, - session_id UInt64, - hostname String, - pathname String, - referrer String, - referrer_source String, - country_code LowCardinality(FixedString(2)), - screen_size LowCardinality(String), - operating_system LowCardinality(String), - browser LowCardinality(String) - ) ENGINE = MergeTree() - PARTITION BY toYYYYMM(timestamp) - ORDER BY (name, domain, user_id, timestamp) - SETTINGS index_granularity = 8192 - """ - - Clickhousex.query(:clickhouse, tb_events, []) - - tb_sessions = """ - CREATE TABLE IF NOT EXISTS #{db}.sessions ( - session_id UInt64, - sign Int8, - domain String, - user_id UInt64, - hostname String, - timestamp DateTime, - start DateTime, - is_bounce UInt8, - entry_page String, - exit_page String, - pageviews Int32, - events Int32, - duration UInt32, - referrer String, - referrer_source String, - country_code LowCardinality(FixedString(2)), - screen_size LowCardinality(String), - operating_system LowCardinality(String), - browser LowCardinality(String) - ) ENGINE = CollapsingMergeTree(sign) - PARTITION BY toYYYYMM(start) - ORDER BY (domain, user_id, session_id, start) - SETTINGS index_granularity = 8192 - """ - - Clickhousex.query(:clickhouse, tb_sessions, []) - end - defp do_create_db do for repo <- repos() do :ok = ensure_repo_created(repo) end end - defp do_create_ch_db() do - db_to_create = Keyword.get(Application.get_env(:plausible, :clickhouse), :database) - IO.puts("create #{inspect(db_to_create)} clickhouse database/tables if it doesn't exist") - Clickhousex.query(:clickhouse, "CREATE DATABASE IF NOT EXISTS #{db_to_create}", []) - end - defp ensure_repo_created(repo) do IO.puts("create #{inspect(repo)} database if it doesn't exist") @@ -220,37 +154,6 @@ defmodule Plausible.Release do Enum.each(repos(), & &1.start_link(pool_size: 2)) end - # connect to the default db for creating the required db - defp prepare_clickhouse(:default_db) do - Application.ensure_all_started(:db_connection) - Application.ensure_all_started(:hackney) - - Clickhousex.start_link( - scheme: :http, - port: 8123, - name: :clickhouse, - database: "default", - username: "default", - hostname: Keyword.get(Application.get_env(:plausible, :clickhouse), :hostname), - password: Keyword.get(Application.get_env(:plausible, :clickhouse), :password) - ) - end - - defp prepare_clickhouse() do - Application.ensure_all_started(:db_connection) - Application.ensure_all_started(:hackney) - - Clickhousex.start_link( - scheme: :http, - port: 8123, - name: :clickhouse, - username: Keyword.get(Application.get_env(:plausible, :clickhouse), :username), - database: Keyword.get(Application.get_env(:plausible, :clickhouse), :database), - hostname: Keyword.get(Application.get_env(:plausible, :clickhouse), :hostname), - password: Keyword.get(Application.get_env(:plausible, :clickhouse), :password) - ) - end - defp seeds_path(repo), do: priv_path_for(repo, "seeds.exs") defp priv_path_for(repo, filename) do diff --git a/lib/plausible_web/controllers/api/external_controller.ex b/lib/plausible_web/controllers/api/external_controller.ex index 9c6b870d2..228727321 100644 --- a/lib/plausible_web/controllers/api/external_controller.ex +++ b/lib/plausible_web/controllers/api/external_controller.ex @@ -36,7 +36,7 @@ defmodule PlausibleWeb.Api.ExternalController do end clickhouse_health = - case Clickhousex.query(:clickhouse, "SELECT 1", []) do + case Ecto.Adapters.SQL.query(Plausible.ClickhouseRepo, "SELECT 1", []) do {:ok, _} -> "ok" e -> "error: #{inspect(e)}" end @@ -80,18 +80,18 @@ defmodule PlausibleWeb.Api.ExternalController do salts = Plausible.Session.Salts.fetch() event_attrs = %{ - timestamp: NaiveDateTime.utc_now(), + timestamp: NaiveDateTime.utc_now() |> NaiveDateTime.truncate(:second), name: params["name"], hostname: strip_www(uri && uri.host), domain: strip_www(params["domain"]) || strip_www(uri && uri.host), pathname: get_pathname(uri, params["hash_mode"]), user_id: generate_user_id(conn, params, salts[:current]), - country_code: country_code, - operating_system: ua && os_name(ua), - browser: ua && browser_name(ua), - referrer_source: get_referrer_source(uri, ref), - referrer: clean_referrer(ref), - screen_size: calculate_screen_size(params["screen_width"]) + country_code: country_code || "", + operating_system: ua && os_name(ua) || "", + browser: ua && browser_name(ua) || "", + referrer_source: get_referrer_source(uri, ref) || "", + referrer: clean_referrer(ref) || "", + screen_size: calculate_screen_size(params["screen_width"]) || "" } changeset = Plausible.ClickhouseEvent.changeset(%Plausible.ClickhouseEvent{}, event_attrs) diff --git a/lib/plausible_web/controllers/site_controller.ex b/lib/plausible_web/controllers/site_controller.ex index 445a77972..95edc0ad0 100644 --- a/lib/plausible_web/controllers/site_controller.ex +++ b/lib/plausible_web/controllers/site_controller.ex @@ -161,7 +161,7 @@ defmodule PlausibleWeb.SiteController do def reset_stats(conn, %{"website" => website}) do site = Sites.get_for_user!(conn.assigns[:current_user].id, website) - Plausible.Clickhouse.delete_stats!(site) + Plausible.ClickhouseRepo.clear_stats_for(site.domain) conn |> put_flash(:success, "#{site.domain} stats will be reset in a few minutes") @@ -180,7 +180,7 @@ defmodule PlausibleWeb.SiteController do end Repo.delete!(site) - Plausible.Clickhouse.delete_stats!(site) + Plausible.ClickhouseRepo.clear_stats_for(site.domain) conn |> put_flash(:success, "Site deleted succesfully along with all pageviews") diff --git a/lib/plausible_web/templates/email/weekly_report.html.eex b/lib/plausible_web/templates/email/weekly_report.html.eex index 4b6324cf0..be6ae610b 100644 --- a/lib/plausible_web/templates/email/weekly_report.html.eex +++ b/lib/plausible_web/templates/email/weekly_report.html.eex @@ -434,7 +434,7 @@ body {
-

<%= referrer["name"] %>

+

<%= referrer[:name] %>

@@ -453,7 +453,7 @@ body {
-

<%= PlausibleWeb.StatsView.large_number_format(referrer["count"]) %>

+

<%= PlausibleWeb.StatsView.large_number_format(referrer[:count]) %>

@@ -563,7 +563,7 @@ body {
-

<%= page["name"] %>

+

<%= page[:name] %>

@@ -582,7 +582,7 @@ body {
-

<%= PlausibleWeb.StatsView.large_number_format(page["count"]) %>

+

<%= PlausibleWeb.StatsView.large_number_format(page[:count]) %>

diff --git a/lib/workers/fetch_tweets.ex b/lib/workers/fetch_tweets.ex index 1471d1c79..bf6714950 100644 --- a/lib/workers/fetch_tweets.ex +++ b/lib/workers/fetch_tweets.ex @@ -1,13 +1,12 @@ defmodule Plausible.Workers.FetchTweets do use Plausible.Repo - alias Plausible.Clickhouse alias Plausible.Twitter.Tweet use Oban.Worker, queue: :fetch_tweets @impl Oban.Worker def perform(_args, _job, twitter_api \\ Plausible.Twitter.Api) do new_links = - Clickhouse.all( + Plausible.ClickhouseRepo.all( from e in Plausible.ClickhouseEvent, where: e.timestamp > fragment("(now() - INTERVAL 6 day)") and @@ -18,7 +17,6 @@ defmodule Plausible.Workers.FetchTweets do distinct: true, select: e.referrer ) - |> Enum.map(fn event -> event["referrer"] end) for link <- new_links do results = twitter_api.search(link) diff --git a/mix.exs b/mix.exs index 69f19f77b..f0aa03bd4 100644 --- a/mix.exs +++ b/mix.exs @@ -89,8 +89,8 @@ defmodule Plausible.MixProject do {:oban, "~> 1.2"}, {:sshex, "2.2.1"}, {:geolix, "~> 1.0"}, - {:geolix_adapter_mmdb2, "~> 0.5.0"}, - {:clickhousex, [git: "https://github.com/atlas-forks/clickhousex.git"]} + {:clickhouse_ecto, git: "https://github.com/plausible/clickhouse_ecto.git"}, + {:geolix_adapter_mmdb2, "~> 0.5.0"} ] end @@ -104,7 +104,7 @@ defmodule Plausible.MixProject do [ "ecto.setup": ["ecto.create", "ecto.migrate", "run priv/repo/seeds.exs"], "ecto.reset": ["ecto.drop", "ecto.setup"], - test: ["ecto.create --quiet", "ecto.migrate", "test"] + test: ["ecto.create --quiet", "ecto.migrate", "test", "clean_clickhouse"] ] end end diff --git a/mix.lock b/mix.lock index 0124f72ed..8ec80679a 100644 --- a/mix.lock +++ b/mix.lock @@ -5,6 +5,7 @@ "bcrypt_elixir": {:hex, :bcrypt_elixir, "2.2.0", "3df902b81ce7fa8867a2ae30d20a1da6877a2c056bfb116fd0bc8a5f0190cea4", [:make, :mix], [{:comeonin, "~> 5.3", [hex: :comeonin, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.6", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "762be3fcb779f08207531bc6612cca480a338e4b4357abb49f5ce00240a77d1e"}, "browser": {:hex, :browser, "0.4.4", "bd6436961a6b2299c6cb38d0e49761c1161d869cd0db46369cef2bf6b77c3665", [:mix], [{:plug, "~> 1.2", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "d476ca309d4a4b19742b870380390aabbcb323c1f6f8745e2da2dfd079b4f8d7"}, "certifi": {:hex, :certifi, "2.5.1", "867ce347f7c7d78563450a18a6a28a8090331e77fa02380b4a21962a65d36ee5", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm", "805abd97539caf89ec6d4732c91e62ba9da0cda51ac462380bbd28ee697a8c42"}, + "clickhouse_ecto": {:git, "https://github.com/plausible/clickhouse_ecto.git", "221e6f0ce17613db83baef362b46a49d4d2d4504", []}, "clickhousex": {:git, "https://github.com/atlas-forks/clickhousex.git", "e010c4eaa6cb6b659e44790a3bea2ec7703ceb31", []}, "combine": {:hex, :combine, "0.10.0", "eff8224eeb56498a2af13011d142c5e7997a80c8f5b97c499f84c841032e429f", [:mix], [], "hexpm", "1b1dbc1790073076580d0d1d64e42eae2366583e7aecd455d1215b0d16f2451b"}, "comeonin": {:hex, :comeonin, "5.3.1", "7fe612b739c78c9c1a75186ef2d322ce4d25032d119823269d0aa1e2f1e20025", [:mix], [], "hexpm", "d6222483060c17f0977fad1b7401ef0c5863c985a64352755f366aee3799c245"}, @@ -14,9 +15,9 @@ "cowlib": {:hex, :cowlib, "2.8.0", "fd0ff1787db84ac415b8211573e9a30a3ebe71b5cbff7f720089972b2319c8a4", [:rebar3], [], "hexpm", "79f954a7021b302186a950a32869dbc185523d99d3e44ce430cd1f3289f41ed4"}, "csv": {:hex, :csv, "2.3.1", "9ce11eff5a74a07baf3787b2b19dd798724d29a9c3a492a41df39f6af686da0e", [:mix], [{:parallel_stream, "~> 1.0.4", [hex: :parallel_stream, repo: "hexpm", optional: false]}], "hexpm", "86626e1c89a4ad9a96d0d9c638f9e88c2346b89b4ba1611988594ebe72b5d5ee"}, "db_connection": {:hex, :db_connection, "2.2.2", "3bbca41b199e1598245b716248964926303b5d4609ff065125ce98bcd368939e", [:mix], [{:connection, "~> 1.0.2", [hex: :connection, repo: "hexpm", optional: false]}], "hexpm", "642af240d8a8affb93b4ba5a6fcd2bbcbdc327e1a524b825d383711536f8070c"}, - "decimal": {:hex, :decimal, "1.8.1", "a4ef3f5f3428bdbc0d35374029ffcf4ede8533536fa79896dd450168d9acdf3c", [:mix], [], "hexpm", "3cb154b00225ac687f6cbd4acc4b7960027c757a5152b369923ead9ddbca7aec"}, + "decimal": {:hex, :decimal, "1.9.0", "83e8daf59631d632b171faabafb4a9f4242c514b0a06ba3df493951c08f64d07", [:mix], [], "hexpm", "b1f2343568eed6928f3e751cf2dffde95bfaa19dd95d09e8a9ea92ccfd6f7d85"}, "double": {:hex, :double, "0.7.0", "a7ee4c3488a0acc6d2ad9b69b6c7d3ddf3da2b54488d0f7c2d6ceb3a995887ca", [:mix], [], "hexpm", "f0c387a2266b4452da7bab03598feec11aef8b2acab061ea947dae81bb257329"}, - "ecto": {:hex, :ecto, "3.4.4", "a2c881e80dc756d648197ae0d936216c0308370332c5e77a2325a10293eef845", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "cc4bd3ad62abc3b21fb629f0f7a3dab23a192fca837d257dd08449fba7373561"}, + "ecto": {:hex, :ecto, "3.4.6", "08f7afad3257d6eb8613309af31037e16c36808dfda5a3cd0cb4e9738db030e4", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "6f13a9e2a62e75c2dcfc7207bfc65645ab387af8360db4c89fee8b5a4bf3f70b"}, "ecto_sql": {:hex, :ecto_sql, "3.4.4", "d28bac2d420f708993baed522054870086fd45016a9d09bb2cd521b9c48d32ea", [:mix], [{:db_connection, "~> 2.2", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.4.3", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.3.0 or ~> 0.4.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.15.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.0", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "edb49af715dd72f213b66adfd0f668a43c17ed510b5d9ac7528569b23af57fe8"}, "elixir_make": {:hex, :elixir_make, "0.6.0", "38349f3e29aff4864352084fc736fa7fa0f2995a819a737554f7ebd28b85aaab", [:mix], [], "hexpm", "d522695b93b7f0b4c0fcb2dfe73a6b905b1c301226a5a55cb42e5b14d509e050"}, "elixir_uuid": {:hex, :elixir_uuid, "1.2.1", "dce506597acb7e6b0daeaff52ff6a9043f5919a4c3315abb4143f0b00378c097", [:mix], [], "hexpm", "f7eba2ea6c3555cea09706492716b0d87397b88946e6380898c2889d68585752"}, @@ -54,14 +55,14 @@ "plug_crypto": {:hex, :plug_crypto, "1.1.2", "bdd187572cc26dbd95b87136290425f2b580a116d3fb1f564216918c9730d227", [:mix], [], "hexpm", "6b8b608f895b6ffcfad49c37c7883e8df98ae19c6a28113b02aa1e9c5b22d6b5"}, "poison": {:hex, :poison, "3.1.0", "d9eb636610e096f86f25d9a46f35a9facac35609a7591b3be3326e99a0484665", [:mix], [], "hexpm", "fec8660eb7733ee4117b85f55799fd3833eb769a6df71ccf8903e8dc5447cfce"}, "poolboy": {:hex, :poolboy, "1.5.2", "392b007a1693a64540cead79830443abf5762f5d30cf50bc95cb2c1aaafa006b", [:rebar3], [], "hexpm", "dad79704ce5440f3d5a3681c8590b9dc25d1a561e8f5a9c995281012860901e3"}, - "postgrex": {:hex, :postgrex, "0.15.4", "5d691c25fc79070705a2ff0e35ce0822b86a0ee3c6fdb7a4fb354623955e1aed", [:mix], [{:connection, "~> 1.0", [hex: :connection, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "306515b9d975fcb2478dc337a1d27dc3bf8af7cd71017c333fe9db3a3d211b0a"}, + "postgrex": {:hex, :postgrex, "0.15.5", "aec40306a622d459b01bff890fa42f1430dac61593b122754144ad9033a2152f", [:mix], [{:connection, "~> 1.0", [hex: :connection, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "ed90c81e1525f65a2ba2279dbcebf030d6d13328daa2f8088b9661eb9143af7f"}, "ranch": {:hex, :ranch, "1.7.1", "6b1fab51b49196860b733a49c07604465a47bdb78aa10c1c16a3d199f7f8c881", [:rebar3], [], "hexpm", "451d8527787df716d99dc36162fca05934915db0b6141bbdac2ea8d3c7afc7d7"}, "ref_inspector": {:hex, :ref_inspector, "1.3.1", "bb0489a4c4299dcd633f2b7a60c41a01f5590789d0b28225a60be484e1fbe777", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}, {:yamerl, "~> 0.7", [hex: :yamerl, repo: "hexpm", optional: false]}], "hexpm", "3172eb1b08e5c69966f796e3fe0e691257546fa143a5eb0ecc18a6e39b233854"}, "sentry": {:hex, :sentry, "7.2.4", "b5bc90b594d40c2e653581e797a5fd2fdf994f2568f6bd66b7fa4971598be8d5", [:mix], [{:hackney, "~> 1.8 or 1.6.5", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix, "~> 1.3", [hex: :phoenix, repo: "hexpm", optional: true]}, {:plug, "~> 1.6", [hex: :plug, repo: "hexpm", optional: true]}, {:plug_cowboy, "~> 1.0 or ~> 2.0", [hex: :plug_cowboy, repo: "hexpm", optional: true]}], "hexpm", "4ee4d368b5013076afcc8b73ed028bdc8ee9db84ea987e3591101e194c1fc24b"}, "siphash": {:hex, :siphash, "3.2.0", "ec03fd4066259218c85e2a4b8eec4bb9663bc02b127ea8a0836db376ba73f2ed", [:make, :mix], [], "hexpm", "ba3810701c6e95637a745e186e8a4899087c3b079ba88fb8f33df054c3b0b7c3"}, "sshex": {:hex, :sshex, "2.2.1", "e1270b8345ea2a66a11c2bb7aed22c93e3bc7bc813486f4ffd0a980e4a898160", [:mix], [], "hexpm", "45b2caa5011dc850e70a2d77e3b62678a3e8bcb903eab6f3e7afb2ea897b13db"}, "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.5", "6eaf7ad16cb568bb01753dbbd7a95ff8b91c7979482b95f38443fe2c8852a79b", [:make, :mix, :rebar3], [], "hexpm", "13104d7897e38ed7f044c4de953a6c28597d1c952075eb2e328bc6d6f2bfc496"}, - "telemetry": {:hex, :telemetry, "0.4.1", "ae2718484892448a24470e6aa341bc847c3277bfb8d4e9289f7474d752c09c7f", [:rebar3], [], "hexpm", "4738382e36a0a9a2b6e25d67c960e40e1a2c95560b9f936d8e29de8cd858480f"}, + "telemetry": {:hex, :telemetry, "0.4.2", "2808c992455e08d6177322f14d3bdb6b625fbcfd233a73505870d8738a2f4599", [:rebar3], [], "hexpm", "2d1419bd9dda6a206d7b5852179511722e2b18812310d304620c7bd92a13fcef"}, "timex": {:hex, :timex, "3.6.2", "845cdeb6119e2fef10751c0b247b6c59d86d78554c83f78db612e3290f819bc2", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.10", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 0.1.8 or ~> 0.5 or ~> 1.0.0", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm", "26030b46199d02a590be61c2394b37ea25a3664c02fafbeca0b24c972025d47a"}, "tzdata": {:hex, :tzdata, "1.0.3", "73470ad29dde46e350c60a66e6b360d3b99d2d18b74c4c349dbebbc27a09a3eb", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "a6e1ee7003c4d04ecbd21dd3ec690d4c6662db5d3bbdd7262d53cdf5e7c746c1"}, "ua_inspector": {:hex, :ua_inspector, "0.20.0", "01939baf5706f7d6c2dc0affbbd7f5e14309ba43ebf8967aa6479ee2204f23bc", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}, {:poolboy, "~> 1.0", [hex: :poolboy, repo: "hexpm", optional: false]}, {:yamerl, "~> 0.7", [hex: :yamerl, repo: "hexpm", optional: false]}], "hexpm", "30e8623b9f55e7d58be12fc2afd50be8792ec14192c289701d3cc93ad6027f26"}, diff --git a/plausible-variables.sample.env b/plausible-variables.sample.env index ce3804e0b..d7142dad2 100644 --- a/plausible-variables.sample.env +++ b/plausible-variables.sample.env @@ -5,6 +5,7 @@ SIGNING_SALT=PL/THF0VMOzuv1bOcldjDzYFBLryvXNs HOST=localhost DATABASE_URL=postgres://postgres:postgres@plausible_db:5432/plausible_db DATABASE_TLS_ENABLED=false +CLICKHOUSE_DATABASE_URL=http://default:@plausible_events_db/plausible_events_db ADMIN_USER_NAME=admin ADMIN_USER_EMAIL=admin@plausible.local ADMIN_USER_PWD=admin@1234! @@ -16,9 +17,5 @@ SMTP_USER_NAME=fakeuser@plausible.local SMTP_USER_PWD=password SMTP_HOST_SSL_ENABLED=false SMTP_MX_LOOKUPS_ENABLED=false -CLICKHOUSE_DATABASE_HOST=plausible_events_db -CLICKHOUSE_DATABASE_NAME=plausible_events_db -CLICKHOUSE_DATABASE_USER=default -CLICKHOUSE_DATABASE_PASSWORD= DISABLE_AUTH=false DISABLE_REGISTRATION=false diff --git a/priv/clickhouse_repo/migrations/20200915070607_create_events_and_sessions.exs b/priv/clickhouse_repo/migrations/20200915070607_create_events_and_sessions.exs new file mode 100644 index 000000000..d846cf4f7 --- /dev/null +++ b/priv/clickhouse_repo/migrations/20200915070607_create_events_and_sessions.exs @@ -0,0 +1,52 @@ +defmodule Plausible.ClickhouseRepo.Migrations.CreateEventsAndSessions do + use Ecto.Migration + + def up do + create_events() + create_sessions() + end + + defp create_events() do + create_if_not_exists table(:events, engine: "MergeTree() PARTITION BY toYYYYMM(timestamp) ORDER BY (name, domain, user_id, timestamp) SETTINGS index_granularity = 8192") do + add :name, :string + add :domain, :string + add :user_id, :UInt64 + add :session_id, :UInt64 + add :hostname, :string + add :pathname, :string + add :referrer, :string + add :referrer_source, :string + add :country_code, :"LowCardinality(FixedString(2))" + add :screen_size, :"LowCardinality(String)" + add :operating_system, :"LowCardinality(String)" + add :browser, :"LowCardinality(String)" + + add :timestamp, :naive_datetime + end + end + + defp create_sessions() do + create_if_not_exists table(:sessions, engine: "CollapsingMergeTree(sign) PARTITION BY toYYYYMM(start) ORDER BY (domain, user_id, session_id, start) SETTINGS index_granularity = 8192") do + add :session_id, :UInt64 + add :sign, :"Int8" + add :domain, :string + add :user_id, :UInt64 + add :hostname, :string + add :is_bounce, :boolean + add :entry_page, :string + add :exit_page, :string + add :pageviews, :integer + add :events, :integer + add :duration, :"UInt32" + add :referrer, :string + add :referrer_source, :string + add :country_code, :"LowCardinality(FixedString(2))" + add :screen_size, :"LowCardinality(String)" + add :operating_system, :"LowCardinality(String)" + add :browser, :"LowCardinality(String)" + + add :start, :naive_datetime + add :timestamp, :naive_datetime + end + end +end diff --git a/test/plausible_web/controllers/api/external_controller_test.exs b/test/plausible_web/controllers/api/external_controller_test.exs index 786143322..9cdeaa9cf 100644 --- a/test/plausible_web/controllers/api/external_controller_test.exs +++ b/test/plausible_web/controllers/api/external_controller_test.exs @@ -1,19 +1,15 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do use PlausibleWeb.ConnCase - use Plausible.Repo + use Plausible.ClickhouseRepo defp get_event(domain) do Plausible.Event.WriteBuffer.flush() - events = - Plausible.Clickhouse.all( - from e in Plausible.ClickhouseEvent, - where: e.domain == ^domain, - order_by: [desc: e.timestamp], - limit: 1 - ) - - List.first(events) + ClickhouseRepo.one( + from e in Plausible.ClickhouseEvent, + where: e.domain == ^domain, + order_by: [desc: e.timestamp] + ) end @user_agent "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36" @@ -37,9 +33,9 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-1.com") assert response(conn, 202) == "" - assert pageview["hostname"] == "gigride.live" - assert pageview["domain"] == "external-controller-test-1.com" - assert pageview["pathname"] == "/" + assert pageview.hostname == "gigride.live" + assert pageview.domain == "external-controller-test-1.com" + assert pageview.pathname == "/" end test "www. is stripped from domain", %{conn: conn} do @@ -55,7 +51,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-2.com") - assert pageview["domain"] == "external-controller-test-2.com" + assert pageview.domain == "external-controller-test-2.com" end test "www. is stripped from hostname", %{conn: conn} do @@ -71,7 +67,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-3.com") - assert pageview["hostname"] == "example.com" + assert pageview.hostname == "example.com" end test "empty path defaults to /", %{conn: conn} do @@ -87,7 +83,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-4.com") - assert pageview["pathname"] == "/" + assert pageview.pathname == "/" end test "bots and crawlers are ignored", %{conn: conn} do @@ -121,8 +117,8 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-6.com") assert response(conn, 202) == "" - assert pageview["operating_system"] == "Mac" - assert pageview["browser"] == "Chrome" + assert pageview.operating_system == "Mac" + assert pageview.browser == "Chrome" end test "parses referrer", %{conn: conn} do @@ -142,7 +138,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-7.com") assert response(conn, 202) == "" - assert pageview["referrer_source"] == "Facebook" + assert pageview.referrer_source == "Facebook" end test "strips trailing slash from referrer", %{conn: conn} do @@ -162,8 +158,8 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-8.com") assert response(conn, 202) == "" - assert pageview["referrer"] == "facebook.com/page" - assert pageview["referrer_source"] == "Facebook" + assert pageview.referrer == "facebook.com/page" + assert pageview.referrer_source == "Facebook" end test "ignores when referrer is internal", %{conn: conn} do @@ -183,7 +179,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-9.com") assert response(conn, 202) == "" - assert pageview["referrer_source"] == "" + assert pageview.referrer_source == "" end test "ignores localhost referrer", %{conn: conn} do @@ -203,7 +199,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-10.com") assert response(conn, 202) == "" - assert pageview["referrer_source"] == "" + assert pageview.referrer_source == "" end test "parses subdomain referrer", %{conn: conn} do @@ -223,7 +219,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-11.com") assert response(conn, 202) == "" - assert pageview["referrer_source"] == "blog.gigride.live" + assert pageview.referrer_source == "blog.gigride.live" end test "referrer is cleaned", %{conn: conn} do @@ -240,7 +236,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-12.com") - assert pageview["referrer"] == "indiehackers.com/page" + assert pageview.referrer == "indiehackers.com/page" end test "utm_source overrides referrer source", %{conn: conn} do @@ -257,7 +253,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-13.com") - assert pageview["referrer_source"] == "betalist" + assert pageview.referrer_source == "betalist" end test "if it's an :unknown referrer, just the domain is used", %{conn: conn} do @@ -277,7 +273,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-14.com") assert response(conn, 202) == "" - assert pageview["referrer_source"] == "indiehackers.com" + assert pageview.referrer_source == "indiehackers.com" end test "if the referrer is not http or https, it is ignored", %{conn: conn} do @@ -297,7 +293,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-15.com") assert response(conn, 202) == "" - assert pageview["referrer_source"] == "" + assert pageview.referrer_source == "" end end @@ -318,7 +314,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-16.com") assert response(conn, 202) == "" - assert pageview["screen_size"] == "Mobile" + assert pageview.screen_size == "Mobile" end test "screen size is nil if screen_width is missing", %{conn: conn} do @@ -337,7 +333,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-17.com") assert response(conn, 202) == "" - assert pageview["screen_size"] == "" + assert pageview.screen_size == "" end test "can trigger a custom event", %{conn: conn} do @@ -356,7 +352,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do event = get_event("external-controller-test-18.com") assert response(conn, 202) == "" - assert event["name"] == "custom event" + assert event.name == "custom event" end test "ignores a malformed referrer URL", %{conn: conn} do @@ -376,7 +372,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do event = get_event("external-controller-test-19.com") assert response(conn, 202) == "" - assert event["referrer"] == "" + assert event.referrer == "" end # Fake data is set up in config/test.exs @@ -394,7 +390,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-20.com") - assert pageview["country_code"] == "US" + assert pageview.country_code == "US" end test "URL is decoded", %{conn: conn} do @@ -410,7 +406,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-21.com") - assert pageview["pathname"] == "/opportunity/category/جوائز-ومسابقات" + assert pageview.pathname == "/opportunity/category/جوائز-ومسابقات" end test "accepts shorthand map keys", %{conn: conn} do @@ -428,10 +424,10 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-22.com") - assert pageview["pathname"] == "/opportunity" - assert pageview["referrer_source"] == "Facebook" - assert pageview["referrer"] == "facebook.com/page" - assert pageview["screen_size"] == "Mobile" + assert pageview.pathname == "/opportunity" + assert pageview.referrer_source == "Facebook" + assert pageview.referrer == "facebook.com/page" + assert pageview.screen_size == "Mobile" end test "records hash when in hash mode", %{conn: conn} do @@ -448,7 +444,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageview = get_event("external-controller-test-23.com") - assert pageview["pathname"] == "/#page-a" + assert pageview.pathname == "/#page-a" end test "responds 400 when required fields are missing", %{conn: conn} do diff --git a/test/plausible_web/controllers/api/stats_controller/referrers_test.exs b/test/plausible_web/controllers/api/stats_controller/referrers_test.exs index 42a8cb1a7..8c20deb63 100644 --- a/test/plausible_web/controllers/api/stats_controller/referrers_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/referrers_test.exs @@ -98,7 +98,7 @@ defmodule PlausibleWeb.Api.StatsController.ReferrersTest do conn = get(conn, "/api/stats/#{site.domain}/referrers/10words?period=day&date=2019-01-01&filters=#{filters}") assert json_response(conn, 200) == %{ - "total_visitors" => 6, + "total_visitors" => 2, "referrers" => [ %{"name" => "10words.com/page1", "url" => "10words.com", "count" => 2} ] @@ -114,7 +114,7 @@ defmodule PlausibleWeb.Api.StatsController.ReferrersTest do ) assert json_response(conn, 200) == %{ - "total_visitors" => 6, + "total_visitors" => 2, "referrers" => [ %{ "name" => "10words.com/page1", diff --git a/test/support/clickhouse_setup.ex b/test/support/clickhouse_setup.ex index aecac94f3..d25414dc8 100644 --- a/test/support/clickhouse_setup.ex +++ b/test/support/clickhouse_setup.ex @@ -1,76 +1,8 @@ defmodule Plausible.Test.ClickhouseSetup do - def run() do - create_events() - create_sessions() - load_fixtures() - end - - def create_events() do - drop = "DROP TABLE events" - - create = """ - CREATE TABLE events ( - timestamp DateTime, - name String, - domain String, - user_id UInt64, - session_id UInt64, - hostname String, - pathname String, - referrer String, - referrer_source String, - country_code LowCardinality(FixedString(2)), - screen_size LowCardinality(String), - operating_system LowCardinality(String), - browser LowCardinality(String) - ) ENGINE = MergeTree() - PARTITION BY toYYYYMM(timestamp) - ORDER BY (name, domain, user_id, timestamp) - SETTINGS index_granularity = 8192 - """ - - Clickhousex.query(:clickhouse, drop, [], log: {Plausible.Clickhouse, :log, []}) - Clickhousex.query(:clickhouse, create, [], log: {Plausible.Clickhouse, :log, []}) - end - - def create_sessions() do - drop = "DROP TABLE sessions" - - create = """ - CREATE TABLE sessions ( - session_id UInt64, - sign Int8, - domain String, - user_id UInt64, - hostname String, - timestamp DateTime, - start DateTime, - is_bounce UInt8, - entry_page String, - exit_page String, - pageviews Int32, - events Int32, - duration UInt32, - referrer String, - referrer_source String, - country_code LowCardinality(FixedString(2)), - screen_size LowCardinality(String), - operating_system LowCardinality(String), - browser LowCardinality(String) - ) ENGINE = CollapsingMergeTree(sign) - PARTITION BY toYYYYMM(start) - ORDER BY (domain, user_id, session_id, start) - SETTINGS index_granularity = 8192 - """ - - Clickhousex.query(:clickhouse, drop, [], log: {Plausible.Clickhouse, :log, []}) - Clickhousex.query(:clickhouse, create, [], log: {Plausible.Clickhouse, :log, []}) - end - @conversion_1_session_id 123 @conversion_2_session_id 234 - def load_fixtures() do + def run() do Plausible.TestUtils.create_events([ %{ name: "pageview", diff --git a/test/support/factory.ex b/test/support/factory.ex index c32b96596..c70a905d1 100644 --- a/test/support/factory.ex +++ b/test/support/factory.ex @@ -38,13 +38,19 @@ defmodule Plausible.Factory do user_id: SipHash.hash!(@hash_key, UUID.uuid4()), hostname: hostname, domain: hostname, + referrer: "", + referrer_source: "", entry_page: "/", pageviews: 1, events: 1, duration: 0, start: Timex.now(), timestamp: Timex.now(), - is_bounce: false + is_bounce: false, + browser: "", + country_code: "", + screen_size: "", + operating_system: "" } end @@ -66,7 +72,13 @@ defmodule Plausible.Factory do pathname: "/", timestamp: Timex.now(), user_id: SipHash.hash!(@hash_key, UUID.uuid4()), - session_id: SipHash.hash!(@hash_key, UUID.uuid4()) + session_id: SipHash.hash!(@hash_key, UUID.uuid4()), + referrer: "", + referrer_source: "", + browser: "", + country_code: "", + screen_size: "", + operating_system: "" } end diff --git a/test/support/test_utils.ex b/test/support/test_utils.ex index 25538ab14..09746509c 100644 --- a/test/support/test_utils.ex +++ b/test/support/test_utils.ex @@ -12,24 +12,27 @@ defmodule Plausible.TestUtils do end def create_pageviews(pageviews) do - Enum.map(pageviews, fn pageview -> - Factory.build(:pageview, pageview) + pageviews = Enum.map(pageviews, fn pageview -> + Factory.build(:pageview, pageview) |> Map.from_struct() |> Map.delete(:__meta__) end) - |> Plausible.Clickhouse.insert_events() + + Plausible.ClickhouseRepo.insert_all("events", pageviews) end def create_events(events) do - Enum.map(events, fn event -> - Factory.build(:event, event) + events = Enum.map(events, fn event -> + Factory.build(:event, event) |> Map.from_struct() |> Map.delete(:__meta__) end) - |> Plausible.Clickhouse.insert_events() + + Plausible.ClickhouseRepo.insert_all("events", events) end def create_sessions(sessions) do - Enum.map(sessions, fn session -> - Factory.build(:ch_session, session) + sessions = Enum.map(sessions, fn session -> + Factory.build(:ch_session, session) |> Map.from_struct() |> Map.delete(:__meta__) end) - |> Plausible.Clickhouse.insert_sessions() + + Plausible.ClickhouseRepo.insert_all("sessions", sessions) end def log_in(%{user: user, conn: conn}) do