From 3115c6e7a81eeac431c963e36014f5a6a83d161a Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Thu, 4 Apr 2024 13:54:23 +0300 Subject: [PATCH] Reducing JOINs in queries (#3966) * Move experimental_session_count? logic to within query object * WIP new querying system for deciding what tables to query * both -> either * Include sample_percent in both tables * Remove a hanging TODO * Allow filtering by visit props on event queries if flag is on * Make default sessions join more conditional * Simplify events_join_sessions? * Add some TODOs * Fix assignment * Handle entry/exit page visit props separately from props stored in events table * Update test which created sessions/events differently from everyone else * Make query_events private * Dont filter by session properties on events table if querying sessions and joining in events * Handle visits, pageviews, events and visitors metrics from other table * both -> either * events, pageviews are strictly event metrics * Add support for (plain) breakdowns deciding which table to use * Run tests with experimental_reduced_joins as a separate job Also refactor which tests are run with postgres:15 to reduce number of jobs * moduledocs for TableDecider * Fix matrix * Custom build name * Move TEST_EXPERIMENTAL_REDUCED_JOINS check * Handle percentage separately from other metrics * Remove debug code * TableDecider tests * both => sample_percent * Improve naming * Simplify code * Breakdowns retain old behavior if getting metric visitors * Unify behavior of entry/exit page hostnames with rest * Fix test naming --- .github/workflows/elixir.yml | 17 +- lib/plausible/stats/aggregate.ex | 20 +-- lib/plausible/stats/base.ex | 63 ++++--- lib/plausible/stats/breakdown.ex | 35 +++- lib/plausible/stats/filters/filters.ex | 9 + lib/plausible/stats/query.ex | 45 ++++- lib/plausible/stats/table_decider.ex | 111 ++++++++++++ lib/plausible/stats/timeseries.ex | 8 +- test/plausible/stats/table_decider_test.exs | 164 ++++++++++++++++++ .../aggregate_test.exs | 46 +---- test/support/test_utils.ex | 25 --- test/test_helper.exs | 8 + 12 files changed, 420 insertions(+), 131 deletions(-) create mode 100644 lib/plausible/stats/table_decider.ex create mode 100644 test/plausible/stats/table_decider_test.exs diff --git a/.github/workflows/elixir.yml b/.github/workflows/elixir.yml index a84c53136..18162bc90 100644 --- a/.github/workflows/elixir.yml +++ b/.github/workflows/elixir.yml @@ -15,14 +15,25 @@ env: jobs: build: - name: Build and test + name: "Build and test (${{ matrix.mix_env }}, ${{ matrix.postgres_image }}${{ matrix.test_experimental_reduced_joins == '1' && ', experimental_reduced_joins' || '' }})" runs-on: ubuntu-latest strategy: matrix: mix_env: ['test', 'small_test'] - postgres_image: ['postgres:15', 'postgres:16'] + postgres_image: ['postgres:16'] + test_experimental_reduced_joins: ['0'] + + include: + - mix_env: 'test' + postgres_image: 'postgres:15' + test_experimental_reduced_joins: '0' + - mix_env: 'test' + postgres_image: 'postgres:16' + test_experimental_reduced_joins: '1' + env: MIX_ENV: ${{ matrix.mix_env }} + TEST_EXPERIMENTAL_REDUCED_JOINS: ${{ matrix.test_experimental_reduced_joins }} services: postgres: image: ${{ matrix.postgres_image }} @@ -132,4 +143,4 @@ jobs: - run: mix format --check-formatted - run: mix deps.unlock --check-unused - run: mix credo diff --from-git-merge-base origin/master - - run: mix dialyzer + - run: mix dialyzer diff --git a/lib/plausible/stats/aggregate.ex b/lib/plausible/stats/aggregate.ex index 68b96183a..45e3010d7 100644 --- a/lib/plausible/stats/aggregate.ex +++ b/lib/plausible/stats/aggregate.ex @@ -5,19 +5,6 @@ defmodule Plausible.Stats.Aggregate do import Ecto.Query alias Plausible.Stats.{Query, Util} - @revenue_metrics on_full_build(do: Plausible.Stats.Goal.Revenue.revenue_metrics(), else: []) - - @event_metrics [ - :visitors, - :pageviews, - :events, - :sample_percent, - :conversion_rate, - :total_visitors - ] ++ @revenue_metrics - - @session_metrics [:visits, :bounce_rate, :visit_duration, :views_per_visit, :sample_percent] - def aggregate(site, query, metrics) do {currency, metrics} = on_full_build do @@ -28,18 +15,17 @@ defmodule Plausible.Stats.Aggregate do Query.trace(query, metrics) - event_metrics = + {event_metrics, session_metrics, other_metrics} = metrics |> Util.maybe_add_visitors_metric() - |> Enum.filter(&(&1 in @event_metrics)) + |> Plausible.Stats.TableDecider.partition_metrics(query) event_task = fn -> aggregate_events(site, query, event_metrics) end - session_metrics = Enum.filter(metrics, &(&1 in @session_metrics)) session_task = fn -> aggregate_sessions(site, query, session_metrics) end time_on_page_task = - if :time_on_page in metrics do + if :time_on_page in other_metrics do fn -> aggregate_time_on_page(site, query) end else fn -> %{} end diff --git a/lib/plausible/stats/base.ex b/lib/plausible/stats/base.ex index 050744b18..923ab53b3 100644 --- a/lib/plausible/stats/base.ex +++ b/lib/plausible/stats/base.ex @@ -3,7 +3,7 @@ defmodule Plausible.Stats.Base do use Plausible use Plausible.Stats.Fragments - alias Plausible.Stats.{Query, Filters} + alias Plausible.Stats.{Query, Filters, TableDecider} alias Plausible.Timezones import Ecto.Query @@ -15,7 +15,7 @@ defmodule Plausible.Stats.Base do def base_event_query(site, query) do events_q = query_events(site, query) - if Enum.any?(Filters.visit_props(), &query.filters["visit:" <> &1]) do + if TableDecider.events_join_sessions?(query) do sessions_q = from( s in query_sessions(site, query), @@ -30,11 +30,15 @@ defmodule Plausible.Stats.Base do on: e.session_id == sq.session_id ) else - events_q + if query.experimental_reduced_joins? do + events_q |> filter_by_visit_props(Filters.event_table_visit_props(), query) + else + events_q + end end end - def query_events(site, query) do + defp query_events(site, query) do {first_datetime, last_datetime} = utc_boundaries(query, site) q = @@ -125,6 +129,27 @@ defmodule Plausible.Stats.Base do q end + def query_sessions(site, query) do + {first_datetime, last_datetime} = + utc_boundaries(query, site) + + q = from(s in "sessions_v2", where: s.site_id == ^site.id) + + sessions_q = + if query.experimental_session_count? do + from s in q, where: s.timestamp >= ^first_datetime and s.start < ^last_datetime + else + from s in q, where: s.start >= ^first_datetime and s.start < ^last_datetime + end + + on_full_build do + sessions_q = Plausible.Stats.Sampling.add_query_hint(sessions_q, query) + end + + filter_by_entry_props(sessions_q, query) + |> filter_by_visit_props(Filters.visit_props(), query) + end + @api_prop_name_to_db %{ "source" => "referrer_source", "device" => "screen_size", @@ -137,27 +162,8 @@ defmodule Plausible.Stats.Base do "entry_page_hostname" => "hostname" } - def query_sessions(site, query) do - {first_datetime, last_datetime} = - utc_boundaries(query, site) - - q = from(s in "sessions_v2", where: s.site_id == ^site.id) - - sessions_q = - if FunWithFlags.enabled?(:experimental_session_count, for: site) or - query.experimental_session_count? do - from s in q, where: s.timestamp >= ^first_datetime and s.start < ^last_datetime - else - from s in q, where: s.start >= ^first_datetime and s.start < ^last_datetime - end - - on_full_build do - sessions_q = Plausible.Stats.Sampling.add_query_hint(sessions_q, query) - end - - sessions_q = filter_by_entry_props(sessions_q, query) - - Enum.reduce(Filters.visit_props(), sessions_q, fn prop_name, sessions_q -> + defp filter_by_visit_props(q, visit_props, query) do + Enum.reduce(visit_props, q, fn prop_name, sessions_q -> filter_key = "visit:" <> prop_name db_field = @@ -241,6 +247,13 @@ defmodule Plausible.Stats.Base do } end + defp select_event_metric(:visits) do + %{ + visits: + dynamic([e], fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", e.session_id)) + } + end + on_full_build do defp select_event_metric(:total_revenue) do %{total_revenue: Plausible.Stats.Goal.Revenue.total_revenue_query()} diff --git a/lib/plausible/stats/breakdown.ex b/lib/plausible/stats/breakdown.ex index 7231aa4f0..693ccfae4 100644 --- a/lib/plausible/stats/breakdown.ex +++ b/lib/plausible/stats/breakdown.ex @@ -6,7 +6,7 @@ defmodule Plausible.Stats.Breakdown do import Plausible.Stats.{Base, Imported} import Ecto.Query require OpenTelemetry.Tracer, as: Tracer - alias Plausible.Stats.{Query, Util} + alias Plausible.Stats.{Query, Util, TableDecider} @no_ref "Direct / None" @not_set "(not set)" @@ -103,7 +103,6 @@ defmodule Plausible.Stats.Breakdown do {event_q, page_q} -> from( e in subquery(union_all(event_q, ^page_q)), - # :TODO: Handle other orderings order_by: [desc: e.visitors] ) |> apply_pagination(pagination) @@ -200,9 +199,35 @@ defmodule Plausible.Stats.Breakdown do metrics_to_select = Util.maybe_add_visitors_metric(metrics) -- @computed_metrics - breakdown_sessions(site, query, property, metrics_to_select) - |> maybe_add_group_conversion_rate(&breakdown_sessions/4, site, query, property, metrics) - |> paginate_and_execute(metrics, pagination) + case breakdown_table(query, metrics, property) do + :session -> + breakdown_sessions(site, query, property, metrics_to_select) + |> maybe_add_group_conversion_rate(&breakdown_sessions/4, site, query, property, metrics) + |> paginate_and_execute(metrics, pagination) + + :event -> + breakdown_events(site, query, property, metrics_to_select) + |> maybe_add_group_conversion_rate(&breakdown_events/4, site, query, property, metrics) + |> paginate_and_execute(metrics, pagination) + end + end + + # Backwards compatibility + # defp breakdown_table(%Query{experimental_reduced_joins?: false}, _, _), do: :session + + defp breakdown_table(_query, _metrics, "visit:entry_page"), do: :session + defp breakdown_table(_query, _metrics, "visit:entry_page_hostname"), do: :session + defp breakdown_table(_query, _metrics, "visit:exit_page"), do: :session + defp breakdown_table(_query, _metrics, "visit:exit_page_hostname"), do: :session + + defp breakdown_table(query, metrics, property) do + {_, session_metrics, _} = TableDecider.partition_metrics(metrics, query, property) + + if not Enum.empty?(session_metrics) do + :session + else + :event + end end defp zip_results(event_result, session_result, property, metrics) do diff --git a/lib/plausible/stats/filters/filters.ex b/lib/plausible/stats/filters/filters.ex index ca5c964a6..732369686 100644 --- a/lib/plausible/stats/filters/filters.ex +++ b/lib/plausible/stats/filters/filters.ex @@ -29,6 +29,15 @@ defmodule Plausible.Stats.Filters do ] def visit_props(), do: @visit_props |> Enum.map(&to_string/1) + @event_table_visit_props @visit_props -- + [ + :entry_page, + :exit_page, + :entry_page_hostname, + :exit_page_hostname + ] + def event_table_visit_props(), do: @event_table_visit_props |> Enum.map(&to_string/1) + @event_props [:name, :page, :goal, :hostname] def event_props(), do: @event_props |> Enum.map(&to_string/1) diff --git a/lib/plausible/stats/query.ex b/lib/plausible/stats/query.ex index 96011b041..97b700ef0 100644 --- a/lib/plausible/stats/query.ex +++ b/lib/plausible/stats/query.ex @@ -10,6 +10,7 @@ defmodule Plausible.Stats.Query do include_imported: false, now: nil, experimental_session_count?: false, + experimental_reduced_joins?: false, experimental_hostname_filter?: false require OpenTelemetry.Tracer, as: Tracer @@ -23,7 +24,9 @@ defmodule Plausible.Stats.Query do query = __MODULE__ |> struct!(now: now) - |> put_experimental_flags(params) + |> put_experimental_session_count(site, params) + |> put_experimental_reduced_joins(site, params) + |> put_experimental_hostname_filter(params) |> put_period(site, params) |> put_interval(params) |> put_parsed_filters(params) @@ -37,14 +40,38 @@ defmodule Plausible.Stats.Query do query end - defp put_experimental_flags(query, params) do - %{ - "experimental_session_count" => :experimental_session_count?, - "experimental_hostname_filter" => :experimental_hostname_filter? - } - |> Enum.reduce(query, fn {param, flag}, query -> - if Map.get(params, param) == "true", do: Map.put(query, flag, true), else: query - end) + defp put_experimental_session_count(query, site, params) do + if Map.has_key?(params, "experimental_session_count") do + struct!(query, + experimental_session_count?: Map.get(params, "experimental_session_count") == "true" + ) + else + struct!(query, + experimental_session_count?: FunWithFlags.enabled?(:experimental_session_count, for: site) + ) + end + end + + defp put_experimental_reduced_joins(query, site, params) do + if Map.has_key?(params, "experimental_reduced_joins") do + struct!(query, + experimental_reduced_joins?: Map.get(params, "experimental_reduced_joins") == "true" + ) + else + struct!(query, + experimental_reduced_joins?: FunWithFlags.enabled?(:experimental_reduced_joins, for: site) + ) + end + end + + defp put_experimental_hostname_filter(query, params) do + if Map.has_key?(params, "experimental_hostname_filter") do + struct!(query, + experimental_hostname_filter?: Map.get(params, "experimental_hostname_filter") == "true" + ) + else + query + end end defp put_period(query, site, %{"period" => "realtime"}) do diff --git a/lib/plausible/stats/table_decider.ex b/lib/plausible/stats/table_decider.ex new file mode 100644 index 000000000..ef976a060 --- /dev/null +++ b/lib/plausible/stats/table_decider.ex @@ -0,0 +1,111 @@ +defmodule Plausible.Stats.TableDecider do + @moduledoc """ + This module contains logic for deciding which tables need to be queried given a query + and metrics, with the purpose of reducing the number of queries and JOINs needed to perform. + """ + + import Enum, only: [empty?: 1] + + alias Plausible.Stats.Query + + def events_join_sessions?(query) do + Enum.any?(query.filters, &(filters_partitioner(query, &1) == :session)) + end + + def partition_metrics(metrics, query, breakdown_property \\ nil) do + %{ + event: event_only_metrics, + session: session_only_metrics, + either: either_metrics, + other: other_metrics, + sample_percent: sample_percent + } = + partition(metrics, query, &metric_partitioner/2) + + # Treat breakdown property as yet another filter + filters = + if breakdown_property do + Map.put(query.filters, breakdown_property, nil) + else + query.filters + end + + %{event: event_only_filters, session: session_only_filters} = + partition(filters, query, &filters_partitioner/2) + + cond do + # Only one table needs to be queried + empty?(event_only_metrics) && empty?(event_only_filters) -> + {[], session_only_metrics ++ either_metrics ++ sample_percent, other_metrics} + + empty?(session_only_metrics) && empty?(session_only_filters) -> + {event_only_metrics ++ either_metrics ++ sample_percent, [], other_metrics} + + # Filters on both events and sessions, but only one kind of metric + empty?(event_only_metrics) -> + {[], session_only_metrics ++ either_metrics ++ sample_percent, other_metrics} + + empty?(session_only_metrics) -> + {event_only_metrics ++ either_metrics ++ sample_percent, [], other_metrics} + + # Default: prefer sessions + true -> + {event_only_metrics ++ sample_percent, + session_only_metrics ++ either_metrics ++ sample_percent, other_metrics} + end + end + + defp metric_partitioner(_, :conversion_rate), do: :event + defp metric_partitioner(_, :average_revenue), do: :event + defp metric_partitioner(_, :total_revenue), do: :event + defp metric_partitioner(_, :pageviews), do: :event + defp metric_partitioner(_, :events), do: :event + defp metric_partitioner(_, :bounce_rate), do: :session + defp metric_partitioner(_, :visit_duration), do: :session + defp metric_partitioner(_, :views_per_visit), do: :session + + # Metrics which used to only be queried from one table but can be calculated from either + defp metric_partitioner(%Query{experimental_reduced_joins?: true}, :visits), do: :either + defp metric_partitioner(%Query{experimental_reduced_joins?: true}, :visitors), do: :either + + defp metric_partitioner(_, :visits), do: :session + defp metric_partitioner(_, :visitors), do: :event + # Calculated metrics - handled on callsite separately from other metrics. + defp metric_partitioner(_, :time_on_page), do: :other + defp metric_partitioner(_, :total_visitors), do: :other + defp metric_partitioner(_, :percentage), do: :other + # Sample percentage is included in both tables if queried. + defp metric_partitioner(_, :sample_percent), do: :sample_percent + + defp metric_partitioner(%Query{experimental_reduced_joins?: false}, unknown) do + raise ArgumentError, "Metric #{unknown} not supported without experimental_reduced_joins?" + end + + defp metric_partitioner(_, _), do: :either + + defp filters_partitioner(_, {"event:" <> _, _}), do: :event + defp filters_partitioner(_, {"visit:entry_page", _}), do: :session + defp filters_partitioner(_, {"visit:entry_page_hostname", _}), do: :session + defp filters_partitioner(_, {"visit:exit_page", _}), do: :session + defp filters_partitioner(_, {"visit:exit_page_hostname", _}), do: :session + + defp filters_partitioner(%Query{experimental_reduced_joins?: true}, {"visit:" <> _, _}), + do: :either + + defp filters_partitioner(_, {"visit:" <> _, _}), + do: :session + + defp filters_partitioner(%Query{experimental_reduced_joins?: false}, {unknown, _}) do + raise ArgumentError, "Filter #{unknown} not supported without experimental_reduced_joins?" + end + + defp filters_partitioner(_, _), do: :either + + @default %{event: [], session: [], either: [], other: [], sample_percent: []} + defp partition(values, query, partitioner) do + Enum.reduce(values, @default, fn value, acc -> + key = partitioner.(query, value) + Map.put(acc, key, Map.fetch!(acc, key) ++ [value]) + end) + end +end diff --git a/lib/plausible/stats/timeseries.ex b/lib/plausible/stats/timeseries.ex index 2efa6418b..16594594b 100644 --- a/lib/plausible/stats/timeseries.ex +++ b/lib/plausible/stats/timeseries.ex @@ -18,15 +18,11 @@ defmodule Plausible.Stats.Timeseries do @typep value :: nil | integer() | float() @type results :: nonempty_list(%{required(:date) => Date.t(), required(metric()) => value()}) - @revenue_metrics on_full_build(do: Plausible.Stats.Goal.Revenue.revenue_metrics(), else: []) - - @event_metrics [:visitors, :pageviews, :events, :conversion_rate] ++ @revenue_metrics - @session_metrics [:visits, :bounce_rate, :visit_duration, :views_per_visit] def timeseries(site, query, metrics) do steps = buckets(query) - event_metrics = Enum.filter(metrics, &(&1 in @event_metrics)) - session_metrics = Enum.filter(metrics, &(&1 in @session_metrics)) + {event_metrics, session_metrics, _} = + Plausible.Stats.TableDecider.partition_metrics(metrics, query) {currency, event_metrics} = on_full_build do diff --git a/test/plausible/stats/table_decider_test.exs b/test/plausible/stats/table_decider_test.exs new file mode 100644 index 000000000..c7d85bbe7 --- /dev/null +++ b/test/plausible/stats/table_decider_test.exs @@ -0,0 +1,164 @@ +defmodule Plausible.Stats.TableDeciderTest do + use Plausible.DataCase, async: true + alias Plausible.Stats.Query + + import Plausible.Stats.TableDecider + + test "events_join_sessions? with experimental_reduced_joins disabled" do + assert not events_join_sessions?(make_query(false, %{})) + assert not events_join_sessions?(make_query(false, %{name: "pageview"})) + assert events_join_sessions?(make_query(false, %{source: "Google"})) + assert events_join_sessions?(make_query(false, %{entry_page: "/"})) + assert events_join_sessions?(make_query(false, %{exit_page: "/"})) + end + + test "events_join_sessions? with experimental_reduced_joins enabled" do + assert not events_join_sessions?(make_query(true, %{})) + assert not events_join_sessions?(make_query(true, %{name: "pageview"})) + assert not events_join_sessions?(make_query(true, %{source: "Google"})) + assert events_join_sessions?(make_query(true, %{entry_page: "/"})) + assert events_join_sessions?(make_query(true, %{exit_page: "/"})) + end + + describe "partition_metrics" do + test "with no metrics or filters" do + query = make_query(false, %{}) + + assert partition_metrics([], query) == {[], [], []} + end + + test "session-only metrics accordingly" do + query = make_query(false, %{}) + + assert partition_metrics([:bounce_rate, :views_per_visit], query) == + {[], [:bounce_rate, :views_per_visit], []} + end + + test "event-only metrics accordingly" do + query = make_query(false, %{}) + + assert partition_metrics([:total_revenue, :visitors], query) == + {[:total_revenue, :visitors], [], []} + end + + test "filters from both, event-only metrics" do + query = make_query(false, %{name: "pageview", source: "Google"}) + + assert partition_metrics([:total_revenue], query) == {[:total_revenue], [], []} + end + + test "filters from both, session-only metrics" do + query = make_query(false, %{name: "pageview", source: "Google"}) + + assert partition_metrics([:bounce_rate], query) == {[], [:bounce_rate], []} + end + + test "session filters but no session metrics" do + query = make_query(false, %{source: "Google"}) + + assert partition_metrics([:total_revenue], query) == {[:total_revenue], [], []} + end + + test "sample_percent is added to both types of metrics" do + query = make_query(false, %{}) + + assert partition_metrics([:total_revenue, :sample_percent], query) == + {[:total_revenue, :sample_percent], [], []} + + assert partition_metrics([:bounce_rate, :sample_percent], query) == + {[], [:bounce_rate, :sample_percent], []} + + assert partition_metrics([:total_revenue, :bounce_rate, :sample_percent], query) == + {[:total_revenue, :sample_percent], [:bounce_rate, :sample_percent], []} + end + + test "other metrics put in its own result" do + query = make_query(false, %{}) + + assert partition_metrics([:time_on_page, :percentage, :total_visitors], query) == + {[], [], [:time_on_page, :percentage, :total_visitors]} + end + + test "raises if unknown metric" do + query = make_query(false, %{}) + + assert_raise ArgumentError, fn -> + partition_metrics([:foobar], query) + end + end + end + + describe "partition_metrics with experimental_reduced_joins enabled" do + test "metrics that can be calculated on either when event-only metrics" do + query = make_query(true, %{}) + + assert partition_metrics([:total_revenue, :visitors], query) == + {[:total_revenue, :visitors], [], []} + + assert partition_metrics([:pageviews, :visits], query) == {[:pageviews, :visits], [], []} + end + + test "metrics that can be calculated on either when session-only metrics" do + query = make_query(true, %{}) + + assert partition_metrics([:bounce_rate, :visitors], query) == + {[], [:bounce_rate, :visitors], []} + + assert partition_metrics([:visit_duration, :visits], query) == + {[], [:visit_duration, :visits], []} + end + + test "metrics that can be calculated on either are biased to sessions" do + query = make_query(true, %{}) + + assert partition_metrics([:bounce_rate, :total_revenue, :visitors], query) == + {[:total_revenue], [:bounce_rate, :visitors], []} + end + + test "sample_percent is handled with either metrics" do + query = make_query(true, %{}) + + assert partition_metrics([:visitors, :sample_percent], query) == + {[], [:visitors, :sample_percent], []} + end + + test "metric can be calculated on either, but filtering on events" do + query = make_query(true, %{name: "pageview"}) + + assert partition_metrics([:visitors], query) == {[:visitors], [], []} + end + + test "metric can be calculated on either, but filtering on events and sessions" do + query = make_query(true, %{name: "pageview", exit_page: "/"}) + + assert partition_metrics([:visitors], query) == {[], [:visitors], []} + end + + test "metric can be calculated on either, filtering on either" do + query = make_query(true, %{source: "Google"}) + + assert partition_metrics([:visitors], query) == {[], [:visitors], []} + end + + test "metric can be calculated on either, filtering on sessions" do + query = make_query(true, %{exit_page: "/"}) + + assert partition_metrics([:visitors], query) == {[], [:visitors], []} + end + + test "breakdown value leans metric" do + query = make_query(true, %{}) + + assert partition_metrics([:visitors], query, "event:name") == {[:visitors], [], []} + assert partition_metrics([:visitors], query, "visit:source") == {[], [:visitors], []} + assert partition_metrics([:visitors], query, "visit:exit_page") == {[], [:visitors], []} + end + end + + defp make_query(experimental_reduced_joins?, filters) do + Query.from(build(:site), %{ + "experimental_reduced_joins" => to_string(experimental_reduced_joins?), + "filters" => Jason.encode!(filters) + }) + end +end diff --git a/test/plausible_web/controllers/api/external_stats_controller/aggregate_test.exs b/test/plausible_web/controllers/api/external_stats_controller/aggregate_test.exs index 6f0e35092..42fe1417a 100644 --- a/test/plausible_web/controllers/api/external_stats_controller/aggregate_test.exs +++ b/test/plausible_web/controllers/api/external_stats_controller/aggregate_test.exs @@ -1355,50 +1355,14 @@ defmodule PlausibleWeb.Api.ExternalStatsController.AggregateTest do assert json_response(conn, 200)["results"] == %{"visitors" => %{"value" => 3}} end - test "joins correctly with the sessions (CollapsingMergeTree) table", %{ + test "handles filtering by visit country", %{ conn: conn, site: site } do - create_sessions([ - %{ - site_id: site.id, - session_id: 1000, - country_code: "EE", - sign: 1, - events: 1 - }, - %{ - site_id: site.id, - session_id: 1000, - country_code: "EE", - sign: -1, - events: 1 - }, - %{ - site_id: site.id, - session_id: 1000, - country_code: "EE", - sign: 1, - events: 2 - } - ]) - - create_events([ - %{ - site_id: site.id, - session_id: 1000, - name: "pageview" - }, - %{ - site_id: site.id, - session_id: 1000, - name: "pageview" - }, - %{ - site_id: site.id, - session_id: 1000, - name: "pageview" - } + populate_stats(site, [ + build(:pageview, country_code: "EE"), + build(:pageview, country_code: "EE"), + build(:pageview, country_code: "EE") ]) conn = diff --git a/test/support/test_utils.ex b/test/support/test_utils.ex index 702c0e303..2c8da18d3 100644 --- a/test/support/test_utils.ex +++ b/test/support/test_utils.ex @@ -90,31 +90,6 @@ defmodule Plausible.TestUtils do Plausible.IngestRepo.insert_all(Plausible.ClickhouseEventV2, pageviews) end - def create_events(events) do - events = - Enum.map(events, fn event -> - Factory.build(:event, event) - |> Map.from_struct() - |> Map.delete(:__meta__) - |> update_in([:timestamp], &to_naive_truncate/1) - end) - - Plausible.IngestRepo.insert_all(Plausible.ClickhouseEventV2, events) - end - - def create_sessions(sessions) do - sessions = - Enum.map(sessions, fn session -> - Factory.build(:ch_session, session) - |> Map.from_struct() - |> Map.delete(:__meta__) - |> update_in([:timestamp], &to_naive_truncate/1) - |> update_in([:start], &to_naive_truncate/1) - end) - - Plausible.IngestRepo.insert_all(Plausible.ClickhouseSessionV2, sessions) - end - def log_in(%{user: user, conn: conn}) do conn = init_session(conn) diff --git a/test/test_helper.exs b/test/test_helper.exs index 338e967f2..581b98b5f 100644 --- a/test/test_helper.exs +++ b/test/test_helper.exs @@ -3,6 +3,14 @@ Mox.defmock(Plausible.HTTPClient.Mock, for: Plausible.HTTPClient.Interface) Application.ensure_all_started(:double) FunWithFlags.enable(:imports_exports) FunWithFlags.enable(:shield_pages) + +# Temporary flag to test `experimental_reduced_joins` flag on all tests. +if System.get_env("TEST_EXPERIMENTAL_REDUCED_JOINS") == "1" do + FunWithFlags.enable(:experimental_reduced_joins) +else + FunWithFlags.disable(:experimental_reduced_joins) +end + Ecto.Adapters.SQL.Sandbox.mode(Plausible.Repo, :manual) if Mix.env() == :small_test do