From 7b11b71dcf1c682b3590c30be5d6e3dd050f59a3 Mon Sep 17 00:00:00 2001 From: RobertJoonas <56999674+RobertJoonas@users.noreply.github.com> Date: Mon, 10 Jun 2024 11:03:12 +0100 Subject: [PATCH] Minor refactor: Remove skip_refresh_imported_opts from query modifying functions (#4204) * Remove skip_refresh_imported_opts from query modifying functions + add test coverage for timeseries conversion rate. * format * improve comment --- lib/plausible/stats/base.ex | 31 +++++++++++---- lib/plausible/stats/query.ex | 22 +++++------ lib/plausible/stats/timeseries.ex | 7 +++- .../timeseries_test.exs | 38 +++++++++++++++++++ 4 files changed, 77 insertions(+), 21 deletions(-) diff --git a/lib/plausible/stats/base.ex b/lib/plausible/stats/base.ex index 108e29f7f..c067ea20f 100644 --- a/lib/plausible/stats/base.ex +++ b/lib/plausible/stats/base.ex @@ -291,7 +291,20 @@ defmodule Plausible.Stats.Base do |> select([e], total_visitors: fragment(@uniq_users_expression, e.user_id)) end - defp total_visitors_subquery(site, %Query{include_imported: true} = query) do + # `total_visitors_subquery` returns a subquery which selects `total_visitors` - + # the number used as the denominator in the calculation of `conversion_rate` and + # `percentage` metrics. + + # Usually, when calculating the totals, a new query is passed into this function, + # where certain filters (e.g. goal, props) are removed. That might make the query + # able to include imported data. However, we always want to include imported data + # only if it's included in the base query - otherwise the total will be based on + # a different data set, making the metric inaccurate. This is why we're using an + # explicit `include_imported` argument here. + @spec total_visitors_subquery(Plausible.Site.t(), Query.t(), boolean()) :: :ok + defp total_visitors_subquery(site, query, include_imported) + + defp total_visitors_subquery(site, query, true = _include_imported) do dynamic( [e], selected_as( @@ -302,16 +315,18 @@ defmodule Plausible.Stats.Base do ) end - defp total_visitors_subquery(site, query) do + defp total_visitors_subquery(site, query, false = _include_imported) do dynamic([e], selected_as(subquery(total_visitors(site, query)), :__total_visitors)) end def add_percentage_metric(q, site, query, metrics) do if :percentage in metrics do - total_query = Query.set_property(query, nil, skip_refresh_imported_opts: true) + total_query = Query.set_property(query, nil) q - |> select_merge(^%{__total_visitors: total_visitors_subquery(site, total_query)}) + |> select_merge( + ^%{__total_visitors: total_visitors_subquery(site, total_query, query.include_imported)} + ) |> select_merge(%{ percentage: fragment( @@ -333,12 +348,14 @@ defmodule Plausible.Stats.Base do if :conversion_rate in metrics do total_query = query - |> Query.remove_filters(["event:goal", "event:props"], skip_refresh_imported_opts: true) - |> Query.set_property(nil, skip_refresh_imported_opts: true) + |> Query.remove_filters(["event:goal", "event:props"]) + |> Query.set_property(nil) # :TRICKY: Subquery is used due to event:goal breakdown above doing an UNION ALL subquery(q) - |> select_merge(^%{total_visitors: total_visitors_subquery(site, total_query)}) + |> select_merge( + ^%{total_visitors: total_visitors_subquery(site, total_query, query.include_imported)} + ) |> select_merge([e], %{ conversion_rate: fragment( diff --git a/lib/plausible/stats/query.ex b/lib/plausible/stats/query.ex index 9786f20b4..760416902 100644 --- a/lib/plausible/stats/query.ex +++ b/lib/plausible/stats/query.ex @@ -203,13 +203,11 @@ defmodule Plausible.Stats.Query do struct!(query, filters: Filters.parse(params["filters"])) end - @spec set_property(t(), String.t() | nil, Keyword.t()) :: t() - def set_property(query, property, opts \\ []) do - query = struct!(query, property: property) - - if Keyword.get(opts, :skip_refresh_imported_opts), - do: query, - else: refresh_imported_opts(query) + @spec set_property(t(), String.t() | nil) :: t() + def set_property(query, property) do + query + |> struct!(property: property) + |> refresh_imported_opts() end def put_filter(query, filter) do @@ -218,17 +216,15 @@ defmodule Plausible.Stats.Query do |> refresh_imported_opts() end - def remove_filters(query, prefixes, opts \\ []) do + def remove_filters(query, prefixes) do new_filters = Enum.reject(query.filters, fn [_, filter_key | _rest] -> Enum.any?(prefixes, &String.starts_with?(filter_key, &1)) end) - query = struct!(query, filters: new_filters) - - if Keyword.get(opts, :skip_refresh_imported_opts), - do: query, - else: refresh_imported_opts(query) + query + |> struct!(filters: new_filters) + |> refresh_imported_opts() end def exclude_imported(query) do diff --git a/lib/plausible/stats/timeseries.ex b/lib/plausible/stats/timeseries.ex index bab37c6e2..69e3dc528 100644 --- a/lib/plausible/stats/timeseries.ex +++ b/lib/plausible/stats/timeseries.ex @@ -314,9 +314,14 @@ defmodule Plausible.Stats.Timeseries do defp maybe_add_timeseries_conversion_rate(q, site, query, metrics) do if :conversion_rate in metrics do + # Having removed some filters, the query might become eligible + # for including imported data. However, we still want to make + # sure that that include_imported is in sync between original + # and the totals query. totals_query = query - |> Query.remove_filters(["event:goal", "event:props"], skip_refresh_imported_opts: true) + |> Query.remove_filters(["event:goal", "event:props"]) + |> struct!(include_imported: query.include_imported) totals_timeseries_q = from(e in base_event_query(site, totals_query), diff --git a/test/plausible_web/controllers/api/external_stats_controller/timeseries_test.exs b/test/plausible_web/controllers/api/external_stats_controller/timeseries_test.exs index 9149711aa..2a60ea2f7 100644 --- a/test/plausible_web/controllers/api/external_stats_controller/timeseries_test.exs +++ b/test/plausible_web/controllers/api/external_stats_controller/timeseries_test.exs @@ -1801,6 +1801,44 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do ] end + test "ignores imported data in conversion rate total calculation when imported data cannot be included", + %{ + conn: conn, + site: site + } do + site_import = insert(:site_import, site: site) + + insert(:goal, site: site, event_name: "Signup") + + populate_stats(site, site_import.id, [ + build(:event, name: "Signup", pathname: "/register", timestamp: ~N[2021-01-01 00:00:00]), + build(:imported_custom_events, name: "Signup", date: ~D[2021-01-01], visitors: 1), + build(:imported_pages, page: "/register", date: ~D[2021-01-01], visitors: 2) + ]) + + %{"results" => results, "warning" => warning} = + conn + |> get("/api/v1/stats/timeseries", %{ + "site_id" => site.domain, + "period" => "custom", + "date" => "2021-01-01,2021-01-01", + "interval" => "date", + "metrics" => "conversion_rate", + "filters" => "event:goal==Signup;event:page==/register", + "with_imported" => "true" + }) + |> json_response(200) + + assert results == [ + %{ + "date" => "2021-01-01", + "conversion_rate" => 100.0 + } + ] + + assert warning =~ "Imported stats are not included in the results" + end + test "returns conversion rate timeseries with a goal + custom prop filter", %{ conn: conn, site: site