Move add_percentage logic into clickhouse (#3854)

* Remove `add_percentage`, calculate percentages in clickhouse queries

This simplifies querying logic and avoids doing extra queries and avoids
race conditions.

* Remove special none handling from breakdowns, handling percentages correctly

* Add (failing) test showing expected add_percentage behavior for user making multiple sessions

* Update add_percentage behavior to use separate subqueries
This commit is contained in:
Karl-Aksel Puulmann 2024-03-06 11:08:25 +02:00 committed by GitHub
parent c60a2faee4
commit c6d98397a8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 179 additions and 113 deletions

View File

@ -10,6 +10,8 @@ defmodule Plausible.Stats.Base do
@no_ref "Direct / None" @no_ref "Direct / None"
@not_set "(not set)" @not_set "(not set)"
@uniq_users_expression "toUInt64(round(uniq(?) * any(_sample_factor)))"
def base_event_query(site, query) do def base_event_query(site, query) do
events_q = query_events(site, query) events_q = query_events(site, query)
@ -229,7 +231,7 @@ defmodule Plausible.Stats.Base do
def select_event_metrics(q, [:visitors | rest]) do def select_event_metrics(q, [:visitors | rest]) do
from(e in q, from(e in q,
select_merge: %{ select_merge: %{
visitors: fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", e.user_id) visitors: selected_as(fragment(@uniq_users_expression, e.user_id), :visitors)
} }
) )
|> select_event_metrics(rest) |> select_event_metrics(rest)
@ -259,6 +261,10 @@ defmodule Plausible.Stats.Base do
|> select_event_metrics(rest) |> select_event_metrics(rest)
end end
def select_event_metrics(q, [:percentage | rest]) do
q |> select_event_metrics(rest)
end
def select_event_metrics(_, [unknown | _]), do: raise("Unknown metric: #{unknown}") def select_event_metrics(_, [unknown | _]), do: raise("Unknown metric: #{unknown}")
def select_session_metrics(q, [], _query), do: q def select_session_metrics(q, [], _query), do: q
@ -315,7 +321,11 @@ defmodule Plausible.Stats.Base do
def select_session_metrics(q, [:visitors | rest], query) do def select_session_metrics(q, [:visitors | rest], query) do
from(s in q, from(s in q,
select_merge: %{ select_merge: %{
visitors: fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", s.user_id) visitors:
selected_as(
fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", s.user_id),
:visitors
)
} }
) )
|> select_session_metrics(rest, query) |> select_session_metrics(rest, query)
@ -353,6 +363,10 @@ defmodule Plausible.Stats.Base do
|> select_session_metrics(rest, query) |> select_session_metrics(rest, query)
end end
def select_session_metrics(q, [:percentage | rest], query) do
q |> select_session_metrics(rest, query)
end
def dynamic_filter_condition(query, filter_key, db_field) do def dynamic_filter_condition(query, filter_key, db_field) do
case query && query.filters && query.filters[filter_key] do case query && query.filters && query.filters[filter_key] do
{:is, value} -> {:is, value} ->
@ -566,4 +580,42 @@ defmodule Plausible.Stats.Base do
fragment("arrayExists(k -> match(?, k), ?)", get_by_key(e, :meta, ^prop_name), ^regexes) fragment("arrayExists(k -> match(?, k), ?)", get_by_key(e, :meta, ^prop_name), ^regexes)
) )
end end
defp total_visitors(site, query) do
base_event_query(site, query)
|> select([e], total_visitors: fragment(@uniq_users_expression, e.user_id))
end
defp total_visitors_subquery(site, %Query{include_imported: true} = query) do
dynamic(
[e],
selected_as(
subquery(total_visitors(site, query)) +
subquery(Plausible.Stats.Imported.total_imported_visitors(site, query)),
:__total_visitors
)
)
end
defp total_visitors_subquery(site, query) do
dynamic([e], selected_as(subquery(total_visitors(site, query)), :__total_visitors))
end
def add_percentage_metric(q, site, query, metrics) do
if :percentage in metrics do
q
|> select_merge(^%{__total_visitors: total_visitors_subquery(site, query)})
|> select_merge(%{
percentage:
fragment(
"if(? > 0, round(? / ? * 100, 1), null)",
selected_as(:__total_visitors),
selected_as(:visitors),
selected_as(:__total_visitors)
)
})
else
q
end
end
end end

View File

@ -14,7 +14,7 @@ defmodule Plausible.Stats.Breakdown do
@revenue_metrics on_full_build(do: Plausible.Stats.Goal.Revenue.revenue_metrics(), else: []) @revenue_metrics on_full_build(do: Plausible.Stats.Goal.Revenue.revenue_metrics(), else: [])
@event_metrics [:visitors, :pageviews, :events] ++ @revenue_metrics @event_metrics [:visitors, :pageviews, :events, :percentage] ++ @revenue_metrics
# These metrics can be asked from the `breakdown/5` function, # These metrics can be asked from the `breakdown/5` function,
# but they are different from regular metrics such as `visitors`, # but they are different from regular metrics such as `visitors`,
@ -108,27 +108,9 @@ defmodule Plausible.Stats.Breakdown do
metrics_to_select = Util.maybe_add_visitors_metric(metrics) -- @computed_metrics metrics_to_select = Util.maybe_add_visitors_metric(metrics) -- @computed_metrics
{_limit, page} = pagination
none_result =
if page == 1 && include_none_result?(query.filters[property]) do
none_query = Query.put_filter(query, property, {:is, "(none)"})
from(e in base_event_query(site, none_query),
select: %{},
select_merge: %{^custom_prop => "(none)"},
having: fragment("uniq(?)", e.user_id) > 0
)
|> select_event_metrics(metrics_to_select)
|> ClickhouseRepo.all()
else
[]
end
if !Keyword.get(opts, :skip_tracing), do: trace(query, property, metrics) if !Keyword.get(opts, :skip_tracing), do: trace(query, property, metrics)
breakdown_events(site, query, "event:props:" <> custom_prop, metrics_to_select, pagination) breakdown_events(site, query, "event:props:" <> custom_prop, metrics_to_select, pagination)
|> Kernel.++(none_result)
|> Enum.map(&cast_revenue_metrics_to_money(&1, currency)) |> Enum.map(&cast_revenue_metrics_to_money(&1, currency))
|> Enum.sort_by(& &1[sorting_key(metrics_to_select)], :desc) |> Enum.sort_by(& &1[sorting_key(metrics_to_select)], :desc)
|> maybe_add_cr(site, query, nil, metrics) |> maybe_add_cr(site, query, nil, metrics)
@ -215,14 +197,6 @@ defmodule Plausible.Stats.Breakdown do
|> Enum.sort_by(& &1[sorting_key(metrics)], :desc) |> Enum.sort_by(& &1[sorting_key(metrics)], :desc)
end end
defp include_none_result?({:is, value}), do: value == "(none)"
defp include_none_result?({:is_not, "(none)"}), do: false
defp include_none_result?({:member, values}), do: Enum.member?(values, "(none)")
defp include_none_result?({:not_member, values}), do: !Enum.member?(values, "(none)")
defp include_none_result?({:matches, _}), do: false
defp include_none_result?({:matches_member, _}), do: false
defp include_none_result?(_), do: true
defp breakdown_sessions(_, _, _, [], _), do: [] defp breakdown_sessions(_, _, _, [], _), do: []
defp breakdown_sessions(site, query, property, metrics, pagination) do defp breakdown_sessions(site, query, property, metrics, pagination) do
@ -234,6 +208,7 @@ defmodule Plausible.Stats.Breakdown do
|> do_group_by(property) |> do_group_by(property)
|> select_session_metrics(metrics, query) |> select_session_metrics(metrics, query)
|> merge_imported(site, query, property, metrics) |> merge_imported(site, query, property, metrics)
|> add_percentage_metric(site, query, metrics)
|> apply_pagination(pagination) |> apply_pagination(pagination)
|> ClickhouseRepo.all() |> ClickhouseRepo.all()
|> transform_keys(%{operating_system: :os}) |> transform_keys(%{operating_system: :os})
@ -250,9 +225,11 @@ defmodule Plausible.Stats.Breakdown do
|> do_group_by(property) |> do_group_by(property)
|> select_event_metrics(metrics) |> select_event_metrics(metrics)
|> merge_imported(site, query, property, metrics) |> merge_imported(site, query, property, metrics)
|> add_percentage_metric(site, query, metrics)
|> apply_pagination(pagination) |> apply_pagination(pagination)
|> ClickhouseRepo.all() |> ClickhouseRepo.all()
|> transform_keys(%{operating_system: :os}) |> transform_keys(%{operating_system: :os})
|> Util.keep_requested_metrics(metrics)
end end
defp maybe_add_time_on_page(event_results, site, query, metrics) do defp maybe_add_time_on_page(event_results, site, query, metrics) do
@ -445,10 +422,19 @@ defmodule Plausible.Stats.Breakdown do
) do ) do
from( from(
e in q, e in q,
where: has_key(e, :meta, ^prop), select_merge: %{
select_merge: %{^prop => get_by_key(e, :meta, ^prop)}, ^prop =>
group_by: get_by_key(e, :meta, ^prop), selected_as(
order_by: {:asc, get_by_key(e, :meta, ^prop)} fragment(
"if(not empty(?), ?, '(none)')",
get_by_key(e, :meta, ^prop),
get_by_key(e, :meta, ^prop)
),
:breakdown_prop_value
)
},
group_by: selected_as(:breakdown_prop_value),
order_by: {:asc, selected_as(:breakdown_prop_value)}
) )
end end

View File

@ -321,16 +321,8 @@ defmodule Plausible.Stats.Imported do
end end
def merge_imported(q, site, query, :aggregate, metrics) do def merge_imported(q, site, query, :aggregate, metrics) do
import_ids = site.complete_import_ids
imported_q = imported_q =
from( imported_visitors(site, query)
i in "imported_visitors",
where: i.site_id == ^site.id,
where: i.import_id in ^import_ids,
where: i.date >= ^query.date_range.first and i.date <= ^query.date_range.last,
select: %{}
)
|> select_imported_metrics(metrics) |> select_imported_metrics(metrics)
from( from(
@ -343,6 +335,23 @@ defmodule Plausible.Stats.Imported do
def merge_imported(q, _, _, _, _), do: q def merge_imported(q, _, _, _, _), do: q
def total_imported_visitors(site, query) do
imported_visitors(site, query)
|> select_merge([i], %{total_visitors: fragment("sum(?)", i.visitors)})
end
defp imported_visitors(site, query) do
import_ids = site.complete_import_ids
from(
i in "imported_visitors",
where: i.site_id == ^site.id,
where: i.import_id in ^import_ids,
where: i.date >= ^query.date_range.first and i.date <= ^query.date_range.last,
select: %{}
)
end
defp select_imported_metrics(q, []), do: q defp select_imported_metrics(q, []), do: q
defp select_imported_metrics(q, [:visitors | rest]) do defp select_imported_metrics(q, [:visitors | rest]) do
@ -457,7 +466,11 @@ defmodule Plausible.Stats.Imported do
defp select_joined_metrics(q, [:visitors | rest]) do defp select_joined_metrics(q, [:visitors | rest]) do
q q
|> select_merge([s, i], %{ |> select_merge([s, i], %{
:visitors => fragment("coalesce(?, 0) + coalesce(?, 0)", s.visitors, i.visitors) :visitors =>
selected_as(
fragment("coalesce(?, 0) + coalesce(?, 0)", s.visitors, i.visitors),
:visitors
)
}) })
|> select_joined_metrics(rest) |> select_joined_metrics(rest)
end end

View File

@ -3,7 +3,12 @@ defmodule Plausible.Stats.Util do
Utilities for modifying stat results Utilities for modifying stat results
""" """
@manually_removable_metrics [:__internal_visits, :visitors] @manually_removable_metrics [
:__internal_visits,
:visitors,
:__total_visitors,
:__breakdown_value
]
@doc """ @doc """
Sometimes we need to manually add metrics in order to calculate the value for Sometimes we need to manually add metrics in order to calculate the value for

View File

@ -827,12 +827,11 @@ defmodule PlausibleWeb.Api.StatsController do
site = conn.assigns[:site] site = conn.assigns[:site]
query = site |> Query.from(params) query = site |> Query.from(params)
pagination = parse_pagination(params) pagination = parse_pagination(params)
metrics = breakdown_metrics(query) metrics = breakdown_metrics(query, [:percentage])
countries = countries =
Stats.breakdown(site, query, "visit:country", metrics, pagination) Stats.breakdown(site, query, "visit:country", metrics, pagination)
|> transform_keys(%{country: :code}) |> transform_keys(%{country: :code})
|> add_percentages(site, query)
if params["csv"] do if params["csv"] do
countries = countries =
@ -952,12 +951,11 @@ defmodule PlausibleWeb.Api.StatsController do
site = conn.assigns[:site] site = conn.assigns[:site]
query = Query.from(site, params) query = Query.from(site, params)
pagination = parse_pagination(params) pagination = parse_pagination(params)
metrics = breakdown_metrics(query) metrics = breakdown_metrics(query, [:percentage])
browsers = browsers =
Stats.breakdown(site, query, "visit:browser", metrics, pagination) Stats.breakdown(site, query, "visit:browser", metrics, pagination)
|> transform_keys(%{browser: :name}) |> transform_keys(%{browser: :name})
|> add_percentages(site, query)
if params["csv"] do if params["csv"] do
if Map.has_key?(query.filters, "event:goal") do if Map.has_key?(query.filters, "event:goal") do
@ -976,12 +974,11 @@ defmodule PlausibleWeb.Api.StatsController do
site = conn.assigns[:site] site = conn.assigns[:site]
query = Query.from(site, params) query = Query.from(site, params)
pagination = parse_pagination(params) pagination = parse_pagination(params)
metrics = breakdown_metrics(query) metrics = breakdown_metrics(query, [:percentage])
versions = versions =
Stats.breakdown(site, query, "visit:browser_version", metrics, pagination) Stats.breakdown(site, query, "visit:browser_version", metrics, pagination)
|> transform_keys(%{browser_version: :name}) |> transform_keys(%{browser_version: :name})
|> add_percentages(site, query)
if params["csv"] do if params["csv"] do
if Map.has_key?(query.filters, "event:goal") do if Map.has_key?(query.filters, "event:goal") do
@ -1006,12 +1003,11 @@ defmodule PlausibleWeb.Api.StatsController do
site = conn.assigns[:site] site = conn.assigns[:site]
query = Query.from(site, params) query = Query.from(site, params)
pagination = parse_pagination(params) pagination = parse_pagination(params)
metrics = breakdown_metrics(query) metrics = breakdown_metrics(query, [:percentage])
systems = systems =
Stats.breakdown(site, query, "visit:os", metrics, pagination) Stats.breakdown(site, query, "visit:os", metrics, pagination)
|> transform_keys(%{os: :name}) |> transform_keys(%{os: :name})
|> add_percentages(site, query)
if params["csv"] do if params["csv"] do
if Map.has_key?(query.filters, "event:goal") do if Map.has_key?(query.filters, "event:goal") do
@ -1030,12 +1026,11 @@ defmodule PlausibleWeb.Api.StatsController do
site = conn.assigns[:site] site = conn.assigns[:site]
query = Query.from(site, params) query = Query.from(site, params)
pagination = parse_pagination(params) pagination = parse_pagination(params)
metrics = breakdown_metrics(query) metrics = breakdown_metrics(query, [:percentage])
versions = versions =
Stats.breakdown(site, query, "visit:os_version", metrics, pagination) Stats.breakdown(site, query, "visit:os_version", metrics, pagination)
|> transform_keys(%{os_version: :name}) |> transform_keys(%{os_version: :name})
|> add_percentages(site, query)
if params["csv"] do if params["csv"] do
if Map.has_key?(query.filters, "event:goal") do if Map.has_key?(query.filters, "event:goal") do
@ -1056,12 +1051,11 @@ defmodule PlausibleWeb.Api.StatsController do
site = conn.assigns[:site] site = conn.assigns[:site]
query = Query.from(site, params) query = Query.from(site, params)
pagination = parse_pagination(params) pagination = parse_pagination(params)
metrics = breakdown_metrics(query) metrics = breakdown_metrics(query, [:percentage])
sizes = sizes =
Stats.breakdown(site, query, "visit:device", metrics, pagination) Stats.breakdown(site, query, "visit:device", metrics, pagination)
|> transform_keys(%{device: :name}) |> transform_keys(%{device: :name})
|> add_percentages(site, query)
if params["csv"] do if params["csv"] do
if Map.has_key?(query.filters, "event:goal") do if Map.has_key?(query.filters, "event:goal") do
@ -1172,7 +1166,7 @@ defmodule PlausibleWeb.Api.StatsController do
if query.filters["event:goal"] do if query.filters["event:goal"] do
[:visitors, :events, :conversion_rate] ++ @revenue_metrics [:visitors, :events, :conversion_rate] ++ @revenue_metrics
else else
[:visitors, :events] ++ @revenue_metrics [:visitors, :events, :percentage] ++ @revenue_metrics
end end
Stats.breakdown(site, query, prefixed_prop, metrics, pagination) Stats.breakdown(site, query, prefixed_prop, metrics, pagination)
@ -1181,7 +1175,6 @@ defmodule PlausibleWeb.Api.StatsController do
Enum.map(entry, &format_revenue_metric/1) Enum.map(entry, &format_revenue_metric/1)
|> Map.new() |> Map.new()
end) end)
|> add_percentages(site, query)
end end
def current_visitors(conn, _) do def current_visitors(conn, _) do
@ -1228,18 +1221,6 @@ defmodule PlausibleWeb.Api.StatsController do
defp to_int(_, default), do: default defp to_int(_, default), do: default
defp add_percentages([_ | _] = breakdown_result, site, query)
when not is_map_key(query.filters, "event:goal") do
%{visitors: %{value: total_visitors}} = Stats.aggregate(site, query, [:visitors])
breakdown_result
|> Enum.map(fn stat ->
Map.put(stat, :percentage, Float.round(stat.visitors / total_visitors * 100, 1))
end)
end
defp add_percentages(breakdown_result, _, _), do: breakdown_result
defp to_csv(list, columns), do: to_csv(list, columns, columns) defp to_csv(list, columns), do: to_csv(list, columns, columns)
defp to_csv(list, columns, column_names) do defp to_csv(list, columns, column_names) do

View File

@ -98,38 +98,7 @@ defmodule PlausibleWeb.Api.StatsController.CustomPropBreakdownTest do
] ]
end end
test "(none) value is added as +1 to pagination limit", %{conn: conn, site: site} do test "(none) value is included in pagination", %{conn: conn, site: site} do
prop_key = "parim_s6ber"
populate_stats(site, [
build(:pageview, "meta.key": [prop_key], "meta.value": ["K2sna Kalle"]),
build(:pageview, "meta.key": [prop_key], "meta.value": ["K2sna Kalle"]),
build(:pageview)
])
conn =
get(
conn,
"/api/stats/#{site.domain}/custom-prop-values/#{prop_key}?period=day&limit=1"
)
assert json_response(conn, 200) == [
%{
"visitors" => 2,
"name" => "K2sna Kalle",
"events" => 2,
"percentage" => 66.7
},
%{
"visitors" => 1,
"name" => "(none)",
"events" => 1,
"percentage" => 33.3
}
]
end
test "(none) value is only included on the first page of results", %{conn: conn, site: site} do
prop_key = "kaksik" prop_key = "kaksik"
populate_stats(site, [ populate_stats(site, [
@ -144,13 +113,13 @@ defmodule PlausibleWeb.Api.StatsController.CustomPropBreakdownTest do
conn1 = conn1 =
get( get(
conn, conn,
"/api/stats/#{site.domain}/custom-prop-values/#{prop_key}?period=day&limit=1&page=1" "/api/stats/#{site.domain}/custom-prop-values/#{prop_key}?period=day&limit=2&page=1"
) )
conn2 = conn2 =
get( get(
conn, conn,
"/api/stats/#{site.domain}/custom-prop-values/#{prop_key}?period=day&limit=1&page=2" "/api/stats/#{site.domain}/custom-prop-values/#{prop_key}?period=day&limit=2&page=2"
) )
assert json_response(conn1, 200) == [ assert json_response(conn1, 200) == [
@ -161,19 +130,19 @@ defmodule PlausibleWeb.Api.StatsController.CustomPropBreakdownTest do
"percentage" => 50.0 "percentage" => 50.0
}, },
%{ %{
"visitors" => 1, "visitors" => 2,
"name" => "(none)", "name" => "Teet",
"events" => 1, "events" => 2,
"percentage" => 16.7 "percentage" => 33.3
} }
] ]
assert json_response(conn2, 200) == [ assert json_response(conn2, 200) == [
%{ %{
"visitors" => 2, "visitors" => 1,
"name" => "Teet", "name" => "(none)",
"events" => 2, "events" => 1,
"percentage" => 33.3 "percentage" => 16.7
} }
] ]
end end

View File

@ -19,6 +19,32 @@ defmodule PlausibleWeb.Api.StatsController.ScreenSizesTest do
] ]
end end
test "returns screen sizes for user making multiple sessions", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview,
user_id: 1,
session_screen_size: "Desktop",
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview,
user_id: 1,
session_screen_size: "Laptop",
timestamp: ~N[2021-01-01 05:00:00]
)
])
conn =
get(conn, "/api/stats/#{site.domain}/screen-sizes", %{
"period" => "day",
"date" => "2021-01-01"
})
assert json_response(conn, 200) == [
%{"name" => "Desktop", "visitors" => 1, "percentage" => 100},
%{"name" => "Laptop", "visitors" => 1, "percentage" => 100}
]
end
test "returns (not set) when appropriate", %{conn: conn, site: site} do test "returns (not set) when appropriate", %{conn: conn, site: site} do
populate_stats(site, [ populate_stats(site, [
build(:pageview, build(:pageview,
@ -144,6 +170,40 @@ defmodule PlausibleWeb.Api.StatsController.ScreenSizesTest do
] ]
end end
test "returns screen sizes for user making multiple sessions by no of visitors with imported data",
%{conn: conn, site: site} do
populate_stats(site, [
build(:pageview,
user_id: 1,
session_screen_size: "Desktop",
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview,
user_id: 1,
session_screen_size: "Laptop",
timestamp: ~N[2021-01-01 05:00:00]
)
])
populate_stats(site, [
build(:imported_devices, device: "Desktop", date: ~D[2021-01-01]),
build(:imported_devices, device: "Laptop", date: ~D[2021-01-01]),
build(:imported_visitors, visitors: 1, date: ~D[2021-01-01])
])
conn =
get(conn, "/api/stats/#{site.domain}/screen-sizes", %{
"period" => "day",
"date" => "2021-01-01",
"with_imported" => "true"
})
assert json_response(conn, 200) == [
%{"name" => "Desktop", "visitors" => 2, "percentage" => 100},
%{"name" => "Laptop", "visitors" => 2, "percentage" => 100}
]
end
test "calculates conversion_rate when filtering for goal", %{conn: conn, site: site} do test "calculates conversion_rate when filtering for goal", %{conn: conn, site: site} do
populate_stats(site, [ populate_stats(site, [
build(:pageview, user_id: 1, screen_size: "Desktop"), build(:pageview, user_id: 1, screen_size: "Desktop"),

View File

@ -185,8 +185,8 @@ defmodule PlausibleWeb.StatsControllerTest do
assert parse_csv(result) == [ assert parse_csv(result) == [
["property", "value", "visitors", "events", "percentage"], ["property", "value", "visitors", "events", "percentage"],
["url", "b", "1", "1", "50.0"],
["url", "(none)", "1", "1", "50.0"], ["url", "(none)", "1", "1", "50.0"],
["url", "b", "1", "1", "50.0"],
[""] [""]
] ]
end end