Move add_percentage logic into clickhouse (#3854)

* Remove `add_percentage`, calculate percentages in clickhouse queries

This simplifies querying logic and avoids doing extra queries and avoids
race conditions.

* Remove special none handling from breakdowns, handling percentages correctly

* Add (failing) test showing expected add_percentage behavior for user making multiple sessions

* Update add_percentage behavior to use separate subqueries
This commit is contained in:
Karl-Aksel Puulmann 2024-03-06 11:08:25 +02:00 committed by GitHub
parent c60a2faee4
commit c6d98397a8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 179 additions and 113 deletions

View File

@ -10,6 +10,8 @@ defmodule Plausible.Stats.Base do
@no_ref "Direct / None"
@not_set "(not set)"
@uniq_users_expression "toUInt64(round(uniq(?) * any(_sample_factor)))"
def base_event_query(site, query) do
events_q = query_events(site, query)
@ -229,7 +231,7 @@ defmodule Plausible.Stats.Base do
def select_event_metrics(q, [:visitors | rest]) do
from(e in q,
select_merge: %{
visitors: fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", e.user_id)
visitors: selected_as(fragment(@uniq_users_expression, e.user_id), :visitors)
}
)
|> select_event_metrics(rest)
@ -259,6 +261,10 @@ defmodule Plausible.Stats.Base do
|> select_event_metrics(rest)
end
def select_event_metrics(q, [:percentage | rest]) do
q |> select_event_metrics(rest)
end
def select_event_metrics(_, [unknown | _]), do: raise("Unknown metric: #{unknown}")
def select_session_metrics(q, [], _query), do: q
@ -315,7 +321,11 @@ defmodule Plausible.Stats.Base do
def select_session_metrics(q, [:visitors | rest], query) do
from(s in q,
select_merge: %{
visitors: fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", s.user_id)
visitors:
selected_as(
fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", s.user_id),
:visitors
)
}
)
|> select_session_metrics(rest, query)
@ -353,6 +363,10 @@ defmodule Plausible.Stats.Base do
|> select_session_metrics(rest, query)
end
def select_session_metrics(q, [:percentage | rest], query) do
q |> select_session_metrics(rest, query)
end
def dynamic_filter_condition(query, filter_key, db_field) do
case query && query.filters && query.filters[filter_key] do
{:is, value} ->
@ -566,4 +580,42 @@ defmodule Plausible.Stats.Base do
fragment("arrayExists(k -> match(?, k), ?)", get_by_key(e, :meta, ^prop_name), ^regexes)
)
end
defp total_visitors(site, query) do
base_event_query(site, query)
|> select([e], total_visitors: fragment(@uniq_users_expression, e.user_id))
end
defp total_visitors_subquery(site, %Query{include_imported: true} = query) do
dynamic(
[e],
selected_as(
subquery(total_visitors(site, query)) +
subquery(Plausible.Stats.Imported.total_imported_visitors(site, query)),
:__total_visitors
)
)
end
defp total_visitors_subquery(site, query) do
dynamic([e], selected_as(subquery(total_visitors(site, query)), :__total_visitors))
end
def add_percentage_metric(q, site, query, metrics) do
if :percentage in metrics do
q
|> select_merge(^%{__total_visitors: total_visitors_subquery(site, query)})
|> select_merge(%{
percentage:
fragment(
"if(? > 0, round(? / ? * 100, 1), null)",
selected_as(:__total_visitors),
selected_as(:visitors),
selected_as(:__total_visitors)
)
})
else
q
end
end
end

View File

@ -14,7 +14,7 @@ defmodule Plausible.Stats.Breakdown do
@revenue_metrics on_full_build(do: Plausible.Stats.Goal.Revenue.revenue_metrics(), else: [])
@event_metrics [:visitors, :pageviews, :events] ++ @revenue_metrics
@event_metrics [:visitors, :pageviews, :events, :percentage] ++ @revenue_metrics
# These metrics can be asked from the `breakdown/5` function,
# but they are different from regular metrics such as `visitors`,
@ -108,27 +108,9 @@ defmodule Plausible.Stats.Breakdown do
metrics_to_select = Util.maybe_add_visitors_metric(metrics) -- @computed_metrics
{_limit, page} = pagination
none_result =
if page == 1 && include_none_result?(query.filters[property]) do
none_query = Query.put_filter(query, property, {:is, "(none)"})
from(e in base_event_query(site, none_query),
select: %{},
select_merge: %{^custom_prop => "(none)"},
having: fragment("uniq(?)", e.user_id) > 0
)
|> select_event_metrics(metrics_to_select)
|> ClickhouseRepo.all()
else
[]
end
if !Keyword.get(opts, :skip_tracing), do: trace(query, property, metrics)
breakdown_events(site, query, "event:props:" <> custom_prop, metrics_to_select, pagination)
|> Kernel.++(none_result)
|> Enum.map(&cast_revenue_metrics_to_money(&1, currency))
|> Enum.sort_by(& &1[sorting_key(metrics_to_select)], :desc)
|> maybe_add_cr(site, query, nil, metrics)
@ -215,14 +197,6 @@ defmodule Plausible.Stats.Breakdown do
|> Enum.sort_by(& &1[sorting_key(metrics)], :desc)
end
defp include_none_result?({:is, value}), do: value == "(none)"
defp include_none_result?({:is_not, "(none)"}), do: false
defp include_none_result?({:member, values}), do: Enum.member?(values, "(none)")
defp include_none_result?({:not_member, values}), do: !Enum.member?(values, "(none)")
defp include_none_result?({:matches, _}), do: false
defp include_none_result?({:matches_member, _}), do: false
defp include_none_result?(_), do: true
defp breakdown_sessions(_, _, _, [], _), do: []
defp breakdown_sessions(site, query, property, metrics, pagination) do
@ -234,6 +208,7 @@ defmodule Plausible.Stats.Breakdown do
|> do_group_by(property)
|> select_session_metrics(metrics, query)
|> merge_imported(site, query, property, metrics)
|> add_percentage_metric(site, query, metrics)
|> apply_pagination(pagination)
|> ClickhouseRepo.all()
|> transform_keys(%{operating_system: :os})
@ -250,9 +225,11 @@ defmodule Plausible.Stats.Breakdown do
|> do_group_by(property)
|> select_event_metrics(metrics)
|> merge_imported(site, query, property, metrics)
|> add_percentage_metric(site, query, metrics)
|> apply_pagination(pagination)
|> ClickhouseRepo.all()
|> transform_keys(%{operating_system: :os})
|> Util.keep_requested_metrics(metrics)
end
defp maybe_add_time_on_page(event_results, site, query, metrics) do
@ -445,10 +422,19 @@ defmodule Plausible.Stats.Breakdown do
) do
from(
e in q,
where: has_key(e, :meta, ^prop),
select_merge: %{^prop => get_by_key(e, :meta, ^prop)},
group_by: get_by_key(e, :meta, ^prop),
order_by: {:asc, get_by_key(e, :meta, ^prop)}
select_merge: %{
^prop =>
selected_as(
fragment(
"if(not empty(?), ?, '(none)')",
get_by_key(e, :meta, ^prop),
get_by_key(e, :meta, ^prop)
),
:breakdown_prop_value
)
},
group_by: selected_as(:breakdown_prop_value),
order_by: {:asc, selected_as(:breakdown_prop_value)}
)
end

View File

@ -321,16 +321,8 @@ defmodule Plausible.Stats.Imported do
end
def merge_imported(q, site, query, :aggregate, metrics) do
import_ids = site.complete_import_ids
imported_q =
from(
i in "imported_visitors",
where: i.site_id == ^site.id,
where: i.import_id in ^import_ids,
where: i.date >= ^query.date_range.first and i.date <= ^query.date_range.last,
select: %{}
)
imported_visitors(site, query)
|> select_imported_metrics(metrics)
from(
@ -343,6 +335,23 @@ defmodule Plausible.Stats.Imported do
def merge_imported(q, _, _, _, _), do: q
def total_imported_visitors(site, query) do
imported_visitors(site, query)
|> select_merge([i], %{total_visitors: fragment("sum(?)", i.visitors)})
end
defp imported_visitors(site, query) do
import_ids = site.complete_import_ids
from(
i in "imported_visitors",
where: i.site_id == ^site.id,
where: i.import_id in ^import_ids,
where: i.date >= ^query.date_range.first and i.date <= ^query.date_range.last,
select: %{}
)
end
defp select_imported_metrics(q, []), do: q
defp select_imported_metrics(q, [:visitors | rest]) do
@ -457,7 +466,11 @@ defmodule Plausible.Stats.Imported do
defp select_joined_metrics(q, [:visitors | rest]) do
q
|> select_merge([s, i], %{
:visitors => fragment("coalesce(?, 0) + coalesce(?, 0)", s.visitors, i.visitors)
:visitors =>
selected_as(
fragment("coalesce(?, 0) + coalesce(?, 0)", s.visitors, i.visitors),
:visitors
)
})
|> select_joined_metrics(rest)
end

View File

@ -3,7 +3,12 @@ defmodule Plausible.Stats.Util do
Utilities for modifying stat results
"""
@manually_removable_metrics [:__internal_visits, :visitors]
@manually_removable_metrics [
:__internal_visits,
:visitors,
:__total_visitors,
:__breakdown_value
]
@doc """
Sometimes we need to manually add metrics in order to calculate the value for

View File

@ -827,12 +827,11 @@ defmodule PlausibleWeb.Api.StatsController do
site = conn.assigns[:site]
query = site |> Query.from(params)
pagination = parse_pagination(params)
metrics = breakdown_metrics(query)
metrics = breakdown_metrics(query, [:percentage])
countries =
Stats.breakdown(site, query, "visit:country", metrics, pagination)
|> transform_keys(%{country: :code})
|> add_percentages(site, query)
if params["csv"] do
countries =
@ -952,12 +951,11 @@ defmodule PlausibleWeb.Api.StatsController do
site = conn.assigns[:site]
query = Query.from(site, params)
pagination = parse_pagination(params)
metrics = breakdown_metrics(query)
metrics = breakdown_metrics(query, [:percentage])
browsers =
Stats.breakdown(site, query, "visit:browser", metrics, pagination)
|> transform_keys(%{browser: :name})
|> add_percentages(site, query)
if params["csv"] do
if Map.has_key?(query.filters, "event:goal") do
@ -976,12 +974,11 @@ defmodule PlausibleWeb.Api.StatsController do
site = conn.assigns[:site]
query = Query.from(site, params)
pagination = parse_pagination(params)
metrics = breakdown_metrics(query)
metrics = breakdown_metrics(query, [:percentage])
versions =
Stats.breakdown(site, query, "visit:browser_version", metrics, pagination)
|> transform_keys(%{browser_version: :name})
|> add_percentages(site, query)
if params["csv"] do
if Map.has_key?(query.filters, "event:goal") do
@ -1006,12 +1003,11 @@ defmodule PlausibleWeb.Api.StatsController do
site = conn.assigns[:site]
query = Query.from(site, params)
pagination = parse_pagination(params)
metrics = breakdown_metrics(query)
metrics = breakdown_metrics(query, [:percentage])
systems =
Stats.breakdown(site, query, "visit:os", metrics, pagination)
|> transform_keys(%{os: :name})
|> add_percentages(site, query)
if params["csv"] do
if Map.has_key?(query.filters, "event:goal") do
@ -1030,12 +1026,11 @@ defmodule PlausibleWeb.Api.StatsController do
site = conn.assigns[:site]
query = Query.from(site, params)
pagination = parse_pagination(params)
metrics = breakdown_metrics(query)
metrics = breakdown_metrics(query, [:percentage])
versions =
Stats.breakdown(site, query, "visit:os_version", metrics, pagination)
|> transform_keys(%{os_version: :name})
|> add_percentages(site, query)
if params["csv"] do
if Map.has_key?(query.filters, "event:goal") do
@ -1056,12 +1051,11 @@ defmodule PlausibleWeb.Api.StatsController do
site = conn.assigns[:site]
query = Query.from(site, params)
pagination = parse_pagination(params)
metrics = breakdown_metrics(query)
metrics = breakdown_metrics(query, [:percentage])
sizes =
Stats.breakdown(site, query, "visit:device", metrics, pagination)
|> transform_keys(%{device: :name})
|> add_percentages(site, query)
if params["csv"] do
if Map.has_key?(query.filters, "event:goal") do
@ -1172,7 +1166,7 @@ defmodule PlausibleWeb.Api.StatsController do
if query.filters["event:goal"] do
[:visitors, :events, :conversion_rate] ++ @revenue_metrics
else
[:visitors, :events] ++ @revenue_metrics
[:visitors, :events, :percentage] ++ @revenue_metrics
end
Stats.breakdown(site, query, prefixed_prop, metrics, pagination)
@ -1181,7 +1175,6 @@ defmodule PlausibleWeb.Api.StatsController do
Enum.map(entry, &format_revenue_metric/1)
|> Map.new()
end)
|> add_percentages(site, query)
end
def current_visitors(conn, _) do
@ -1228,18 +1221,6 @@ defmodule PlausibleWeb.Api.StatsController do
defp to_int(_, default), do: default
defp add_percentages([_ | _] = breakdown_result, site, query)
when not is_map_key(query.filters, "event:goal") do
%{visitors: %{value: total_visitors}} = Stats.aggregate(site, query, [:visitors])
breakdown_result
|> Enum.map(fn stat ->
Map.put(stat, :percentage, Float.round(stat.visitors / total_visitors * 100, 1))
end)
end
defp add_percentages(breakdown_result, _, _), do: breakdown_result
defp to_csv(list, columns), do: to_csv(list, columns, columns)
defp to_csv(list, columns, column_names) do

View File

@ -98,38 +98,7 @@ defmodule PlausibleWeb.Api.StatsController.CustomPropBreakdownTest do
]
end
test "(none) value is added as +1 to pagination limit", %{conn: conn, site: site} do
prop_key = "parim_s6ber"
populate_stats(site, [
build(:pageview, "meta.key": [prop_key], "meta.value": ["K2sna Kalle"]),
build(:pageview, "meta.key": [prop_key], "meta.value": ["K2sna Kalle"]),
build(:pageview)
])
conn =
get(
conn,
"/api/stats/#{site.domain}/custom-prop-values/#{prop_key}?period=day&limit=1"
)
assert json_response(conn, 200) == [
%{
"visitors" => 2,
"name" => "K2sna Kalle",
"events" => 2,
"percentage" => 66.7
},
%{
"visitors" => 1,
"name" => "(none)",
"events" => 1,
"percentage" => 33.3
}
]
end
test "(none) value is only included on the first page of results", %{conn: conn, site: site} do
test "(none) value is included in pagination", %{conn: conn, site: site} do
prop_key = "kaksik"
populate_stats(site, [
@ -144,13 +113,13 @@ defmodule PlausibleWeb.Api.StatsController.CustomPropBreakdownTest do
conn1 =
get(
conn,
"/api/stats/#{site.domain}/custom-prop-values/#{prop_key}?period=day&limit=1&page=1"
"/api/stats/#{site.domain}/custom-prop-values/#{prop_key}?period=day&limit=2&page=1"
)
conn2 =
get(
conn,
"/api/stats/#{site.domain}/custom-prop-values/#{prop_key}?period=day&limit=1&page=2"
"/api/stats/#{site.domain}/custom-prop-values/#{prop_key}?period=day&limit=2&page=2"
)
assert json_response(conn1, 200) == [
@ -161,19 +130,19 @@ defmodule PlausibleWeb.Api.StatsController.CustomPropBreakdownTest do
"percentage" => 50.0
},
%{
"visitors" => 1,
"name" => "(none)",
"events" => 1,
"percentage" => 16.7
"visitors" => 2,
"name" => "Teet",
"events" => 2,
"percentage" => 33.3
}
]
assert json_response(conn2, 200) == [
%{
"visitors" => 2,
"name" => "Teet",
"events" => 2,
"percentage" => 33.3
"visitors" => 1,
"name" => "(none)",
"events" => 1,
"percentage" => 16.7
}
]
end

View File

@ -19,6 +19,32 @@ defmodule PlausibleWeb.Api.StatsController.ScreenSizesTest do
]
end
test "returns screen sizes for user making multiple sessions", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview,
user_id: 1,
session_screen_size: "Desktop",
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview,
user_id: 1,
session_screen_size: "Laptop",
timestamp: ~N[2021-01-01 05:00:00]
)
])
conn =
get(conn, "/api/stats/#{site.domain}/screen-sizes", %{
"period" => "day",
"date" => "2021-01-01"
})
assert json_response(conn, 200) == [
%{"name" => "Desktop", "visitors" => 1, "percentage" => 100},
%{"name" => "Laptop", "visitors" => 1, "percentage" => 100}
]
end
test "returns (not set) when appropriate", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview,
@ -144,6 +170,40 @@ defmodule PlausibleWeb.Api.StatsController.ScreenSizesTest do
]
end
test "returns screen sizes for user making multiple sessions by no of visitors with imported data",
%{conn: conn, site: site} do
populate_stats(site, [
build(:pageview,
user_id: 1,
session_screen_size: "Desktop",
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview,
user_id: 1,
session_screen_size: "Laptop",
timestamp: ~N[2021-01-01 05:00:00]
)
])
populate_stats(site, [
build(:imported_devices, device: "Desktop", date: ~D[2021-01-01]),
build(:imported_devices, device: "Laptop", date: ~D[2021-01-01]),
build(:imported_visitors, visitors: 1, date: ~D[2021-01-01])
])
conn =
get(conn, "/api/stats/#{site.domain}/screen-sizes", %{
"period" => "day",
"date" => "2021-01-01",
"with_imported" => "true"
})
assert json_response(conn, 200) == [
%{"name" => "Desktop", "visitors" => 2, "percentage" => 100},
%{"name" => "Laptop", "visitors" => 2, "percentage" => 100}
]
end
test "calculates conversion_rate when filtering for goal", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview, user_id: 1, screen_size: "Desktop"),

View File

@ -185,8 +185,8 @@ defmodule PlausibleWeb.StatsControllerTest do
assert parse_csv(result) == [
["property", "value", "visitors", "events", "percentage"],
["url", "b", "1", "1", "50.0"],
["url", "(none)", "1", "1", "50.0"],
["url", "b", "1", "1", "50.0"],
[""]
]
end