query scroll depth

This commit is contained in:
Robert Joonas 2024-11-07 11:03:18 +01:00
parent 7ede7a3dbf
commit 15b14d3c49
6 changed files with 343 additions and 1 deletions

View File

@ -245,6 +245,7 @@ defmodule Plausible.Stats.SQL.Expression do
def event_metric(:percentage), do: %{}
def event_metric(:conversion_rate), do: %{}
def event_metric(:scroll_depth), do: %{}
def event_metric(:group_conversion_rate), do: %{}
def event_metric(:total_visitors), do: %{}

View File

@ -126,7 +126,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do
|> Enum.reduce(%{}, &Map.merge/2)
end
defp build_group_by(q, table, query) do
def build_group_by(q, table, query) do
Enum.reduce(query.dimensions, q, &dimension_group_by(&2, table, query, &1))
end

View File

@ -16,6 +16,7 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do
|> maybe_add_percentage_metric(site, query)
|> maybe_add_global_conversion_rate(site, query)
|> maybe_add_group_conversion_rate(site, query)
|> maybe_add_scroll_depth(site, query)
end
defp maybe_add_percentage_metric(q, site, query) do
@ -121,6 +122,71 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do
end
end
def maybe_add_scroll_depth(q, site, query) do
cond do
:scroll_depth not in query.metrics -> q
query.dimensions == [] -> add_aggregate_scroll_depth(q, site, query)
true -> add_group_scroll_depth(q, site, query)
end
end
defp add_aggregate_scroll_depth(q, site, query) do
max_per_visitor_q =
Base.base_event_query(site, query)
|> where([e], e.name == "pageleave")
|> select([e], %{
user_id: e.user_id,
max_scroll_depth: max(e.scroll_depth)
})
|> group_by([e], e.user_id)
scroll_depth_q =
subquery(max_per_visitor_q)
|> select([p], fragment("toUInt8(round(ifNotFinite(avg(?), 0)))", p.max_scroll_depth))
select_merge_as(q, [e], %{scroll_depth: subquery(scroll_depth_q)})
end
defp add_group_scroll_depth(q, site, query) do
max_per_visitor_q =
Base.base_event_query(site, query)
|> where([e], e.name == "pageleave")
|> select([e], %{
user_id: e.user_id,
max_scroll_depth: max(e.scroll_depth)
})
|> SQL.QueryBuilder.build_group_by(:events, query)
|> group_by([e], e.user_id)
dim_shortnames = Enum.map(query.dimensions, fn dim -> shortname(query, dim) end)
dim_select =
dim_shortnames
|> Enum.map(fn dim -> {dim, dynamic([p], field(p, ^dim))} end)
|> Map.new()
dim_group_by =
dim_shortnames
|> Enum.map(fn dim -> dynamic([p], field(p, ^dim)) end)
scroll_depth_q =
subquery(max_per_visitor_q)
|> select([p], %{
scroll_depth: fragment("toUInt8(round(ifNotFinite(avg(?), 0)))", p.max_scroll_depth)
})
|> select_merge(^dim_select)
|> group_by(^dim_group_by)
join_on_dim_condition =
dim_shortnames
|> Enum.map(fn dim -> dynamic([_e, ..., s], selected_as(^dim) == field(s, ^dim)) end)
|> Enum.reduce(fn condition, acc -> dynamic([], ^acc and ^condition) end)
q
|> join(:left, [e], s in subquery(scroll_depth_q), on: ^join_on_dim_condition)
|> select_merge_as([_e, ..., s], %{scroll_depth: fragment("any(?)", s.scroll_depth)})
end
# `total_visitors_subquery` returns a subquery which selects `total_visitors` -
# the number used as the denominator in the calculation of `conversion_rate` and
# `percentage` metrics.

View File

@ -74,6 +74,7 @@ defmodule Plausible.Stats.TableDecider do
defp metric_partitioner(_, :average_revenue), do: :event
defp metric_partitioner(_, :total_revenue), do: :event
defp metric_partitioner(_, :scroll_depth), do: :event
defp metric_partitioner(_, :pageviews), do: :event
defp metric_partitioner(_, :events), do: :event
defp metric_partitioner(_, :bounce_rate), do: :session

View File

@ -101,6 +101,61 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do
]
end
test "can query scroll_depth metric with a page filter", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview, user_id: 123, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageleave, user_id: 123, timestamp: ~N[2021-01-01 00:00:10], scroll_depth: 40),
build(:pageview, user_id: 123, timestamp: ~N[2021-01-01 00:00:10]),
build(:pageleave, user_id: 123, timestamp: ~N[2021-01-01 00:00:20], scroll_depth: 60),
build(:pageview, user_id: 456, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageleave, user_id: 456, timestamp: ~N[2021-01-01 00:00:10], scroll_depth: 80)
])
conn =
post(conn, "/api/v2/query", %{
"site_id" => site.domain,
"filters" => [["is", "event:page", ["/"]]],
"date_range" => "all",
"metrics" => ["visitors", "scroll_depth"]
})
assert json_response(conn, 200)["results"] == [
%{"metrics" => [2, 70], "dimensions" => []}
]
end
test "scroll depth is 0 when no pageleave data in range", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview, timestamp: ~N[2021-01-01 00:00:00])
])
conn =
post(conn, "/api/v2/query", %{
"site_id" => site.domain,
"filters" => [["is", "event:page", ["/"]]],
"date_range" => "all",
"metrics" => ["visitors", "scroll_depth"]
})
assert json_response(conn, 200)["results"] == [
%{"metrics" => [1, 0], "dimensions" => []}
]
end
test "scroll depth is 0 when no data at all in range", %{conn: conn, site: site} do
conn =
post(conn, "/api/v2/query", %{
"site_id" => site.domain,
"filters" => [["is", "event:page", ["/"]]],
"date_range" => "all",
"metrics" => ["visitors", "scroll_depth"]
})
assert json_response(conn, 200)["results"] == [
%{"metrics" => [0, 0], "dimensions" => []}
]
end
test "does not count pageleave events towards the events metric in a simple aggregate query",
%{conn: conn, site: site} do
populate_stats(site, [
@ -1127,6 +1182,40 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do
end
describe "timeseries" do
test "scroll_depth metric in a time:day breakdown", %{conn: conn, site: site} do
t0 = ~N[2020-01-01 00:00:00]
[t1, t2, t3] = for i <- 1..3, do: NaiveDateTime.add(t0, i, :minute)
populate_stats(site, [
build(:pageview, user_id: 12, timestamp: t0),
build(:pageleave, user_id: 12, timestamp: t1, scroll_depth: 20),
build(:pageview, user_id: 34, timestamp: t0),
build(:pageleave, user_id: 34, timestamp: t1, scroll_depth: 17),
build(:pageview, user_id: 34, timestamp: t2),
build(:pageleave, user_id: 34, timestamp: t3, scroll_depth: 60),
build(:pageview, user_id: 56, timestamp: NaiveDateTime.add(t0, 1, :day)),
build(:pageleave,
user_id: 56,
timestamp: NaiveDateTime.add(t1, 1, :day),
scroll_depth: 20
)
])
conn =
post(conn, "/api/v2/query", %{
"site_id" => site.domain,
"metrics" => ["scroll_depth"],
"date_range" => "all",
"dimensions" => ["time:day"],
"filters" => [["is", "event:page", ["/"]]]
})
assert json_response(conn, 200)["results"] == [
%{"dimensions" => ["2020-01-01"], "metrics" => [40]},
%{"dimensions" => ["2020-01-02"], "metrics" => [20]}
]
end
test "shows hourly data for a certain date with time_labels", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview, user_id: @user_id, timestamp: ~N[2021-01-01 00:00:00]),
@ -1716,6 +1805,187 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do
]
end
test "breakdown by event:page with scroll_depth metric", %{conn: conn, site: site} do
t0 = ~N[2020-01-01 00:00:00]
[t1, t2, t3] = for i <- 1..3, do: NaiveDateTime.add(t0, i, :minute)
populate_stats(site, [
build(:pageview, user_id: 12, pathname: "/blog", timestamp: t0),
build(:pageleave, user_id: 12, pathname: "/blog", timestamp: t1, scroll_depth: 20),
build(:pageview, user_id: 12, pathname: "/another", timestamp: t1),
build(:pageleave, user_id: 12, pathname: "/another", timestamp: t2, scroll_depth: 24),
build(:pageview, user_id: 34, pathname: "/blog", timestamp: t0),
build(:pageleave, user_id: 34, pathname: "/blog", timestamp: t1, scroll_depth: 17),
build(:pageview, user_id: 34, pathname: "/another", timestamp: t1),
build(:pageleave, user_id: 34, pathname: "/another", timestamp: t2, scroll_depth: 26),
build(:pageview, user_id: 34, pathname: "/blog", timestamp: t2),
build(:pageleave, user_id: 34, pathname: "/blog", timestamp: t3, scroll_depth: 60),
build(:pageview, user_id: 56, pathname: "/blog", timestamp: t0),
build(:pageleave, user_id: 56, pathname: "/blog", timestamp: t1, scroll_depth: 100)
])
conn =
post(conn, "/api/v2/query", %{
"site_id" => site.domain,
"metrics" => ["visitors", "pageviews", "scroll_depth"],
"date_range" => "all",
"dimensions" => ["event:page"]
})
assert json_response(conn, 200)["results"] == [
%{"dimensions" => ["/blog"], "metrics" => [3, 4, 60]},
%{"dimensions" => ["/another"], "metrics" => [2, 2, 25]}
]
end
test "breakdown by event:page + visit:source with scroll_depth metric", %{
conn: conn,
site: site
} do
t0 = ~N[2020-01-01 00:00:00]
[t1, t2, t3] = for i <- 1..3, do: NaiveDateTime.add(t0, i, :minute)
populate_stats(site, [
build(:pageview, referrer_source: "Google", user_id: 12, pathname: "/blog", timestamp: t0),
build(:pageleave,
referrer_source: "Google",
user_id: 12,
pathname: "/blog",
timestamp: t1,
scroll_depth: 20
),
build(:pageview,
referrer_source: "Google",
user_id: 12,
pathname: "/another",
timestamp: t1
),
build(:pageleave,
referrer_source: "Google",
user_id: 12,
pathname: "/another",
timestamp: t2,
scroll_depth: 24
),
build(:pageview, referrer_source: "Google", user_id: 34, pathname: "/blog", timestamp: t0),
build(:pageleave,
referrer_source: "Google",
user_id: 34,
pathname: "/blog",
timestamp: t1,
scroll_depth: 17
),
build(:pageview,
referrer_source: "Google",
user_id: 34,
pathname: "/another",
timestamp: t1
),
build(:pageleave,
referrer_source: "Google",
user_id: 34,
pathname: "/another",
timestamp: t2,
scroll_depth: 26
),
build(:pageview, referrer_source: "Google", user_id: 34, pathname: "/blog", timestamp: t2),
build(:pageleave,
referrer_source: "Google",
user_id: 34,
pathname: "/blog",
timestamp: t3,
scroll_depth: 60
),
build(:pageview, referrer_source: "Twitter", user_id: 56, pathname: "/blog", timestamp: t0),
build(:pageleave,
referrer_source: "Twitter",
user_id: 56,
pathname: "/blog",
timestamp: t1,
scroll_depth: 20
),
build(:pageview,
referrer_source: "Twitter",
user_id: 56,
pathname: "/another",
timestamp: t1
),
build(:pageleave,
referrer_source: "Twitter",
user_id: 56,
pathname: "/another",
timestamp: t2,
scroll_depth: 24
)
])
conn =
post(conn, "/api/v2/query", %{
"site_id" => site.domain,
"metrics" => ["visitors", "pageviews", "scroll_depth"],
"date_range" => "all",
"dimensions" => ["event:page", "visit:source"]
})
assert json_response(conn, 200)["results"] == [
%{"dimensions" => ["/blog", "Google"], "metrics" => [2, 3, 40]},
%{"dimensions" => ["/another", "Google"], "metrics" => [2, 2, 25]},
%{"dimensions" => ["/blog", "Twitter"], "metrics" => [1, 1, 20]},
%{"dimensions" => ["/another", "Twitter"], "metrics" => [1, 1, 24]}
]
end
test "breakdown by event:page + time:day with scroll_depth metric", %{conn: conn, site: site} do
t0 = ~N[2020-01-01 00:00:00]
[t1, t2, t3] = for i <- 1..3, do: NaiveDateTime.add(t0, i, :minute)
populate_stats(site, [
build(:pageview, user_id: 12, pathname: "/blog", timestamp: t0),
build(:pageleave, user_id: 12, pathname: "/blog", timestamp: t1, scroll_depth: 20),
build(:pageview, user_id: 12, pathname: "/another", timestamp: t1),
build(:pageleave, user_id: 12, pathname: "/another", timestamp: t2, scroll_depth: 24),
build(:pageview, user_id: 34, pathname: "/blog", timestamp: t0),
build(:pageleave, user_id: 34, pathname: "/blog", timestamp: t1, scroll_depth: 17),
build(:pageview, user_id: 34, pathname: "/another", timestamp: t1),
build(:pageleave, user_id: 34, pathname: "/another", timestamp: t2, scroll_depth: 26),
build(:pageview, user_id: 34, pathname: "/blog", timestamp: t2),
build(:pageleave, user_id: 34, pathname: "/blog", timestamp: t3, scroll_depth: 60),
build(:pageview, user_id: 56, pathname: "/blog", timestamp: NaiveDateTime.add(t0, 1, :day)),
build(:pageleave,
user_id: 56,
pathname: "/blog",
timestamp: NaiveDateTime.add(t1, 1, :day),
scroll_depth: 20
),
build(:pageview,
user_id: 56,
pathname: "/another",
timestamp: NaiveDateTime.add(t1, 1, :day)
),
build(:pageleave,
user_id: 56,
pathname: "/another",
timestamp: NaiveDateTime.add(t2, 1, :day),
scroll_depth: 24
)
])
conn =
post(conn, "/api/v2/query", %{
"site_id" => site.domain,
"metrics" => ["scroll_depth"],
"date_range" => "all",
"dimensions" => ["event:page", "time:day"]
})
assert json_response(conn, 200)["results"] == [
%{"dimensions" => ["/blog", "2020-01-01"], "metrics" => [40]},
%{"dimensions" => ["/another", "2020-01-01"], "metrics" => [25]},
%{"dimensions" => ["/another", "2020-01-02"], "metrics" => [24]},
%{"dimensions" => ["/blog", "2020-01-02"], "metrics" => [20]}
]
end
test "attempting to breakdown by event:hostname returns an error", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview, hostname: "a.example.com"),

View File

@ -139,6 +139,10 @@ defmodule Plausible.Factory do
Map.put(event_factory(), :name, "pageview")
end
def pageleave_factory do
Map.put(event_factory(), :name, "pageleave")
end
def event_factory do
hostname = sequence(:domain, &"example-#{&1}.com")