Return session in each time bucket its active in for hourly/minute timeseries (#4052)

* Fix typo in test name

* Update test_helper, enable experimental_session_count together with experimental_reduced_joins

* Return session in each time bucket its active in for hourly/minute timeseries

The behavior is behind experimental_session_count flag

This results in more accurate visitor showing compared to previous approach of showing each user only
active the _last_ time they did a pageview.

Were not doing this for monthly/weekly graphs due to query performance cost and it having a small effect there.
See also https://3.basecamp.com/5308029/buckets/35611491/messages/7085680123

* Add tests for new behavior

Note the new behavior mimics the old one precisely, these tests fail if only
experimental_reduced_joins is on, but not experimental_session_count

* Type erasure

* Dead comment remove

* Expected_plot change
This commit is contained in:
Karl-Aksel Puulmann 2024-05-05 11:44:43 +03:00 committed by GitHub
parent 850a843d82
commit 17f812443d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 211 additions and 10 deletions

View File

@ -55,7 +55,7 @@ defmodule Plausible.Stats.Timeseries do
metrics = Util.maybe_add_visitors_metric(metrics) metrics = Util.maybe_add_visitors_metric(metrics)
from(e in base_event_query(site, query), select: ^select_event_metrics(metrics)) from(e in base_event_query(site, query), select: ^select_event_metrics(metrics))
|> select_bucket(site, query) |> select_bucket(:events, site, query)
|> maybe_add_timeseries_conversion_rate(site, query, metrics) |> maybe_add_timeseries_conversion_rate(site, query, metrics)
|> Plausible.Stats.Imported.merge_imported_timeseries(site, query, metrics) |> Plausible.Stats.Imported.merge_imported_timeseries(site, query, metrics)
|> ClickhouseRepo.all(label: :event_timeseries) |> ClickhouseRepo.all(label: :event_timeseries)
@ -66,7 +66,7 @@ defmodule Plausible.Stats.Timeseries do
defp sessions_timeseries(site, query, metrics) do defp sessions_timeseries(site, query, metrics) do
from(e in query_sessions(site, query), select: ^select_session_metrics(metrics, query)) from(e in query_sessions(site, query), select: ^select_session_metrics(metrics, query))
|> filter_converted_sessions(site, query) |> filter_converted_sessions(site, query)
|> select_bucket(site, query) |> select_bucket(:sessions, site, query)
|> Plausible.Stats.Imported.merge_imported_timeseries(site, query, metrics) |> Plausible.Stats.Imported.merge_imported_timeseries(site, query, metrics)
|> ClickhouseRepo.all(label: :session_timeseries) |> ClickhouseRepo.all(label: :session_timeseries)
|> Util.keep_requested_metrics(metrics) |> Util.keep_requested_metrics(metrics)
@ -152,7 +152,7 @@ defmodule Plausible.Stats.Timeseries do
date date
end end
defp select_bucket(q, site, %Query{interval: "month"}) do defp select_bucket(q, _table, site, %Query{interval: "month"}) do
from( from(
e in q, e in q,
group_by: fragment("toStartOfMonth(toTimeZone(?, ?))", e.timestamp, ^site.timezone), group_by: fragment("toStartOfMonth(toTimeZone(?, ?))", e.timestamp, ^site.timezone),
@ -163,7 +163,7 @@ defmodule Plausible.Stats.Timeseries do
) )
end end
defp select_bucket(q, site, %Query{interval: "week"} = query) do defp select_bucket(q, _table, site, %Query{interval: "week"} = query) do
{first_datetime, _} = utc_boundaries(query, site) {first_datetime, _} = utc_boundaries(query, site)
from( from(
@ -174,7 +174,7 @@ defmodule Plausible.Stats.Timeseries do
) )
end end
defp select_bucket(q, site, %Query{interval: "date"}) do defp select_bucket(q, _table, site, %Query{interval: "date"}) do
from( from(
e in q, e in q,
group_by: fragment("toDate(toTimeZone(?, ?))", e.timestamp, ^site.timezone), group_by: fragment("toDate(toTimeZone(?, ?))", e.timestamp, ^site.timezone),
@ -185,7 +185,14 @@ defmodule Plausible.Stats.Timeseries do
) )
end end
defp select_bucket(q, site, %Query{interval: "hour"}) do defp select_bucket(q, :sessions, site, %Query{
interval: "hour",
experimental_session_count?: true
}) do
bucket_with_timeslots(q, site, 3600)
end
defp select_bucket(q, _table, site, %Query{interval: "hour"}) do
from( from(
e in q, e in q,
group_by: fragment("toStartOfHour(toTimeZone(?, ?))", e.timestamp, ^site.timezone), group_by: fragment("toStartOfHour(toTimeZone(?, ?))", e.timestamp, ^site.timezone),
@ -196,7 +203,30 @@ defmodule Plausible.Stats.Timeseries do
) )
end end
defp select_bucket(q, _site, %Query{interval: "minute", period: "30m"}) do defp select_bucket(q, :sessions, _site, %Query{
interval: "minute",
period: "30m",
experimental_session_count?: true
}) do
from(
s in q,
array_join:
bucket in fragment(
"timeSlots(?, toUInt32(timeDiff(?, ?)), ?)",
s.start,
s.start,
s.timestamp,
60
),
group_by: fragment("dateDiff('minute', now(), ?)", bucket),
order_by: fragment("dateDiff('minute', now(), ?)", bucket),
select_merge: %{
date: fragment("dateDiff('minute', now(), ?)", bucket)
}
)
end
defp select_bucket(q, _table, _site, %Query{interval: "minute", period: "30m"}) do
from( from(
e in q, e in q,
group_by: fragment("dateDiff('minute', now(), ?)", e.timestamp), group_by: fragment("dateDiff('minute', now(), ?)", e.timestamp),
@ -207,7 +237,7 @@ defmodule Plausible.Stats.Timeseries do
) )
end end
defp select_bucket(q, site, %Query{interval: "minute"}) do defp select_bucket(q, _table, site, %Query{interval: "minute"}) do
from( from(
e in q, e in q,
group_by: fragment("toStartOfMinute(toTimeZone(?, ?))", e.timestamp, ^site.timezone), group_by: fragment("toStartOfMinute(toTimeZone(?, ?))", e.timestamp, ^site.timezone),
@ -218,6 +248,35 @@ defmodule Plausible.Stats.Timeseries do
) )
end end
defp select_bucket(q, :sessions, site, %Query{
interval: "minute",
experimental_session_count?: true
}) do
bucket_with_timeslots(q, site, 60)
end
# Includes session in _every_ time bucket it was active in.
# Only done in hourly and minute graphs for performance reasons.
defp bucket_with_timeslots(q, site, period_in_seconds) do
from(
s in q,
array_join:
bucket in fragment(
"timeSlots(toTimeZone(?, ?), toUInt32(timeDiff(?, ?)), toUInt32(?))",
s.start,
^site.timezone,
s.start,
s.timestamp,
^period_in_seconds
),
group_by: bucket,
order_by: bucket,
select_merge: %{
date: fragment("?", bucket)
}
)
end
defp date_or_weekstart(date, query) do defp date_or_weekstart(date, query) do
weekstart = Timex.beginning_of_week(date) weekstart = Timex.beginning_of_week(date)
@ -261,7 +320,7 @@ defmodule Plausible.Stats.Timeseries do
from(e in base_event_query(site, totals_query), from(e in base_event_query(site, totals_query),
select: ^select_event_metrics([:visitors]) select: ^select_event_metrics([:visitors])
) )
|> select_bucket(site, query) |> select_bucket(:events, site, query)
from(e in subquery(q), from(e in subquery(q),
left_join: c in subquery(totals_timeseries_q), left_join: c in subquery(totals_timeseries_q),

View File

@ -19,7 +19,7 @@ defmodule PlausibleWeb.Api.StatsController.MainGraphTest do
assert Enum.any?(plot, fn pageviews -> pageviews > 0 end) assert Enum.any?(plot, fn pageviews -> pageviews > 0 end)
end end
test "displays visitors for a day", %{conn: conn, site: site} do test "displays pageviews for a day", %{conn: conn, site: site} do
populate_stats(site, [ populate_stats(site, [
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]), build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 23:00:00]) build(:pageview, timestamp: ~N[2021-01-01 23:00:00])
@ -433,6 +433,146 @@ defmodule PlausibleWeb.Api.StatsController.MainGraphTest do
end end
end end
describe "GET /api/stats/main-graph - visitors plot" do
setup [:create_user, :log_in, :create_new_site, :create_legacy_site_import]
test "displays visitors per hour with short visits", %{
conn: conn,
site: site
} do
populate_stats(site, [
build(:pageview, timestamp: ~N[2021-01-01 00:00:00], user_id: 1),
build(:pageview, timestamp: ~N[2021-01-01 00:10:00], user_id: 1),
build(:pageview, timestamp: ~N[2021-01-01 00:20:00])
])
conn =
get(
conn,
"/api/stats/#{site.domain}/main-graph?period=day&date=2021-01-01&metric=visitors&interval=hour"
)
assert %{"plot" => plot} = json_response(conn, 200)
assert Enum.count(plot) == 24
assert List.first(plot) == 2
assert Enum.sum(plot) == 2
end
test "displays visitors realtime with visits spanning multiple minutes", %{
conn: conn,
site: site
} do
populate_stats(site, [
build(:pageview, timestamp: relative_time(minutes: -35), user_id: 1),
build(:pageview, timestamp: relative_time(minutes: -20), user_id: 1),
build(:pageview, timestamp: relative_time(minutes: -25), user_id: 2),
build(:pageview, timestamp: relative_time(minutes: -15), user_id: 2),
build(:pageview, timestamp: relative_time(minutes: -5), user_id: 3),
build(:pageview, timestamp: relative_time(minutes: -3), user_id: 3)
])
conn =
get(
conn,
"/api/stats/#{site.domain}/main-graph?period=realtime&metric=visitors"
)
assert %{"plot" => plot} = json_response(conn, 200)
expected_plot =
if FunWithFlags.enabled?(:experimental_session_count) do
~w[1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 1 0 0]
else
~w[0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0]
end
assert plot == Enum.map(expected_plot, &String.to_integer/1)
end
test "displays visitors per hour with visits spanning multiple hours", %{
conn: conn,
site: site
} do
populate_stats(site, [
build(:pageview, timestamp: ~N[2020-12-31 23:45:00], user_id: 1),
build(:pageview, timestamp: ~N[2021-01-01 00:10:00], user_id: 1),
build(:pageview, timestamp: ~N[2021-01-01 00:15:00], user_id: 1),
build(:pageview, timestamp: ~N[2021-01-01 00:35:00], user_id: 2),
build(:pageview, timestamp: ~N[2021-01-01 01:00:00], user_id: 2),
build(:pageview, timestamp: ~N[2021-01-01 01:25:00], user_id: 2),
build(:pageview, timestamp: ~N[2021-01-01 01:50:00], user_id: 2),
build(:pageview, timestamp: ~N[2021-01-01 02:05:00], user_id: 2),
build(:pageview, timestamp: ~N[2021-01-01 23:45:00], user_id: 3),
build(:pageview, timestamp: ~N[2021-01-02 00:05:00], user_id: 3)
])
conn =
get(
conn,
"/api/stats/#{site.domain}/main-graph?period=day&date=2021-01-01&metric=visitors&interval=hour"
)
assert %{"plot" => plot} = json_response(conn, 200)
zeroes = List.duplicate(0, 20)
assert [2, 1, 1] ++ zeroes ++ [1] == plot
end
test "displays visitors per day with visits showed only in last time bucket", %{
conn: conn,
site: site
} do
populate_stats(site, [
build(:pageview, timestamp: ~N[2020-12-31 23:45:00], user_id: 1),
build(:pageview, timestamp: ~N[2021-01-01 00:10:00], user_id: 1),
build(:pageview, timestamp: ~N[2020-01-02 23:45:00], user_id: 2),
build(:pageview, timestamp: ~N[2021-01-03 00:10:00], user_id: 2),
build(:pageview, timestamp: ~N[2020-01-03 23:45:00], user_id: 3),
build(:pageview, timestamp: ~N[2021-01-04 00:10:00], user_id: 3),
build(:pageview, timestamp: ~N[2020-01-07 23:45:00], user_id: 4),
build(:pageview, timestamp: ~N[2021-01-08 00:10:00], user_id: 4)
])
conn =
get(
conn,
"/api/stats/#{site.domain}/main-graph?period=7d&date=2021-01-07&metric=visitors&interval=date"
)
assert %{"plot" => plot} = json_response(conn, 200)
assert plot == [1, 0, 1, 1, 0, 0, 0]
end
test "displays visitors per week with visits showed only in last time bucket", %{
conn: conn,
site: site
} do
populate_stats(site, [
build(:pageview, timestamp: ~N[2020-12-31 23:45:00], user_id: 1),
build(:pageview, timestamp: ~N[2021-01-01 00:10:00], user_id: 1),
build(:pageview, timestamp: ~N[2020-01-03 23:45:00], user_id: 2),
build(:pageview, timestamp: ~N[2021-01-04 00:10:00], user_id: 2),
build(:pageview, timestamp: ~N[2021-01-31 23:45:00], user_id: 3),
build(:pageview, timestamp: ~N[2021-02-01 00:05:00], user_id: 3)
])
conn =
get(
conn,
"/api/stats/#{site.domain}/main-graph?period=month&date=2021-01-01&metric=visitors&interval=week"
)
assert %{"plot" => plot} = json_response(conn, 200)
if FunWithFlags.enabled?(:experimental_session_count) do
assert plot == [1, 1, 0, 0, 0]
else
assert plot == [1, 1, 0, 0, 1]
end
end
end
describe "GET /api/stats/main-graph - conversion_rate plot" do describe "GET /api/stats/main-graph - conversion_rate plot" do
setup [:create_user, :log_in, :create_new_site] setup [:create_user, :log_in, :create_new_site]

View File

@ -11,8 +11,10 @@ FunWithFlags.enable(:csv_imports_exports)
# Temporary flag to test `experimental_reduced_joins` flag on all tests. # Temporary flag to test `experimental_reduced_joins` flag on all tests.
if System.get_env("TEST_EXPERIMENTAL_REDUCED_JOINS") == "1" do if System.get_env("TEST_EXPERIMENTAL_REDUCED_JOINS") == "1" do
FunWithFlags.enable(:experimental_reduced_joins) FunWithFlags.enable(:experimental_reduced_joins)
FunWithFlags.enable(:experimental_session_count)
else else
FunWithFlags.disable(:experimental_reduced_joins) FunWithFlags.disable(:experimental_reduced_joins)
FunWithFlags.disable(:experimental_session_count)
end end
Ecto.Adapters.SQL.Sandbox.mode(Plausible.Repo, :manual) Ecto.Adapters.SQL.Sandbox.mode(Plausible.Repo, :manual)