mirror of
https://github.com/plausible/analytics.git
synced 2025-01-05 17:16:44 +03:00
Return session in each time bucket its active in for hourly/minute timeseries (#4052)
* Fix typo in test name * Update test_helper, enable experimental_session_count together with experimental_reduced_joins * Return session in each time bucket its active in for hourly/minute timeseries The behavior is behind experimental_session_count flag This results in more accurate visitor showing compared to previous approach of showing each user only active the _last_ time they did a pageview. Were not doing this for monthly/weekly graphs due to query performance cost and it having a small effect there. See also https://3.basecamp.com/5308029/buckets/35611491/messages/7085680123 * Add tests for new behavior Note the new behavior mimics the old one precisely, these tests fail if only experimental_reduced_joins is on, but not experimental_session_count * Type erasure * Dead comment remove * Expected_plot change
This commit is contained in:
parent
850a843d82
commit
17f812443d
@ -55,7 +55,7 @@ defmodule Plausible.Stats.Timeseries do
|
|||||||
metrics = Util.maybe_add_visitors_metric(metrics)
|
metrics = Util.maybe_add_visitors_metric(metrics)
|
||||||
|
|
||||||
from(e in base_event_query(site, query), select: ^select_event_metrics(metrics))
|
from(e in base_event_query(site, query), select: ^select_event_metrics(metrics))
|
||||||
|> select_bucket(site, query)
|
|> select_bucket(:events, site, query)
|
||||||
|> maybe_add_timeseries_conversion_rate(site, query, metrics)
|
|> maybe_add_timeseries_conversion_rate(site, query, metrics)
|
||||||
|> Plausible.Stats.Imported.merge_imported_timeseries(site, query, metrics)
|
|> Plausible.Stats.Imported.merge_imported_timeseries(site, query, metrics)
|
||||||
|> ClickhouseRepo.all(label: :event_timeseries)
|
|> ClickhouseRepo.all(label: :event_timeseries)
|
||||||
@ -66,7 +66,7 @@ defmodule Plausible.Stats.Timeseries do
|
|||||||
defp sessions_timeseries(site, query, metrics) do
|
defp sessions_timeseries(site, query, metrics) do
|
||||||
from(e in query_sessions(site, query), select: ^select_session_metrics(metrics, query))
|
from(e in query_sessions(site, query), select: ^select_session_metrics(metrics, query))
|
||||||
|> filter_converted_sessions(site, query)
|
|> filter_converted_sessions(site, query)
|
||||||
|> select_bucket(site, query)
|
|> select_bucket(:sessions, site, query)
|
||||||
|> Plausible.Stats.Imported.merge_imported_timeseries(site, query, metrics)
|
|> Plausible.Stats.Imported.merge_imported_timeseries(site, query, metrics)
|
||||||
|> ClickhouseRepo.all(label: :session_timeseries)
|
|> ClickhouseRepo.all(label: :session_timeseries)
|
||||||
|> Util.keep_requested_metrics(metrics)
|
|> Util.keep_requested_metrics(metrics)
|
||||||
@ -152,7 +152,7 @@ defmodule Plausible.Stats.Timeseries do
|
|||||||
date
|
date
|
||||||
end
|
end
|
||||||
|
|
||||||
defp select_bucket(q, site, %Query{interval: "month"}) do
|
defp select_bucket(q, _table, site, %Query{interval: "month"}) do
|
||||||
from(
|
from(
|
||||||
e in q,
|
e in q,
|
||||||
group_by: fragment("toStartOfMonth(toTimeZone(?, ?))", e.timestamp, ^site.timezone),
|
group_by: fragment("toStartOfMonth(toTimeZone(?, ?))", e.timestamp, ^site.timezone),
|
||||||
@ -163,7 +163,7 @@ defmodule Plausible.Stats.Timeseries do
|
|||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
defp select_bucket(q, site, %Query{interval: "week"} = query) do
|
defp select_bucket(q, _table, site, %Query{interval: "week"} = query) do
|
||||||
{first_datetime, _} = utc_boundaries(query, site)
|
{first_datetime, _} = utc_boundaries(query, site)
|
||||||
|
|
||||||
from(
|
from(
|
||||||
@ -174,7 +174,7 @@ defmodule Plausible.Stats.Timeseries do
|
|||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
defp select_bucket(q, site, %Query{interval: "date"}) do
|
defp select_bucket(q, _table, site, %Query{interval: "date"}) do
|
||||||
from(
|
from(
|
||||||
e in q,
|
e in q,
|
||||||
group_by: fragment("toDate(toTimeZone(?, ?))", e.timestamp, ^site.timezone),
|
group_by: fragment("toDate(toTimeZone(?, ?))", e.timestamp, ^site.timezone),
|
||||||
@ -185,7 +185,14 @@ defmodule Plausible.Stats.Timeseries do
|
|||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
defp select_bucket(q, site, %Query{interval: "hour"}) do
|
defp select_bucket(q, :sessions, site, %Query{
|
||||||
|
interval: "hour",
|
||||||
|
experimental_session_count?: true
|
||||||
|
}) do
|
||||||
|
bucket_with_timeslots(q, site, 3600)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp select_bucket(q, _table, site, %Query{interval: "hour"}) do
|
||||||
from(
|
from(
|
||||||
e in q,
|
e in q,
|
||||||
group_by: fragment("toStartOfHour(toTimeZone(?, ?))", e.timestamp, ^site.timezone),
|
group_by: fragment("toStartOfHour(toTimeZone(?, ?))", e.timestamp, ^site.timezone),
|
||||||
@ -196,7 +203,30 @@ defmodule Plausible.Stats.Timeseries do
|
|||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
defp select_bucket(q, _site, %Query{interval: "minute", period: "30m"}) do
|
defp select_bucket(q, :sessions, _site, %Query{
|
||||||
|
interval: "minute",
|
||||||
|
period: "30m",
|
||||||
|
experimental_session_count?: true
|
||||||
|
}) do
|
||||||
|
from(
|
||||||
|
s in q,
|
||||||
|
array_join:
|
||||||
|
bucket in fragment(
|
||||||
|
"timeSlots(?, toUInt32(timeDiff(?, ?)), ?)",
|
||||||
|
s.start,
|
||||||
|
s.start,
|
||||||
|
s.timestamp,
|
||||||
|
60
|
||||||
|
),
|
||||||
|
group_by: fragment("dateDiff('minute', now(), ?)", bucket),
|
||||||
|
order_by: fragment("dateDiff('minute', now(), ?)", bucket),
|
||||||
|
select_merge: %{
|
||||||
|
date: fragment("dateDiff('minute', now(), ?)", bucket)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp select_bucket(q, _table, _site, %Query{interval: "minute", period: "30m"}) do
|
||||||
from(
|
from(
|
||||||
e in q,
|
e in q,
|
||||||
group_by: fragment("dateDiff('minute', now(), ?)", e.timestamp),
|
group_by: fragment("dateDiff('minute', now(), ?)", e.timestamp),
|
||||||
@ -207,7 +237,7 @@ defmodule Plausible.Stats.Timeseries do
|
|||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
defp select_bucket(q, site, %Query{interval: "minute"}) do
|
defp select_bucket(q, _table, site, %Query{interval: "minute"}) do
|
||||||
from(
|
from(
|
||||||
e in q,
|
e in q,
|
||||||
group_by: fragment("toStartOfMinute(toTimeZone(?, ?))", e.timestamp, ^site.timezone),
|
group_by: fragment("toStartOfMinute(toTimeZone(?, ?))", e.timestamp, ^site.timezone),
|
||||||
@ -218,6 +248,35 @@ defmodule Plausible.Stats.Timeseries do
|
|||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
defp select_bucket(q, :sessions, site, %Query{
|
||||||
|
interval: "minute",
|
||||||
|
experimental_session_count?: true
|
||||||
|
}) do
|
||||||
|
bucket_with_timeslots(q, site, 60)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Includes session in _every_ time bucket it was active in.
|
||||||
|
# Only done in hourly and minute graphs for performance reasons.
|
||||||
|
defp bucket_with_timeslots(q, site, period_in_seconds) do
|
||||||
|
from(
|
||||||
|
s in q,
|
||||||
|
array_join:
|
||||||
|
bucket in fragment(
|
||||||
|
"timeSlots(toTimeZone(?, ?), toUInt32(timeDiff(?, ?)), toUInt32(?))",
|
||||||
|
s.start,
|
||||||
|
^site.timezone,
|
||||||
|
s.start,
|
||||||
|
s.timestamp,
|
||||||
|
^period_in_seconds
|
||||||
|
),
|
||||||
|
group_by: bucket,
|
||||||
|
order_by: bucket,
|
||||||
|
select_merge: %{
|
||||||
|
date: fragment("?", bucket)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
defp date_or_weekstart(date, query) do
|
defp date_or_weekstart(date, query) do
|
||||||
weekstart = Timex.beginning_of_week(date)
|
weekstart = Timex.beginning_of_week(date)
|
||||||
|
|
||||||
@ -261,7 +320,7 @@ defmodule Plausible.Stats.Timeseries do
|
|||||||
from(e in base_event_query(site, totals_query),
|
from(e in base_event_query(site, totals_query),
|
||||||
select: ^select_event_metrics([:visitors])
|
select: ^select_event_metrics([:visitors])
|
||||||
)
|
)
|
||||||
|> select_bucket(site, query)
|
|> select_bucket(:events, site, query)
|
||||||
|
|
||||||
from(e in subquery(q),
|
from(e in subquery(q),
|
||||||
left_join: c in subquery(totals_timeseries_q),
|
left_join: c in subquery(totals_timeseries_q),
|
||||||
|
@ -19,7 +19,7 @@ defmodule PlausibleWeb.Api.StatsController.MainGraphTest do
|
|||||||
assert Enum.any?(plot, fn pageviews -> pageviews > 0 end)
|
assert Enum.any?(plot, fn pageviews -> pageviews > 0 end)
|
||||||
end
|
end
|
||||||
|
|
||||||
test "displays visitors for a day", %{conn: conn, site: site} do
|
test "displays pageviews for a day", %{conn: conn, site: site} do
|
||||||
populate_stats(site, [
|
populate_stats(site, [
|
||||||
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
|
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
|
||||||
build(:pageview, timestamp: ~N[2021-01-01 23:00:00])
|
build(:pageview, timestamp: ~N[2021-01-01 23:00:00])
|
||||||
@ -433,6 +433,146 @@ defmodule PlausibleWeb.Api.StatsController.MainGraphTest do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
describe "GET /api/stats/main-graph - visitors plot" do
|
||||||
|
setup [:create_user, :log_in, :create_new_site, :create_legacy_site_import]
|
||||||
|
|
||||||
|
test "displays visitors per hour with short visits", %{
|
||||||
|
conn: conn,
|
||||||
|
site: site
|
||||||
|
} do
|
||||||
|
populate_stats(site, [
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-01 00:00:00], user_id: 1),
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-01 00:10:00], user_id: 1),
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-01 00:20:00])
|
||||||
|
])
|
||||||
|
|
||||||
|
conn =
|
||||||
|
get(
|
||||||
|
conn,
|
||||||
|
"/api/stats/#{site.domain}/main-graph?period=day&date=2021-01-01&metric=visitors&interval=hour"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert %{"plot" => plot} = json_response(conn, 200)
|
||||||
|
|
||||||
|
assert Enum.count(plot) == 24
|
||||||
|
assert List.first(plot) == 2
|
||||||
|
assert Enum.sum(plot) == 2
|
||||||
|
end
|
||||||
|
|
||||||
|
test "displays visitors realtime with visits spanning multiple minutes", %{
|
||||||
|
conn: conn,
|
||||||
|
site: site
|
||||||
|
} do
|
||||||
|
populate_stats(site, [
|
||||||
|
build(:pageview, timestamp: relative_time(minutes: -35), user_id: 1),
|
||||||
|
build(:pageview, timestamp: relative_time(minutes: -20), user_id: 1),
|
||||||
|
build(:pageview, timestamp: relative_time(minutes: -25), user_id: 2),
|
||||||
|
build(:pageview, timestamp: relative_time(minutes: -15), user_id: 2),
|
||||||
|
build(:pageview, timestamp: relative_time(minutes: -5), user_id: 3),
|
||||||
|
build(:pageview, timestamp: relative_time(minutes: -3), user_id: 3)
|
||||||
|
])
|
||||||
|
|
||||||
|
conn =
|
||||||
|
get(
|
||||||
|
conn,
|
||||||
|
"/api/stats/#{site.domain}/main-graph?period=realtime&metric=visitors"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert %{"plot" => plot} = json_response(conn, 200)
|
||||||
|
|
||||||
|
expected_plot =
|
||||||
|
if FunWithFlags.enabled?(:experimental_session_count) do
|
||||||
|
~w[1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 1 0 0]
|
||||||
|
else
|
||||||
|
~w[0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0]
|
||||||
|
end
|
||||||
|
|
||||||
|
assert plot == Enum.map(expected_plot, &String.to_integer/1)
|
||||||
|
end
|
||||||
|
|
||||||
|
test "displays visitors per hour with visits spanning multiple hours", %{
|
||||||
|
conn: conn,
|
||||||
|
site: site
|
||||||
|
} do
|
||||||
|
populate_stats(site, [
|
||||||
|
build(:pageview, timestamp: ~N[2020-12-31 23:45:00], user_id: 1),
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-01 00:10:00], user_id: 1),
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-01 00:15:00], user_id: 1),
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-01 00:35:00], user_id: 2),
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-01 01:00:00], user_id: 2),
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-01 01:25:00], user_id: 2),
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-01 01:50:00], user_id: 2),
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-01 02:05:00], user_id: 2),
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-01 23:45:00], user_id: 3),
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-02 00:05:00], user_id: 3)
|
||||||
|
])
|
||||||
|
|
||||||
|
conn =
|
||||||
|
get(
|
||||||
|
conn,
|
||||||
|
"/api/stats/#{site.domain}/main-graph?period=day&date=2021-01-01&metric=visitors&interval=hour"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert %{"plot" => plot} = json_response(conn, 200)
|
||||||
|
|
||||||
|
zeroes = List.duplicate(0, 20)
|
||||||
|
assert [2, 1, 1] ++ zeroes ++ [1] == plot
|
||||||
|
end
|
||||||
|
|
||||||
|
test "displays visitors per day with visits showed only in last time bucket", %{
|
||||||
|
conn: conn,
|
||||||
|
site: site
|
||||||
|
} do
|
||||||
|
populate_stats(site, [
|
||||||
|
build(:pageview, timestamp: ~N[2020-12-31 23:45:00], user_id: 1),
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-01 00:10:00], user_id: 1),
|
||||||
|
build(:pageview, timestamp: ~N[2020-01-02 23:45:00], user_id: 2),
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-03 00:10:00], user_id: 2),
|
||||||
|
build(:pageview, timestamp: ~N[2020-01-03 23:45:00], user_id: 3),
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-04 00:10:00], user_id: 3),
|
||||||
|
build(:pageview, timestamp: ~N[2020-01-07 23:45:00], user_id: 4),
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-08 00:10:00], user_id: 4)
|
||||||
|
])
|
||||||
|
|
||||||
|
conn =
|
||||||
|
get(
|
||||||
|
conn,
|
||||||
|
"/api/stats/#{site.domain}/main-graph?period=7d&date=2021-01-07&metric=visitors&interval=date"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert %{"plot" => plot} = json_response(conn, 200)
|
||||||
|
assert plot == [1, 0, 1, 1, 0, 0, 0]
|
||||||
|
end
|
||||||
|
|
||||||
|
test "displays visitors per week with visits showed only in last time bucket", %{
|
||||||
|
conn: conn,
|
||||||
|
site: site
|
||||||
|
} do
|
||||||
|
populate_stats(site, [
|
||||||
|
build(:pageview, timestamp: ~N[2020-12-31 23:45:00], user_id: 1),
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-01 00:10:00], user_id: 1),
|
||||||
|
build(:pageview, timestamp: ~N[2020-01-03 23:45:00], user_id: 2),
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-04 00:10:00], user_id: 2),
|
||||||
|
build(:pageview, timestamp: ~N[2021-01-31 23:45:00], user_id: 3),
|
||||||
|
build(:pageview, timestamp: ~N[2021-02-01 00:05:00], user_id: 3)
|
||||||
|
])
|
||||||
|
|
||||||
|
conn =
|
||||||
|
get(
|
||||||
|
conn,
|
||||||
|
"/api/stats/#{site.domain}/main-graph?period=month&date=2021-01-01&metric=visitors&interval=week"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert %{"plot" => plot} = json_response(conn, 200)
|
||||||
|
|
||||||
|
if FunWithFlags.enabled?(:experimental_session_count) do
|
||||||
|
assert plot == [1, 1, 0, 0, 0]
|
||||||
|
else
|
||||||
|
assert plot == [1, 1, 0, 0, 1]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
describe "GET /api/stats/main-graph - conversion_rate plot" do
|
describe "GET /api/stats/main-graph - conversion_rate plot" do
|
||||||
setup [:create_user, :log_in, :create_new_site]
|
setup [:create_user, :log_in, :create_new_site]
|
||||||
|
|
||||||
|
@ -11,8 +11,10 @@ FunWithFlags.enable(:csv_imports_exports)
|
|||||||
# Temporary flag to test `experimental_reduced_joins` flag on all tests.
|
# Temporary flag to test `experimental_reduced_joins` flag on all tests.
|
||||||
if System.get_env("TEST_EXPERIMENTAL_REDUCED_JOINS") == "1" do
|
if System.get_env("TEST_EXPERIMENTAL_REDUCED_JOINS") == "1" do
|
||||||
FunWithFlags.enable(:experimental_reduced_joins)
|
FunWithFlags.enable(:experimental_reduced_joins)
|
||||||
|
FunWithFlags.enable(:experimental_session_count)
|
||||||
else
|
else
|
||||||
FunWithFlags.disable(:experimental_reduced_joins)
|
FunWithFlags.disable(:experimental_reduced_joins)
|
||||||
|
FunWithFlags.disable(:experimental_session_count)
|
||||||
end
|
end
|
||||||
|
|
||||||
Ecto.Adapters.SQL.Sandbox.mode(Plausible.Repo, :manual)
|
Ecto.Adapters.SQL.Sandbox.mode(Plausible.Repo, :manual)
|
||||||
|
Loading…
Reference in New Issue
Block a user