Safeguard queries relying on sign from faulty old session entries (#4555)

* Safeguard session queries relying on `sign` from faulty old session entries

* Comment updated metric

Co-authored-by: Karl-Aksel Puulmann <macobo@users.noreply.github.com>

* Apply safeguards to `bounce_rate` metric only

* Add note to bounce rate definition in SQL fragments as well

* Add test for graceful bounce rate handling in breakdown

* Make user_id more unique

* Add a note to the test

* Move regression test to APIv2 tests

---------

Co-authored-by: Karl-Aksel Puulmann <macobo@users.noreply.github.com>
This commit is contained in:
Adrian Gruntkowski 2024-09-10 13:22:49 +02:00 committed by GitHub
parent c105ebceaf
commit 8ba5f7d32f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 70 additions and 2 deletions

View File

@ -252,7 +252,10 @@ defmodule Plausible.Stats.SQL.Expression do
wrap_alias([], %{
bounce_rate:
fragment(
"toUInt32(ifNotFinite(round(sumIf(is_bounce * sign, ?) / sumIf(sign, ?) * 100), 0))",
# :TRICKY: Before PR #4493, we could have sessions where `sum(is_bounce * sign)`
# is negative, leading to an underflow and >100% bounce rate. This works around
# that issue.
"toUInt32(greatest(ifNotFinite(round(sumIf(is_bounce * sign, ?) / sumIf(sign, ?) * 100), 0), 0))",
^condition,
^condition
),

View File

@ -30,7 +30,12 @@ defmodule Plausible.Stats.SQL.Fragments do
defmacro bounce_rate() do
quote do
fragment("toUInt32(ifNotFinite(round(sum(is_bounce * sign) / sum(sign) * 100), 0))")
fragment(
# :TRICKY: Before PR #4493, we could have sessions where `sum(is_bounce * sign)`
# is negative, leading to an underflow and >100% bounce rate. This works around
# that issue.
"toUInt32(greatest(ifNotFinite(round(sum(is_bounce * sign) / sum(sign) * 100), 0), 0))"
)
end
end

View File

@ -3159,6 +3159,66 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do
]
end
test "bounce rate calculation handles invalid session data gracefully", %{
conn: conn,
site: site
} do
# NOTE: At the time of this test is added, it appears it does _not_
# catch the regression on MacOS (ARM), regardless if Clickhouse is run
# natively or from a docker container. The test still does catch
# that regression when ran on Linux for instance (including CI).
user_id = System.unique_integer([:positive])
populate_stats(site, [
build(:pageview,
user_id: user_id,
pathname: "/",
timestamp: ~N[2021-01-01 00:00:00]
)
])
first_session = Plausible.Cache.Adapter.get(:sessions, {site.id, user_id})
populate_stats(site, [
build(:pageview,
user_id: user_id,
pathname: "/",
timestamp: ~N[2021-01-01 00:01:00]
)
])
Plausible.Cache.Adapter.put(:sessions, {site.id, user_id}, first_session)
populate_stats(site, [
build(:pageview,
user_id: user_id,
pathname: "/",
timestamp: ~N[2021-01-01 00:01:00]
)
])
conn =
get(conn, "/api/v1/stats/breakdown", %{
"site_id" => site.domain,
"period" => "day",
"date" => "2021-01-01",
"property" => "event:page",
"metrics" => "bounce_rate"
})
conn =
post(conn, "/api/v2/query", %{
"site_id" => site.domain,
"date_range" => "all",
"metrics" => ["bounce_rate"],
"dimensions" => ["event:page"]
})
assert json_response(conn, 200)["results"] == [
%{"dimensions" => ["/"], "metrics" => [0]}
]
end
describe "using the returned query object in a new POST request" do
test "yields the same results for a simple aggregate query", %{conn: conn, site: site} do
Plausible.Site.changeset(site, %{timezone: "Europe/Tallinn"})