Stop treating page filter as an entry_page filter (#2752)

* remove dead code

* stop treating page filter as entry page filter in breakdown queries

* stop treating page filter as entry page filter in aggregate queries

* stop treating page filter as entry page filter in timeseries queries

* mix format

* update changelog

* break code down to smaller functions to keep credo happy

* remove unused functions
This commit is contained in:
RobertJoonas 2023-03-15 18:12:59 +02:00 committed by GitHub
parent 4654a50365
commit d0895efbdd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 118 additions and 136 deletions

View File

@ -8,6 +8,7 @@ All notable changes to this project will be documented in this file.
- Add support for more Bamboo adapters, i.e. `Bamboo.MailgunAdapter`, `Bamboo.MandrillAdapter`, `Bamboo.SendGridAdapter` plausible/analytics#2649 - Add support for more Bamboo adapters, i.e. `Bamboo.MailgunAdapter`, `Bamboo.MandrillAdapter`, `Bamboo.SendGridAdapter` plausible/analytics#2649
### Fixed ### Fixed
- Stop treating page filter as an entry page filter
- City report showing N/A instead of city names with imported data plausible/analytics#2675 - City report showing N/A instead of city names with imported data plausible/analytics#2675
- Empty values for Screen Size, OS and Browser are uniformly replaced with "(not set)" - Empty values for Screen Size, OS and Browser are uniformly replaced with "(not set)"
- Fix [more pageviews with session prop filter than with no filters](https://github.com/plausible/analytics/issues/1666) - Fix [more pageviews with session prop filter than with no filters](https://github.com/plausible/analytics/issues/1666)

View File

@ -38,8 +38,6 @@ defmodule Plausible.Stats.Aggregate do
defp aggregate_sessions(_, _, []), do: %{} defp aggregate_sessions(_, _, []), do: %{}
defp aggregate_sessions(site, query, metrics) do defp aggregate_sessions(site, query, metrics) do
query = Query.treat_page_filter_as_entry_page(query)
from(e in query_sessions(site, query), select: %{}) from(e in query_sessions(site, query), select: %{})
|> filter_converted_sessions(site, query) |> filter_converted_sessions(site, query)
|> select_session_metrics(metrics) |> select_session_metrics(metrics)

View File

@ -11,7 +11,7 @@ defmodule Plausible.Stats.Base do
def base_event_query(site, query) do def base_event_query(site, query) do
events_q = query_events(site, query) events_q = query_events(site, query)
if Enum.any?(Filters.visit_props() ++ ["goal", "page"], &query.filters["visit:" <> &1]) do if Enum.any?(Filters.visit_props(), &query.filters["visit:" <> &1]) do
sessions_q = sessions_q =
from( from(
s in query_sessions(site, query), s in query_sessions(site, query),
@ -154,72 +154,7 @@ defmodule Plausible.Stats.Base do
where: s.start >= ^first_datetime and s.start < ^last_datetime where: s.start >= ^first_datetime and s.start < ^last_datetime
) )
|> add_sample_hint(query) |> add_sample_hint(query)
|> filter_by_entry_props(query)
sessions_q =
case {query.filters["visit:goal"], query.filters["visit:page"]} do
{nil, nil} ->
sessions_q
{goal_filter, page_filter} ->
events_query =
Query.put_filter(query, "event:goal", goal_filter)
|> Query.put_filter("event:name", nil)
|> Query.put_filter("event:page", page_filter)
events_q =
from(
s in query_events(site, events_query),
select: %{session_id: fragment("DISTINCT ?", s.session_id)}
)
from(
s in sessions_q,
join: sq in subquery(events_q),
on: s.session_id == sq.session_id
)
end
sessions_q =
case Query.get_filter_by_prefix(query, "visit:entry_props:") do
nil ->
sessions_q
{"visit:entry_props:" <> prop_name, filter_value} ->
case filter_value do
{:is, "(none)"} ->
from(
s in sessions_q,
where: fragment("not has(?, ?)", field(s, :"entry_meta.key"), ^prop_name)
)
{:is, value} ->
from(
s in sessions_q,
inner_lateral_join: meta in "entry_meta",
as: :meta,
where: meta.key == ^prop_name and meta.value == ^value
)
{:is_not, "(none)"} ->
from(
s in sessions_q,
where: fragment("has(?, ?)", field(s, :"entry_meta.key"), ^prop_name)
)
{:is_not, value} ->
from(
s in sessions_q,
left_lateral_join: meta in "entry_meta",
as: :meta,
where:
(meta.key == ^prop_name and meta.value != ^value) or
fragment("not has(?, ?)", field(s, :"entry_meta.key"), ^prop_name)
)
_ ->
sessions_q
end
end
Enum.reduce(Filters.visit_props(), sessions_q, fn prop_name, sessions_q -> Enum.reduce(Filters.visit_props(), sessions_q, fn prop_name, sessions_q ->
filter = query.filters["visit:" <> prop_name] filter = query.filters["visit:" <> prop_name]
@ -258,6 +193,52 @@ defmodule Plausible.Stats.Base do
end) end)
end end
def filter_by_entry_props(sessions_q, query) do
case Query.get_filter_by_prefix(query, "visit:entry_props:") do
nil ->
sessions_q
{"visit:entry_props:" <> prop_name, filter_value} ->
apply_entry_prop_filter(sessions_q, prop_name, filter_value)
end
end
def apply_entry_prop_filter(sessions_q, prop_name, {:is, "(none)"}) do
from(
s in sessions_q,
where: fragment("not has(?, ?)", field(s, :"entry_meta.key"), ^prop_name)
)
end
def apply_entry_prop_filter(sessions_q, prop_name, {:is, value}) do
from(
s in sessions_q,
inner_lateral_join: meta in "entry_meta",
as: :meta,
where: meta.key == ^prop_name and meta.value == ^value
)
end
def apply_entry_prop_filter(sessions_q, prop_name, {:is_not, "(none)"}) do
from(
s in sessions_q,
where: fragment("has(?, ?)", field(s, :"entry_meta.key"), ^prop_name)
)
end
def apply_entry_prop_filter(sessions_q, prop_name, {:is_not, value}) do
from(
s in sessions_q,
left_lateral_join: meta in "entry_meta",
as: :meta,
where:
(meta.key == ^prop_name and meta.value != ^value) or
fragment("not has(?, ?)", field(s, :"entry_meta.key"), ^prop_name)
)
end
def apply_entry_prop_filter(sessions_q, _, _), do: sessions_q
def select_event_metrics(q, []), do: q def select_event_metrics(q, []), do: q
def select_event_metrics(q, [:pageviews | rest]) do def select_event_metrics(q, [:pageviews | rest]) do

View File

@ -151,24 +151,6 @@ defmodule Plausible.Stats.Breakdown do
breakdown_events(site, query, property, metrics, pagination) breakdown_events(site, query, property, metrics, pagination)
end end
def breakdown(site, query, property, metrics, pagination)
when property in [
"visit:source",
"visit:utm_medium",
"visit:utm_source",
"visit:utm_campaign",
"visit:utm_content",
"visit:utm_term"
] do
query =
query
|> Query.treat_page_filter_as_entry_page()
|> Query.treat_prop_filter_as_entry_prop()
trace(query, property, metrics)
breakdown_sessions(site, query, property, metrics, pagination)
end
def breakdown(site, query, property, metrics, pagination) do def breakdown(site, query, property, metrics, pagination) do
trace(query, property, metrics) trace(query, property, metrics)
breakdown_sessions(site, query, property, metrics, pagination) breakdown_sessions(site, query, property, metrics, pagination)

View File

@ -208,30 +208,6 @@ defmodule Plausible.Stats.Query do
} }
end end
def treat_page_filter_as_entry_page(%__MODULE__{filters: %{"visit:entry_page" => _}} = q), do: q
def treat_page_filter_as_entry_page(%__MODULE__{filters: %{"event:page" => f}} = q) do
q
|> put_filter("visit:entry_page", f)
|> put_filter("event:page", nil)
end
def treat_page_filter_as_entry_page(q), do: q
def treat_prop_filter_as_entry_prop(%__MODULE__{filters: filters} = q) do
prop_filter = get_filter_by_prefix(q, "event:props:")
case {filters["event:goal"], prop_filter} do
{nil, {"event:props:" <> prop, filter_value}} ->
q
|> remove_event_filters([:props])
|> put_filter("visit:entry_props:" <> prop, filter_value)
_ ->
q
end
end
def remove_event_filters(query, opts) do def remove_event_filters(query, opts) do
new_filters = new_filters =
Enum.filter(query.filters, fn {filter_key, _} -> Enum.filter(query.filters, fn {filter_key, _} ->

View File

@ -42,9 +42,8 @@ defmodule Plausible.Stats.Timeseries do
defp sessions_timeseries(_, _, []), do: [] defp sessions_timeseries(_, _, []), do: []
defp sessions_timeseries(site, query, metrics) do defp sessions_timeseries(site, query, metrics) do
query = Query.treat_page_filter_as_entry_page(query)
from(e in query_sessions(site, query), select: %{}) from(e in query_sessions(site, query), select: %{})
|> filter_converted_sessions(site, query)
|> select_bucket(site, query) |> select_bucket(site, query)
|> select_session_metrics(metrics) |> select_session_metrics(metrics)
|> Plausible.Stats.Imported.merge_imported_timeseries(site, query, metrics) |> Plausible.Stats.Imported.merge_imported_timeseries(site, query, metrics)

View File

@ -1,5 +1,5 @@
date,visitors,pageviews,bounce_rate,visit_duration date,visitors,pageviews,bounce_rate,visit_duration
2021-09-20,0,0,100,0 2021-09-20,0,0,,
2021-09-21,0,0,, 2021-09-21,0,0,,
2021-09-22,0,0,, 2021-09-22,0,0,,
2021-09-23,0,0,, 2021-09-23,0,0,,
@ -29,4 +29,4 @@ date,visitors,pageviews,bounce_rate,visit_duration
2021-10-17,0,0,, 2021-10-17,0,0,,
2021-10-18,0,0,, 2021-10-18,0,0,,
2021-10-19,1,0,100,0 2021-10-19,1,0,100,0
2021-10-20,0,0,0,60 2021-10-20,0,0,,

1 date visitors pageviews bounce_rate visit_duration
2 2021-09-20 0 0 100 0
3 2021-09-21 0 0
4 2021-09-22 0 0
5 2021-09-23 0 0
29 2021-10-17 0 0
30 2021-10-18 0 0
31 2021-10-19 1 0 100 0
32 2021-10-20 0 0 0 60

View File

@ -1 +1,2 @@
name,visitors,bounce_rate,visit_duration name,visitors,bounce_rate,visit_duration
Google,1,0,60

1 name visitors bounce_rate visit_duration
2 Google 1 0 60

View File

@ -1 +1,2 @@
name,visitors,bounce_rate,visit_duration name,visitors,bounce_rate,visit_duration
Direct / None,1,0,60

1 name visitors bounce_rate visit_duration
2 Direct / None 1 0 60

View File

@ -1 +1,2 @@
name,visitors,bounce_rate,visit_duration name,visitors,bounce_rate,visit_duration
Direct / None,1,0,60

1 name visitors bounce_rate visit_duration
2 Direct / None 1 0 60

View File

@ -1 +1,2 @@
name,visitors,bounce_rate,visit_duration name,visitors,bounce_rate,visit_duration
Direct / None,1,0,60

1 name visitors bounce_rate visit_duration
2 Direct / None 1 0 60

View File

@ -1 +1,2 @@
name,visitors,bounce_rate,visit_duration name,visitors,bounce_rate,visit_duration
Direct / None,1,0,60

1 name visitors bounce_rate visit_duration
2 Direct / None 1 0 60

View File

@ -1 +1,2 @@
name,visitors,bounce_rate,visit_duration name,visitors,bounce_rate,visit_duration
Direct / None,1,0,60

1 name visitors bounce_rate visit_duration
2 Direct / None 1 0 60

View File

@ -29,4 +29,4 @@ date,visitors,pageviews,visits,views_per_visit,bounce_rate,visit_duration
2021-10-17,0,0,0,0.0,, 2021-10-17,0,0,0,0.0,,
2021-10-18,0,0,0,0.0,, 2021-10-18,0,0,0,0.0,,
2021-10-19,0,0,0,0.0,, 2021-10-19,0,0,0,0.0,,
2021-10-20,1,1,0,0.0,, 2021-10-20,1,1,1,2.0,0,60

1 date visitors pageviews visits views_per_visit bounce_rate visit_duration
29 2021-10-17 0 0 0 0.0
30 2021-10-18 0 0 0 0.0
31 2021-10-19 0 0 0 0.0
32 2021-10-20 1 1 0 1 0.0 2.0 0 60

View File

@ -638,19 +638,19 @@ defmodule PlausibleWeb.Api.ExternalStatsController.AggregateTest do
} }
end end
test "when filtering by page, session metrics treat is like entry_page", %{ test "can filter by page", %{
conn: conn, conn: conn,
site: site site: site
} do } do
populate_stats([ populate_stats([
build(:pageview, build(:pageview,
pathname: "/blogpost",
user_id: @user_id, user_id: @user_id,
domain: site.domain, domain: site.domain,
timestamp: ~N[2021-01-01 00:00:00] timestamp: ~N[2021-01-01 00:00:00]
), ),
build(:pageview, build(:pageview,
user_id: @user_id, user_id: @user_id,
pathname: "/blogpost",
domain: site.domain, domain: site.domain,
timestamp: ~N[2021-01-01 00:25:00] timestamp: ~N[2021-01-01 00:25:00]
), ),

View File

@ -1257,23 +1257,30 @@ defmodule PlausibleWeb.Api.ExternalStatsController.BreakdownTest do
} }
end end
test "event:page filter shows traffic sources directly to that page", %{ test "event:page filter shows sources of sessions that have visited that page", %{
conn: conn, conn: conn,
site: site site: site
} do } do
populate_stats(site, [ populate_stats(site, [
build(:pageview, build(:pageview,
pathname: "/ignore", pathname: "/",
referrer_source: "Should not show up", referrer_source: "Twitter",
utm_medium: "Should not show up", utm_medium: "Twitter",
utm_source: "Should not show up", utm_source: "Twitter",
utm_campaign: "Should not show up", utm_campaign: "Twitter",
user_id: @user_id user_id: @user_id
), ),
build(:pageview, build(:pageview,
pathname: "/plausible.io", pathname: "/plausible.io",
user_id: @user_id user_id: @user_id
), ),
build(:pageview,
pathname: "/plausible.io",
referrer_source: "Google",
utm_medium: "Google",
utm_source: "Google",
utm_campaign: "Google"
),
build(:pageview, build(:pageview,
pathname: "/plausible.io", pathname: "/plausible.io",
referrer_source: "Google", referrer_source: "Google",
@ -1294,7 +1301,8 @@ defmodule PlausibleWeb.Api.ExternalStatsController.BreakdownTest do
assert json_response(conn, 200) == %{ assert json_response(conn, 200) == %{
"results" => [ "results" => [
%{property => "Google", "visitors" => 1} %{property => "Google", "visitors" => 2},
%{property => "Twitter", "visitors" => 1}
] ]
} }
end end

View File

@ -681,13 +681,12 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do
} }
end end
test "filtering by page - session metrics consider it like entry_page", %{ test "can filter by page", %{
conn: conn, conn: conn,
site: site site: site
} do } do
populate_stats([ populate_stats([
build(:pageview, build(:pageview,
pathname: "/hello",
user_id: @user_id, user_id: @user_id,
domain: site.domain, domain: site.domain,
timestamp: ~N[2021-01-01 00:00:00] timestamp: ~N[2021-01-01 00:00:00]
@ -716,7 +715,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do
"period" => "month", "period" => "month",
"date" => "2021-01-01", "date" => "2021-01-01",
"filters" => "event:page==/hello", "filters" => "event:page==/hello",
"metrics" => "visitors,pageviews,bounce_rate,visit_duration" "metrics" => "visitors,visits,pageviews,views_per_visit,bounce_rate,visit_duration"
}) })
res = json_response(conn, 200)["results"] res = json_response(conn, 200)["results"]
@ -724,7 +723,9 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do
assert List.first(res) == %{ assert List.first(res) == %{
"date" => "2021-01-01", "date" => "2021-01-01",
"visitors" => 2, "visitors" => 2,
"pageviews" => 3, "visits" => 2,
"pageviews" => 2,
"views_per_visit" => 1.5,
"bounce_rate" => 50, "bounce_rate" => 50,
"visit_duration" => 150 "visit_duration" => 150
} }

View File

@ -61,6 +61,13 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do
"meta.value": ["John Doe"], "meta.value": ["John Doe"],
timestamp: ~N[2021-01-01 00:00:00] timestamp: ~N[2021-01-01 00:00:00]
), ),
build(:pageview,
referrer_source: "Google",
referrer: "google.com",
"meta.key": ["author"],
"meta.value": ["John Doe"],
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview, build(:pageview,
referrer_source: "Facebook", referrer_source: "Facebook",
referrer: "facebook.com", referrer: "facebook.com",
@ -77,7 +84,8 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do
) )
assert json_response(conn, 200) == [ assert json_response(conn, 200) == [
%{"name" => "Google", "visitors" => 1} %{"name" => "Google", "visitors" => 2},
%{"name" => "DuckDuckGo", "visitors" => 1}
] ]
end end
@ -107,9 +115,16 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do
"meta.value": ["other"], "meta.value": ["other"],
timestamp: ~N[2021-01-01 00:00:00] timestamp: ~N[2021-01-01 00:00:00]
), ),
build(:pageview,
referrer_source: "Google",
referrer: "google.com",
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview, build(:pageview,
referrer_source: "Facebook", referrer_source: "Facebook",
referrer: "facebook.com", referrer: "facebook.com",
"meta.key": ["author"],
"meta.value": ["John Doe"],
timestamp: ~N[2021-01-01 00:00:00] timestamp: ~N[2021-01-01 00:00:00]
) )
]) ])
@ -123,8 +138,8 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do
) )
assert json_response(conn, 200) == [ assert json_response(conn, 200) == [
%{"name" => "Facebook", "visitors" => 1}, %{"name" => "Google", "visitors" => 2},
%{"name" => "Google", "visitors" => 1} %{"name" => "DuckDuckGo", "visitors" => 1}
] ]
end end
@ -152,6 +167,11 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do
"meta.value": ["other"], "meta.value": ["other"],
timestamp: ~N[2021-01-01 00:00:00] timestamp: ~N[2021-01-01 00:00:00]
), ),
build(:pageview,
referrer_source: "Facebook",
referrer: "facebook.com",
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview, build(:pageview,
referrer_source: "Facebook", referrer_source: "Facebook",
referrer: "facebook.com", referrer: "facebook.com",
@ -168,7 +188,8 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do
) )
assert json_response(conn, 200) == [ assert json_response(conn, 200) == [
%{"name" => "Facebook", "visitors" => 1} %{"name" => "Facebook", "visitors" => 2},
%{"name" => "DuckDuckGo", "visitors" => 1}
] ]
end end
@ -198,6 +219,13 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do
"meta.value": ["other"], "meta.value": ["other"],
timestamp: ~N[2021-01-01 00:00:00] timestamp: ~N[2021-01-01 00:00:00]
), ),
build(:pageview,
referrer_source: "Google",
referrer: "google.com",
"meta.key": ["author"],
"meta.value": ["another"],
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview, build(:pageview,
referrer_source: "Facebook", referrer_source: "Facebook",
referrer: "facebook.com", referrer: "facebook.com",
@ -214,7 +242,8 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do
) )
assert json_response(conn, 200) == [ assert json_response(conn, 200) == [
%{"name" => "Google", "visitors" => 1} %{"name" => "Google", "visitors" => 2},
%{"name" => "DuckDuckGo", "visitors" => 1}
] ]
end end
@ -442,7 +471,7 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do
test "shows sources for a page", %{conn: conn, site: site} do test "shows sources for a page", %{conn: conn, site: site} do
populate_stats(site, [ populate_stats(site, [
build(:pageview, pathname: "/page1", referrer_source: "Google"), build(:pageview, pathname: "/page1", referrer_source: "Google"),
build(:pageview, pathname: "/page2", referrer_source: "Google"), build(:pageview, pathname: "/page1", referrer_source: "Google"),
build(:pageview, user_id: 1, pathname: "/page2", referrer_source: "DuckDuckGo"), build(:pageview, user_id: 1, pathname: "/page2", referrer_source: "DuckDuckGo"),
build(:pageview, user_id: 1, pathname: "/page1", referrer_source: "DuckDuckGo") build(:pageview, user_id: 1, pathname: "/page1", referrer_source: "DuckDuckGo")
]) ])
@ -451,7 +480,8 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do
conn = get(conn, "/api/stats/#{site.domain}/sources?filters=#{filters}") conn = get(conn, "/api/stats/#{site.domain}/sources?filters=#{filters}")
assert json_response(conn, 200) == [ assert json_response(conn, 200) == [
%{"name" => "Google", "visitors" => 1} %{"name" => "Google", "visitors" => 2},
%{"name" => "DuckDuckGo", "visitors" => 1}
] ]
end end
end end