From d0895efbddb0e4e3e6e0d36bf97c5b43302e2fca Mon Sep 17 00:00:00 2001 From: RobertJoonas <56999674+RobertJoonas@users.noreply.github.com> Date: Wed, 15 Mar 2023 18:12:59 +0200 Subject: [PATCH] Stop treating page filter as an entry_page filter (#2752) * remove dead code * stop treating page filter as entry page filter in breakdown queries * stop treating page filter as entry page filter in aggregate queries * stop treating page filter as entry page filter in timeseries queries * mix format * update changelog * break code down to smaller functions to keep credo happy * remove unused functions --- CHANGELOG.md | 1 + lib/plausible/stats/aggregate.ex | 2 - lib/plausible/stats/base.ex | 115 ++++++++---------- lib/plausible/stats/breakdown.ex | 18 --- lib/plausible/stats/query.ex | 24 ---- lib/plausible/stats/timeseries.ex | 3 +- .../CSVs/30d-filter-goal/visitors.csv | 4 +- .../CSVs/30d-filter-path/sources.csv | 1 + .../CSVs/30d-filter-path/utm_campaigns.csv | 1 + .../CSVs/30d-filter-path/utm_contents.csv | 1 + .../CSVs/30d-filter-path/utm_mediums.csv | 1 + .../CSVs/30d-filter-path/utm_sources.csv | 1 + .../CSVs/30d-filter-path/utm_terms.csv | 1 + .../CSVs/30d-filter-path/visitors.csv | 2 +- .../aggregate_test.exs | 4 +- .../breakdown_test.exs | 22 ++-- .../timeseries_test.exs | 9 +- .../api/stats_controller/sources_test.exs | 44 +++++-- 18 files changed, 118 insertions(+), 136 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ddb898069..2ad4fd3b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ All notable changes to this project will be documented in this file. - Add support for more Bamboo adapters, i.e. `Bamboo.MailgunAdapter`, `Bamboo.MandrillAdapter`, `Bamboo.SendGridAdapter` plausible/analytics#2649 ### Fixed +- Stop treating page filter as an entry page filter - City report showing N/A instead of city names with imported data plausible/analytics#2675 - Empty values for Screen Size, OS and Browser are uniformly replaced with "(not set)" - Fix [more pageviews with session prop filter than with no filters](https://github.com/plausible/analytics/issues/1666) diff --git a/lib/plausible/stats/aggregate.ex b/lib/plausible/stats/aggregate.ex index 975552346..146d9c644 100644 --- a/lib/plausible/stats/aggregate.ex +++ b/lib/plausible/stats/aggregate.ex @@ -38,8 +38,6 @@ defmodule Plausible.Stats.Aggregate do defp aggregate_sessions(_, _, []), do: %{} defp aggregate_sessions(site, query, metrics) do - query = Query.treat_page_filter_as_entry_page(query) - from(e in query_sessions(site, query), select: %{}) |> filter_converted_sessions(site, query) |> select_session_metrics(metrics) diff --git a/lib/plausible/stats/base.ex b/lib/plausible/stats/base.ex index 8cbc748d1..0a464c65e 100644 --- a/lib/plausible/stats/base.ex +++ b/lib/plausible/stats/base.ex @@ -11,7 +11,7 @@ defmodule Plausible.Stats.Base do def base_event_query(site, query) do events_q = query_events(site, query) - if Enum.any?(Filters.visit_props() ++ ["goal", "page"], &query.filters["visit:" <> &1]) do + if Enum.any?(Filters.visit_props(), &query.filters["visit:" <> &1]) do sessions_q = from( s in query_sessions(site, query), @@ -154,72 +154,7 @@ defmodule Plausible.Stats.Base do where: s.start >= ^first_datetime and s.start < ^last_datetime ) |> add_sample_hint(query) - - sessions_q = - case {query.filters["visit:goal"], query.filters["visit:page"]} do - {nil, nil} -> - sessions_q - - {goal_filter, page_filter} -> - events_query = - Query.put_filter(query, "event:goal", goal_filter) - |> Query.put_filter("event:name", nil) - |> Query.put_filter("event:page", page_filter) - - events_q = - from( - s in query_events(site, events_query), - select: %{session_id: fragment("DISTINCT ?", s.session_id)} - ) - - from( - s in sessions_q, - join: sq in subquery(events_q), - on: s.session_id == sq.session_id - ) - end - - sessions_q = - case Query.get_filter_by_prefix(query, "visit:entry_props:") do - nil -> - sessions_q - - {"visit:entry_props:" <> prop_name, filter_value} -> - case filter_value do - {:is, "(none)"} -> - from( - s in sessions_q, - where: fragment("not has(?, ?)", field(s, :"entry_meta.key"), ^prop_name) - ) - - {:is, value} -> - from( - s in sessions_q, - inner_lateral_join: meta in "entry_meta", - as: :meta, - where: meta.key == ^prop_name and meta.value == ^value - ) - - {:is_not, "(none)"} -> - from( - s in sessions_q, - where: fragment("has(?, ?)", field(s, :"entry_meta.key"), ^prop_name) - ) - - {:is_not, value} -> - from( - s in sessions_q, - left_lateral_join: meta in "entry_meta", - as: :meta, - where: - (meta.key == ^prop_name and meta.value != ^value) or - fragment("not has(?, ?)", field(s, :"entry_meta.key"), ^prop_name) - ) - - _ -> - sessions_q - end - end + |> filter_by_entry_props(query) Enum.reduce(Filters.visit_props(), sessions_q, fn prop_name, sessions_q -> filter = query.filters["visit:" <> prop_name] @@ -258,6 +193,52 @@ defmodule Plausible.Stats.Base do end) end + def filter_by_entry_props(sessions_q, query) do + case Query.get_filter_by_prefix(query, "visit:entry_props:") do + nil -> + sessions_q + + {"visit:entry_props:" <> prop_name, filter_value} -> + apply_entry_prop_filter(sessions_q, prop_name, filter_value) + end + end + + def apply_entry_prop_filter(sessions_q, prop_name, {:is, "(none)"}) do + from( + s in sessions_q, + where: fragment("not has(?, ?)", field(s, :"entry_meta.key"), ^prop_name) + ) + end + + def apply_entry_prop_filter(sessions_q, prop_name, {:is, value}) do + from( + s in sessions_q, + inner_lateral_join: meta in "entry_meta", + as: :meta, + where: meta.key == ^prop_name and meta.value == ^value + ) + end + + def apply_entry_prop_filter(sessions_q, prop_name, {:is_not, "(none)"}) do + from( + s in sessions_q, + where: fragment("has(?, ?)", field(s, :"entry_meta.key"), ^prop_name) + ) + end + + def apply_entry_prop_filter(sessions_q, prop_name, {:is_not, value}) do + from( + s in sessions_q, + left_lateral_join: meta in "entry_meta", + as: :meta, + where: + (meta.key == ^prop_name and meta.value != ^value) or + fragment("not has(?, ?)", field(s, :"entry_meta.key"), ^prop_name) + ) + end + + def apply_entry_prop_filter(sessions_q, _, _), do: sessions_q + def select_event_metrics(q, []), do: q def select_event_metrics(q, [:pageviews | rest]) do diff --git a/lib/plausible/stats/breakdown.ex b/lib/plausible/stats/breakdown.ex index 776847947..0abe33552 100644 --- a/lib/plausible/stats/breakdown.ex +++ b/lib/plausible/stats/breakdown.ex @@ -151,24 +151,6 @@ defmodule Plausible.Stats.Breakdown do breakdown_events(site, query, property, metrics, pagination) end - def breakdown(site, query, property, metrics, pagination) - when property in [ - "visit:source", - "visit:utm_medium", - "visit:utm_source", - "visit:utm_campaign", - "visit:utm_content", - "visit:utm_term" - ] do - query = - query - |> Query.treat_page_filter_as_entry_page() - |> Query.treat_prop_filter_as_entry_prop() - - trace(query, property, metrics) - breakdown_sessions(site, query, property, metrics, pagination) - end - def breakdown(site, query, property, metrics, pagination) do trace(query, property, metrics) breakdown_sessions(site, query, property, metrics, pagination) diff --git a/lib/plausible/stats/query.ex b/lib/plausible/stats/query.ex index 64b79cbaf..c083c36c5 100644 --- a/lib/plausible/stats/query.ex +++ b/lib/plausible/stats/query.ex @@ -208,30 +208,6 @@ defmodule Plausible.Stats.Query do } end - def treat_page_filter_as_entry_page(%__MODULE__{filters: %{"visit:entry_page" => _}} = q), do: q - - def treat_page_filter_as_entry_page(%__MODULE__{filters: %{"event:page" => f}} = q) do - q - |> put_filter("visit:entry_page", f) - |> put_filter("event:page", nil) - end - - def treat_page_filter_as_entry_page(q), do: q - - def treat_prop_filter_as_entry_prop(%__MODULE__{filters: filters} = q) do - prop_filter = get_filter_by_prefix(q, "event:props:") - - case {filters["event:goal"], prop_filter} do - {nil, {"event:props:" <> prop, filter_value}} -> - q - |> remove_event_filters([:props]) - |> put_filter("visit:entry_props:" <> prop, filter_value) - - _ -> - q - end - end - def remove_event_filters(query, opts) do new_filters = Enum.filter(query.filters, fn {filter_key, _} -> diff --git a/lib/plausible/stats/timeseries.ex b/lib/plausible/stats/timeseries.ex index 9b3cdf19e..76fcec917 100644 --- a/lib/plausible/stats/timeseries.ex +++ b/lib/plausible/stats/timeseries.ex @@ -42,9 +42,8 @@ defmodule Plausible.Stats.Timeseries do defp sessions_timeseries(_, _, []), do: [] defp sessions_timeseries(site, query, metrics) do - query = Query.treat_page_filter_as_entry_page(query) - from(e in query_sessions(site, query), select: %{}) + |> filter_converted_sessions(site, query) |> select_bucket(site, query) |> select_session_metrics(metrics) |> Plausible.Stats.Imported.merge_imported_timeseries(site, query, metrics) diff --git a/test/plausible_web/controllers/CSVs/30d-filter-goal/visitors.csv b/test/plausible_web/controllers/CSVs/30d-filter-goal/visitors.csv index f7e11600a..0887396c4 100644 --- a/test/plausible_web/controllers/CSVs/30d-filter-goal/visitors.csv +++ b/test/plausible_web/controllers/CSVs/30d-filter-goal/visitors.csv @@ -1,5 +1,5 @@ date,visitors,pageviews,bounce_rate,visit_duration -2021-09-20,0,0,100,0 +2021-09-20,0,0,, 2021-09-21,0,0,, 2021-09-22,0,0,, 2021-09-23,0,0,, @@ -29,4 +29,4 @@ date,visitors,pageviews,bounce_rate,visit_duration 2021-10-17,0,0,, 2021-10-18,0,0,, 2021-10-19,1,0,100,0 -2021-10-20,0,0,0,60 +2021-10-20,0,0,, diff --git a/test/plausible_web/controllers/CSVs/30d-filter-path/sources.csv b/test/plausible_web/controllers/CSVs/30d-filter-path/sources.csv index 773a58cf7..3b22e30a0 100644 --- a/test/plausible_web/controllers/CSVs/30d-filter-path/sources.csv +++ b/test/plausible_web/controllers/CSVs/30d-filter-path/sources.csv @@ -1 +1,2 @@ name,visitors,bounce_rate,visit_duration +Google,1,0,60 diff --git a/test/plausible_web/controllers/CSVs/30d-filter-path/utm_campaigns.csv b/test/plausible_web/controllers/CSVs/30d-filter-path/utm_campaigns.csv index 773a58cf7..324c36e7f 100644 --- a/test/plausible_web/controllers/CSVs/30d-filter-path/utm_campaigns.csv +++ b/test/plausible_web/controllers/CSVs/30d-filter-path/utm_campaigns.csv @@ -1 +1,2 @@ name,visitors,bounce_rate,visit_duration +Direct / None,1,0,60 diff --git a/test/plausible_web/controllers/CSVs/30d-filter-path/utm_contents.csv b/test/plausible_web/controllers/CSVs/30d-filter-path/utm_contents.csv index 773a58cf7..324c36e7f 100644 --- a/test/plausible_web/controllers/CSVs/30d-filter-path/utm_contents.csv +++ b/test/plausible_web/controllers/CSVs/30d-filter-path/utm_contents.csv @@ -1 +1,2 @@ name,visitors,bounce_rate,visit_duration +Direct / None,1,0,60 diff --git a/test/plausible_web/controllers/CSVs/30d-filter-path/utm_mediums.csv b/test/plausible_web/controllers/CSVs/30d-filter-path/utm_mediums.csv index 773a58cf7..324c36e7f 100644 --- a/test/plausible_web/controllers/CSVs/30d-filter-path/utm_mediums.csv +++ b/test/plausible_web/controllers/CSVs/30d-filter-path/utm_mediums.csv @@ -1 +1,2 @@ name,visitors,bounce_rate,visit_duration +Direct / None,1,0,60 diff --git a/test/plausible_web/controllers/CSVs/30d-filter-path/utm_sources.csv b/test/plausible_web/controllers/CSVs/30d-filter-path/utm_sources.csv index 773a58cf7..324c36e7f 100644 --- a/test/plausible_web/controllers/CSVs/30d-filter-path/utm_sources.csv +++ b/test/plausible_web/controllers/CSVs/30d-filter-path/utm_sources.csv @@ -1 +1,2 @@ name,visitors,bounce_rate,visit_duration +Direct / None,1,0,60 diff --git a/test/plausible_web/controllers/CSVs/30d-filter-path/utm_terms.csv b/test/plausible_web/controllers/CSVs/30d-filter-path/utm_terms.csv index 773a58cf7..324c36e7f 100644 --- a/test/plausible_web/controllers/CSVs/30d-filter-path/utm_terms.csv +++ b/test/plausible_web/controllers/CSVs/30d-filter-path/utm_terms.csv @@ -1 +1,2 @@ name,visitors,bounce_rate,visit_duration +Direct / None,1,0,60 diff --git a/test/plausible_web/controllers/CSVs/30d-filter-path/visitors.csv b/test/plausible_web/controllers/CSVs/30d-filter-path/visitors.csv index c835217e9..a49423c67 100644 --- a/test/plausible_web/controllers/CSVs/30d-filter-path/visitors.csv +++ b/test/plausible_web/controllers/CSVs/30d-filter-path/visitors.csv @@ -29,4 +29,4 @@ date,visitors,pageviews,visits,views_per_visit,bounce_rate,visit_duration 2021-10-17,0,0,0,0.0,, 2021-10-18,0,0,0,0.0,, 2021-10-19,0,0,0,0.0,, -2021-10-20,1,1,0,0.0,, +2021-10-20,1,1,1,2.0,0,60 diff --git a/test/plausible_web/controllers/api/external_stats_controller/aggregate_test.exs b/test/plausible_web/controllers/api/external_stats_controller/aggregate_test.exs index c0c355d48..15dbc469e 100644 --- a/test/plausible_web/controllers/api/external_stats_controller/aggregate_test.exs +++ b/test/plausible_web/controllers/api/external_stats_controller/aggregate_test.exs @@ -638,19 +638,19 @@ defmodule PlausibleWeb.Api.ExternalStatsController.AggregateTest do } end - test "when filtering by page, session metrics treat is like entry_page", %{ + test "can filter by page", %{ conn: conn, site: site } do populate_stats([ build(:pageview, - pathname: "/blogpost", user_id: @user_id, domain: site.domain, timestamp: ~N[2021-01-01 00:00:00] ), build(:pageview, user_id: @user_id, + pathname: "/blogpost", domain: site.domain, timestamp: ~N[2021-01-01 00:25:00] ), diff --git a/test/plausible_web/controllers/api/external_stats_controller/breakdown_test.exs b/test/plausible_web/controllers/api/external_stats_controller/breakdown_test.exs index 60c697c3c..9f930dfcf 100644 --- a/test/plausible_web/controllers/api/external_stats_controller/breakdown_test.exs +++ b/test/plausible_web/controllers/api/external_stats_controller/breakdown_test.exs @@ -1257,23 +1257,30 @@ defmodule PlausibleWeb.Api.ExternalStatsController.BreakdownTest do } end - test "event:page filter shows traffic sources directly to that page", %{ + test "event:page filter shows sources of sessions that have visited that page", %{ conn: conn, site: site } do populate_stats(site, [ build(:pageview, - pathname: "/ignore", - referrer_source: "Should not show up", - utm_medium: "Should not show up", - utm_source: "Should not show up", - utm_campaign: "Should not show up", + pathname: "/", + referrer_source: "Twitter", + utm_medium: "Twitter", + utm_source: "Twitter", + utm_campaign: "Twitter", user_id: @user_id ), build(:pageview, pathname: "/plausible.io", user_id: @user_id ), + build(:pageview, + pathname: "/plausible.io", + referrer_source: "Google", + utm_medium: "Google", + utm_source: "Google", + utm_campaign: "Google" + ), build(:pageview, pathname: "/plausible.io", referrer_source: "Google", @@ -1294,7 +1301,8 @@ defmodule PlausibleWeb.Api.ExternalStatsController.BreakdownTest do assert json_response(conn, 200) == %{ "results" => [ - %{property => "Google", "visitors" => 1} + %{property => "Google", "visitors" => 2}, + %{property => "Twitter", "visitors" => 1} ] } end diff --git a/test/plausible_web/controllers/api/external_stats_controller/timeseries_test.exs b/test/plausible_web/controllers/api/external_stats_controller/timeseries_test.exs index c1b313e8b..b84c3cf1c 100644 --- a/test/plausible_web/controllers/api/external_stats_controller/timeseries_test.exs +++ b/test/plausible_web/controllers/api/external_stats_controller/timeseries_test.exs @@ -681,13 +681,12 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do } end - test "filtering by page - session metrics consider it like entry_page", %{ + test "can filter by page", %{ conn: conn, site: site } do populate_stats([ build(:pageview, - pathname: "/hello", user_id: @user_id, domain: site.domain, timestamp: ~N[2021-01-01 00:00:00] @@ -716,7 +715,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "period" => "month", "date" => "2021-01-01", "filters" => "event:page==/hello", - "metrics" => "visitors,pageviews,bounce_rate,visit_duration" + "metrics" => "visitors,visits,pageviews,views_per_visit,bounce_rate,visit_duration" }) res = json_response(conn, 200)["results"] @@ -724,7 +723,9 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do assert List.first(res) == %{ "date" => "2021-01-01", "visitors" => 2, - "pageviews" => 3, + "visits" => 2, + "pageviews" => 2, + "views_per_visit" => 1.5, "bounce_rate" => 50, "visit_duration" => 150 } diff --git a/test/plausible_web/controllers/api/stats_controller/sources_test.exs b/test/plausible_web/controllers/api/stats_controller/sources_test.exs index f2a694479..5b8f82e75 100644 --- a/test/plausible_web/controllers/api/stats_controller/sources_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/sources_test.exs @@ -61,6 +61,13 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do "meta.value": ["John Doe"], timestamp: ~N[2021-01-01 00:00:00] ), + build(:pageview, + referrer_source: "Google", + referrer: "google.com", + "meta.key": ["author"], + "meta.value": ["John Doe"], + timestamp: ~N[2021-01-01 00:00:00] + ), build(:pageview, referrer_source: "Facebook", referrer: "facebook.com", @@ -77,7 +84,8 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do ) assert json_response(conn, 200) == [ - %{"name" => "Google", "visitors" => 1} + %{"name" => "Google", "visitors" => 2}, + %{"name" => "DuckDuckGo", "visitors" => 1} ] end @@ -107,9 +115,16 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do "meta.value": ["other"], timestamp: ~N[2021-01-01 00:00:00] ), + build(:pageview, + referrer_source: "Google", + referrer: "google.com", + timestamp: ~N[2021-01-01 00:00:00] + ), build(:pageview, referrer_source: "Facebook", referrer: "facebook.com", + "meta.key": ["author"], + "meta.value": ["John Doe"], timestamp: ~N[2021-01-01 00:00:00] ) ]) @@ -123,8 +138,8 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do ) assert json_response(conn, 200) == [ - %{"name" => "Facebook", "visitors" => 1}, - %{"name" => "Google", "visitors" => 1} + %{"name" => "Google", "visitors" => 2}, + %{"name" => "DuckDuckGo", "visitors" => 1} ] end @@ -152,6 +167,11 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do "meta.value": ["other"], timestamp: ~N[2021-01-01 00:00:00] ), + build(:pageview, + referrer_source: "Facebook", + referrer: "facebook.com", + timestamp: ~N[2021-01-01 00:00:00] + ), build(:pageview, referrer_source: "Facebook", referrer: "facebook.com", @@ -168,7 +188,8 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do ) assert json_response(conn, 200) == [ - %{"name" => "Facebook", "visitors" => 1} + %{"name" => "Facebook", "visitors" => 2}, + %{"name" => "DuckDuckGo", "visitors" => 1} ] end @@ -198,6 +219,13 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do "meta.value": ["other"], timestamp: ~N[2021-01-01 00:00:00] ), + build(:pageview, + referrer_source: "Google", + referrer: "google.com", + "meta.key": ["author"], + "meta.value": ["another"], + timestamp: ~N[2021-01-01 00:00:00] + ), build(:pageview, referrer_source: "Facebook", referrer: "facebook.com", @@ -214,7 +242,8 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do ) assert json_response(conn, 200) == [ - %{"name" => "Google", "visitors" => 1} + %{"name" => "Google", "visitors" => 2}, + %{"name" => "DuckDuckGo", "visitors" => 1} ] end @@ -442,7 +471,7 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do test "shows sources for a page", %{conn: conn, site: site} do populate_stats(site, [ build(:pageview, pathname: "/page1", referrer_source: "Google"), - build(:pageview, pathname: "/page2", referrer_source: "Google"), + build(:pageview, pathname: "/page1", referrer_source: "Google"), build(:pageview, user_id: 1, pathname: "/page2", referrer_source: "DuckDuckGo"), build(:pageview, user_id: 1, pathname: "/page1", referrer_source: "DuckDuckGo") ]) @@ -451,7 +480,8 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do conn = get(conn, "/api/stats/#{site.domain}/sources?filters=#{filters}") assert json_response(conn, 200) == [ - %{"name" => "Google", "visitors" => 1} + %{"name" => "Google", "visitors" => 2}, + %{"name" => "DuckDuckGo", "visitors" => 1} ] end end