mirror of
https://github.com/plausible/analytics.git
synced 2024-12-23 09:33:19 +03:00
Refactor spike detection top sources query (#3770)
* ORDER BY referrer_source for spikes job This is more consistent with the rest of the queries * Refactor top_sources -> top_sources_for_spike * Remove more dead code * Remove unused arguments * Remove unused select arguments * Add a test to top_sources_for_spike
This commit is contained in:
parent
d1fe184cb7
commit
f3509f2a17
@ -9,8 +9,6 @@ defmodule Plausible.Stats.Clickhouse do
|
|||||||
alias Plausible.Stats.Query
|
alias Plausible.Stats.Query
|
||||||
alias Plausible.Timezones
|
alias Plausible.Timezones
|
||||||
|
|
||||||
@no_ref "Direct / None"
|
|
||||||
|
|
||||||
@spec pageview_start_date_local(Plausible.Site.t()) :: Date.t() | nil
|
@spec pageview_start_date_local(Plausible.Site.t()) :: Date.t() | nil
|
||||||
def pageview_start_date_local(site) do
|
def pageview_start_date_local(site) do
|
||||||
datetime =
|
datetime =
|
||||||
@ -83,89 +81,32 @@ defmodule Plausible.Stats.Clickhouse do
|
|||||||
|
|
||||||
def usage_breakdown([], _date_range), do: {0, 0}
|
def usage_breakdown([], _date_range), do: {0, 0}
|
||||||
|
|
||||||
def top_sources(site, query, limit, page, show_noref \\ false, include_details) do
|
def top_sources_for_spike(site, query, limit, page) do
|
||||||
offset = (page - 1) * limit
|
offset = (page - 1) * limit
|
||||||
|
|
||||||
|
{first_datetime, last_datetime} = utc_boundaries(query, site)
|
||||||
|
|
||||||
referrers =
|
referrers =
|
||||||
from(s in base_session_query(site, query),
|
from(s in "sessions_v2",
|
||||||
|
select: %{
|
||||||
|
name: s.referrer_source,
|
||||||
|
count: uniq(s.user_id)
|
||||||
|
},
|
||||||
|
where: s.site_id == ^site.id,
|
||||||
|
# Note: This query intentionally uses session end timestamp to get currently active users
|
||||||
|
where: s.timestamp >= ^first_datetime and s.start < ^last_datetime,
|
||||||
|
where: s.referrer_source != "",
|
||||||
group_by: s.referrer_source,
|
group_by: s.referrer_source,
|
||||||
order_by: [desc: uniq(s.user_id), asc: fragment("min(start)")],
|
order_by: [desc: uniq(s.user_id), asc: s.referrer_source],
|
||||||
limit: ^limit,
|
limit: ^limit,
|
||||||
offset: ^offset
|
offset: ^offset
|
||||||
)
|
)
|
||||||
|> filter_converted_sessions(site, query)
|
|
||||||
|
|
||||||
referrers =
|
on_full_build do
|
||||||
if show_noref do
|
referrers = Plausible.Stats.Sampling.add_query_hint(referrers, 10_000_000)
|
||||||
referrers
|
|
||||||
else
|
|
||||||
from(s in referrers, where: s.referrer_source != "")
|
|
||||||
end
|
|
||||||
|
|
||||||
referrers = apply_page_as_entry_page(referrers, site, query)
|
|
||||||
|
|
||||||
referrers =
|
|
||||||
if include_details do
|
|
||||||
from(
|
|
||||||
s in referrers,
|
|
||||||
select: %{
|
|
||||||
name:
|
|
||||||
fragment(
|
|
||||||
"if(empty(?), ?, ?) as name",
|
|
||||||
s.referrer_source,
|
|
||||||
@no_ref,
|
|
||||||
s.referrer_source
|
|
||||||
),
|
|
||||||
url: fragment("any(?)", s.referrer),
|
|
||||||
count: uniq(s.user_id),
|
|
||||||
bounce_rate: bounce_rate(),
|
|
||||||
visit_duration: visit_duration()
|
|
||||||
}
|
|
||||||
)
|
|
||||||
else
|
|
||||||
from(
|
|
||||||
s in referrers,
|
|
||||||
select: %{
|
|
||||||
name:
|
|
||||||
fragment(
|
|
||||||
"if(empty(?), ?, ?) as name",
|
|
||||||
s.referrer_source,
|
|
||||||
@no_ref,
|
|
||||||
s.referrer_source
|
|
||||||
),
|
|
||||||
url: fragment("any(?)", s.referrer),
|
|
||||||
count: uniq(s.user_id)
|
|
||||||
}
|
|
||||||
)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
ClickhouseRepo.all(referrers)
|
ClickhouseRepo.all(referrers)
|
||||||
|> Enum.map(fn ref ->
|
|
||||||
Map.update(ref, :url, nil, fn url -> url && URI.parse("http://" <> url).host end)
|
|
||||||
end)
|
|
||||||
end
|
|
||||||
|
|
||||||
defp filter_converted_sessions(db_query, site, query) do
|
|
||||||
goal = query.filters["goal"]
|
|
||||||
page = query.filters[:page]
|
|
||||||
|
|
||||||
if is_binary(goal) || is_binary(page) do
|
|
||||||
converted_sessions =
|
|
||||||
from(e in base_query(site, query),
|
|
||||||
select: %{session_id: fragment("DISTINCT ?", e.session_id)}
|
|
||||||
)
|
|
||||||
|
|
||||||
from(s in db_query,
|
|
||||||
join: cs in subquery(converted_sessions),
|
|
||||||
on: s.session_id == cs.session_id
|
|
||||||
)
|
|
||||||
else
|
|
||||||
db_query
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
defp apply_page_as_entry_page(db_query, _site, query) do
|
|
||||||
include_path_filter_entry(db_query, query.filters[:page])
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def current_visitors(site, query) do
|
def current_visitors(site, query) do
|
||||||
@ -299,128 +240,6 @@ defmodule Plausible.Stats.Clickhouse do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
defp base_session_query(site, query) do
|
|
||||||
{first_datetime, last_datetime} = utc_boundaries(query, site)
|
|
||||||
|
|
||||||
q =
|
|
||||||
from(s in "sessions_v2",
|
|
||||||
where: s.site_id == ^site.id,
|
|
||||||
where: s.timestamp >= ^first_datetime and s.start < ^last_datetime
|
|
||||||
)
|
|
||||||
|
|
||||||
on_full_build do
|
|
||||||
q = Plausible.Stats.Sampling.add_query_hint(q, 10_000_000)
|
|
||||||
end
|
|
||||||
|
|
||||||
q =
|
|
||||||
if query.filters["source"] do
|
|
||||||
source = query.filters["source"]
|
|
||||||
source = if source == @no_ref, do: "", else: source
|
|
||||||
from(s in q, where: s.referrer_source == ^source)
|
|
||||||
else
|
|
||||||
q
|
|
||||||
end
|
|
||||||
|
|
||||||
q =
|
|
||||||
if query.filters["screen"] do
|
|
||||||
size = query.filters["screen"]
|
|
||||||
from(s in q, where: s.screen_size == ^size)
|
|
||||||
else
|
|
||||||
q
|
|
||||||
end
|
|
||||||
|
|
||||||
q =
|
|
||||||
if query.filters["browser"] do
|
|
||||||
browser = query.filters["browser"]
|
|
||||||
from(s in q, where: s.browser == ^browser)
|
|
||||||
else
|
|
||||||
q
|
|
||||||
end
|
|
||||||
|
|
||||||
q =
|
|
||||||
if query.filters["browser_version"] do
|
|
||||||
version = query.filters["browser_version"]
|
|
||||||
from(s in q, where: s.browser_version == ^version)
|
|
||||||
else
|
|
||||||
q
|
|
||||||
end
|
|
||||||
|
|
||||||
q =
|
|
||||||
if query.filters["os"] do
|
|
||||||
os = query.filters["os"]
|
|
||||||
from(s in q, where: s.operating_system == ^os)
|
|
||||||
else
|
|
||||||
q
|
|
||||||
end
|
|
||||||
|
|
||||||
q =
|
|
||||||
if query.filters["os_version"] do
|
|
||||||
version = query.filters["os_version"]
|
|
||||||
from(s in q, where: s.operating_system_version == ^version)
|
|
||||||
else
|
|
||||||
q
|
|
||||||
end
|
|
||||||
|
|
||||||
q =
|
|
||||||
if query.filters["country"] do
|
|
||||||
country = query.filters["country"]
|
|
||||||
from(s in q, where: s.country_code == ^country)
|
|
||||||
else
|
|
||||||
q
|
|
||||||
end
|
|
||||||
|
|
||||||
q =
|
|
||||||
if query.filters["utm_medium"] do
|
|
||||||
utm_medium = query.filters["utm_medium"]
|
|
||||||
from(s in q, where: s.utm_medium == ^utm_medium)
|
|
||||||
else
|
|
||||||
q
|
|
||||||
end
|
|
||||||
|
|
||||||
q =
|
|
||||||
if query.filters["utm_source"] do
|
|
||||||
utm_source = query.filters["utm_source"]
|
|
||||||
from(s in q, where: s.utm_source == ^utm_source)
|
|
||||||
else
|
|
||||||
q
|
|
||||||
end
|
|
||||||
|
|
||||||
q =
|
|
||||||
if query.filters["utm_campaign"] do
|
|
||||||
utm_campaign = query.filters["utm_campaign"]
|
|
||||||
from(s in q, where: s.utm_campaign == ^utm_campaign)
|
|
||||||
else
|
|
||||||
q
|
|
||||||
end
|
|
||||||
|
|
||||||
q =
|
|
||||||
if query.filters["utm_content"] do
|
|
||||||
utm_content = query.filters["utm_content"]
|
|
||||||
from(s in q, where: s.utm_content == ^utm_content)
|
|
||||||
else
|
|
||||||
q
|
|
||||||
end
|
|
||||||
|
|
||||||
q =
|
|
||||||
if query.filters["utm_term"] do
|
|
||||||
utm_term = query.filters["utm_term"]
|
|
||||||
from(s in q, where: s.utm_term == ^utm_term)
|
|
||||||
else
|
|
||||||
q
|
|
||||||
end
|
|
||||||
|
|
||||||
q = include_path_filter_entry(q, query.filters["entry_page"])
|
|
||||||
|
|
||||||
q = include_path_filter_exit(q, query.filters["exit_page"])
|
|
||||||
|
|
||||||
if query.filters["referrer"] do
|
|
||||||
ref = query.filters["referrer"]
|
|
||||||
from(s in q, where: s.referrer == ^ref)
|
|
||||||
else
|
|
||||||
q
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
defp base_query_bare(site, query) do
|
defp base_query_bare(site, query) do
|
||||||
{first_datetime, last_datetime} = utc_boundaries(query, site)
|
{first_datetime, last_datetime} = utc_boundaries(query, site)
|
||||||
|
|
||||||
@ -672,52 +491,6 @@ defmodule Plausible.Stats.Clickhouse do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
defp include_path_filter_entry(db_query, path) do
|
|
||||||
if path do
|
|
||||||
{negated, path} = check_negated_filter(path)
|
|
||||||
{contains_regex, path_regex} = convert_path_regex(path)
|
|
||||||
|
|
||||||
if contains_regex do
|
|
||||||
if negated do
|
|
||||||
from(e in db_query, where: fragment("not(match(?, ?))", e.entry_page, ^path_regex))
|
|
||||||
else
|
|
||||||
from(e in db_query, where: fragment("match(?, ?)", e.entry_page, ^path_regex))
|
|
||||||
end
|
|
||||||
else
|
|
||||||
if negated do
|
|
||||||
from(e in db_query, where: e.entry_page != ^path)
|
|
||||||
else
|
|
||||||
from(e in db_query, where: e.entry_page == ^path)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
db_query
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
defp include_path_filter_exit(db_query, path) do
|
|
||||||
if path do
|
|
||||||
{negated, path} = check_negated_filter(path)
|
|
||||||
{contains_regex, path_regex} = convert_path_regex(path)
|
|
||||||
|
|
||||||
if contains_regex do
|
|
||||||
if negated do
|
|
||||||
from(e in db_query, where: fragment("not(match(?, ?))", e.exit_page, ^path_regex))
|
|
||||||
else
|
|
||||||
from(e in db_query, where: fragment("match(?, ?)", e.exit_page, ^path_regex))
|
|
||||||
end
|
|
||||||
else
|
|
||||||
if negated do
|
|
||||||
from(e in db_query, where: e.exit_page != ^path)
|
|
||||||
else
|
|
||||||
from(e in db_query, where: e.exit_page == ^path)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
db_query
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
defp beginning_of_time(candidate, site_creation_date) do
|
defp beginning_of_time(candidate, site_creation_date) do
|
||||||
if Timex.after?(site_creation_date, candidate) do
|
if Timex.after?(site_creation_date, candidate) do
|
||||||
site_creation_date
|
site_creation_date
|
||||||
|
@ -23,7 +23,7 @@ defmodule Plausible.Workers.SpikeNotifier do
|
|||||||
current_visitors = clickhouse.current_visitors(notification.site, query)
|
current_visitors = clickhouse.current_visitors(notification.site, query)
|
||||||
|
|
||||||
if current_visitors >= notification.threshold do
|
if current_visitors >= notification.threshold do
|
||||||
sources = clickhouse.top_sources(notification.site, query, 3, 1, true)
|
sources = clickhouse.top_sources_for_spike(notification.site, query, 3, 1)
|
||||||
notify(notification, current_visitors, sources)
|
notify(notification, current_visitors, sources)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -219,4 +219,39 @@ defmodule Plausible.Stats.ClickhouseTest do
|
|||||||
} = Clickhouse.last_24h_visitors_hourly_intervals([site], fixed_now)[site.domain]
|
} = Clickhouse.last_24h_visitors_hourly_intervals([site], fixed_now)[site.domain]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
describe "top_sources_for_spike/4" do
|
||||||
|
test "gets named sources" do
|
||||||
|
site = insert(:site)
|
||||||
|
query = Plausible.Stats.Query.from(site, %{"period" => "all"})
|
||||||
|
|
||||||
|
populate_stats(site, [
|
||||||
|
build(:pageview,
|
||||||
|
pathname: "/",
|
||||||
|
referrer_source: "Twitter"
|
||||||
|
),
|
||||||
|
build(:pageview,
|
||||||
|
pathname: "/plausible.io"
|
||||||
|
),
|
||||||
|
build(:pageview,
|
||||||
|
pathname: "/plausible.io",
|
||||||
|
referrer_source: "Google"
|
||||||
|
),
|
||||||
|
build(:pageview,
|
||||||
|
pathname: "/plausible.io",
|
||||||
|
referrer_source: "Google"
|
||||||
|
),
|
||||||
|
build(:pageview,
|
||||||
|
pathname: "/plausible.io",
|
||||||
|
referrer_source: "Bing"
|
||||||
|
)
|
||||||
|
])
|
||||||
|
|
||||||
|
assert [
|
||||||
|
%{count: 2, name: "Google"},
|
||||||
|
%{count: 1, name: "Bing"},
|
||||||
|
%{count: 1, name: "Twitter"}
|
||||||
|
] = Clickhouse.top_sources_for_spike(site, query, 5, 1)
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
@ -15,7 +15,7 @@ defmodule Plausible.Workers.SpikeNotifierTest do
|
|||||||
|
|
||||||
clickhouse_stub =
|
clickhouse_stub =
|
||||||
stub(Plausible.Stats.Clickhouse, :current_visitors, fn _site, _query -> 5 end)
|
stub(Plausible.Stats.Clickhouse, :current_visitors, fn _site, _query -> 5 end)
|
||||||
|> stub(:top_sources, fn _site, _query, _limit, _page, _show_noref -> [] end)
|
|> stub(:top_sources_for_spike, fn _site, _query, _limit, _page -> [] end)
|
||||||
|
|
||||||
SpikeNotifier.perform(nil, clickhouse_stub)
|
SpikeNotifier.perform(nil, clickhouse_stub)
|
||||||
|
|
||||||
@ -33,7 +33,7 @@ defmodule Plausible.Workers.SpikeNotifierTest do
|
|||||||
|
|
||||||
clickhouse_stub =
|
clickhouse_stub =
|
||||||
stub(Plausible.Stats.Clickhouse, :current_visitors, fn _site, _query -> 10 end)
|
stub(Plausible.Stats.Clickhouse, :current_visitors, fn _site, _query -> 10 end)
|
||||||
|> stub(:top_sources, fn _site, _query, _limit, _page, _show_noref -> [] end)
|
|> stub(:top_sources_for_spike, fn _site, _query, _limit, _page -> [] end)
|
||||||
|
|
||||||
SpikeNotifier.perform(nil, clickhouse_stub)
|
SpikeNotifier.perform(nil, clickhouse_stub)
|
||||||
|
|
||||||
@ -59,7 +59,7 @@ defmodule Plausible.Workers.SpikeNotifierTest do
|
|||||||
|
|
||||||
clickhouse_stub =
|
clickhouse_stub =
|
||||||
stub(Plausible.Stats.Clickhouse, :current_visitors, fn _site, _query -> 10 end)
|
stub(Plausible.Stats.Clickhouse, :current_visitors, fn _site, _query -> 10 end)
|
||||||
|> stub(:top_sources, fn _site, _query, _limit, _page, _show_noref -> [] end)
|
|> stub(:top_sources_for_spike, fn _site, _query, _limit, _page -> [] end)
|
||||||
|
|
||||||
SpikeNotifier.perform(nil, clickhouse_stub)
|
SpikeNotifier.perform(nil, clickhouse_stub)
|
||||||
|
|
||||||
@ -72,7 +72,7 @@ defmodule Plausible.Workers.SpikeNotifierTest do
|
|||||||
|
|
||||||
clickhouse_stub =
|
clickhouse_stub =
|
||||||
stub(Plausible.Stats.Clickhouse, :current_visitors, fn _site, _query -> 10 end)
|
stub(Plausible.Stats.Clickhouse, :current_visitors, fn _site, _query -> 10 end)
|
||||||
|> stub(:top_sources, fn _site, _query, _limit, _page, _show_noref -> [] end)
|
|> stub(:top_sources_for_spike, fn _site, _query, _limit, _page -> [] end)
|
||||||
|
|
||||||
SpikeNotifier.perform(nil, clickhouse_stub)
|
SpikeNotifier.perform(nil, clickhouse_stub)
|
||||||
|
|
||||||
@ -93,7 +93,7 @@ defmodule Plausible.Workers.SpikeNotifierTest do
|
|||||||
|
|
||||||
clickhouse_stub =
|
clickhouse_stub =
|
||||||
stub(Plausible.Stats.Clickhouse, :current_visitors, fn _site, _query -> 10 end)
|
stub(Plausible.Stats.Clickhouse, :current_visitors, fn _site, _query -> 10 end)
|
||||||
|> stub(:top_sources, fn _site, _query, _limit, _page, _show_noref -> [] end)
|
|> stub(:top_sources_for_spike, fn _site, _query, _limit, _page -> [] end)
|
||||||
|
|
||||||
SpikeNotifier.perform(nil, clickhouse_stub)
|
SpikeNotifier.perform(nil, clickhouse_stub)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user