Filter by hostnames (#3963)

* CH Migration: exit/entry hostnames in sessions_v2

* Leave only exit_page_hostname, we already record hostnames

* Use ClickHouse DDL in favour of ecto so that cluster is included

* Compress with ZSTD(3)

* Expose Hostname filter in the dashboard dropdown

* Add `exit_page_hostname` to ClickHouse `sessions_v2` schema

* Start tracking hostname changes in sessions

* Implement hostname filter suggestions

* Enable filtering by `event:hostname`

* Add tests for filtering by hostnames

* Ensure filter suggestions work for exit pages too

* Allow overriding hostnames with `send_pageview` mix task

* Remove `:window_time_on_page` flag

It seems that we can remove it after all?

* Initialize `experimental_hostname_filter` query parameter

* Rewrite cache store behaviour with regards to session hostnames

* Work around inconsistent session merging

So that `populate_stats` can get closer to actual ingestion

* Improve top stats test

* Make it possible to filter sessions by entry/exit hostnames

* Update pages tests

* Expose `experimental_hostname_filtering` temporarily in the UI

* Untested yet: also apply experimental filtering to sources

* Introduce `hostname_filter` feature flag

* Format

* Test top sources with hostname filter + experimental flag
This commit is contained in:
hq1 2024-04-04 10:48:30 +02:00 committed by GitHub
parent e6d83e946f
commit 6af80dd246
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 921 additions and 147 deletions

View File

@ -44,6 +44,7 @@ export function serializeQuery(query, extraQuery = []) {
if (query.to) { queryObj.to = formatISO(query.to) }
if (query.filters) { queryObj.filters = serializeFilters(query.filters) }
if (query.experimental_session_count) { queryObj.experimental_session_count = query.experimental_session_count }
if (query.experimental_hostname_filter) { queryObj.experimental_hostname_filter = query.experimental_hostname_filter }
if (query.with_imported) { queryObj.with_imported = query.with_imported }
if (SHARED_LINK_AUTH) { queryObj.auth = SHARED_LINK_AUTH }

View File

@ -40,6 +40,7 @@ export function parseQuery(querystring, site) {
match_day_of_week: matchDayOfWeek == 'true',
with_imported: q.get('with_imported') ? q.get('with_imported') === 'true' : true,
experimental_session_count: q.get('experimental_session_count'),
experimental_hostname_filter: q.get('experimental_hostname_filter'),
filters: {
'goal': q.get('goal'),
'props': JSON.parse(q.get('props')),
@ -59,6 +60,7 @@ export function parseQuery(querystring, site) {
'region': q.get('region'),
'city': q.get('city'),
'page': q.get('page'),
'hostname': q.get('hostname'),
'entry_page': q.get('entry_page'),
'exit_page': q.get('exit_page')
}

View File

@ -1,3 +1,5 @@
const flags = JSON.parse(document.getElementById('stats-react-container').dataset.flags)
export const FILTER_GROUPS = {
'page': ['page', 'entry_page', 'exit_page'],
'source': ['source', 'referrer'],
@ -7,10 +9,12 @@ export const FILTER_GROUPS = {
'os': ['os', 'os_version'],
'utm': ['utm_medium', 'utm_source', 'utm_campaign', 'utm_term', 'utm_content'],
'goal': ['goal'],
'props': ['prop_key', 'prop_value']
'props': ['prop_key', 'prop_value'],
...(flags.hostname_filter ? { 'hostname': ['hostname', 'experimental_hostname_filter'] } : {})
}
export const NO_CONTAINS_OPERATOR = new Set(['goal', 'screen'].concat(FILTER_GROUPS['location']))
export const NO_CONTAINS_OPERATOR = new Set(['experimental_hostname_filter', 'goal', 'screen'].concat(FILTER_GROUPS['location']))
export const FILTER_OPERATIONS = {
isNot: 'is not',
@ -25,7 +29,7 @@ export const OPERATION_PREFIX = {
};
export function supportsIsNot(filterName) {
return !['goal', 'prop_key'].includes(filterName)
return !['goal', 'prop_key', 'experimental_hostname_filter'].includes(filterName)
}
export function isFreeChoiceFilter(filterName) {
@ -37,7 +41,7 @@ export function isFreeChoiceFilter(filterName) {
let NON_ESCAPED_PIPE_REGEX;
try {
NON_ESCAPED_PIPE_REGEX = new RegExp("(?<!\\\\)\\|", "g")
} catch(_e) {
} catch (_e) {
NON_ESCAPED_PIPE_REGEX = '|'
}
@ -64,19 +68,19 @@ export function parsePrefix(rawValue) {
.filter((clause) => !!clause)
.map((val) => val.replaceAll(ESCAPED_PIPE, '|'))
return {type, values}
return { type, values }
}
export function parseQueryPropsFilter(query) {
return Object.entries(query.filters['props']).map(([key, propVal]) => {
const {type, values} = parsePrefix(propVal)
const clauses = values.map(val => { return {value: val, label: val}})
const { type, values } = parsePrefix(propVal)
const clauses = values.map(val => { return { value: val, label: val } })
return { propKey: { label: key, value: key }, type, clauses }
})
}
export function parseQueryFilter(query, filter) {
const {type, values} = parsePrefix(query.filters[filter] || '')
const { type, values } = parsePrefix(query.filters[filter] || '')
let labels = values
@ -95,9 +99,9 @@ export function parseQueryFilter(query, filter) {
labels = rawLabel.split('|').filter(label => !!label)
}
const clauses = values.map((value, index) => { return {value, label: labels[index]}})
const clauses = values.map((value, index) => { return { value, label: labels[index] } })
return {type, clauses}
return { type, clauses }
}
export function isFilteringOnFixedValue(query, filter) {
@ -152,6 +156,8 @@ export const formattedFilters = {
'region': 'Region',
'city': 'City',
'page': 'Page',
'hostname': 'Hostname',
'experimental_hostname_filter': 'Treat hostname as entry/exit hostname',
'entry_page': 'Entry Page',
'exit_page': 'Exit Page'
'exit_page': 'Exit Page',
}

View File

@ -24,6 +24,7 @@ defmodule Mix.Tasks.SendPageview do
page: :string,
referrer: :string,
host: :string,
hostname: :string,
event: :string,
props: :string,
revenue_currency: :string,
@ -86,6 +87,7 @@ defmodule Mix.Tasks.SendPageview do
referrer = Keyword.get(opts, :referrer, @default_referrer)
event = Keyword.get(opts, :event, @default_event)
props = Keyword.get(opts, :props, @default_props)
hostname = Keyword.get(opts, :hostname, domain)
revenue =
if Keyword.get(opts, :revenue_currency) do
@ -97,7 +99,7 @@ defmodule Mix.Tasks.SendPageview do
%{
name: event,
url: "http://#{domain}#{page}",
url: "http://#{hostname}#{page}",
domain: domain,
referrer: referrer,
props: props,

View File

@ -65,4 +65,28 @@ defmodule Plausible.ClickhouseEventV2 do
)
|> validate_required([:name, :site_id, :hostname, :pathname, :user_id, :timestamp])
end
@session_properties [
:session_id,
:referrer,
:referrer_source,
:utm_medium,
:utm_source,
:utm_campaign,
:utm_content,
:utm_term,
:country_code,
:subdivision1_code,
:subdivision2_code,
:city_geoname_id,
:screen_size,
:operating_system,
:operating_system_version,
:browser,
:browser_version
]
def merge_session(%__MODULE__{} = event, session) do
Map.merge(event, Map.take(session, @session_properties))
end
end

View File

@ -43,6 +43,7 @@ defmodule Plausible.ClickhouseSessionV2 do
field :is_bounce, BoolUInt8
field :entry_page, :string
field :exit_page, :string
field :exit_page_hostname, :string
field :pageviews, Ch, type: "Int32"
field :events, Ch, type: "Int32"
field :sign, Ch, type: "Int8"

View File

@ -44,26 +44,6 @@ defmodule Plausible.Ingestion.Event do
changeset: %Ecto.Changeset{}
}
@session_properties [
:session_id,
:referrer,
:referrer_source,
:utm_medium,
:utm_source,
:utm_campaign,
:utm_content,
:utm_term,
:country_code,
:subdivision1_code,
:subdivision2_code,
:city_geoname_id,
:screen_size,
:operating_system,
:operating_system_version,
:browser,
:browser_version
]
@spec build_and_buffer(Request.t()) :: {:ok, %{buffered: [t()], dropped: [t()]}}
def build_and_buffer(%Request{domains: domains} = request) do
processed_events =
@ -359,8 +339,7 @@ defmodule Plausible.Ingestion.Event do
%{
event
| clickhouse_event:
Map.merge(event.clickhouse_event, Map.take(session, @session_properties))
| clickhouse_event: ClickhouseEventV2.merge_session(event.clickhouse_event, session)
}
end

View File

@ -47,7 +47,14 @@ defmodule Plausible.Session.CacheStore do
do: event.pathname,
else: session.entry_page
),
hostname:
if(event.name == "pageview" and session.hostname == "",
do: event.hostname,
else: session.hostname
),
exit_page: if(event.name == "pageview", do: event.pathname, else: session.exit_page),
exit_page_hostname:
if(event.name == "pageview", do: event.hostname, else: session.exit_page_hostname),
is_bounce: false,
duration: Timex.diff(event.timestamp, session.start, :second) |> abs,
pageviews:
@ -60,11 +67,12 @@ defmodule Plausible.Session.CacheStore do
%Plausible.ClickhouseSessionV2{
sign: 1,
session_id: Plausible.ClickhouseSessionV2.random_uint64(),
hostname: event.hostname,
hostname: if(event.name == "pageview", do: event.hostname, else: ""),
site_id: event.site_id,
user_id: event.user_id,
entry_page: if(event.name == "pageview", do: event.pathname, else: ""),
exit_page: if(event.name == "pageview", do: event.pathname, else: ""),
exit_page_hostname: if(event.name == "pageview", do: event.hostname, else: ""),
is_bounce: true,
duration: 0,
pageviews: if(event.name == "pageview", do: 1, else: 0),

View File

@ -48,7 +48,10 @@ defmodule Plausible.Stats.Base do
q = Plausible.Stats.Sampling.add_query_hint(q, query)
end
q = from(e in q, where: ^dynamic_filter_condition(query, "event:page", :pathname))
q =
q
|> where([e], ^dynamic_filter_condition(query, "event:page", :pathname))
|> where([e], ^dynamic_filter_condition(query, "event:hostname", :hostname))
q =
case query.filters["event:name"] do
@ -130,7 +133,8 @@ defmodule Plausible.Stats.Base do
"os_version" => "operating_system_version",
"country" => "country_code",
"region" => "subdivision1_code",
"city" => "city_geoname_id"
"city" => "city_geoname_id",
"entry_page_hostname" => "hostname"
}
def query_sessions(site, query) do

View File

@ -273,94 +273,6 @@ defmodule Plausible.Stats.Breakdown do
end
defp breakdown_time_on_page(site, query, pages) do
if FunWithFlags.enabled?(:window_time_on_page) do
window_breakdown_time_on_page(site, query, pages)
else
neighbor_breakdown_time_on_page(site, query, pages)
end
end
defp neighbor_breakdown_time_on_page(site, query, pages) do
q =
from(
e in base_event_query(site, Query.remove_event_filters(query, [:page, :props])),
select: {
fragment("? as p", e.pathname),
fragment("? as t", e.timestamp),
fragment("? as s", e.session_id)
},
order_by: [e.session_id, e.timestamp]
)
{base_query_raw, base_query_raw_params} = ClickhouseRepo.to_sql(:all, q)
select =
if query.include_imported do
"sum(td), count(case when p2 != p then 1 end)"
else
"round(sum(td)/count(case when p2 != p then 1 end))"
end
pages_idx = length(base_query_raw_params)
params = base_query_raw_params ++ [pages]
time_query = "
SELECT
p,
#{select}
FROM
(SELECT
p,
p2,
sum(t2-t) as td
FROM
(SELECT
*,
neighbor(t, 1) as t2,
neighbor(p, 1) as p2,
neighbor(s, 1) as s2
FROM (#{base_query_raw}))
WHERE s=s2 AND p IN {$#{pages_idx}:Array(String)}
GROUP BY p,p2,s)
GROUP BY p"
{:ok, res} = ClickhouseRepo.query(time_query, params)
if query.include_imported do
# Imported page views have pre-calculated values
res =
res.rows
|> Enum.map(fn [page, time, visits] -> {page, {time, visits}} end)
|> Enum.into(%{})
from(
i in "imported_pages",
group_by: i.page,
where: i.site_id == ^site.id,
where: i.date >= ^query.date_range.first and i.date <= ^query.date_range.last,
where: i.page in ^pages,
select: %{
page: i.page,
pageviews: fragment("sum(?) - sum(?)", i.pageviews, i.exits),
time_on_page: sum(i.time_on_page)
}
)
|> ClickhouseRepo.all()
|> Enum.reduce(res, fn %{page: page, pageviews: pageviews, time_on_page: time}, res ->
{restime, resviews} = Map.get(res, page, {0, 0})
Map.put(res, page, {restime + time, resviews + pageviews})
end)
|> Enum.map(fn
{page, {_, 0}} -> {page, nil}
{page, {time, pageviews}} -> {page, time / pageviews}
end)
|> Enum.into(%{})
else
res.rows |> Enum.map(fn [page, time] -> {page, time} end) |> Enum.into(%{})
end
end
defp window_breakdown_time_on_page(site, query, pages) do
import Ecto.Query
windowed_pages_q =

View File

@ -118,6 +118,10 @@ defmodule Plausible.Stats.FilterSuggestions do
end)
end
def filter_suggestions(_site, _query, "experimental_hostname_filter", _filter_search) do
wrap_suggestions(["true", "false"])
end
def filter_suggestions(site, _query, "goal", filter_search) do
site
|> Plausible.Goals.for_site()
@ -208,11 +212,12 @@ defmodule Plausible.Stats.FilterSuggestions do
"operating_system" -> :operating_system
"operating_system_version" -> :operating_system_version
"screen_size" -> :screen_size
"hostname" -> :hostname
_ -> :unknown
end
q =
if(filter_name == :pathname,
if(filter_name == :pathname or filter_name == :hostname,
do: base_event_query(site, query),
else: query_sessions(site, query)
)
@ -230,6 +235,12 @@ defmodule Plausible.Stats.FilterSuggestions do
where: fragment("? ilike ?", e.pathname, ^filter_query)
)
:hostname ->
from(e in q,
select: e.hostname,
where: fragment("? ilike ?", e.hostname, ^filter_query)
)
:entry_page ->
from(e in q,
select: e.entry_page,

View File

@ -31,7 +31,7 @@ defmodule Plausible.Stats.Filters.DashboardFilterParser do
{is_negated, val} = parse_negated_prefix(val)
{is_contains, val} = parse_contains_prefix(val)
is_list = list_expression?(val)
is_wildcard = String.contains?(key, ["page", "goal"]) && wildcard_expression?(val)
is_wildcard = String.contains?(key, ["page", "goal", "hostname"]) && wildcard_expression?(val)
val = if is_list, do: parse_member_list(val), else: remove_escape_chars(val)
val = if key == "goal", do: wrap_goal_value(val), else: val

View File

@ -23,11 +23,13 @@ defmodule Plausible.Stats.Filters do
:region,
:city,
:entry_page,
:exit_page
:exit_page,
:entry_page_hostname,
:exit_page_hostname
]
def visit_props(), do: @visit_props |> Enum.map(&to_string/1)
@event_props [:name, :page, :goal]
@event_props [:name, :page, :goal, :hostname]
def event_props(), do: @event_props |> Enum.map(&to_string/1)

View File

@ -9,7 +9,8 @@ defmodule Plausible.Stats.Query do
imported_data_requested: false,
include_imported: false,
now: nil,
experimental_session_count?: false
experimental_session_count?: false,
experimental_hostname_filter?: false
require OpenTelemetry.Tracer, as: Tracer
alias Plausible.Stats.{Filters, Interval}
@ -22,7 +23,7 @@ defmodule Plausible.Stats.Query do
query =
__MODULE__
|> struct!(now: now)
|> put_experimental_session_count(params)
|> put_experimental_flags(params)
|> put_period(site, params)
|> put_interval(params)
|> put_parsed_filters(params)
@ -36,12 +37,14 @@ defmodule Plausible.Stats.Query do
query
end
defp put_experimental_session_count(query, params) do
if Map.get(params, "experimental_session_count") == "true" do
struct!(query, experimental_session_count?: true)
else
query
end
defp put_experimental_flags(query, params) do
%{
"experimental_session_count" => :experimental_session_count?,
"experimental_hostname_filter" => :experimental_hostname_filter?
}
|> Enum.reduce(query, fn {param, flag}, query ->
if Map.get(params, param) == "true", do: Map.put(query, flag, true), else: query
end)
end
defp put_period(query, site, %{"period" => "realtime"}) do

View File

@ -423,6 +423,13 @@ defmodule PlausibleWeb.Api.StatsController do
query = Query.from(site, params)
pagination = parse_pagination(params)
query =
if query.experimental_hostname_filter? and query.filters["event:hostname"] do
Query.put_filter(query, "visit:entry_page_hostname", query.filters["event:hostname"])
else
query
end
extra_metrics =
if params["detailed"], do: [:bounce_rate, :visit_duration], else: []
@ -728,6 +735,13 @@ defmodule PlausibleWeb.Api.StatsController do
pagination = parse_pagination(params)
metrics = breakdown_metrics(query, [:visits, :visit_duration])
query =
if query.experimental_hostname_filter? and query.filters["event:hostname"] do
Query.put_filter(query, "visit:entry_page_hostname", query.filters["event:hostname"])
else
query
end
entry_pages =
Stats.breakdown(site, query, "visit:entry_page", metrics, pagination)
|> transform_keys(%{entry_page: :name})
@ -758,6 +772,13 @@ defmodule PlausibleWeb.Api.StatsController do
{limit, page} = parse_pagination(params)
metrics = breakdown_metrics(query, [:visits])
query =
if query.experimental_hostname_filter? and query.filters["event:hostname"] do
Query.put_filter(query, "visit:exit_page_hostname", query.filters["event:hostname"])
else
query
end
exit_pages =
Stats.breakdown(site, query, "visit:exit_page", metrics, {limit, page})
|> add_exit_rate(site, query, limit)

View File

@ -70,7 +70,7 @@ defmodule PlausibleWeb.StatsController do
native_stats_start_date: NaiveDateTime.to_date(site.native_stats_start_at),
title: title(conn, site),
demo: demo,
flags: get_flags(conn.assigns[:current_user]),
flags: get_flags(conn.assigns[:current_user], site),
is_dbip: is_dbip(),
dogfood_page_path: dogfood_page_path,
load_dashboard_js: true
@ -330,7 +330,7 @@ defmodule PlausibleWeb.StatsController do
embedded: conn.params["embed"] == "true",
background: conn.params["background"],
theme: conn.params["theme"],
flags: get_flags(conn.assigns[:current_user]),
flags: get_flags(conn.assigns[:current_user], shared_link.site),
is_dbip: is_dbip(),
load_dashboard_js: true
)
@ -348,8 +348,12 @@ defmodule PlausibleWeb.StatsController do
defp shared_link_cookie_name(slug), do: "shared-link-" <> slug
defp get_flags(_user) do
%{}
defp get_flags(user, site) do
%{
hostname_filter:
FunWithFlags.enabled?(:hostname_filter, for: user) ||
FunWithFlags.enabled?(:hostname_filter, for: site)
}
end
defp is_dbip() do

View File

@ -12,6 +12,7 @@
FunWithFlags.enable(:imports_exports)
FunWithFlags.enable(:shield_pages)
FunWithFlags.enable(:hostname_filter)
user = Plausible.Factory.insert(:user, email: "user@plausible.test", password: "plausible")
@ -161,7 +162,7 @@ native_stats_range
[
site_id: site.id,
hostname: site.domain,
hostname: Enum.random(["en.dummy.site", "es.dummy.site", "dummy.site"]),
timestamp: put_random_time.(date, index),
referrer_source: Enum.random(["", "Facebook", "Twitter", "DuckDuckGo", "Google"]),
browser: Enum.random(["Edge", "Chrome", "Safari", "Firefox", "Vivaldi"]),
@ -196,7 +197,7 @@ native_stats_range
[
name: goal4.event_name,
site_id: site.id,
hostname: site.domain,
hostname: Enum.random(["en.dummy.site", "es.dummy.site", "dummy.site"]),
timestamp: put_random_time.(date, index),
referrer_source: Enum.random(["", "Facebook", "Twitter", "DuckDuckGo", "Google"]),
browser: Enum.random(["Edge", "Chrome", "Safari", "Firefox", "Vivaldi"]),

View File

@ -89,6 +89,131 @@ defmodule Plausible.Session.CacheStoreTest do
assert session.events == 2
end
describe "hostname-related attributes" do
test "initial for non-pageview" do
site_id = new_site_id()
event =
build(:event,
name: "custom_event",
site_id: site_id,
pathname: "/path/1",
hostname: "example.com"
)
flush([event])
session = get_session(site_id)
assert session.hostname == ""
assert session.exit_page_hostname == ""
end
test "initial for pageview" do
site_id = new_site_id()
event =
build(:event,
name: "pageview",
site_id: site_id,
pathname: "/path/1",
hostname: "example.com"
)
flush([event])
session = get_session(site_id)
assert session.hostname == "example.com"
assert session.exit_page_hostname == "example.com"
end
test "subsequent pageview after custom_event" do
site_id = new_site_id()
events = [
build(:event,
name: "custom_event",
site_id: site_id,
pathname: "/path/1",
hostname: "whatever.example.com",
timestamp: Timex.shift(Timex.now(), seconds: -5),
user_id: 1
),
build(:event,
name: "pageview",
site_id: site_id,
pathname: "/path/2",
hostname: "example.com",
user_id: 1
)
]
flush(events)
session = get_session(site_id)
assert session.hostname == "example.com"
assert session.exit_page_hostname == "example.com"
end
test "hostname change" do
site_id = new_site_id()
events = [
build(:event,
name: "pageview",
site_id: site_id,
pathname: "/landing",
hostname: "example.com",
timestamp: Timex.shift(Timex.now(), seconds: -5),
user_id: 1
),
build(:event,
name: "pageview",
site_id: site_id,
pathname: "/post/1",
hostname: "blog.example.com",
user_id: 1
)
]
flush(events)
session = get_session(site_id)
assert session.hostname == "example.com"
assert session.exit_page_hostname == "blog.example.com"
end
test "hostname change with custom event in the middle" do
site_id = new_site_id()
events = [
build(:event,
name: "pageview",
site_id: site_id,
pathname: "/landing",
hostname: "example.com",
timestamp: Timex.shift(Timex.now(), seconds: -5),
user_id: 1
),
build(:event,
name: "custom_event",
site_id: site_id,
pathname: "/path/1",
hostname: "analytics.example.com",
timestamp: Timex.shift(Timex.now(), seconds: -3),
user_id: 1
),
build(:event,
name: "pageview",
site_id: site_id,
pathname: "/post/1",
hostname: "blog.example.com",
user_id: 1
)
]
flush(events)
session = get_session(site_id)
assert session.hostname == "example.com"
assert session.exit_page_hostname == "blog.example.com"
end
end
test "initial pageview-specific attributes" do
site_id = new_site_id()
@ -231,7 +356,12 @@ defmodule Plausible.Session.CacheStoreTest do
end
defp get_session(site_id) do
session_q = from s in Plausible.ClickhouseSessionV2, where: s.site_id == ^site_id
session_q =
from s in Plausible.ClickhouseSessionV2,
where: s.site_id == ^site_id,
order_by: [desc: :timestamp],
limit: 1
Plausible.ClickhouseRepo.one!(session_q)
end
end

View File

@ -67,6 +67,9 @@ defmodule Plausible.Stats.DashboardFilterParserTest do
%{"props" => %{"cta" => "Top"}}
|> assert_parsed(%{"event:props:cta" => {:is, "Top"}})
%{"hostname" => "dummy.site"}
|> assert_parsed(%{"event:hostname" => {:is, "dummy.site"}})
end
end

View File

@ -918,7 +918,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do
timestamp: ~N[2021-01-01 05:00:00]
),
build(:pageview,
pathname: "/goobye",
pathname: "/goodbye",
timestamp: ~N[2021-01-01 00:00:00]
)
])
@ -944,6 +944,50 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do
}
end
test "can filter by hostname", %{
conn: conn,
site: site
} do
populate_stats(site, [
build(:pageview,
user_id: @user_id,
hostname: "landing.example.com",
timestamp: ~N[2021-01-01 00:00:01]
),
build(:pageview,
user_id: @user_id,
hostname: "example.com",
timestamp: ~N[2021-01-01 00:00:02]
),
build(:pageview,
user_id: @user_id,
hostname: "example.com",
timestamp: ~N[2021-01-01 00:00:06]
)
])
conn =
get(conn, "/api/v1/stats/timeseries", %{
"site_id" => site.domain,
"period" => "day",
"date" => "2021-01-01",
"filters" => "event:hostname==example.com",
"metrics" => "visitors,visits,pageviews,bounce_rate,visit_duration"
})
res =
json_response(conn, 200)["results"]
assert List.first(res) == %{
"bounce_rate" => 0,
"date" => "2021-01-01 00:00:00",
"pageviews" => 2,
"visit_duration" => 5,
"visitors" => 1,
"visits" => 1
}
end
test "can filter by event:name", %{conn: conn, site: site} do
populate_stats(site, [
build(:event,

View File

@ -25,6 +25,37 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do
]
end
test "returns top pages by visitors by hostname", %{conn: conn1, site: site} do
populate_stats(site, [
build(:pageview, pathname: "/", hostname: "a.example.com"),
build(:pageview, pathname: "/", hostname: "b.example.com"),
build(:pageview, pathname: "/", hostname: "d.example.com"),
build(:pageview, pathname: "/landing", hostname: "x.example.com", user_id: 123),
build(:pageview, pathname: "/register", hostname: "d.example.com", user_id: 123),
build(:pageview, pathname: "/register", hostname: "d.example.com", user_id: 123),
build(:pageview, pathname: "/register", hostname: "d.example.com"),
build(:pageview, pathname: "/contact", hostname: "e.example.com")
])
filters = Jason.encode!(%{"hostname" => "*.example.com"})
conn = get(conn1, "/api/stats/#{site.domain}/pages?period=day&filters=#{filters}")
assert json_response(conn, 200) == [
%{"visitors" => 3, "name" => "/"},
%{"visitors" => 2, "name" => "/register"},
%{"visitors" => 1, "name" => "/contact"},
%{"visitors" => 1, "name" => "/landing"}
]
filters = Jason.encode!(%{"hostname" => "d.example.com"})
conn = get(conn1, "/api/stats/#{site.domain}/pages?period=day&filters=#{filters}")
assert json_response(conn, 200) == [
%{"visitors" => 2, "name" => "/register"},
%{"visitors" => 1, "name" => "/"}
]
end
test "returns top pages with :is filter on custom pageview props", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview,
@ -888,6 +919,132 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do
]
end
test "filtering by hostname, excludes a page on different hostname", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview,
timestamp: ~N[2021-01-01 05:01:00],
pathname: "/about",
hostname: "blog.example.com",
user_id: @user_id
),
build(:pageview,
timestamp: ~N[2021-01-01 05:01:02],
pathname: "/hello",
hostname: "example.com",
user_id: @user_id
),
build(:pageview,
timestamp: ~N[2021-01-01 05:01:02],
pathname: "/about",
hostname: "blog.example.com"
)
])
filters = Jason.encode!(%{"hostname" => "blog.example.com"})
conn =
get(
conn,
"/api/stats/#{site.domain}/pages?period=day&date=2021-01-01&detailed=true&filters=#{filters}"
)
assert json_response(conn, 200) == [
%{
"bounce_rate" => 50,
"name" => "/about",
"pageviews" => 2,
"time_on_page" => nil,
"visitors" => 2
}
]
end
test "calculates bounce rate and time on page for pages when filtered by hostname", %{
conn: conn,
site: site
} do
populate_stats(site, [
# session 1
build(:pageview,
pathname: "/about-blog",
hostname: "blog.example.com",
user_id: @user_id + 1,
timestamp: ~N[2021-01-01 00:01:00]
),
# session 2
build(:pageview,
pathname: "/about-blog",
hostname: "blog.example.com",
user_id: @user_id,
timestamp: ~N[2021-01-01 00:01:00]
),
build(:pageview,
pathname: "/about",
hostname: "example.com",
user_id: @user_id,
timestamp: ~N[2021-01-01 00:10:00]
),
build(:pageview,
pathname: "/about-blog",
hostname: "blog.example.com",
user_id: @user_id,
timestamp: ~N[2021-01-01 00:15:00]
),
build(:pageview,
pathname: "/exit-blog",
hostname: "blog.example.com",
timestamp: ~N[2021-01-01 00:20:00],
user_id: @user_id
),
build(:pageview,
pathname: "/about",
hostname: "example.com",
timestamp: ~N[2021-01-01 00:22:00],
user_id: @user_id
),
build(:pageview,
pathname: "/exit",
hostname: "example.com",
timestamp: ~N[2021-01-01 00:25:00],
user_id: @user_id
),
# session 3
build(:pageview,
pathname: "/about",
hostname: "example.com",
user_id: @user_id + 2,
timestamp: ~N[2021-01-01 00:01:00]
)
])
filters = Jason.encode!(%{"hostname" => "blog.example.com"})
conn =
get(
conn,
"/api/stats/#{site.domain}/pages?period=day&date=2021-01-01&detailed=true&filters=#{filters}"
)
assert json_response(conn, 200) == [
%{
"bounce_rate" => 50,
"name" => "/about-blog",
"pageviews" => 3,
"time_on_page" => 1140.0,
"visitors" => 2
},
%{
"bounce_rate" => nil,
"name" => "/exit-blog",
"pageviews" => 1,
"time_on_page" => nil,
"visitors" => 1
}
]
end
test "doesn't calculate time on page with only single page visits", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview, pathname: "/", user_id: @user_id, timestamp: ~N[2021-01-01 00:00:00]),
@ -1227,6 +1384,110 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do
]
end
test "returns top entry pages by visitors filtered by hostname", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview,
pathname: "/page1",
hostname: "en.example.com",
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview,
pathname: "/page1",
hostname: "es.example.com",
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview,
pathname: "/page2",
hostname: "en.example.com",
user_id: @user_id,
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview,
pathname: "/page2",
hostname: "es.example.com",
user_id: @user_id,
timestamp: ~N[2021-01-01 00:15:00]
),
build(:pageview,
pathname: "/exit",
hostname: "es.example.com",
user_id: @user_id,
timestamp: ~N[2021-01-01 00:16:00]
),
build(:pageview,
pathname: "/page2",
hostname: "es.example.com",
timestamp: ~N[2021-01-01 23:15:00]
)
])
filters = Jason.encode!(%{"hostname" => "es.example.com"})
conn =
get(
conn,
"/api/stats/#{site.domain}/entry-pages?period=day&date=2021-01-01&filters=#{filters}"
)
assert json_response(conn, 200) == [
%{"name" => "/page2", "visit_duration" => 480, "visitors" => 2, "visits" => 2},
%{"name" => "/page1", "visit_duration" => 0, "visitors" => 1, "visits" => 1}
]
end
test "returns top entry pages by visitors filtered by hostname with experimental_hostname_filter",
%{conn: conn, site: site} do
populate_stats(site, [
build(:pageview,
pathname: "/page1",
hostname: "en.example.com",
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview,
pathname: "/page1",
hostname: "es.example.com",
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview,
pathname: "/page2",
hostname: "en.example.com",
user_id: @user_id,
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview,
pathname: "/page2",
hostname: "es.example.com",
user_id: @user_id,
timestamp: ~N[2021-01-01 00:15:00]
),
build(:pageview,
pathname: "/exit",
hostname: "es.example.com",
user_id: @user_id,
timestamp: ~N[2021-01-01 00:16:00]
),
build(:pageview,
pathname: "/page2",
hostname: "es.example.com",
timestamp: ~N[2021-01-01 23:15:00]
)
])
filters = Jason.encode!(%{"hostname" => "es.example.com"})
conn =
get(
conn,
"/api/stats/#{site.domain}/entry-pages?period=day&date=2021-01-01&filters=#{filters}&experimental_hostname_filter=true"
)
# We're going to only join sessions where the exit hostname matches the filter
assert json_response(conn, 200) == [
%{"name" => "/page1", "visit_duration" => 0, "visitors" => 1, "visits" => 1},
%{"name" => "/page2", "visit_duration" => 0, "visitors" => 1, "visits" => 1}
]
end
test "bugfix: pagination on /pages filtered by goal", %{conn: conn, site: site} do
populate_stats(
site,
@ -1377,6 +1638,99 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do
]
end
test "returns top exit pages by visitors filtered by hostname", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview,
pathname: "/page1",
hostname: "en.example.com",
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview,
pathname: "/page1",
hostname: "es.example.com",
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview,
pathname: "/page1",
hostname: "en.example.com",
user_id: @user_id,
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview,
pathname: "/page2",
hostname: "es.example.com",
user_id: @user_id,
timestamp: ~N[2021-01-01 00:15:00]
),
build(:pageview,
pathname: "/exit",
hostname: "en.example.com",
user_id: @user_id,
timestamp: ~N[2021-01-01 00:16:00]
)
])
filters = Jason.encode!(%{hostname: "es.example.com"})
conn =
get(
conn,
"/api/stats/#{site.domain}/exit-pages?period=day&date=2021-01-01&filters=#{filters}"
)
assert json_response(conn, 200) ==
[
%{"name" => "/exit", "visitors" => 1, "visits" => 1},
%{"name" => "/page1", "visitors" => 1, "visits" => 1}
]
end
test "returns top exit pages by visitors filtered by hostname with experimental_hostname_filter",
%{conn: conn, site: site} do
populate_stats(site, [
build(:pageview,
pathname: "/page1",
hostname: "en.example.com",
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview,
pathname: "/page1",
hostname: "es.example.com",
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview,
pathname: "/page1",
hostname: "en.example.com",
user_id: @user_id,
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview,
pathname: "/page2",
hostname: "es.example.com",
user_id: @user_id,
timestamp: ~N[2021-01-01 00:15:00]
),
build(:pageview,
pathname: "/exit",
hostname: "en.example.com",
user_id: @user_id,
timestamp: ~N[2021-01-01 00:16:00]
)
])
filters = Jason.encode!(%{hostname: "es.example.com"})
conn =
get(
conn,
"/api/stats/#{site.domain}/exit-pages?period=day&date=2021-01-01&filters=#{filters}&experimental_hostname_filter=true"
)
# We're going to only join sessions where the entry hostname matches the filter
assert json_response(conn, 200) ==
[%{"name" => "/page1", "visitors" => 1, "visits" => 1}]
end
test "returns top exit pages filtered by custom pageview props", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview,

View File

@ -1221,6 +1221,121 @@ defmodule PlausibleWeb.Api.StatsController.SourcesTest do
]
end
test "returns top referrers for a custom goal and filtered by hostname", %{
conn: conn,
site: site
} do
populate_stats(site, [
build(:pageview,
hostname: "blog.example.com",
referrer_source: "Facebook",
user_id: @user_id
),
build(:pageview,
hostname: "app.example.com",
pathname: "/register",
user_id: @user_id
),
build(:event,
name: "Signup",
hostname: "app.example.com",
pathname: "/register",
user_id: @user_id
)
])
filters = Jason.encode!(%{goal: "Signup", hostname: "app.example.com"})
conn =
get(
conn,
"/api/stats/#{site.domain}/sources?period=day&filters=#{filters}"
)
assert json_response(conn, 200) ==
[
%{
"conversion_rate" => 100.0,
"name" => "Facebook",
"total_visitors" => 1,
"visitors" => 1
}
]
end
test "returns no top referrers for a custom goal and filtered by hostname and experimental_hostname_filter",
%{
conn: conn,
site: site
} do
populate_stats(site, [
build(:pageview,
hostname: "blog.example.com",
referrer_source: "Facebook",
user_id: @user_id
),
build(:pageview,
hostname: "app.example.com",
pathname: "/register",
user_id: @user_id
),
build(:event,
name: "Signup",
hostname: "app.example.com",
pathname: "/register",
user_id: @user_id
)
])
filters = Jason.encode!(%{goal: "Signup", hostname: "app.example.com"})
conn =
get(
conn,
"/api/stats/#{site.domain}/sources?period=day&filters=#{filters}&experimental_hostname_filter=true"
)
assert json_response(conn, 200) == []
end
test "returns top referrers for a custom goal and filtered by hostname and experimental_hostname_filter",
%{
conn: conn,
site: site
} do
populate_stats(site, [
build(:pageview,
hostname: "app.example.com",
referrer_source: "Facebook",
pathname: "/register",
user_id: @user_id
),
build(:event,
name: "Signup",
hostname: "app.example.com",
pathname: "/register",
user_id: @user_id
)
])
filters = Jason.encode!(%{goal: "Signup", hostname: "app.example.com"})
conn =
get(
conn,
"/api/stats/#{site.domain}/sources?period=day&filters=#{filters}&experimental_hostname_filter=true"
)
assert json_response(conn, 200) == [
%{
"conversion_rate" => 100.0,
"name" => "Facebook",
"total_visitors" => 1,
"visitors" => 1
}
]
end
test "returns top referrers with goal filter + :is prop filter", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview,

View File

@ -236,6 +236,46 @@ defmodule PlausibleWeb.Api.StatsController.SuggestionsTest do
assert json_response(conn, 200) == []
end
test "returns suggestions for hostnames", %{conn: conn1, user: user} do
{:ok, [site: site]} = create_new_site(%{user: user})
populate_stats(site, [
build(:pageview,
pathname: "/",
hostname: "host-alice.example.com"
),
build(:pageview,
pathname: "/some-other-page",
hostname: "host-bob.example.com",
user_id: 123
),
build(:pageview, pathname: "/exit", hostname: "host-carol.example.com", user_id: 123)
])
conn =
get(
conn1,
"/api/stats/#{site.domain}/suggestions/hostname?q=alice"
)
assert json_response(conn, 200) == [
%{"value" => "host-alice.example.com", "label" => "host-alice.example.com"}
]
conn =
get(
conn1,
"/api/stats/#{site.domain}/suggestions/hostname?q=host"
)
assert json_response(conn, 200) ==
[
%{"label" => "host-alice.example.com", "value" => "host-alice.example.com"},
%{"label" => "host-carol.example.com", "value" => "host-carol.example.com"},
%{"label" => "host-bob.example.com", "value" => "host-bob.example.com"}
]
end
test "returns suggestions for referrers", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview,

View File

@ -642,6 +642,38 @@ defmodule PlausibleWeb.Api.StatsController.TopStatsTest do
assert %{"name" => "Unique visitors", "value" => 2} in res["top_stats"]
end
test "returns only visitors with specific screen size for a given hostname", %{
conn: conn,
site: site
} do
populate_stats(site, [
build(:pageview, screen_size: "Desktop", hostname: "blog.example.com"),
build(:pageview, screen_size: "Desktop", hostname: "example.com", user_id: @user_id),
build(:pageview, screen_size: "Desktop", hostname: "blog.example.com", user_id: @user_id),
build(:pageview,
screen_size: "Desktop",
hostname: "blog.example.com",
user_id: @user_id + 1
),
build(:pageview, screen_size: "Desktop", hostname: "example.com", user_id: @user_id + 1),
build(:pageview, screen_size: "Mobile", hostname: "blog.example.com")
])
filters = Jason.encode!(%{screen: "Desktop", hostname: "blog.example.com"})
conn =
get(
conn,
"/api/stats/#{site.domain}/top-stats?period=month&filters=#{filters}"
)
res =
json_response(conn, 200)
assert %{"name" => "Unique visitors", "value" => 3} in res["top_stats"]
assert %{"name" => "Total visits", "value" => 3} in res["top_stats"]
end
test "returns only visitors with specific browser", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview, browser: "Chrome"),
@ -738,6 +770,82 @@ defmodule PlausibleWeb.Api.StatsController.TopStatsTest do
_ -> false
end)
end
test "hostname exact filter", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview, pathname: "/index", hostname: "example.com"),
build(:pageview, pathname: "/index", hostname: "example.com", user_id: @user_id),
build(:pageview, pathname: "/blog/post1", hostname: "blog.example.com", user_id: @user_id),
build(:pageview, pathname: "/blog/post2", hostname: "blog.example.com")
])
filters = Jason.encode!(%{hostname: "example.com"})
conn =
get(
conn,
"/api/stats/#{site.domain}/top-stats?period=month&filters=#{filters}"
)
res = json_response(conn, 200)
assert %{"name" => "Unique visitors", "value" => 2} in res["top_stats"]
assert %{"name" => "Total pageviews", "value" => 2} in res["top_stats"]
end
test "hostname glob filter", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview, pathname: "/index", hostname: "example.com"),
build(:pageview, pathname: "/index", hostname: "example.com", user_id: @user_id),
build(:pageview, pathname: "/blog/post1", hostname: "blog.example.com", user_id: @user_id),
build(:pageview, pathname: "/blog/post2", hostname: "blog.example.com", user_id: @user_id),
build(:pageview, pathname: "/blog/post2", hostname: "blog.example.com"),
build(:pageview, pathname: "/blog/post2", hostname: "about.example.com")
])
filters = Jason.encode!(%{hostname: "*example.com"})
conn =
get(
conn,
"/api/stats/#{site.domain}/top-stats?period=month&filters=#{filters}"
)
res =
json_response(conn, 200)
assert %{"name" => "Unique visitors", "value" => 4} in res["top_stats"]
assert %{"name" => "Total pageviews", "value" => 6} in res["top_stats"]
end
test "hostname glob subdomain filter", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview, pathname: "/index", hostname: "example.com"),
build(:pageview, pathname: "/index", hostname: "example.com", user_id: @user_id),
build(:pageview, pathname: "/blog/post1", hostname: "blog.example.com", user_id: @user_id),
build(:pageview, pathname: "/blog/post2", hostname: "blog.example.com", user_id: @user_id),
build(:pageview, pathname: "/blog/post3", hostname: "blog.example.com"),
build(:pageview,
pathname: "/blog/post2",
hostname: "blog.example.com",
user_id: 100_002_378_237
)
])
filters = Jason.encode!(%{hostname: "*.example.com"})
# filters = Jason.encode!(%{page: "/blog/*"})
conn =
get(
conn,
"/api/stats/#{site.domain}/top-stats?period=month&filters=#{filters}"
)
res = json_response(conn, 200)
assert %{"name" => "Unique visitors", "value" => 3} in res["top_stats"]
assert %{"name" => "Total pageviews", "value" => 4} in res["top_stats"]
end
end
describe "GET /api/stats/top-stats - filtered for goal" do

View File

@ -198,7 +198,7 @@ defmodule Plausible.TestUtils do
session = Plausible.Session.CacheStore.on_event(event_params, event_params, nil)
event_params
|> Map.merge(session)
|> Plausible.ClickhouseEventV2.merge_session(session)
|> Plausible.Event.WriteBuffer.insert()
end

View File

@ -1,7 +1,6 @@
{:ok, _} = Application.ensure_all_started(:ex_machina)
Mox.defmock(Plausible.HTTPClient.Mock, for: Plausible.HTTPClient.Interface)
Application.ensure_all_started(:double)
FunWithFlags.enable(:window_time_on_page)
FunWithFlags.enable(:imports_exports)
FunWithFlags.enable(:shield_pages)
Ecto.Adapters.SQL.Sandbox.mode(Plausible.Repo, :manual)