Finish stats module (#1248)

* Fix small inconsitencies in stats module

* Format
This commit is contained in:
Uku Taht 2021-08-17 15:21:12 +03:00 committed by GitHub
parent 4fef567623
commit 669866a16b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 167 additions and 189 deletions

View File

@ -3,8 +3,8 @@ defmodule Plausible.Stats.Aggregate do
use Plausible.ClickhouseRepo
import Plausible.Stats.Base
@event_metrics ["visitors", "pageviews", "events"]
@session_metrics ["visits", "bounce_rate", "visit_duration"]
@event_metrics ["visitors", "pageviews", "events", "sample_percent"]
@session_metrics ["visits", "bounce_rate", "visit_duration", "sample_percent"]
def aggregate(site, query, metrics) do
event_metrics = Enum.filter(metrics, &(&1 in @event_metrics))
@ -19,11 +19,11 @@ defmodule Plausible.Stats.Aggregate do
Task.async(fn -> %{} end)
end
Task.await(event_task)
|> Map.merge(Task.await(session_task))
Task.await(session_task)
|> Map.merge(Task.await(event_task))
|> Map.merge(Task.await(time_on_page_task))
|> Enum.map(fn {metric, value} ->
{metric, %{value: round(value || 0)}}
{metric, %{"value" => round(value || 0)}}
end)
|> Enum.into(%{})
end
@ -31,54 +31,21 @@ defmodule Plausible.Stats.Aggregate do
defp aggregate_events(_, _, []), do: %{}
defp aggregate_events(site, query, metrics) do
q = from(e in base_event_query(site, query), select: %{})
Enum.reduce(metrics, q, &select_event_metric/2)
from(e in base_event_query(site, query), select: %{})
|> select_event_metrics(metrics)
|> ClickhouseRepo.one()
end
defp select_event_metric("pageviews", q) do
from(e in q,
select_merge: %{pageviews: fragment("countIf(? = 'pageview')", e.name)}
)
end
defp select_event_metric("events", q) do
from(e in q,
select_merge: %{events: fragment("count(*)")}
)
end
defp select_event_metric("visitors", q) do
from(e in q, select_merge: %{visitors: fragment("uniq(?)", e.user_id)})
end
defp aggregate_sessions(_, _, []), do: %{}
defp aggregate_sessions(site, query, metrics) do
query = Query.treat_page_filter_as_entry_page(query)
q = from(e in query_sessions(site, query), select: %{})
Enum.reduce(metrics, q, &select_session_metric/2)
from(e in query_sessions(site, query), select: %{})
|> select_session_metrics(metrics)
|> ClickhouseRepo.one()
end
defp select_session_metric("bounce_rate", q) do
from(s in q,
select_merge: %{bounce_rate: fragment("round(sum(is_bounce * sign) / sum(sign) * 100)")}
)
end
defp select_session_metric("visits", q) do
from(s in q,
select_merge: %{visits: fragment("sum(?)", s.sign)}
)
end
defp select_session_metric("visit_duration", q) do
from(s in q, select_merge: %{visit_duration: fragment("round(avg(duration * sign))")})
end
defp aggregate_time_on_page(site, query) do
q =
from(
@ -134,6 +101,6 @@ defmodule Plausible.Stats.Aggregate do
{:ok, res} = ClickhouseRepo.query(time_query, base_query_raw_params ++ [where_arg])
[[time_on_page]] = res.rows
%{time_on_page: time_on_page}
%{"time_on_page" => time_on_page}
end
end

View File

@ -28,7 +28,9 @@ defmodule Plausible.Stats.Base do
{first_datetime, last_datetime} = utc_boundaries(query, site.timezone)
q =
from(e in "events",
from(
e in "events",
hints: [sample: query.sample_threshold],
where: e.domain == ^site.domain,
where: e.timestamp >= ^first_datetime and e.timestamp < ^last_datetime
)
@ -113,7 +115,9 @@ defmodule Plausible.Stats.Base do
{first_datetime, last_datetime} = utc_boundaries(query, site.timezone)
sessions_q =
from(s in "sessions",
from(
s in "sessions",
hints: [sample: query.sample_threshold],
where: s.domain == ^site.domain,
where: s.timestamp >= ^first_datetime and s.start < ^last_datetime
)
@ -139,6 +143,10 @@ defmodule Plausible.Stats.Base do
fragment_data = [{String.to_existing_atom(prop_name), {:in, list}}]
from(s in sessions_q, where: fragment(^fragment_data))
{:matches, expr} ->
regex = page_regex(expr)
from(s in sessions_q, where: fragment("match(?, ?)", ^prop_name, ^regex))
nil ->
sessions_q
@ -148,6 +156,105 @@ defmodule Plausible.Stats.Base do
end)
end
def select_event_metrics(q, []), do: q
def select_event_metrics(q, ["pageviews" | rest]) do
from(e in q,
select_merge: %{
"pageviews" =>
fragment("toUInt64(round(countIf(? = 'pageview') * any(_sample_factor)))", e.name)
}
)
|> select_event_metrics(rest)
end
def select_event_metrics(q, ["events" | rest]) do
from(e in q,
select_merge: %{"events" => fragment("toUInt64(round(count(*) * any(_sample_factor)))")}
)
|> select_event_metrics(rest)
end
def select_event_metrics(q, ["visitors" | rest]) do
from(e in q,
select_merge: %{
"visitors" => fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", e.user_id)
}
)
|> select_event_metrics(rest)
end
def select_event_metrics(q, ["sample_percent" | rest]) do
from(e in q,
select_merge: %{
"sample_percent" =>
fragment("if(any(_sample_factor) > 1, round(100 / any(_sample_factor)), 100)")
}
)
|> select_event_metrics(rest)
end
def select_event_metrics(_, [unknown | _]), do: raise("Unknown metric " <> unknown)
def select_session_metrics(q, []), do: q
def select_session_metrics(q, ["bounce_rate" | rest]) do
from(s in q,
select_merge: %{
"bounce_rate" =>
fragment("toUInt32(ifNotFinite(round(sum(is_bounce * sign) / sum(sign) * 100), 0))")
}
)
|> select_session_metrics(rest)
end
def select_session_metrics(q, ["visits" | rest]) do
from(s in q,
select_merge: %{
"visits" => fragment("toUInt64(round(sum(?) * any(_sample_factor)))", s.sign)
}
)
|> select_session_metrics(rest)
end
def select_session_metrics(q, ["pageviews" | rest]) do
from(s in q,
select_merge: %{
"pageviews" =>
fragment("toUInt64(round(sum(? * ?) * any(_sample_factor)))", s.sign, s.pageviews)
}
)
|> select_session_metrics(rest)
end
def select_session_metrics(q, ["visitors" | rest]) do
from(s in q,
select_merge: %{
"visitors" => fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", s.user_id)
}
)
|> select_session_metrics(rest)
end
def select_session_metrics(q, ["visit_duration" | rest]) do
from(s in q,
select_merge: %{
"visit_duration" => fragment("toUInt32(ifNotFinite(round(avg(duration * sign)), 0))")
}
)
|> select_session_metrics(rest)
end
def select_session_metrics(q, ["sample_percent" | rest]) do
from(e in q,
select_merge: %{
"sample_percent" =>
fragment("if(any(_sample_factor) > 1, round(100 / any(_sample_factor)), 100)")
}
)
|> select_event_metrics(rest)
end
defp db_prop_val("referrer_source", @no_ref), do: ""
defp db_prop_val("utm_medium", @no_ref), do: ""
defp db_prop_val("utm_source", @no_ref), do: ""

View File

@ -101,10 +101,7 @@ defmodule Plausible.Stats.Breakdown do
query
pages ->
new_filters =
Map.put(query.filters, "event:page", {:member, Enum.map(pages, & &1["page"])})
%Query{query | filters: new_filters}
Query.put_filter(query, "visit:entry_page", {:member, Enum.map(pages, & &1["page"])})
end
{limit, _page} = pagination
@ -125,6 +122,11 @@ defmodule Plausible.Stats.Breakdown do
breakdown_events(site, query, property, metrics, pagination)
end
def breakdown(site, query, "visit:source", metrics, pagination) do
query = Query.treat_page_filter_as_entry_page(query)
breakdown_sessions(site, query, "visit:source", metrics, pagination)
end
def breakdown(site, query, property, metrics, pagination) do
breakdown_sessions(site, query, property, metrics, pagination)
end
@ -167,7 +169,7 @@ defmodule Plausible.Stats.Breakdown do
)
|> filter_converted_sessions(site, query)
|> do_group_by(property)
|> select_metrics(metrics)
|> select_session_metrics(metrics)
|> ClickhouseRepo.all()
end
@ -427,70 +429,6 @@ defmodule Plausible.Stats.Breakdown do
)
end
defp select_event_metrics(q, []), do: q
defp select_event_metrics(q, ["pageviews" | rest]) do
from(e in q,
select_merge: %{"pageviews" => fragment("countIf(? = 'pageview')", e.name)}
)
|> select_event_metrics(rest)
end
defp select_event_metrics(q, ["visitors" | rest]) do
from(e in q,
select_merge: %{"visitors" => fragment("uniq(?) as count", e.user_id)}
)
|> select_event_metrics(rest)
end
defp select_event_metrics(q, ["events" | rest]) do
from(e in q,
select_merge: %{"events" => fragment("count(*)")}
)
|> select_event_metrics(rest)
end
defp select_metrics(q, []), do: q
defp select_metrics(q, ["pageviews" | rest]) do
from(s in q,
select_merge: %{"pageviews" => fragment("sum(? * ?)", s.sign, s.pageviews)}
)
|> select_metrics(rest)
end
defp select_metrics(q, ["visitors" | rest]) do
from(s in q,
select_merge: %{"visitors" => fragment("uniq(?) as count", s.user_id)}
)
|> select_metrics(rest)
end
defp select_metrics(q, ["visits" | rest]) do
from(s in q,
select_merge: %{
"visits" => fragment("sum(?)", s.sign)
}
)
|> select_metrics(rest)
end
defp select_metrics(q, ["bounce_rate" | rest]) do
from(s in q,
select_merge: %{
"bounce_rate" => fragment("round(sum(? * ?) / sum(?) * 100)", s.is_bounce, s.sign, s.sign)
}
)
|> select_metrics(rest)
end
defp select_metrics(q, ["visit_duration" | rest]) do
from(s in q,
select_merge: %{"visit_duration" => fragment("round(avg(? * ?))", s.duration, s.sign)}
)
|> select_metrics(rest)
end
defp transform_keys(results, keys_to_replace) do
Enum.map(results, fn map ->
Enum.map(map, fn {key, val} ->

View File

@ -1,5 +1,9 @@
defmodule Plausible.Stats.Query do
defstruct date_range: nil, interval: nil, period: nil, filters: %{}
defstruct date_range: nil,
interval: nil,
period: nil,
filters: %{},
sample_threshold: 10_000_000
def shift_back(%__MODULE__{period: "month"} = query, site) do
# Querying current month to date
@ -212,10 +216,14 @@ defmodule Plausible.Stats.Query do
cond do
is_list && is_glob -> raise "Not implemented"
key == "visit:goal" -> {key, parse_goal_filter(val)}
is_list -> {key, {:member, String.split(val, "|")}}
is_glob -> {key, {:matches, val}}
is_negated -> {key, {:is_not, val}}
true -> {key, {:is, val}}
end
end
defp parse_goal_filter("Visit " <> page), do: {:is, :page, page}
defp parse_goal_filter(event), do: {:is, :event, event}
end

View File

@ -37,6 +37,8 @@ defmodule Plausible.Stats.Timeseries do
end
defp sessions_timeseries(site, query, metrics) do
query = Query.treat_page_filter_as_entry_page(query)
from(e in query_sessions(site, query),
group_by: fragment("date"),
order_by: fragment("date"),
@ -110,49 +112,6 @@ defmodule Plausible.Stats.Timeseries do
)
end
defp select_event_metrics(q, []), do: q
defp select_event_metrics(q, ["pageviews" | rest]) do
from(e in q,
select_merge: %{"pageviews" => fragment("countIf(? = 'pageview')", e.name)}
)
|> select_event_metrics(rest)
end
defp select_event_metrics(q, ["visitors" | rest]) do
from(e in q,
select_merge: %{"visitors" => fragment("uniq(?) as count", e.user_id)}
)
|> select_event_metrics(rest)
end
defp select_session_metrics(q, []), do: q
defp select_session_metrics(q, ["bounce_rate" | rest]) do
from(s in q,
select_merge: %{
"bounce_rate" => bounce_rate()
}
)
|> select_session_metrics(rest)
end
defp select_session_metrics(q, ["visits" | rest]) do
from(s in q,
select_merge: %{
"visits" => fragment("sum(?)", s.sign)
}
)
|> select_session_metrics(rest)
end
defp select_session_metrics(q, ["visit_duration" | rest]) do
from(s in q,
select_merge: %{"visit_duration" => visit_duration()}
)
|> select_session_metrics(rest)
end
defp empty_row(date, metrics) do
Enum.reduce(metrics, %{"date" => date}, fn metric, row ->
case metric do

View File

@ -27,13 +27,13 @@ defmodule PlausibleWeb.Api.ExternalStatsController do
Task.async(fn -> Plausible.Stats.aggregate(site, query, metrics) end)
])
Enum.map(curr_result, fn {metric, %{value: current_val}} ->
%{value: prev_val} = prev_result[metric]
Enum.map(curr_result, fn {metric, %{"value" => current_val}} ->
%{"value" => prev_val} = prev_result[metric]
{metric,
%{
value: current_val,
change: percent_change(prev_val, current_val)
"value" => current_val,
"change" => percent_change(prev_val, current_val)
}}
end)
|> Enum.into(%{})

View File

@ -58,8 +58,8 @@ defmodule PlausibleWeb.Api.StatsController do
defp fetch_top_stats(site, %Query{period: "30m"} = query) do
%{
visitors: %{value: visitors},
pageviews: %{value: pageviews}
"visitors" => %{"value" => visitors},
"pageviews" => %{"value" => pageviews}
} = Stats.aggregate(site, query, ["visitors", "pageviews"])
stats = [
@ -77,7 +77,7 @@ defmodule PlausibleWeb.Api.StatsController do
}
]
{stats, 100}
{stats, nil}
end
defp fetch_top_stats(site, %Query{filters: %{"visit:goal" => _goal}} = query) do
@ -85,21 +85,21 @@ defmodule PlausibleWeb.Api.StatsController do
prev_query = Query.shift_back(query, site)
%{
visitors: %{value: unique_visitors}
"visitors" => %{"value" => unique_visitors}
} = Stats.aggregate(site, %{query | filters: total_filter}, ["visitors"])
%{
visitors: %{value: prev_unique_visitors}
"visitors" => %{"value" => prev_unique_visitors}
} = Stats.aggregate(site, %{prev_query | filters: total_filter}, ["visitors"])
%{
visitors: %{value: converted_visitors},
events: %{value: completions}
"visitors" => %{"value" => converted_visitors},
"events" => %{"value" => completions}
} = Stats.aggregate(site, query, ["visitors", "events"])
%{
visitors: %{value: prev_converted_visitors},
events: %{value: prev_completions}
"visitors" => %{"value" => prev_converted_visitors},
"events" => %{"value" => prev_completions}
} = Stats.aggregate(site, prev_query, ["visitors", "events"])
conversion_rate = calculate_cr(unique_visitors, converted_visitors)
@ -128,7 +128,7 @@ defmodule PlausibleWeb.Api.StatsController do
}
]
{stats, 0}
{stats, 100}
end
defp fetch_top_stats(site, query) do
@ -136,9 +136,9 @@ defmodule PlausibleWeb.Api.StatsController do
metrics =
if query.filters["event:page"] do
["visitors", "pageviews", "bounce_rate", "time_on_page"]
["visitors", "pageviews", "bounce_rate", "time_on_page", "sample_percent"]
else
["visitors", "pageviews", "bounce_rate", "visit_duration"]
["visitors", "pageviews", "bounce_rate", "visit_duration", "sample_percent"]
end
current_results = Stats.aggregate(site, query, metrics)
@ -146,28 +146,28 @@ defmodule PlausibleWeb.Api.StatsController do
stats =
[
top_stats_entry(current_results, prev_results, "Unique visitors", :visitors),
top_stats_entry(current_results, prev_results, "Total pageviews", :pageviews),
top_stats_entry(current_results, prev_results, "Bounce rate", :bounce_rate),
top_stats_entry(current_results, prev_results, "Visit duration", :visit_duration),
top_stats_entry(current_results, prev_results, "Time on page", :time_on_page)
top_stats_entry(current_results, prev_results, "Unique visitors", "visitors"),
top_stats_entry(current_results, prev_results, "Total pageviews", "pageviews"),
top_stats_entry(current_results, prev_results, "Bounce rate", "bounce_rate"),
top_stats_entry(current_results, prev_results, "Visit duration", "visit_duration"),
top_stats_entry(current_results, prev_results, "Time on page", "time_on_page")
]
|> Enum.filter(& &1)
{stats, 0}
{stats, current_results["sample_percent"]["value"]}
end
defp top_stats_entry(current_results, prev_results, name, key) do
if current_results[key] do
%{
name: name,
value: current_results[key][:value],
change: calculate_change(key, prev_results[key][:value], current_results[key][:value])
value: current_results[key]["value"],
change: calculate_change(key, prev_results[key]["value"], current_results[key]["value"])
}
end
end
defp calculate_change(:bounce_rate, old_count, new_count) do
defp calculate_change("bounce_rate", old_count, new_count) do
if old_count > 0, do: new_count - old_count
end
@ -195,7 +195,6 @@ defmodule PlausibleWeb.Api.StatsController do
Query.from(site.timezone, params)
|> Filters.add_prefix()
|> maybe_hide_noref("visit:source", params)
|> Query.treat_page_filter_as_entry_page()
pagination = parse_pagination(params)
@ -272,7 +271,7 @@ defmodule PlausibleWeb.Api.StatsController do
google_api().fetch_stats(site, query, params["limit"] || 9)
end
%{visitors: %{value: total_visitors}} = Stats.aggregate(site, query, ["visitors"])
%{"visitors" => %{"value" => total_visitors}} = Stats.aggregate(site, query, ["visitors"])
case search_terms do
nil ->
@ -306,7 +305,7 @@ defmodule PlausibleWeb.Api.StatsController do
Stats.breakdown(site, query, "visit:referrer", metrics, pagination)
|> transform_keys(%{"referrer" => "name", "visitors" => "count"})
%{visitors: %{value: total_visitors}} = Stats.aggregate(site, query, ["visitors"])
%{"visitors" => %{"value" => total_visitors}} = Stats.aggregate(site, query, ["visitors"])
json(conn, %{referrers: referrers, total_visitors: total_visitors})
end
@ -468,7 +467,7 @@ defmodule PlausibleWeb.Api.StatsController do
total_filter = Map.merge(query.filters, %{"visit:goal" => nil})
%{visitors: %{value: total_visitors}} =
%{"visitors" => %{"value" => total_visitors}} =
Stats.aggregate(site, %{query | filters: total_filter}, ["visitors"])
prop_names = Stats.props(site, query)
@ -492,7 +491,7 @@ defmodule PlausibleWeb.Api.StatsController do
total_filter = Map.merge(query.filters, %{"visit:goal" => nil})
%{visitors: %{value: unique_visitors}} =
%{"visitors" => %{"value" => unique_visitors}} =
Stats.aggregate(site, %{query | filters: total_filter}, ["visitors"])
prop_name = "event:props:" <> params["prop_name"]

View File

@ -12,7 +12,7 @@
"bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm", "7af5c7e09fe1d40f76c8e4f9dd2be7cebd83909f31fee7cd0e9eadc567da8353"},
"cachex": {:hex, :cachex, "3.3.0", "6f2ebb8f27491fe39121bd207c78badc499214d76c695658b19d6079beeca5c2", [:mix], [{:eternal, "~> 1.2", [hex: :eternal, repo: "hexpm", optional: false]}, {:jumper, "~> 1.0", [hex: :jumper, repo: "hexpm", optional: false]}, {:sleeplocks, "~> 1.1", [hex: :sleeplocks, repo: "hexpm", optional: false]}, {:unsafe, "~> 1.0", [hex: :unsafe, repo: "hexpm", optional: false]}], "hexpm", "d90e5ee1dde14cef33f6b187af4335b88748b72b30c038969176cd4e6ccc31a1"},
"certifi": {:hex, :certifi, "2.6.1", "dbab8e5e155a0763eea978c913ca280a6b544bfa115633fa20249c3d396d9493", [:rebar3], [], "hexpm", "524c97b4991b3849dd5c17a631223896272c6b0af446778ba4675a1dff53bb7e"},
"clickhouse_ecto": {:git, "https://github.com/plausible/clickhouse_ecto.git", "07adb8da725346e4de6d376069192e9942fe7a5b", []},
"clickhouse_ecto": {:git, "https://github.com/plausible/clickhouse_ecto.git", "93d86c48230f85797555c348dbe9e8738d3b8cc2", []},
"clickhousex": {:git, "https://github.com/plausible/clickhousex", "0832dd4b1af1f0eba1d1018c231bf0d8d281f031", []},
"combination": {:hex, :combination, "0.0.3", "746aedca63d833293ec6e835aa1f34974868829b1486b1e1cb0685f0b2ae1f41", [:mix], [], "hexpm", "72b099f463df42ef7dc6371d250c7070b57b6c5902853f69deb894f79eda18ca"},
"combine": {:hex, :combine, "0.10.0", "eff8224eeb56498a2af13011d142c5e7997a80c8f5b97c499f84c841032e429f", [:mix], [], "hexpm", "1b1dbc1790073076580d0d1d64e42eae2366583e7aecd455d1215b0d16f2451b"},