From 669866a16bb2a9128c7703a8b943ff24f6405208 Mon Sep 17 00:00:00 2001 From: Uku Taht Date: Tue, 17 Aug 2021 15:21:12 +0300 Subject: [PATCH] Finish stats module (#1248) * Fix small inconsitencies in stats module * Format --- lib/plausible/stats/aggregate.ex | 53 ++------- lib/plausible/stats/base.ex | 111 +++++++++++++++++- lib/plausible/stats/breakdown.ex | 76 ++---------- lib/plausible/stats/query.ex | 10 +- lib/plausible/stats/timeseries.ex | 45 +------ .../api/external_stats_controller.ex | 8 +- .../controllers/api/stats_controller.ex | 51 ++++---- mix.lock | 2 +- 8 files changed, 167 insertions(+), 189 deletions(-) diff --git a/lib/plausible/stats/aggregate.ex b/lib/plausible/stats/aggregate.ex index 3416db8b6..6b38d34a9 100644 --- a/lib/plausible/stats/aggregate.ex +++ b/lib/plausible/stats/aggregate.ex @@ -3,8 +3,8 @@ defmodule Plausible.Stats.Aggregate do use Plausible.ClickhouseRepo import Plausible.Stats.Base - @event_metrics ["visitors", "pageviews", "events"] - @session_metrics ["visits", "bounce_rate", "visit_duration"] + @event_metrics ["visitors", "pageviews", "events", "sample_percent"] + @session_metrics ["visits", "bounce_rate", "visit_duration", "sample_percent"] def aggregate(site, query, metrics) do event_metrics = Enum.filter(metrics, &(&1 in @event_metrics)) @@ -19,11 +19,11 @@ defmodule Plausible.Stats.Aggregate do Task.async(fn -> %{} end) end - Task.await(event_task) - |> Map.merge(Task.await(session_task)) + Task.await(session_task) + |> Map.merge(Task.await(event_task)) |> Map.merge(Task.await(time_on_page_task)) |> Enum.map(fn {metric, value} -> - {metric, %{value: round(value || 0)}} + {metric, %{"value" => round(value || 0)}} end) |> Enum.into(%{}) end @@ -31,54 +31,21 @@ defmodule Plausible.Stats.Aggregate do defp aggregate_events(_, _, []), do: %{} defp aggregate_events(site, query, metrics) do - q = from(e in base_event_query(site, query), select: %{}) - - Enum.reduce(metrics, q, &select_event_metric/2) + from(e in base_event_query(site, query), select: %{}) + |> select_event_metrics(metrics) |> ClickhouseRepo.one() end - defp select_event_metric("pageviews", q) do - from(e in q, - select_merge: %{pageviews: fragment("countIf(? = 'pageview')", e.name)} - ) - end - - defp select_event_metric("events", q) do - from(e in q, - select_merge: %{events: fragment("count(*)")} - ) - end - - defp select_event_metric("visitors", q) do - from(e in q, select_merge: %{visitors: fragment("uniq(?)", e.user_id)}) - end - defp aggregate_sessions(_, _, []), do: %{} defp aggregate_sessions(site, query, metrics) do query = Query.treat_page_filter_as_entry_page(query) - q = from(e in query_sessions(site, query), select: %{}) - Enum.reduce(metrics, q, &select_session_metric/2) + from(e in query_sessions(site, query), select: %{}) + |> select_session_metrics(metrics) |> ClickhouseRepo.one() end - defp select_session_metric("bounce_rate", q) do - from(s in q, - select_merge: %{bounce_rate: fragment("round(sum(is_bounce * sign) / sum(sign) * 100)")} - ) - end - - defp select_session_metric("visits", q) do - from(s in q, - select_merge: %{visits: fragment("sum(?)", s.sign)} - ) - end - - defp select_session_metric("visit_duration", q) do - from(s in q, select_merge: %{visit_duration: fragment("round(avg(duration * sign))")}) - end - defp aggregate_time_on_page(site, query) do q = from( @@ -134,6 +101,6 @@ defmodule Plausible.Stats.Aggregate do {:ok, res} = ClickhouseRepo.query(time_query, base_query_raw_params ++ [where_arg]) [[time_on_page]] = res.rows - %{time_on_page: time_on_page} + %{"time_on_page" => time_on_page} end end diff --git a/lib/plausible/stats/base.ex b/lib/plausible/stats/base.ex index 48d3e0010..da502bcf1 100644 --- a/lib/plausible/stats/base.ex +++ b/lib/plausible/stats/base.ex @@ -28,7 +28,9 @@ defmodule Plausible.Stats.Base do {first_datetime, last_datetime} = utc_boundaries(query, site.timezone) q = - from(e in "events", + from( + e in "events", + hints: [sample: query.sample_threshold], where: e.domain == ^site.domain, where: e.timestamp >= ^first_datetime and e.timestamp < ^last_datetime ) @@ -113,7 +115,9 @@ defmodule Plausible.Stats.Base do {first_datetime, last_datetime} = utc_boundaries(query, site.timezone) sessions_q = - from(s in "sessions", + from( + s in "sessions", + hints: [sample: query.sample_threshold], where: s.domain == ^site.domain, where: s.timestamp >= ^first_datetime and s.start < ^last_datetime ) @@ -139,6 +143,10 @@ defmodule Plausible.Stats.Base do fragment_data = [{String.to_existing_atom(prop_name), {:in, list}}] from(s in sessions_q, where: fragment(^fragment_data)) + {:matches, expr} -> + regex = page_regex(expr) + from(s in sessions_q, where: fragment("match(?, ?)", ^prop_name, ^regex)) + nil -> sessions_q @@ -148,6 +156,105 @@ defmodule Plausible.Stats.Base do end) end + def select_event_metrics(q, []), do: q + + def select_event_metrics(q, ["pageviews" | rest]) do + from(e in q, + select_merge: %{ + "pageviews" => + fragment("toUInt64(round(countIf(? = 'pageview') * any(_sample_factor)))", e.name) + } + ) + |> select_event_metrics(rest) + end + + def select_event_metrics(q, ["events" | rest]) do + from(e in q, + select_merge: %{"events" => fragment("toUInt64(round(count(*) * any(_sample_factor)))")} + ) + |> select_event_metrics(rest) + end + + def select_event_metrics(q, ["visitors" | rest]) do + from(e in q, + select_merge: %{ + "visitors" => fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", e.user_id) + } + ) + |> select_event_metrics(rest) + end + + def select_event_metrics(q, ["sample_percent" | rest]) do + from(e in q, + select_merge: %{ + "sample_percent" => + fragment("if(any(_sample_factor) > 1, round(100 / any(_sample_factor)), 100)") + } + ) + |> select_event_metrics(rest) + end + + def select_event_metrics(_, [unknown | _]), do: raise("Unknown metric " <> unknown) + + def select_session_metrics(q, []), do: q + + def select_session_metrics(q, ["bounce_rate" | rest]) do + from(s in q, + select_merge: %{ + "bounce_rate" => + fragment("toUInt32(ifNotFinite(round(sum(is_bounce * sign) / sum(sign) * 100), 0))") + } + ) + |> select_session_metrics(rest) + end + + def select_session_metrics(q, ["visits" | rest]) do + from(s in q, + select_merge: %{ + "visits" => fragment("toUInt64(round(sum(?) * any(_sample_factor)))", s.sign) + } + ) + |> select_session_metrics(rest) + end + + def select_session_metrics(q, ["pageviews" | rest]) do + from(s in q, + select_merge: %{ + "pageviews" => + fragment("toUInt64(round(sum(? * ?) * any(_sample_factor)))", s.sign, s.pageviews) + } + ) + |> select_session_metrics(rest) + end + + def select_session_metrics(q, ["visitors" | rest]) do + from(s in q, + select_merge: %{ + "visitors" => fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", s.user_id) + } + ) + |> select_session_metrics(rest) + end + + def select_session_metrics(q, ["visit_duration" | rest]) do + from(s in q, + select_merge: %{ + "visit_duration" => fragment("toUInt32(ifNotFinite(round(avg(duration * sign)), 0))") + } + ) + |> select_session_metrics(rest) + end + + def select_session_metrics(q, ["sample_percent" | rest]) do + from(e in q, + select_merge: %{ + "sample_percent" => + fragment("if(any(_sample_factor) > 1, round(100 / any(_sample_factor)), 100)") + } + ) + |> select_event_metrics(rest) + end + defp db_prop_val("referrer_source", @no_ref), do: "" defp db_prop_val("utm_medium", @no_ref), do: "" defp db_prop_val("utm_source", @no_ref), do: "" diff --git a/lib/plausible/stats/breakdown.ex b/lib/plausible/stats/breakdown.ex index 7e66a8eae..522e475c1 100644 --- a/lib/plausible/stats/breakdown.ex +++ b/lib/plausible/stats/breakdown.ex @@ -101,10 +101,7 @@ defmodule Plausible.Stats.Breakdown do query pages -> - new_filters = - Map.put(query.filters, "event:page", {:member, Enum.map(pages, & &1["page"])}) - - %Query{query | filters: new_filters} + Query.put_filter(query, "visit:entry_page", {:member, Enum.map(pages, & &1["page"])}) end {limit, _page} = pagination @@ -125,6 +122,11 @@ defmodule Plausible.Stats.Breakdown do breakdown_events(site, query, property, metrics, pagination) end + def breakdown(site, query, "visit:source", metrics, pagination) do + query = Query.treat_page_filter_as_entry_page(query) + breakdown_sessions(site, query, "visit:source", metrics, pagination) + end + def breakdown(site, query, property, metrics, pagination) do breakdown_sessions(site, query, property, metrics, pagination) end @@ -167,7 +169,7 @@ defmodule Plausible.Stats.Breakdown do ) |> filter_converted_sessions(site, query) |> do_group_by(property) - |> select_metrics(metrics) + |> select_session_metrics(metrics) |> ClickhouseRepo.all() end @@ -427,70 +429,6 @@ defmodule Plausible.Stats.Breakdown do ) end - defp select_event_metrics(q, []), do: q - - defp select_event_metrics(q, ["pageviews" | rest]) do - from(e in q, - select_merge: %{"pageviews" => fragment("countIf(? = 'pageview')", e.name)} - ) - |> select_event_metrics(rest) - end - - defp select_event_metrics(q, ["visitors" | rest]) do - from(e in q, - select_merge: %{"visitors" => fragment("uniq(?) as count", e.user_id)} - ) - |> select_event_metrics(rest) - end - - defp select_event_metrics(q, ["events" | rest]) do - from(e in q, - select_merge: %{"events" => fragment("count(*)")} - ) - |> select_event_metrics(rest) - end - - defp select_metrics(q, []), do: q - - defp select_metrics(q, ["pageviews" | rest]) do - from(s in q, - select_merge: %{"pageviews" => fragment("sum(? * ?)", s.sign, s.pageviews)} - ) - |> select_metrics(rest) - end - - defp select_metrics(q, ["visitors" | rest]) do - from(s in q, - select_merge: %{"visitors" => fragment("uniq(?) as count", s.user_id)} - ) - |> select_metrics(rest) - end - - defp select_metrics(q, ["visits" | rest]) do - from(s in q, - select_merge: %{ - "visits" => fragment("sum(?)", s.sign) - } - ) - |> select_metrics(rest) - end - - defp select_metrics(q, ["bounce_rate" | rest]) do - from(s in q, - select_merge: %{ - "bounce_rate" => fragment("round(sum(? * ?) / sum(?) * 100)", s.is_bounce, s.sign, s.sign) - } - ) - |> select_metrics(rest) - end - - defp select_metrics(q, ["visit_duration" | rest]) do - from(s in q, - select_merge: %{"visit_duration" => fragment("round(avg(? * ?))", s.duration, s.sign)} - ) - |> select_metrics(rest) - end - defp transform_keys(results, keys_to_replace) do Enum.map(results, fn map -> Enum.map(map, fn {key, val} -> diff --git a/lib/plausible/stats/query.ex b/lib/plausible/stats/query.ex index 97e0d0c3a..11262bca3 100644 --- a/lib/plausible/stats/query.ex +++ b/lib/plausible/stats/query.ex @@ -1,5 +1,9 @@ defmodule Plausible.Stats.Query do - defstruct date_range: nil, interval: nil, period: nil, filters: %{} + defstruct date_range: nil, + interval: nil, + period: nil, + filters: %{}, + sample_threshold: 10_000_000 def shift_back(%__MODULE__{period: "month"} = query, site) do # Querying current month to date @@ -212,10 +216,14 @@ defmodule Plausible.Stats.Query do cond do is_list && is_glob -> raise "Not implemented" + key == "visit:goal" -> {key, parse_goal_filter(val)} is_list -> {key, {:member, String.split(val, "|")}} is_glob -> {key, {:matches, val}} is_negated -> {key, {:is_not, val}} true -> {key, {:is, val}} end end + + defp parse_goal_filter("Visit " <> page), do: {:is, :page, page} + defp parse_goal_filter(event), do: {:is, :event, event} end diff --git a/lib/plausible/stats/timeseries.ex b/lib/plausible/stats/timeseries.ex index 8d52c6f4a..4647cf6c9 100644 --- a/lib/plausible/stats/timeseries.ex +++ b/lib/plausible/stats/timeseries.ex @@ -37,6 +37,8 @@ defmodule Plausible.Stats.Timeseries do end defp sessions_timeseries(site, query, metrics) do + query = Query.treat_page_filter_as_entry_page(query) + from(e in query_sessions(site, query), group_by: fragment("date"), order_by: fragment("date"), @@ -110,49 +112,6 @@ defmodule Plausible.Stats.Timeseries do ) end - defp select_event_metrics(q, []), do: q - - defp select_event_metrics(q, ["pageviews" | rest]) do - from(e in q, - select_merge: %{"pageviews" => fragment("countIf(? = 'pageview')", e.name)} - ) - |> select_event_metrics(rest) - end - - defp select_event_metrics(q, ["visitors" | rest]) do - from(e in q, - select_merge: %{"visitors" => fragment("uniq(?) as count", e.user_id)} - ) - |> select_event_metrics(rest) - end - - defp select_session_metrics(q, []), do: q - - defp select_session_metrics(q, ["bounce_rate" | rest]) do - from(s in q, - select_merge: %{ - "bounce_rate" => bounce_rate() - } - ) - |> select_session_metrics(rest) - end - - defp select_session_metrics(q, ["visits" | rest]) do - from(s in q, - select_merge: %{ - "visits" => fragment("sum(?)", s.sign) - } - ) - |> select_session_metrics(rest) - end - - defp select_session_metrics(q, ["visit_duration" | rest]) do - from(s in q, - select_merge: %{"visit_duration" => visit_duration()} - ) - |> select_session_metrics(rest) - end - defp empty_row(date, metrics) do Enum.reduce(metrics, %{"date" => date}, fn metric, row -> case metric do diff --git a/lib/plausible_web/controllers/api/external_stats_controller.ex b/lib/plausible_web/controllers/api/external_stats_controller.ex index 8a752fb3e..e647442c9 100644 --- a/lib/plausible_web/controllers/api/external_stats_controller.ex +++ b/lib/plausible_web/controllers/api/external_stats_controller.ex @@ -27,13 +27,13 @@ defmodule PlausibleWeb.Api.ExternalStatsController do Task.async(fn -> Plausible.Stats.aggregate(site, query, metrics) end) ]) - Enum.map(curr_result, fn {metric, %{value: current_val}} -> - %{value: prev_val} = prev_result[metric] + Enum.map(curr_result, fn {metric, %{"value" => current_val}} -> + %{"value" => prev_val} = prev_result[metric] {metric, %{ - value: current_val, - change: percent_change(prev_val, current_val) + "value" => current_val, + "change" => percent_change(prev_val, current_val) }} end) |> Enum.into(%{}) diff --git a/lib/plausible_web/controllers/api/stats_controller.ex b/lib/plausible_web/controllers/api/stats_controller.ex index 3b221d18b..5134770e2 100644 --- a/lib/plausible_web/controllers/api/stats_controller.ex +++ b/lib/plausible_web/controllers/api/stats_controller.ex @@ -58,8 +58,8 @@ defmodule PlausibleWeb.Api.StatsController do defp fetch_top_stats(site, %Query{period: "30m"} = query) do %{ - visitors: %{value: visitors}, - pageviews: %{value: pageviews} + "visitors" => %{"value" => visitors}, + "pageviews" => %{"value" => pageviews} } = Stats.aggregate(site, query, ["visitors", "pageviews"]) stats = [ @@ -77,7 +77,7 @@ defmodule PlausibleWeb.Api.StatsController do } ] - {stats, 100} + {stats, nil} end defp fetch_top_stats(site, %Query{filters: %{"visit:goal" => _goal}} = query) do @@ -85,21 +85,21 @@ defmodule PlausibleWeb.Api.StatsController do prev_query = Query.shift_back(query, site) %{ - visitors: %{value: unique_visitors} + "visitors" => %{"value" => unique_visitors} } = Stats.aggregate(site, %{query | filters: total_filter}, ["visitors"]) %{ - visitors: %{value: prev_unique_visitors} + "visitors" => %{"value" => prev_unique_visitors} } = Stats.aggregate(site, %{prev_query | filters: total_filter}, ["visitors"]) %{ - visitors: %{value: converted_visitors}, - events: %{value: completions} + "visitors" => %{"value" => converted_visitors}, + "events" => %{"value" => completions} } = Stats.aggregate(site, query, ["visitors", "events"]) %{ - visitors: %{value: prev_converted_visitors}, - events: %{value: prev_completions} + "visitors" => %{"value" => prev_converted_visitors}, + "events" => %{"value" => prev_completions} } = Stats.aggregate(site, prev_query, ["visitors", "events"]) conversion_rate = calculate_cr(unique_visitors, converted_visitors) @@ -128,7 +128,7 @@ defmodule PlausibleWeb.Api.StatsController do } ] - {stats, 0} + {stats, 100} end defp fetch_top_stats(site, query) do @@ -136,9 +136,9 @@ defmodule PlausibleWeb.Api.StatsController do metrics = if query.filters["event:page"] do - ["visitors", "pageviews", "bounce_rate", "time_on_page"] + ["visitors", "pageviews", "bounce_rate", "time_on_page", "sample_percent"] else - ["visitors", "pageviews", "bounce_rate", "visit_duration"] + ["visitors", "pageviews", "bounce_rate", "visit_duration", "sample_percent"] end current_results = Stats.aggregate(site, query, metrics) @@ -146,28 +146,28 @@ defmodule PlausibleWeb.Api.StatsController do stats = [ - top_stats_entry(current_results, prev_results, "Unique visitors", :visitors), - top_stats_entry(current_results, prev_results, "Total pageviews", :pageviews), - top_stats_entry(current_results, prev_results, "Bounce rate", :bounce_rate), - top_stats_entry(current_results, prev_results, "Visit duration", :visit_duration), - top_stats_entry(current_results, prev_results, "Time on page", :time_on_page) + top_stats_entry(current_results, prev_results, "Unique visitors", "visitors"), + top_stats_entry(current_results, prev_results, "Total pageviews", "pageviews"), + top_stats_entry(current_results, prev_results, "Bounce rate", "bounce_rate"), + top_stats_entry(current_results, prev_results, "Visit duration", "visit_duration"), + top_stats_entry(current_results, prev_results, "Time on page", "time_on_page") ] |> Enum.filter(& &1) - {stats, 0} + {stats, current_results["sample_percent"]["value"]} end defp top_stats_entry(current_results, prev_results, name, key) do if current_results[key] do %{ name: name, - value: current_results[key][:value], - change: calculate_change(key, prev_results[key][:value], current_results[key][:value]) + value: current_results[key]["value"], + change: calculate_change(key, prev_results[key]["value"], current_results[key]["value"]) } end end - defp calculate_change(:bounce_rate, old_count, new_count) do + defp calculate_change("bounce_rate", old_count, new_count) do if old_count > 0, do: new_count - old_count end @@ -195,7 +195,6 @@ defmodule PlausibleWeb.Api.StatsController do Query.from(site.timezone, params) |> Filters.add_prefix() |> maybe_hide_noref("visit:source", params) - |> Query.treat_page_filter_as_entry_page() pagination = parse_pagination(params) @@ -272,7 +271,7 @@ defmodule PlausibleWeb.Api.StatsController do google_api().fetch_stats(site, query, params["limit"] || 9) end - %{visitors: %{value: total_visitors}} = Stats.aggregate(site, query, ["visitors"]) + %{"visitors" => %{"value" => total_visitors}} = Stats.aggregate(site, query, ["visitors"]) case search_terms do nil -> @@ -306,7 +305,7 @@ defmodule PlausibleWeb.Api.StatsController do Stats.breakdown(site, query, "visit:referrer", metrics, pagination) |> transform_keys(%{"referrer" => "name", "visitors" => "count"}) - %{visitors: %{value: total_visitors}} = Stats.aggregate(site, query, ["visitors"]) + %{"visitors" => %{"value" => total_visitors}} = Stats.aggregate(site, query, ["visitors"]) json(conn, %{referrers: referrers, total_visitors: total_visitors}) end @@ -468,7 +467,7 @@ defmodule PlausibleWeb.Api.StatsController do total_filter = Map.merge(query.filters, %{"visit:goal" => nil}) - %{visitors: %{value: total_visitors}} = + %{"visitors" => %{"value" => total_visitors}} = Stats.aggregate(site, %{query | filters: total_filter}, ["visitors"]) prop_names = Stats.props(site, query) @@ -492,7 +491,7 @@ defmodule PlausibleWeb.Api.StatsController do total_filter = Map.merge(query.filters, %{"visit:goal" => nil}) - %{visitors: %{value: unique_visitors}} = + %{"visitors" => %{"value" => unique_visitors}} = Stats.aggregate(site, %{query | filters: total_filter}, ["visitors"]) prop_name = "event:props:" <> params["prop_name"] diff --git a/mix.lock b/mix.lock index 4194ec35d..a98f902aa 100644 --- a/mix.lock +++ b/mix.lock @@ -12,7 +12,7 @@ "bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm", "7af5c7e09fe1d40f76c8e4f9dd2be7cebd83909f31fee7cd0e9eadc567da8353"}, "cachex": {:hex, :cachex, "3.3.0", "6f2ebb8f27491fe39121bd207c78badc499214d76c695658b19d6079beeca5c2", [:mix], [{:eternal, "~> 1.2", [hex: :eternal, repo: "hexpm", optional: false]}, {:jumper, "~> 1.0", [hex: :jumper, repo: "hexpm", optional: false]}, {:sleeplocks, "~> 1.1", [hex: :sleeplocks, repo: "hexpm", optional: false]}, {:unsafe, "~> 1.0", [hex: :unsafe, repo: "hexpm", optional: false]}], "hexpm", "d90e5ee1dde14cef33f6b187af4335b88748b72b30c038969176cd4e6ccc31a1"}, "certifi": {:hex, :certifi, "2.6.1", "dbab8e5e155a0763eea978c913ca280a6b544bfa115633fa20249c3d396d9493", [:rebar3], [], "hexpm", "524c97b4991b3849dd5c17a631223896272c6b0af446778ba4675a1dff53bb7e"}, - "clickhouse_ecto": {:git, "https://github.com/plausible/clickhouse_ecto.git", "07adb8da725346e4de6d376069192e9942fe7a5b", []}, + "clickhouse_ecto": {:git, "https://github.com/plausible/clickhouse_ecto.git", "93d86c48230f85797555c348dbe9e8738d3b8cc2", []}, "clickhousex": {:git, "https://github.com/plausible/clickhousex", "0832dd4b1af1f0eba1d1018c231bf0d8d281f031", []}, "combination": {:hex, :combination, "0.0.3", "746aedca63d833293ec6e835aa1f34974868829b1486b1e1cb0685f0b2ae1f41", [:mix], [], "hexpm", "72b099f463df42ef7dc6371d250c7070b57b6c5902853f69deb894f79eda18ca"}, "combine": {:hex, :combine, "0.10.0", "eff8224eeb56498a2af13011d142c5e7997a80c8f5b97c499f84c841032e429f", [:mix], [], "hexpm", "1b1dbc1790073076580d0d1d64e42eae2366583e7aecd455d1215b0d16f2451b"},