Add new Pages / Visit metric to Stats API Aggregate and Timeseries (#2714)

* refactor metric validation

* link to the correct docs section

* add the new metric to aggregate API

* explicitly remove __internal_visits

The overall metric selection is well defined by
`Plausible.Stats.Timeseries.empty_row/2`. The only metric that needs
to be removed from the timeseries response is __internal_visits.

This commit also moves the `remove_internal_visits_metric` function to a
new Util module to be used by both breakdown and timeseries.

* add the new metric to timeseries API

* mix format

* add moduledoc to keep credo happy

* convert pages_per_visit to string straight away

* do rounding in db query

* query # of visits with sum(sign) instead

* stop converting pages_per_visit to string
This commit is contained in:
RobertJoonas 2023-03-06 12:07:53 +02:00 committed by GitHub
parent 43bf7dd09f
commit 0d6c72d50f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 248 additions and 54 deletions

View File

@ -4,7 +4,7 @@ defmodule Plausible.Stats.Aggregate do
import Plausible.Stats.{Base, Imported}
@event_metrics [:visitors, :pageviews, :events, :sample_percent]
@session_metrics [:visits, :bounce_rate, :visit_duration, :sample_percent]
@session_metrics [:visits, :bounce_rate, :visit_duration, :pages_per_visit, :sample_percent]
def aggregate(site, query, metrics) do
event_metrics = Enum.filter(metrics, &(&1 in @event_metrics))
@ -21,9 +21,8 @@ defmodule Plausible.Stats.Aggregate do
Plausible.ClickhouseRepo.parallel_tasks([session_task, event_task, time_on_page_task])
|> Enum.reduce(%{}, fn aggregate, task_result -> Map.merge(aggregate, task_result) end)
|> Enum.map(fn {metric, value} ->
{metric, %{value: round(value || 0)}}
end)
|> Enum.map(&maybe_round_value/1)
|> Enum.map(fn {metric, value} -> {metric, %{value: value}} end)
|> Enum.into(%{})
end
@ -109,4 +108,14 @@ defmodule Plausible.Stats.Aggregate do
[[time_on_page]] = res.rows
%{time_on_page: time_on_page}
end
@metrics_to_round [:bounce_rate, :time_on_page, :visit_duration, :sample_percent]
defp maybe_round_value({metric, nil}), do: {metric, 0}
defp maybe_round_value({metric, value}) when metric in @metrics_to_round do
{metric, round(value)}
end
defp maybe_round_value(entry), do: entry
end

View File

@ -314,7 +314,7 @@ defmodule Plausible.Stats.Base do
def select_session_metrics(q, [:visits | rest]) do
from(s in q,
select_merge: %{
visits: fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", s.session_id)
visits: fragment("toUInt64(round(sum(?) * any(_sample_factor)))", s.sign)
}
)
|> select_session_metrics(rest)
@ -359,6 +359,16 @@ defmodule Plausible.Stats.Base do
|> select_session_metrics(rest)
end
def select_session_metrics(q, [:pages_per_visit | rest]) do
from(s in q,
select_merge: %{
pages_per_visit:
fragment("ifNotFinite(round(sum(? * ?) / sum(?), 2), 0)", s.sign, s.pageviews, s.sign)
}
)
|> select_session_metrics(rest)
end
def select_session_metrics(q, [:sample_percent | rest]) do
from(e in q,
select_merge: %{

View File

@ -1,6 +1,6 @@
defmodule Plausible.Stats.Breakdown do
use Plausible.ClickhouseRepo
import Plausible.Stats.{Base, Imported}
import Plausible.Stats.{Base, Imported, Util}
require OpenTelemetry.Tracer, as: Tracer
alias Plausible.Stats.Query
alias Plausible.Goals
@ -584,18 +584,6 @@ defmodule Plausible.Stats.Breakdown do
end)
end
defp remove_internal_visits_metric(results, metrics) do
# "__internal_visits" is fetched when querying bounce rate and visit duration, as it
# is needed to calculate these from imported data. Let's remove it from the
# result if it wasn't requested.
if :bounce_rate in metrics or :visit_duration in metrics do
results
|> Enum.map(&Map.delete(&1, :__internal_visits))
else
results
end
end
defp apply_pagination(q, {limit, page}) do
offset = (page - 1) * limit

View File

@ -1,11 +1,11 @@
defmodule Plausible.Stats.Timeseries do
use Plausible.ClickhouseRepo
alias Plausible.Stats.Query
import Plausible.Stats.Base
import Plausible.Stats.{Base, Util}
use Plausible.Stats.Fragments
@event_metrics [:visitors, :pageviews]
@session_metrics [:visits, :bounce_rate, :visit_duration]
@session_metrics [:visits, :bounce_rate, :visit_duration, :pages_per_visit]
def timeseries(site, query, metrics) do
steps = buckets(query)
@ -45,6 +45,7 @@ defmodule Plausible.Stats.Timeseries do
|> select_session_metrics(metrics)
|> Plausible.Stats.Imported.merge_imported_timeseries(site, query, metrics)
|> ClickhouseRepo.all()
|> remove_internal_visits_metric(metrics)
end
defp buckets(%Query{interval: "month"} = query) do
@ -191,6 +192,7 @@ defmodule Plausible.Stats.Timeseries do
:pageviews -> Map.merge(row, %{pageviews: 0})
:visitors -> Map.merge(row, %{visitors: 0})
:visits -> Map.merge(row, %{visits: 0})
:pages_per_visit -> Map.merge(row, %{pages_per_visit: 0.0})
:bounce_rate -> Map.merge(row, %{bounce_rate: nil})
:visit_duration -> Map.merge(row, %{:visit_duration => nil})
end

View File

@ -0,0 +1,19 @@
defmodule Plausible.Stats.Util do
@moduledoc """
Utilities for modifying stat results
"""
@doc """
`__internal_visits` is fetched when querying bounce rate and visit duration, as it
is needed to calculate these from imported data. This function removes that metric
from all entries in the results list.
"""
def remove_internal_visits_metric(results, metrics) do
if :bounce_rate in metrics or :visit_duration in metrics do
results
|> Enum.map(&Map.delete(&1, :__internal_visits))
else
results
end
end
end

View File

@ -17,7 +17,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController do
with :ok <- validate_period(params),
:ok <- validate_date(params),
query <- Query.from(site, params),
{:ok, metrics} <- parse_metrics(params, nil, query) do
{:ok, metrics} <- parse_and_validate_metrics(params, nil, query) do
results =
if params["compare"] == "previous_period" do
prev_query = Query.shift_back(query, site)
@ -42,7 +42,11 @@ defmodule PlausibleWeb.Api.ExternalStatsController do
Plausible.Stats.aggregate(site, query, metrics)
end
json(conn, %{results: Map.take(results, metrics)})
results =
results
|> Map.take(metrics)
json(conn, %{results: results})
else
{:error, msg} ->
conn
@ -59,7 +63,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController do
:ok <- validate_date(params),
{:ok, property} <- validate_property(params),
query <- Query.from(site, params),
{:ok, metrics} <- parse_metrics(params, property, query),
{:ok, metrics} <- parse_and_validate_metrics(params, property, query),
{:ok, limit} <- validate_or_default_limit(params) do
page = String.to_integer(Map.get(params, "page", "1"))
results = Plausible.Stats.breakdown(site, query, property, metrics, {limit, page})
@ -116,42 +120,53 @@ defmodule PlausibleWeb.Api.ExternalStatsController do
defp event_only_property?(_), do: false
@event_metrics ["visitors", "pageviews", "events"]
@session_metrics ["visits", "bounce_rate", "visit_duration"]
defp parse_metrics(params, property, query) do
@session_metrics ["visits", "bounce_rate", "visit_duration", "pages_per_visit"]
defp parse_and_validate_metrics(params, property, query) do
metrics =
Map.get(params, "metrics", "visitors")
|> String.split(",")
case validate_all_metrics(metrics, property, query) do
{:error, reason} -> {:error, reason}
metrics -> {:ok, Enum.map(metrics, &String.to_atom/1)}
end
end
defp validate_all_metrics(metrics, property, query) do
Enum.reduce_while(metrics, [], fn metric, acc ->
case validate_metric(metric, property, query) do
{:ok, metric} -> {:cont, acc ++ [metric]}
{:error, reason} -> {:halt, {:error, reason}}
end
end)
end
defp validate_metric(metric, _, _) when metric in @event_metrics, do: {:ok, metric}
defp validate_metric(metric, property, query) when metric in @session_metrics do
event_only_filter = Map.keys(query.filters) |> Enum.find(&event_only_property?/1)
valid_metrics =
if event_only_property?(property) || event_only_filter do
@event_metrics
else
@event_metrics ++ @session_metrics
end
cond do
metric == "pages_per_visit" && property != nil ->
{:error, "Metric `#{metric}` is not supported in breakdown queries"}
invalid_metric = Enum.find(metrics, fn metric -> metric not in valid_metrics end)
event_only_property?(property) ->
{:error, "Session metric `#{metric}` cannot be queried for breakdown by `#{property}`."}
if invalid_metric do
cond do
event_only_property?(property) && invalid_metric in @session_metrics ->
{:error,
"Session metric `#{invalid_metric}` cannot be queried for breakdown by `#{property}`."}
event_only_filter ->
{:error,
"Session metric `#{metric}` cannot be queried when using a filter on `#{event_only_filter}`."}
event_only_filter && invalid_metric in @session_metrics ->
{:error,
"Session metric `#{invalid_metric}` cannot be queried when using a filter on `#{event_only_filter}`."}
true ->
{:error,
"The metric `#{invalid_metric}` is not recognized. Find valid metrics from the documentation: https://plausible.io/docs/stats-api#get-apiv1statsbreakdown"}
end
else
{:ok, Enum.map(metrics, &String.to_atom/1)}
true ->
{:ok, metric}
end
end
defp validate_metric(metric, _, _) do
{:error,
"The metric `#{metric}` is not recognized. Find valid metrics from the documentation: https://plausible.io/docs/stats-api#metrics"}
end
def timeseries(conn, params) do
site = conn.assigns[:site]
params = Map.put(params, "sample_threshold", "infinite")
@ -160,10 +175,10 @@ defmodule PlausibleWeb.Api.ExternalStatsController do
:ok <- validate_date(params),
:ok <- validate_interval(params),
query <- Query.from(site, params),
{:ok, metrics} <- parse_metrics(params, nil, query) do
{:ok, metrics} <- parse_and_validate_metrics(params, nil, query) do
graph = Plausible.Stats.timeseries(site, query, metrics)
metrics = metrics ++ [:date]
json(conn, %{results: Enum.map(graph, &Map.take(&1, metrics))})
json(conn, %{results: graph})
else
{:error, msg} ->
conn

View File

@ -73,7 +73,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.AggregateTest do
assert json_response(conn, 400) == %{
"error" =>
"The metric `led_zeppelin` is not recognized. Find valid metrics from the documentation: https://plausible.io/docs/stats-api#get-apiv1statsbreakdown"
"The metric `led_zeppelin` is not recognized. Find valid metrics from the documentation: https://plausible.io/docs/stats-api#metrics"
}
end
@ -116,7 +116,32 @@ defmodule PlausibleWeb.Api.ExternalStatsController.AggregateTest do
}
end
test "aggregates visitors, pageviews, visits, bounce rate and visit duration", %{
test "rounds pages_per_visit to two decimal places", %{
conn: conn,
site: site
} do
populate_stats([
build(:pageview, user_id: @user_id, domain: site.domain, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, user_id: @user_id, domain: site.domain, timestamp: ~N[2021-01-01 00:25:00]),
build(:pageview, user_id: 456, domain: site.domain, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, user_id: 456, domain: site.domain, timestamp: ~N[2021-01-01 00:25:00]),
build(:pageview, domain: site.domain, timestamp: ~N[2021-01-01 00:00:00])
])
conn =
get(conn, "/api/v1/stats/aggregate", %{
"site_id" => site.domain,
"period" => "day",
"date" => "2021-01-01",
"metrics" => "pages_per_visit"
})
assert json_response(conn, 200)["results"] == %{
"pages_per_visit" => %{"value" => 1.67}
}
end
test "aggregates all metrics in a single query", %{
conn: conn,
site: site
} do
@ -131,13 +156,14 @@ defmodule PlausibleWeb.Api.ExternalStatsController.AggregateTest do
"site_id" => site.domain,
"period" => "day",
"date" => "2021-01-01",
"metrics" => "pageviews,visits,visitors,bounce_rate,visit_duration"
"metrics" => "pageviews,visits,pages_per_visit,visitors,bounce_rate,visit_duration"
})
assert json_response(conn, 200)["results"] == %{
"pageviews" => %{"value" => 3},
"visitors" => %{"value" => 2},
"visits" => %{"value" => 2},
"pages_per_visit" => %{"value" => 1.5},
"bounce_rate" => %{"value" => 50},
"visit_duration" => %{"value" => 750}
}

View File

@ -67,7 +67,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.BreakdownTest do
assert json_response(conn, 400) == %{
"error" =>
"The metric `baa` is not recognized. Find valid metrics from the documentation: https://plausible.io/docs/stats-api#get-apiv1statsbreakdown"
"The metric `baa` is not recognized. Find valid metrics from the documentation: https://plausible.io/docs/stats-api#metrics"
}
end

View File

@ -797,4 +797,129 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do
assert second == %{"date" => "2021-01-02", "visitors" => 1}
end
end
describe "metrics" do
test "shows pageviews,visits,pages_per_visit for last 7d", %{conn: conn, site: site} do
populate_stats([
build(:pageview,
user_id: @user_id,
domain: site.domain,
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview,
user_id: @user_id,
domain: site.domain,
timestamp: ~N[2021-01-01 00:05:00]
),
build(:pageview, domain: site.domain, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, domain: site.domain, timestamp: ~N[2021-01-07 23:59:00])
])
conn =
get(conn, "/api/v1/stats/timeseries", %{
"site_id" => site.domain,
"period" => "7d",
"metrics" => "pageviews,visits,pages_per_visit",
"date" => "2021-01-07"
})
assert json_response(conn, 200) == %{
"results" => [
%{
"date" => "2021-01-01",
"pageviews" => 3,
"visits" => 2,
"pages_per_visit" => 1.5
},
%{
"date" => "2021-01-02",
"pageviews" => 0,
"visits" => 0,
"pages_per_visit" => 0.0
},
%{
"date" => "2021-01-03",
"pageviews" => 0,
"visits" => 0,
"pages_per_visit" => 0.0
},
%{
"date" => "2021-01-04",
"pageviews" => 0,
"visits" => 0,
"pages_per_visit" => 0.0
},
%{
"date" => "2021-01-05",
"pageviews" => 0,
"visits" => 0,
"pages_per_visit" => 0.0
},
%{
"date" => "2021-01-06",
"pageviews" => 0,
"visits" => 0,
"pages_per_visit" => 0.0
},
%{
"date" => "2021-01-07",
"pageviews" => 1,
"visits" => 1,
"pages_per_visit" => 1.0
}
]
}
end
test "rounds pages_per_visit to two decimal places", %{
conn: conn,
site: site
} do
populate_stats([
build(:pageview,
user_id: @user_id,
domain: site.domain,
timestamp: ~N[2021-01-01 00:00:00]
),
build(:pageview,
user_id: @user_id,
domain: site.domain,
timestamp: ~N[2021-01-01 00:05:00]
),
build(:pageview,
user_id: @user_id,
domain: site.domain,
timestamp: ~N[2021-01-03 00:00:00]
),
build(:pageview,
user_id: @user_id,
domain: site.domain,
timestamp: ~N[2021-01-03 00:01:00]
),
build(:pageview, domain: site.domain, timestamp: ~N[2021-01-03 00:00:00]),
build(:pageview, domain: site.domain, timestamp: ~N[2021-01-03 00:00:00]),
build(:pageview, domain: site.domain, timestamp: ~N[2021-01-07 23:59:00])
])
conn =
get(conn, "/api/v1/stats/timeseries", %{
"site_id" => site.domain,
"period" => "7d",
"metrics" => "pages_per_visit",
"date" => "2021-01-07"
})
assert json_response(conn, 200) == %{
"results" => [
%{"date" => "2021-01-01", "pages_per_visit" => 2.0},
%{"date" => "2021-01-02", "pages_per_visit" => 0.0},
%{"date" => "2021-01-03", "pages_per_visit" => 1.33},
%{"date" => "2021-01-04", "pages_per_visit" => 0.0},
%{"date" => "2021-01-05", "pages_per_visit" => 0.0},
%{"date" => "2021-01-06", "pages_per_visit" => 0.0},
%{"date" => "2021-01-07", "pages_per_visit" => 1.0}
]
}
end
end
end