Stats API: allow escaping | literal character with \| when filtering (#2266)

* add separate module for filter parsing

* add tests for filter parser

* allow escaping pipe character in filter value

* add documentation and doctests

* do not remove escape chars from wildcard values

* changelog update

* change the parse_filters/1 function argument
This commit is contained in:
RobertJoonas 2022-09-26 16:20:08 +03:00 committed by GitHub
parent 620e29ab33
commit 155e274150
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 189 additions and 49 deletions

View File

@ -5,6 +5,7 @@ All notable changes to this project will be documented in this file.
## Unreleased
### Added
- The ability to escape `|` characters with `\` in Stats API filter values
- An upper bound of 1000 to the `limit` parameter in Stats API
- The `exclusions` script extension now also takes a `data-include` attribute tag
- A `file-downloads` script extension for automatically tracking file downloads as custom events

View File

@ -0,0 +1,80 @@
defmodule Plausible.Stats.FilterParser do
@moduledoc """
A module for parsing filters used in stat queries.
"""
@doc """
Parses different filter formats.
Depending on the format and type of the `filters` argument, returns:
* a decoded map, when `filters` is encoded JSON
* a parsed filter map, when `filters` is a filter expression string
* the same map, when `filters` is a map
Returns an empty map when argument type is unexpected (e.g. `nil`).
### Examples:
iex> FilterParser.parse_filters("{\\"page\\":\\"/blog/**\\"}")
%{"page" => "/blog/**"}
iex> FilterParser.parse_filters("visit:browser!=Chrome")
%{"visit:browser" => {:is_not, "Chrome"}}
iex> FilterParser.parse_filters(nil)
%{}
"""
def parse_filters(filters) when is_binary(filters) do
case Jason.decode(filters) do
{:ok, parsed} -> parsed
{:error, err} -> parse_filter_expression(err.data)
end
end
def parse_filters(filters) when is_map(filters), do: filters
def parse_filters(_), do: %{}
defp parse_filter_expression(str) do
filters = String.split(str, ";")
Enum.map(filters, &parse_single_filter/1)
|> Enum.into(%{})
end
@non_escaped_pipe_regex ~r/(?<!\\)\|/
defp parse_single_filter(str) do
[key, raw_value] =
String.trim(str)
|> String.split(["==", "!="], trim: true)
|> Enum.map(&String.trim/1)
is_negated = String.contains?(str, "!=")
is_list = Regex.match?(@non_escaped_pipe_regex, raw_value)
is_wildcard = String.contains?(raw_value, "*")
final_value = remove_escape_chars(raw_value)
cond do
key == "event:goal" -> {key, parse_goal_filter(final_value)}
is_wildcard && is_negated -> {key, {:does_not_match, raw_value}}
is_wildcard -> {key, {:matches, raw_value}}
is_list -> {key, {:member, parse_member_list(raw_value)}}
is_negated -> {key, {:is_not, final_value}}
true -> {key, {:is, final_value}}
end
end
defp parse_goal_filter("Visit " <> page), do: {:is, :page, page}
defp parse_goal_filter(event), do: {:is, :event, event}
defp remove_escape_chars(value) do
String.replace(value, "\\|", "|")
end
defp parse_member_list(raw_value) do
raw_value
|> String.split(@non_escaped_pipe_regex)
|> Enum.map(&remove_escape_chars/1)
end
end

View File

@ -7,6 +7,7 @@ defmodule Plausible.Stats.Query do
include_imported: false
@default_sample_threshold 20_000_000
alias Plausible.Stats.FilterParser
def shift_back(%__MODULE__{period: "year"} = query, site) do
# Querying current year to date
@ -68,7 +69,7 @@ defmodule Plausible.Stats.Query do
period: "realtime",
interval: "minute",
date_range: Date.range(date, date),
filters: parse_filters(params),
filters: FilterParser.parse_filters(params["filters"]),
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold),
include_imported: false
}
@ -81,7 +82,7 @@ defmodule Plausible.Stats.Query do
period: "day",
date_range: Date.range(date, date),
interval: "hour",
filters: parse_filters(params),
filters: FilterParser.parse_filters(params["filters"]),
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
}
|> maybe_include_imported(site, params)
@ -95,7 +96,7 @@ defmodule Plausible.Stats.Query do
period: "7d",
date_range: Date.range(start_date, end_date),
interval: "date",
filters: parse_filters(params),
filters: FilterParser.parse_filters(params["filters"]),
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
}
|> maybe_include_imported(site, params)
@ -109,7 +110,7 @@ defmodule Plausible.Stats.Query do
period: "30d",
date_range: Date.range(start_date, end_date),
interval: "date",
filters: parse_filters(params),
filters: FilterParser.parse_filters(params["filters"]),
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
}
|> maybe_include_imported(site, params)
@ -125,7 +126,7 @@ defmodule Plausible.Stats.Query do
period: "month",
date_range: Date.range(start_date, end_date),
interval: "date",
filters: parse_filters(params),
filters: FilterParser.parse_filters(params["filters"]),
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
}
|> maybe_include_imported(site, params)
@ -144,7 +145,7 @@ defmodule Plausible.Stats.Query do
period: "6mo",
date_range: Date.range(start_date, end_date),
interval: Map.get(params, "interval", "month"),
filters: parse_filters(params),
filters: FilterParser.parse_filters(params["filters"]),
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
}
|> maybe_include_imported(site, params)
@ -163,7 +164,7 @@ defmodule Plausible.Stats.Query do
period: "12mo",
date_range: Date.range(start_date, end_date),
interval: Map.get(params, "interval", "month"),
filters: parse_filters(params),
filters: FilterParser.parse_filters(params["filters"]),
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
}
|> maybe_include_imported(site, params)
@ -180,7 +181,7 @@ defmodule Plausible.Stats.Query do
period: "year",
date_range: Date.range(start_date, end_date),
interval: Map.get(params, "interval", "month"),
filters: parse_filters(params),
filters: FilterParser.parse_filters(params["filters"]),
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
}
|> maybe_include_imported(site, params)
@ -245,7 +246,7 @@ defmodule Plausible.Stats.Query do
period: "custom",
date_range: Date.range(from_date, to_date),
interval: Map.get(params, "interval", "date"),
filters: parse_filters(params),
filters: FilterParser.parse_filters(params["filters"]),
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
}
|> maybe_include_imported(site, params)
@ -326,46 +327,6 @@ defmodule Plausible.Stats.Query do
end
end
defp parse_filters(%{"filters" => filters}) when is_binary(filters) do
case Jason.decode(filters) do
{:ok, parsed} -> parsed
{:error, err} -> parse_filter_expression(err.data)
end
end
defp parse_filters(%{"filters" => filters}) when is_map(filters), do: filters
defp parse_filters(_), do: %{}
defp parse_filter_expression(str) do
filters = String.split(str, ";")
Enum.map(filters, &parse_single_filter/1)
|> Enum.into(%{})
end
defp parse_single_filter(str) do
[key, val] =
String.trim(str)
|> String.split(["==", "!="], trim: true)
|> Enum.map(&String.trim/1)
is_negated = String.contains?(str, "!=")
is_list = String.contains?(val, "|")
is_wildcard = String.contains?(val, "*")
cond do
key == "event:goal" -> {key, parse_goal_filter(val)}
is_wildcard && is_negated -> {key, {:does_not_match, val}}
is_wildcard -> {key, {:matches, val}}
is_list -> {key, {:member, String.split(val, "|")}}
is_negated -> {key, {:is_not, val}}
true -> {key, {:is, val}}
end
end
defp parse_goal_filter("Visit " <> page), do: {:is, :page, page}
defp parse_goal_filter(event), do: {:is, :event, event}
defp maybe_include_imported(query, site, params) do
imported_data_requested = params["with_imported"] == "true"
has_imported_data = site.imported_data && site.imported_data.status == "ok"

View File

@ -0,0 +1,80 @@
defmodule Plausible.Stats.FilterParserTest do
use ExUnit.Case, async: true
alias Plausible.Stats.FilterParser
doctest Plausible.Stats.FilterParser
def assert_parsed(input, expected_output) do
assert FilterParser.parse_filters(input) == expected_output
end
describe "parses filter expression" do
test "simple positive" do
"event:name==pageview"
|> assert_parsed(%{"event:name" => {:is, "pageview"}})
end
test "simple negative" do
"event:name!=pageview"
|> assert_parsed(%{"event:name" => {:is_not, "pageview"}})
end
test "whitespace is trimmed" do
" event:name == pageview "
|> assert_parsed(%{"event:name" => {:is, "pageview"}})
end
test "wildcard" do
"event:page==/blog/post-*"
|> assert_parsed(%{"event:page" => {:matches, "/blog/post-*"}})
end
test "negative wildcard" do
"event:page!=/blog/post-*"
|> assert_parsed(%{"event:page" => {:does_not_match, "/blog/post-*"}})
end
test "custom event goal" do
"event:goal==Signup"
|> assert_parsed(%{"event:goal" => {:is, :event, "Signup"}})
end
test "pageview goal" do
"event:goal==Visit /blog"
|> assert_parsed(%{"event:goal" => {:is, :page, "/blog"}})
end
test "member" do
"visit:country==FR|GB|DE"
|> assert_parsed(%{"visit:country" => {:member, ["FR", "GB", "DE"]}})
end
test "member + wildcard" do
"event:page==/blog**|/newsletter|/*/"
|> assert_parsed(%{"event:page" => {:matches, "/blog**|/newsletter|/*/"}})
end
test "combined with \";\"" do
"event:page==/blog**|/newsletter|/*/ ; visit:country==FR|GB|DE"
|> assert_parsed(%{
"event:page" => {:matches, "/blog**|/newsletter|/*/"},
"visit:country" => {:member, ["FR", "GB", "DE"]}
})
end
test "escaping pipe character" do
"utm_campaign==campaign \\| 1"
|> assert_parsed(%{"utm_campaign" => {:is, "campaign | 1"}})
end
test "escaping pipe character in member filter" do
"utm_campaign==campaign \\| 1|campaign \\| 2"
|> assert_parsed(%{"utm_campaign" => {:member, ["campaign | 1", "campaign | 2"]}})
end
test "keeps escape characters in member + wildcard filter" do
"event:page==/**\\|page|/other/page"
|> assert_parsed(%{"event:page" => {:matches, "/**\\|page|/other/page"}})
end
end
end

View File

@ -781,5 +781,23 @@ defmodule PlausibleWeb.Api.ExternalStatsController.AggregateTest do
assert json_response(conn, 200)["results"] == %{"visitors" => %{"value" => 3}}
end
test "can escape pipe character in member + wildcard filter", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview, pathname: "/blog/post|1"),
build(:pageview, pathname: "/otherpost|1"),
build(:pageview, pathname: "/blog/post|2"),
build(:pageview, pathname: "/something-else")
])
conn =
get(conn, "/api/v1/stats/aggregate", %{
"site_id" => site.domain,
"metrics" => "visitors",
"filters" => "event:page==**post\\|1|/something-else"
})
assert json_response(conn, 200)["results"] == %{"visitors" => %{"value" => 3}}
end
end
end