mirror of
https://github.com/plausible/analytics.git
synced 2024-11-26 23:27:54 +03:00
Stats API: allow escaping |
literal character with \|
when filtering (#2266)
* add separate module for filter parsing * add tests for filter parser * allow escaping pipe character in filter value * add documentation and doctests * do not remove escape chars from wildcard values * changelog update * change the parse_filters/1 function argument
This commit is contained in:
parent
620e29ab33
commit
155e274150
@ -5,6 +5,7 @@ All notable changes to this project will be documented in this file.
|
||||
## Unreleased
|
||||
|
||||
### Added
|
||||
- The ability to escape `|` characters with `\` in Stats API filter values
|
||||
- An upper bound of 1000 to the `limit` parameter in Stats API
|
||||
- The `exclusions` script extension now also takes a `data-include` attribute tag
|
||||
- A `file-downloads` script extension for automatically tracking file downloads as custom events
|
||||
|
80
lib/plausible/stats/filter_parser.ex
Normal file
80
lib/plausible/stats/filter_parser.ex
Normal file
@ -0,0 +1,80 @@
|
||||
defmodule Plausible.Stats.FilterParser do
|
||||
@moduledoc """
|
||||
A module for parsing filters used in stat queries.
|
||||
"""
|
||||
|
||||
@doc """
|
||||
Parses different filter formats.
|
||||
|
||||
Depending on the format and type of the `filters` argument, returns:
|
||||
|
||||
* a decoded map, when `filters` is encoded JSON
|
||||
* a parsed filter map, when `filters` is a filter expression string
|
||||
* the same map, when `filters` is a map
|
||||
|
||||
Returns an empty map when argument type is unexpected (e.g. `nil`).
|
||||
|
||||
### Examples:
|
||||
|
||||
iex> FilterParser.parse_filters("{\\"page\\":\\"/blog/**\\"}")
|
||||
%{"page" => "/blog/**"}
|
||||
|
||||
iex> FilterParser.parse_filters("visit:browser!=Chrome")
|
||||
%{"visit:browser" => {:is_not, "Chrome"}}
|
||||
|
||||
iex> FilterParser.parse_filters(nil)
|
||||
%{}
|
||||
"""
|
||||
def parse_filters(filters) when is_binary(filters) do
|
||||
case Jason.decode(filters) do
|
||||
{:ok, parsed} -> parsed
|
||||
{:error, err} -> parse_filter_expression(err.data)
|
||||
end
|
||||
end
|
||||
|
||||
def parse_filters(filters) when is_map(filters), do: filters
|
||||
def parse_filters(_), do: %{}
|
||||
|
||||
defp parse_filter_expression(str) do
|
||||
filters = String.split(str, ";")
|
||||
|
||||
Enum.map(filters, &parse_single_filter/1)
|
||||
|> Enum.into(%{})
|
||||
end
|
||||
|
||||
@non_escaped_pipe_regex ~r/(?<!\\)\|/
|
||||
defp parse_single_filter(str) do
|
||||
[key, raw_value] =
|
||||
String.trim(str)
|
||||
|> String.split(["==", "!="], trim: true)
|
||||
|> Enum.map(&String.trim/1)
|
||||
|
||||
is_negated = String.contains?(str, "!=")
|
||||
is_list = Regex.match?(@non_escaped_pipe_regex, raw_value)
|
||||
is_wildcard = String.contains?(raw_value, "*")
|
||||
|
||||
final_value = remove_escape_chars(raw_value)
|
||||
|
||||
cond do
|
||||
key == "event:goal" -> {key, parse_goal_filter(final_value)}
|
||||
is_wildcard && is_negated -> {key, {:does_not_match, raw_value}}
|
||||
is_wildcard -> {key, {:matches, raw_value}}
|
||||
is_list -> {key, {:member, parse_member_list(raw_value)}}
|
||||
is_negated -> {key, {:is_not, final_value}}
|
||||
true -> {key, {:is, final_value}}
|
||||
end
|
||||
end
|
||||
|
||||
defp parse_goal_filter("Visit " <> page), do: {:is, :page, page}
|
||||
defp parse_goal_filter(event), do: {:is, :event, event}
|
||||
|
||||
defp remove_escape_chars(value) do
|
||||
String.replace(value, "\\|", "|")
|
||||
end
|
||||
|
||||
defp parse_member_list(raw_value) do
|
||||
raw_value
|
||||
|> String.split(@non_escaped_pipe_regex)
|
||||
|> Enum.map(&remove_escape_chars/1)
|
||||
end
|
||||
end
|
@ -7,6 +7,7 @@ defmodule Plausible.Stats.Query do
|
||||
include_imported: false
|
||||
|
||||
@default_sample_threshold 20_000_000
|
||||
alias Plausible.Stats.FilterParser
|
||||
|
||||
def shift_back(%__MODULE__{period: "year"} = query, site) do
|
||||
# Querying current year to date
|
||||
@ -68,7 +69,7 @@ defmodule Plausible.Stats.Query do
|
||||
period: "realtime",
|
||||
interval: "minute",
|
||||
date_range: Date.range(date, date),
|
||||
filters: parse_filters(params),
|
||||
filters: FilterParser.parse_filters(params["filters"]),
|
||||
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold),
|
||||
include_imported: false
|
||||
}
|
||||
@ -81,7 +82,7 @@ defmodule Plausible.Stats.Query do
|
||||
period: "day",
|
||||
date_range: Date.range(date, date),
|
||||
interval: "hour",
|
||||
filters: parse_filters(params),
|
||||
filters: FilterParser.parse_filters(params["filters"]),
|
||||
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
|
||||
}
|
||||
|> maybe_include_imported(site, params)
|
||||
@ -95,7 +96,7 @@ defmodule Plausible.Stats.Query do
|
||||
period: "7d",
|
||||
date_range: Date.range(start_date, end_date),
|
||||
interval: "date",
|
||||
filters: parse_filters(params),
|
||||
filters: FilterParser.parse_filters(params["filters"]),
|
||||
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
|
||||
}
|
||||
|> maybe_include_imported(site, params)
|
||||
@ -109,7 +110,7 @@ defmodule Plausible.Stats.Query do
|
||||
period: "30d",
|
||||
date_range: Date.range(start_date, end_date),
|
||||
interval: "date",
|
||||
filters: parse_filters(params),
|
||||
filters: FilterParser.parse_filters(params["filters"]),
|
||||
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
|
||||
}
|
||||
|> maybe_include_imported(site, params)
|
||||
@ -125,7 +126,7 @@ defmodule Plausible.Stats.Query do
|
||||
period: "month",
|
||||
date_range: Date.range(start_date, end_date),
|
||||
interval: "date",
|
||||
filters: parse_filters(params),
|
||||
filters: FilterParser.parse_filters(params["filters"]),
|
||||
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
|
||||
}
|
||||
|> maybe_include_imported(site, params)
|
||||
@ -144,7 +145,7 @@ defmodule Plausible.Stats.Query do
|
||||
period: "6mo",
|
||||
date_range: Date.range(start_date, end_date),
|
||||
interval: Map.get(params, "interval", "month"),
|
||||
filters: parse_filters(params),
|
||||
filters: FilterParser.parse_filters(params["filters"]),
|
||||
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
|
||||
}
|
||||
|> maybe_include_imported(site, params)
|
||||
@ -163,7 +164,7 @@ defmodule Plausible.Stats.Query do
|
||||
period: "12mo",
|
||||
date_range: Date.range(start_date, end_date),
|
||||
interval: Map.get(params, "interval", "month"),
|
||||
filters: parse_filters(params),
|
||||
filters: FilterParser.parse_filters(params["filters"]),
|
||||
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
|
||||
}
|
||||
|> maybe_include_imported(site, params)
|
||||
@ -180,7 +181,7 @@ defmodule Plausible.Stats.Query do
|
||||
period: "year",
|
||||
date_range: Date.range(start_date, end_date),
|
||||
interval: Map.get(params, "interval", "month"),
|
||||
filters: parse_filters(params),
|
||||
filters: FilterParser.parse_filters(params["filters"]),
|
||||
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
|
||||
}
|
||||
|> maybe_include_imported(site, params)
|
||||
@ -245,7 +246,7 @@ defmodule Plausible.Stats.Query do
|
||||
period: "custom",
|
||||
date_range: Date.range(from_date, to_date),
|
||||
interval: Map.get(params, "interval", "date"),
|
||||
filters: parse_filters(params),
|
||||
filters: FilterParser.parse_filters(params["filters"]),
|
||||
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
|
||||
}
|
||||
|> maybe_include_imported(site, params)
|
||||
@ -326,46 +327,6 @@ defmodule Plausible.Stats.Query do
|
||||
end
|
||||
end
|
||||
|
||||
defp parse_filters(%{"filters" => filters}) when is_binary(filters) do
|
||||
case Jason.decode(filters) do
|
||||
{:ok, parsed} -> parsed
|
||||
{:error, err} -> parse_filter_expression(err.data)
|
||||
end
|
||||
end
|
||||
|
||||
defp parse_filters(%{"filters" => filters}) when is_map(filters), do: filters
|
||||
defp parse_filters(_), do: %{}
|
||||
|
||||
defp parse_filter_expression(str) do
|
||||
filters = String.split(str, ";")
|
||||
|
||||
Enum.map(filters, &parse_single_filter/1)
|
||||
|> Enum.into(%{})
|
||||
end
|
||||
|
||||
defp parse_single_filter(str) do
|
||||
[key, val] =
|
||||
String.trim(str)
|
||||
|> String.split(["==", "!="], trim: true)
|
||||
|> Enum.map(&String.trim/1)
|
||||
|
||||
is_negated = String.contains?(str, "!=")
|
||||
is_list = String.contains?(val, "|")
|
||||
is_wildcard = String.contains?(val, "*")
|
||||
|
||||
cond do
|
||||
key == "event:goal" -> {key, parse_goal_filter(val)}
|
||||
is_wildcard && is_negated -> {key, {:does_not_match, val}}
|
||||
is_wildcard -> {key, {:matches, val}}
|
||||
is_list -> {key, {:member, String.split(val, "|")}}
|
||||
is_negated -> {key, {:is_not, val}}
|
||||
true -> {key, {:is, val}}
|
||||
end
|
||||
end
|
||||
|
||||
defp parse_goal_filter("Visit " <> page), do: {:is, :page, page}
|
||||
defp parse_goal_filter(event), do: {:is, :event, event}
|
||||
|
||||
defp maybe_include_imported(query, site, params) do
|
||||
imported_data_requested = params["with_imported"] == "true"
|
||||
has_imported_data = site.imported_data && site.imported_data.status == "ok"
|
||||
|
80
test/plausible/stats/filter_parser_test.exs
Normal file
80
test/plausible/stats/filter_parser_test.exs
Normal file
@ -0,0 +1,80 @@
|
||||
defmodule Plausible.Stats.FilterParserTest do
|
||||
use ExUnit.Case, async: true
|
||||
alias Plausible.Stats.FilterParser
|
||||
|
||||
doctest Plausible.Stats.FilterParser
|
||||
|
||||
def assert_parsed(input, expected_output) do
|
||||
assert FilterParser.parse_filters(input) == expected_output
|
||||
end
|
||||
|
||||
describe "parses filter expression" do
|
||||
test "simple positive" do
|
||||
"event:name==pageview"
|
||||
|> assert_parsed(%{"event:name" => {:is, "pageview"}})
|
||||
end
|
||||
|
||||
test "simple negative" do
|
||||
"event:name!=pageview"
|
||||
|> assert_parsed(%{"event:name" => {:is_not, "pageview"}})
|
||||
end
|
||||
|
||||
test "whitespace is trimmed" do
|
||||
" event:name == pageview "
|
||||
|> assert_parsed(%{"event:name" => {:is, "pageview"}})
|
||||
end
|
||||
|
||||
test "wildcard" do
|
||||
"event:page==/blog/post-*"
|
||||
|> assert_parsed(%{"event:page" => {:matches, "/blog/post-*"}})
|
||||
end
|
||||
|
||||
test "negative wildcard" do
|
||||
"event:page!=/blog/post-*"
|
||||
|> assert_parsed(%{"event:page" => {:does_not_match, "/blog/post-*"}})
|
||||
end
|
||||
|
||||
test "custom event goal" do
|
||||
"event:goal==Signup"
|
||||
|> assert_parsed(%{"event:goal" => {:is, :event, "Signup"}})
|
||||
end
|
||||
|
||||
test "pageview goal" do
|
||||
"event:goal==Visit /blog"
|
||||
|> assert_parsed(%{"event:goal" => {:is, :page, "/blog"}})
|
||||
end
|
||||
|
||||
test "member" do
|
||||
"visit:country==FR|GB|DE"
|
||||
|> assert_parsed(%{"visit:country" => {:member, ["FR", "GB", "DE"]}})
|
||||
end
|
||||
|
||||
test "member + wildcard" do
|
||||
"event:page==/blog**|/newsletter|/*/"
|
||||
|> assert_parsed(%{"event:page" => {:matches, "/blog**|/newsletter|/*/"}})
|
||||
end
|
||||
|
||||
test "combined with \";\"" do
|
||||
"event:page==/blog**|/newsletter|/*/ ; visit:country==FR|GB|DE"
|
||||
|> assert_parsed(%{
|
||||
"event:page" => {:matches, "/blog**|/newsletter|/*/"},
|
||||
"visit:country" => {:member, ["FR", "GB", "DE"]}
|
||||
})
|
||||
end
|
||||
|
||||
test "escaping pipe character" do
|
||||
"utm_campaign==campaign \\| 1"
|
||||
|> assert_parsed(%{"utm_campaign" => {:is, "campaign | 1"}})
|
||||
end
|
||||
|
||||
test "escaping pipe character in member filter" do
|
||||
"utm_campaign==campaign \\| 1|campaign \\| 2"
|
||||
|> assert_parsed(%{"utm_campaign" => {:member, ["campaign | 1", "campaign | 2"]}})
|
||||
end
|
||||
|
||||
test "keeps escape characters in member + wildcard filter" do
|
||||
"event:page==/**\\|page|/other/page"
|
||||
|> assert_parsed(%{"event:page" => {:matches, "/**\\|page|/other/page"}})
|
||||
end
|
||||
end
|
||||
end
|
@ -781,5 +781,23 @@ defmodule PlausibleWeb.Api.ExternalStatsController.AggregateTest do
|
||||
|
||||
assert json_response(conn, 200)["results"] == %{"visitors" => %{"value" => 3}}
|
||||
end
|
||||
|
||||
test "can escape pipe character in member + wildcard filter", %{conn: conn, site: site} do
|
||||
populate_stats(site, [
|
||||
build(:pageview, pathname: "/blog/post|1"),
|
||||
build(:pageview, pathname: "/otherpost|1"),
|
||||
build(:pageview, pathname: "/blog/post|2"),
|
||||
build(:pageview, pathname: "/something-else")
|
||||
])
|
||||
|
||||
conn =
|
||||
get(conn, "/api/v1/stats/aggregate", %{
|
||||
"site_id" => site.domain,
|
||||
"metrics" => "visitors",
|
||||
"filters" => "event:page==**post\\|1|/something-else"
|
||||
})
|
||||
|
||||
assert json_response(conn, 200)["results"] == %{"visitors" => %{"value" => 3}}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
Loading…
Reference in New Issue
Block a user