Make subsequent query boundaries idempotent (#3723)

* Make subsequent query boundaries idempotent

This should prevent time-sensitive race conditions
on subsequent `Plausible.Stats.*` calls for the same query.
Not sure if there are more places where now() is
dynamic relative to query life cycle, but it's a start.

* Format

* Update lib/plausible/stats/query.ex

Co-authored-by: Wojtek Mach <wojtekmach@users.noreply.github.com>

* Match on `Query.now` in a test

---------

Co-authored-by: Wojtek Mach <wojtekmach@users.noreply.github.com>
This commit is contained in:
hq1 2024-01-25 08:59:03 +01:00 committed by GitHub
parent 822483c37c
commit 0fa7f3c1e1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 74 additions and 64 deletions

View File

@ -484,28 +484,28 @@ defmodule Plausible.Stats.Base do
end
end
def utc_boundaries(%Query{period: "realtime"}, site) do
def utc_boundaries(%Query{period: "realtime", now: now}, site) do
last_datetime =
NaiveDateTime.utc_now()
now
|> Timex.shift(seconds: 5)
|> beginning_of_time(site.native_stats_start_at)
|> NaiveDateTime.truncate(:second)
first_datetime =
NaiveDateTime.utc_now() |> Timex.shift(minutes: -5) |> NaiveDateTime.truncate(:second)
now |> Timex.shift(minutes: -5) |> NaiveDateTime.truncate(:second)
{first_datetime, last_datetime}
end
def utc_boundaries(%Query{period: "30m"}, site) do
def utc_boundaries(%Query{period: "30m", now: now}, site) do
last_datetime =
NaiveDateTime.utc_now()
now
|> Timex.shift(seconds: 5)
|> beginning_of_time(site.native_stats_start_at)
|> NaiveDateTime.truncate(:second)
first_datetime =
NaiveDateTime.utc_now() |> Timex.shift(minutes: -30) |> NaiveDateTime.truncate(:second)
now |> Timex.shift(minutes: -30) |> NaiveDateTime.truncate(:second)
{first_datetime, last_datetime}
end

View File

@ -555,8 +555,8 @@ defmodule Plausible.Stats.Clickhouse do
base_query_bare(site, query) |> include_goal_conversions(query)
end
defp utc_boundaries(%Query{period: "30m"}, site) do
last_datetime = NaiveDateTime.utc_now() |> NaiveDateTime.truncate(:second)
defp utc_boundaries(%Query{now: now, period: "30m"}, site) do
last_datetime = now |> NaiveDateTime.truncate(:second)
first_datetime =
last_datetime
@ -567,8 +567,8 @@ defmodule Plausible.Stats.Clickhouse do
{first_datetime, last_datetime}
end
defp utc_boundaries(%Query{period: "realtime"}, site) do
last_datetime = NaiveDateTime.utc_now() |> NaiveDateTime.truncate(:second)
defp utc_boundaries(%Query{now: now, period: "realtime"}, site) do
last_datetime = now |> NaiveDateTime.truncate(:second)
first_datetime =
last_datetime

View File

@ -7,7 +7,8 @@ defmodule Plausible.Stats.Query do
filters: %{},
sample_threshold: 20_000_000,
imported_data_requested: false,
include_imported: false
include_imported: false,
now: nil
require OpenTelemetry.Tracer, as: Tracer
alias Plausible.Stats.{FilterParser, Interval}
@ -15,8 +16,12 @@ defmodule Plausible.Stats.Query do
@type t :: %__MODULE__{}
def from(site, params) do
now = NaiveDateTime.utc_now(:second)
query =
query_by_period(site, params)
__MODULE__
|> struct!(now: now)
|> put_period(site, params)
|> put_interval(params)
|> put_parsed_filters(params)
|> put_imported_opts(site, params)
@ -29,57 +34,49 @@ defmodule Plausible.Stats.Query do
query
end
defp query_by_period(site, %{"period" => "realtime"}) do
defp put_period(query, site, %{"period" => "realtime"}) do
date = today(site.timezone)
%__MODULE__{
period: "realtime",
date_range: Date.range(date, date)
}
struct!(query, period: "realtime", date_range: Date.range(date, date))
end
defp query_by_period(site, %{"period" => "day"} = params) do
defp put_period(query, site, %{"period" => "day"} = params) do
date = parse_single_date(site.timezone, params)
%__MODULE__{
period: "day",
date_range: Date.range(date, date)
}
struct!(query, period: "day", date_range: Date.range(date, date))
end
defp query_by_period(site, %{"period" => "7d"} = params) do
defp put_period(query, site, %{"period" => "7d"} = params) do
end_date = parse_single_date(site.timezone, params)
start_date = end_date |> Timex.shift(days: -6)
%__MODULE__{
struct!(
query,
period: "7d",
date_range: Date.range(start_date, end_date)
}
)
end
defp query_by_period(site, %{"period" => "30d"} = params) do
defp put_period(query, site, %{"period" => "30d"} = params) do
end_date = parse_single_date(site.timezone, params)
start_date = end_date |> Timex.shift(days: -30)
%__MODULE__{
period: "30d",
date_range: Date.range(start_date, end_date)
}
struct!(query, period: "30d", date_range: Date.range(start_date, end_date))
end
defp query_by_period(site, %{"period" => "month"} = params) do
defp put_period(query, site, %{"period" => "month"} = params) do
date = parse_single_date(site.timezone, params)
start_date = Timex.beginning_of_month(date)
end_date = Timex.end_of_month(date)
%__MODULE__{
struct!(query,
period: "month",
date_range: Date.range(start_date, end_date)
}
)
end
defp query_by_period(site, %{"period" => "6mo"} = params) do
defp put_period(query, site, %{"period" => "6mo"} = params) do
end_date =
parse_single_date(site.timezone, params)
|> Timex.end_of_month()
@ -88,13 +85,13 @@ defmodule Plausible.Stats.Query do
Timex.shift(end_date, months: -5)
|> Timex.beginning_of_month()
%__MODULE__{
struct!(query,
period: "6mo",
date_range: Date.range(start_date, end_date)
}
)
end
defp query_by_period(site, %{"period" => "12mo"} = params) do
defp put_period(query, site, %{"period" => "12mo"} = params) do
end_date =
parse_single_date(site.timezone, params)
|> Timex.end_of_month()
@ -103,78 +100,77 @@ defmodule Plausible.Stats.Query do
Timex.shift(end_date, months: -11)
|> Timex.beginning_of_month()
%__MODULE__{
struct!(query,
period: "12mo",
date_range: Date.range(start_date, end_date)
}
)
end
defp query_by_period(site, %{"period" => "year"} = params) do
defp put_period(query, site, %{"period" => "year"} = params) do
end_date =
parse_single_date(site.timezone, params)
|> Timex.end_of_year()
start_date = Timex.beginning_of_year(end_date)
%__MODULE__{
struct!(query,
period: "year",
date_range: Date.range(start_date, end_date)
}
)
end
defp query_by_period(site, %{"period" => "all"}) do
defp put_period(query, site, %{"period" => "all"}) do
now = today(site.timezone)
start_date = Plausible.Site.local_start_date(site) || now
%__MODULE__{
struct!(query,
period: "all",
date_range: Date.range(start_date, now)
}
)
end
defp query_by_period(site, %{"period" => "custom", "from" => from, "to" => to} = params) do
defp put_period(query, site, %{"period" => "custom", "from" => from, "to" => to} = params) do
new_params =
params
|> Map.drop(["from", "to"])
|> Map.put("date", Enum.join([from, to], ","))
query_by_period(site, new_params)
put_period(query, site, new_params)
end
defp query_by_period(_site, %{"period" => "custom", "date" => date}) do
defp put_period(query, _site, %{"period" => "custom", "date" => date}) do
[from, to] = String.split(date, ",")
from_date = Date.from_iso8601!(String.trim(from))
to_date = Date.from_iso8601!(String.trim(to))
%__MODULE__{
struct!(query,
period: "custom",
date_range: Date.range(from_date, to_date)
}
)
end
defp query_by_period(site, params) do
query_by_period(site, Map.merge(params, %{"period" => "30d"}))
defp put_period(query, site, params) do
put_period(query, site, Map.merge(params, %{"period" => "30d"}))
end
defp put_interval(%{:period => "all"} = query, params) do
interval = Map.get(params, "interval", Interval.default_for_date_range(query.date_range))
Map.put(query, :interval, interval)
struct!(query, interval: interval)
end
defp put_interval(query, params) do
interval = Map.get(params, "interval", Interval.default_for_period(query.period))
Map.put(query, :interval, interval)
struct!(query, interval: interval)
end
defp put_parsed_filters(query, params) do
Map.put(query, :filters, FilterParser.parse_filters(params["filters"]))
struct!(query, filters: FilterParser.parse_filters(params["filters"]))
end
def put_filter(query, key, val) do
%__MODULE__{
query
| filters: Map.put(query.filters, key, val)
}
struct!(query,
filters: Map.put(query.filters, key, val)
)
end
def remove_event_filters(query, opts) do
@ -189,7 +185,7 @@ defmodule Plausible.Stats.Query do
end)
|> Enum.into(%{})
%__MODULE__{query | filters: new_filters}
struct!(query, filters: new_filters)
end
def has_event_filters?(query) do
@ -220,9 +216,10 @@ defmodule Plausible.Stats.Query do
defp put_imported_opts(query, site, params) do
requested? = params["with_imported"] == "true"
query
|> Map.put(:imported_data_requested, requested?)
|> Map.put(:include_imported, include_imported?(query, site, requested?))
struct!(query,
imported_data_requested: requested?,
include_imported: include_imported?(query, site, requested?)
)
end
defp maybe_drop_prop_filter(query, site) do
@ -234,7 +231,7 @@ defmodule Plausible.Stats.Query do
end
if prop_filter? && !props_available?.(),
do: %__MODULE__{query | filters: Map.drop(query.filters, ["props"])},
do: struct!(query, filters: Map.drop(query.filters, ["props"])),
else: query
end

View File

@ -15,6 +15,19 @@ defmodule Plausible.Stats.QueryTest do
{:ok, site: site, user: user}
end
@tag :slow
test "keeps current timestamp so that utc_boundaries don't depend on time passing by", %{
site: site
} do
q1 = %{now: %NaiveDateTime{}} = Query.from(site, %{"period" => "realtime"})
q2 = %{now: %NaiveDateTime{}} = Query.from(site, %{"period" => "30m"})
boundaries1 = Plausible.Stats.Base.utc_boundaries(q1, site)
boundaries2 = Plausible.Stats.Base.utc_boundaries(q2, site)
:timer.sleep(1500)
assert ^boundaries1 = Plausible.Stats.Base.utc_boundaries(q1, site)
assert ^boundaries2 = Plausible.Stats.Base.utc_boundaries(q2, site)
end
test "parses day format", %{site: site} do
q = Query.from(site, %{"period" => "day", "date" => "2019-01-01"})