Hard-code sample rate based on fractional_hardcoded_sample_rate flag (#4762)

* Hard-code sample rate based on fractional_hardcoded_sample_rate flag

We found cases where using a numeric sample rate would cause issues when
joining two tables due to different _sample_factor. Ref: https://3.basecamp.com/5308029/buckets/26383192/card_tables/cards/7973456592#__recording_7978780711

The proper fix is to use fractional sample rates everywhere, but this is
a whole project due to not wanting to sample small sites. For now, hard-code
sample rate for specific sites having the issue while we work on a
larger fix.

* is_number
This commit is contained in:
Karl-Aksel Puulmann 2024-10-31 12:25:22 +02:00 committed by GitHub
parent 7ff1a16ae3
commit b16bd91600
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 29 additions and 8 deletions

View File

@ -17,8 +17,8 @@ defmodule Plausible.Stats.Sampling do
end
end
@spec add_query_hint(Ecto.Query.t(), pos_integer()) :: Ecto.Query.t()
def add_query_hint(%Ecto.Query{} = query, threshold) when is_integer(threshold) do
@spec add_query_hint(Ecto.Query.t(), pos_integer() | float()) :: Ecto.Query.t()
def add_query_hint(%Ecto.Query{} = query, threshold) when is_number(threshold) do
from(x in query, hints: unsafe_fragment(^"SAMPLE #{threshold}"))
end
@ -27,15 +27,32 @@ defmodule Plausible.Stats.Sampling do
add_query_hint(query, @default_sample_threshold)
end
@spec put_threshold(Plausible.Stats.Query.t(), map()) :: Plausible.Stats.Query.t()
def put_threshold(query, params) do
@spec put_threshold(Plausible.Stats.Query.t(), Plausible.Site.t(), map()) ::
Plausible.Stats.Query.t()
def put_threshold(query, site, params) do
sample_threshold =
case params["sample_threshold"] do
nil -> @default_sample_threshold
"infinite" -> :infinite
value -> String.to_integer(value)
nil ->
site_default_threshold(site)
"infinite" ->
:infinite
value_string ->
{value, _} = Float.parse(value_string)
value
end
Map.put(query, :sample_threshold, sample_threshold)
end
defp site_default_threshold(site) do
if FunWithFlags.enabled?(:fractional_hardcoded_sample_rate, for: site) do
# Hard-coded sample rate to temporarily fix an issue for a client.
# To be solved as part of https://3.basecamp.com/5308029/buckets/39750953/messages/7978775089
0.1
else
@default_sample_threshold
end
end
end

View File

@ -26,7 +26,7 @@ defmodule Plausible.Stats.Legacy.QueryBuilder do
|> Query.put_imported_opts(site, params)
on_ee do
query = Plausible.Stats.Sampling.put_threshold(query, params)
query = Plausible.Stats.Sampling.put_threshold(query, site, params)
end
query

View File

@ -37,6 +37,10 @@ defmodule Plausible.Stats.Query do
|> put_experimental_reduced_joins(site, params)
|> struct!(v2: true, now: DateTime.utc_now(:second), debug_metadata: debug_metadata)
on_ee do
query = Plausible.Stats.Sampling.put_threshold(query, site, params)
end
{:ok, query}
end
end