Prevent overwriting with_imported when comparing (#2871)

This commit fixes a bug where comparisons would not work with imported data. This is because comparisons copies the source query and modifies the dates only, where it should reevaluate if there is imported data for the new comparison date range.

Closes #2870

Co-authored-by: Adam <hq@mtod.org>
This commit is contained in:
Vini Brasil 2023-04-26 10:06:40 +01:00 committed by GitHub
parent cc5c795fd6
commit 8ec3dd402f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 275 additions and 44 deletions

View File

@ -53,14 +53,21 @@ defmodule Plausible.Stats.Comparisons do
it will be shifted to start on Sunday, January 2nd, 2022 instead of it will be shifted to start on Sunday, January 2nd, 2022 instead of
January 1st. Defaults to false. January 1st. Defaults to false.
* `:include_imported?` - includes imported data in the generated comparison
query if there is imported data. Defaults to true.
""" """
def compare(%Plausible.Site{} = site, %Stats.Query{} = source_query, mode, opts \\ []) do def compare(%Plausible.Site{} = site, %Stats.Query{} = source_query, mode, opts \\ []) do
if valid_mode?(source_query, mode) do opts =
opts = Keyword.put_new(opts, :now, Timex.now(site.timezone)) opts
do_compare(source_query, mode, opts) |> Keyword.put_new(:now, Timex.now(site.timezone))
else |> Keyword.put_new(:match_day_of_week?, false)
{:error, :not_supported} |> Keyword.put_new(:include_imported?, true)
end
with :ok <- validate_mode(source_query, mode),
{:ok, comparison_query} <- do_compare(source_query, mode, opts),
comparison_query <- maybe_include_imported(comparison_query, site, opts),
do: {:ok, comparison_query}
end end
defp do_compare(source_query, "year_over_year", opts) do defp do_compare(source_query, "year_over_year", opts) do
@ -113,7 +120,7 @@ defmodule Plausible.Stats.Comparisons do
end end
defp maybe_match_day_of_week(comparison_query, source_query, opts) do defp maybe_match_day_of_week(comparison_query, source_query, opts) do
if Keyword.get(opts, :match_day_of_week?, false) do if Keyword.fetch!(opts, :match_day_of_week?) do
day_to_match = Date.day_of_week(source_query.date_range.first) day_to_match = Date.day_of_week(source_query.date_range.first)
new_first = new_first =
@ -158,14 +165,16 @@ defmodule Plausible.Stats.Comparisons do
Date.add(date, -days_to_subtract) Date.add(date, -days_to_subtract)
end end
@spec valid_mode?(Stats.Query.t(), mode()) :: boolean() defp maybe_include_imported(query, site, opts) do
@doc """ include_imported? = Stats.Query.include_imported?(query, site, opts[:include_imported?])
Returns whether the source query and the selected mode support comparisons. %Stats.Query{query | include_imported: include_imported?}
end
For example, the realtime view doesn't support comparisons. Additionally, only defp validate_mode(%Stats.Query{period: period}, mode) do
#{inspect(@modes)} are supported. if mode in @modes && period not in @disallowed_periods do
""" :ok
def valid_mode?(%Stats.Query{period: period}, mode) do else
mode in @modes && period not in @disallowed_periods {:error, :not_supported}
end
end end
end end

View File

@ -249,18 +249,19 @@ defmodule Plausible.Stats.Query do
end end
defp maybe_include_imported(query, site, params) do defp maybe_include_imported(query, site, params) do
imported_data_requested = params["with_imported"] == "true" requested? = params["with_imported"] == "true"
has_imported_data = site.imported_data && site.imported_data.status == "ok" %{query | include_imported: include_imported?(query, site, requested?)}
end
date_range_overlaps = @spec include_imported?(t(), Plausible.Site.t(), boolean()) :: boolean()
has_imported_data && !Timex.after?(query.date_range.first, site.imported_data.end_date) def include_imported?(query, site, requested?) do
cond do
no_filters_applied = Enum.empty?(query.filters) is_nil(site.imported_data) -> false
site.imported_data.status != "ok" -> false
include_imported = Timex.after?(query.date_range.first, site.imported_data.end_date) -> false
imported_data_requested && has_imported_data && date_range_overlaps && no_filters_applied Enum.any?(query.filters) -> false
true -> requested?
%{query | include_imported: !!include_imported} end
end end
@spec trace(%__MODULE__{}) :: %__MODULE__{} @spec trace(%__MODULE__{}) :: %__MODULE__{}

View File

@ -113,10 +113,13 @@ defmodule PlausibleWeb.Api.StatsController do
comparison_opts = parse_comparison_opts(params) comparison_opts = parse_comparison_opts(params)
comparison_result = {comparison_query, comparison_result} =
case Comparisons.compare(site, query, params["comparison"], comparison_opts) do case Comparisons.compare(site, query, params["comparison"], comparison_opts) do
{:ok, comparison_query} -> Stats.timeseries(site, comparison_query, [selected_metric]) {:ok, comparison_query} ->
{:error, :not_supported} -> nil {comparison_query, Stats.timeseries(site, comparison_query, [selected_metric])}
{:error, :not_supported} ->
{nil, nil}
end end
labels = label_timeseries(timeseries_result, comparison_result) labels = label_timeseries(timeseries_result, comparison_result)
@ -130,7 +133,7 @@ defmodule PlausibleWeb.Api.StatsController do
comparison_labels: comparison_result && label_timeseries(comparison_result, nil), comparison_labels: comparison_result && label_timeseries(comparison_result, nil),
present_index: present_index, present_index: present_index,
interval: query.interval, interval: query.interval,
with_imported: query.include_imported, with_imported: with_imported?(query, comparison_query),
imported_source: site.imported_data && site.imported_data.source, imported_source: site.imported_data && site.imported_data.source,
full_intervals: full_intervals full_intervals: full_intervals
}) })
@ -206,7 +209,7 @@ defmodule PlausibleWeb.Api.StatsController do
top_stats: top_stats, top_stats: top_stats,
interval: query.interval, interval: query.interval,
sample_percent: sample_percent, sample_percent: sample_percent,
with_imported: query.include_imported, with_imported: with_imported?(query, comparison_query),
imported_source: site.imported_data && site.imported_data.source, imported_source: site.imported_data && site.imported_data.source,
comparing_from: comparison_query && comparison_query.date_range.first, comparing_from: comparison_query && comparison_query.date_range.first,
comparing_to: comparison_query && comparison_query.date_range.last, comparing_to: comparison_query && comparison_query.date_range.last,
@ -1320,7 +1323,16 @@ defmodule PlausibleWeb.Api.StatsController do
[ [
from: params["compare_from"], from: params["compare_from"],
to: params["compare_to"], to: params["compare_to"],
match_day_of_week?: params["match_day_of_week"] == "true" match_day_of_week?: params["match_day_of_week"] == "true",
include_imported?: params["with_imported"] == "true"
] ]
end end
defp with_imported?(source_query, comparison_query) do
cond do
source_query.include_imported -> true
comparison_query && comparison_query.include_imported -> true
true -> false
end
end
end end

View File

@ -10,12 +10,28 @@
# We recommend using the bang functions (`insert!`, `update!` # We recommend using the bang functions (`insert!`, `update!`
# and so on) as they will fail if something goes wrong. # and so on) as they will fail if something goes wrong.
FunWithFlags.enable(:comparisons)
user = Plausible.Factory.insert(:user, email: "user@plausible.test", password: "plausible") user = Plausible.Factory.insert(:user, email: "user@plausible.test", password: "plausible")
beginning_of_time = NaiveDateTime.add(NaiveDateTime.utc_now(), -721, :day) native_stats_range =
Date.range(
Date.add(Date.utc_today(), -720),
Date.utc_today()
)
imported_stats_range =
Date.range(
Date.add(native_stats_range.first, -360),
Date.add(native_stats_range.first, -1)
)
site = site =
Plausible.Factory.insert(:site, domain: "dummy.site", native_stats_start_at: beginning_of_time) Plausible.Factory.insert(:site,
domain: "dummy.site",
native_stats_start_at: NaiveDateTime.new!(native_stats_range.first, ~T[00:00:00]),
stats_start_date: NaiveDateTime.new!(imported_stats_range.first, ~T[00:00:00])
)
_membership = Plausible.Factory.insert(:site_membership, user: user, site: site, role: :owner) _membership = Plausible.Factory.insert(:site_membership, user: user, site: site, role: :owner)
@ -77,17 +93,16 @@ geolocations = [
[] []
] ]
Enum.flat_map(-720..0, fn day_index -> native_stats_range
date = Date.add(Date.utc_today(), day_index) |> Enum.with_index()
number_of_events = 0..:rand.uniform(500) |> Enum.flat_map(fn {date, index} ->
Enum.map(0..:rand.uniform(500), fn _ ->
Enum.map(number_of_events, fn _ ->
geolocation = Enum.random(geolocations) geolocation = Enum.random(geolocations)
[ [
site_id: site.id, site_id: site.id,
hostname: site.domain, hostname: site.domain,
timestamp: put_random_time.(date, day_index), timestamp: put_random_time.(date, index),
referrer_source: Enum.random(["", "Facebook", "Twitter", "DuckDuckGo", "Google"]), referrer_source: Enum.random(["", "Facebook", "Twitter", "DuckDuckGo", "Google"]),
browser: Enum.random(["Edge", "Chrome", "Safari", "Firefox", "Vivaldi"]), browser: Enum.random(["Edge", "Chrome", "Safari", "Firefox", "Vivaldi"]),
browser_version: to_string(Enum.random(0..50)), browser_version: to_string(Enum.random(0..50)),
@ -100,3 +115,48 @@ Enum.flat_map(-720..0, fn day_index ->
end) end)
end) end)
|> Plausible.TestUtils.populate_stats() |> Plausible.TestUtils.populate_stats()
site =
site
|> Plausible.Site.start_import(
imported_stats_range.first,
imported_stats_range.last,
"Google Analytics"
)
|> Plausible.Repo.update!()
imported_stats_range
|> Enum.flat_map(fn date ->
Enum.flat_map(0..:rand.uniform(500), fn _ ->
[
Plausible.Factory.build(:imported_visitors,
date: date,
pageviews: Enum.random(1..20),
visitors: Enum.random(1..20),
bounces: Enum.random(1..20),
visits: Enum.random(1..200),
visit_duration: Enum.random(1000..10000)
),
Plausible.Factory.build(:imported_sources,
date: date,
source: Enum.random(["", "Facebook", "Twitter", "DuckDuckGo", "Google"]),
visitors: Enum.random(1..20),
visits: Enum.random(1..200),
bounces: Enum.random(1..20),
visit_duration: Enum.random(1000..10000)
),
Plausible.Factory.build(:imported_pages,
date: date,
visitors: Enum.random(1..20),
pageviews: Enum.random(1..20),
exits: Enum.random(1..20),
time_on_page: Enum.random(1000..10000)
)
]
end)
end)
|> then(&Plausible.TestUtils.populate_stats(site, &1))
site
|> Plausible.Site.import_success()
|> Plausible.Repo.update!()

View File

@ -53,7 +53,8 @@ defmodule PlausibleWeb.Api.StatsController.MainGraphTest do
"/api/stats/#{site.domain}/main-graph?period=day&date=2021-01-01&with_imported=true" "/api/stats/#{site.domain}/main-graph?period=day&date=2021-01-01&with_imported=true"
) )
assert %{"plot" => plot, "imported_source" => "Google Analytics"} = json_response(conn, 200) assert %{"plot" => plot, "imported_source" => "Google Analytics", "with_imported" => true} =
json_response(conn, 200)
assert plot == [2] ++ List.duplicate(0, 23) assert plot == [2] ++ List.duplicate(0, 23)
end end
@ -119,7 +120,8 @@ defmodule PlausibleWeb.Api.StatsController.MainGraphTest do
"/api/stats/#{site.domain}/main-graph?period=month&date=2021-01-01&with_imported=true" "/api/stats/#{site.domain}/main-graph?period=month&date=2021-01-01&with_imported=true"
) )
assert %{"plot" => plot, "imported_source" => "Google Analytics"} = json_response(conn, 200) assert %{"plot" => plot, "imported_source" => "Google Analytics", "with_imported" => true} =
json_response(conn, 200)
assert Enum.count(plot) == 31 assert Enum.count(plot) == 31
assert List.first(plot) == 2 assert List.first(plot) == 2
@ -139,7 +141,8 @@ defmodule PlausibleWeb.Api.StatsController.MainGraphTest do
"/api/stats/#{site.domain}/main-graph?period=month&date=2021-01-01&with_imported=true" "/api/stats/#{site.domain}/main-graph?period=month&date=2021-01-01&with_imported=true"
) )
assert %{"plot" => plot, "imported_source" => "Google Analytics"} = json_response(conn, 200) assert %{"plot" => plot, "imported_source" => "Google Analytics", "with_imported" => true} =
json_response(conn, 200)
assert Enum.count(plot) == 31 assert Enum.count(plot) == 31
assert List.first(plot) == 1 assert List.first(plot) == 1
@ -829,5 +832,84 @@ defmodule PlausibleWeb.Api.StatsController.MainGraphTest do
assert length(labels) == length(comparison_labels) assert length(labels) == length(comparison_labels)
assert "__blank__" == List.last(labels) assert "__blank__" == List.last(labels)
end end
test "compares imported data and native data together", %{conn: conn, site: site} do
populate_stats(site, [
build(:imported_visitors, date: ~D[2020-01-02]),
build(:imported_visitors, date: ~D[2020-01-02]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00])
])
site
|> Ecto.Changeset.change(
imported_data: %{
start_date: ~D[2005-01-01],
end_date: ~D[2020-01-31],
source: "Google Analytics",
status: "ok"
}
)
|> Plausible.Repo.update!()
conn =
get(
conn,
"/api/stats/#{site.domain}/main-graph?period=year&date=2021-01-01&with_imported=true&comparison=year_over_year&interval=month"
)
assert %{
"plot" => plot,
"comparison_plot" => comparison_plot,
"imported_source" => "Google Analytics",
"with_imported" => true
} = json_response(conn, 200)
assert 4 == Enum.sum(plot)
assert 2 == Enum.sum(comparison_plot)
end
test "does not return imported data when with_imported is set to false when comparing", %{
conn: conn,
site: site
} do
populate_stats(site, [
build(:imported_visitors, date: ~D[2020-01-02]),
build(:imported_visitors, date: ~D[2020-01-02]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00])
])
site
|> Ecto.Changeset.change(
imported_data: %{
start_date: ~D[2005-01-01],
end_date: ~D[2020-01-31],
source: "Google Analytics",
status: "ok"
}
)
|> Plausible.Repo.update!()
conn =
get(
conn,
"/api/stats/#{site.domain}/main-graph?period=year&date=2021-01-01&with_imported=false&comparison=year_over_year&interval=month"
)
assert %{
"plot" => plot,
"comparison_plot" => comparison_plot,
"imported_source" => "Google Analytics",
"with_imported" => false
} = json_response(conn, 200)
assert 4 == Enum.sum(plot)
assert 0 == Enum.sum(comparison_plot)
end
end end
end end

View File

@ -779,7 +779,7 @@ defmodule PlausibleWeb.Api.StatsController.TopStatsTest do
end end
describe "GET /api/stats/top-stats - with comparisons" do describe "GET /api/stats/top-stats - with comparisons" do
setup [:create_user, :log_in, :create_new_site] setup [:create_user, :log_in, :create_new_site, :add_imported_data]
test "defaults to previous period when comparison is not set", %{site: site, conn: conn} do test "defaults to previous period when comparison is not set", %{site: site, conn: conn} do
populate_stats(site, [ populate_stats(site, [
@ -847,5 +847,72 @@ defmodule PlausibleWeb.Api.StatsController.TopStatsTest do
"to" => "2021-01-31" "to" => "2021-01-31"
} = json_response(conn, 200) } = json_response(conn, 200)
end end
test "compares native and imported data", %{site: site, conn: conn} do
populate_stats(site, [
build(:imported_visitors, date: ~D[2020-01-02]),
build(:imported_visitors, date: ~D[2020-01-02]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00])
])
site
|> Ecto.Changeset.change(
imported_data: %{
start_date: ~D[2005-01-01],
end_date: ~D[2020-01-31],
source: "Google Analytics",
status: "ok"
}
)
|> Plausible.Repo.update!()
conn =
get(
conn,
"/api/stats/#{site.domain}/top-stats?period=month&date=2021-01-01&with_imported=true&comparison=year_over_year"
)
assert %{"top_stats" => top_stats, "with_imported" => true} = json_response(conn, 200)
assert %{"change" => 100, "comparison_value" => 2, "name" => "Total visits", "value" => 4} in top_stats
end
test "does not compare imported data when with_imported is set to false", %{
site: site,
conn: conn
} do
populate_stats(site, [
build(:imported_visitors, date: ~D[2020-01-02]),
build(:imported_visitors, date: ~D[2020-01-02]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00])
])
site
|> Ecto.Changeset.change(
imported_data: %{
start_date: ~D[2005-01-01],
end_date: ~D[2020-01-31],
source: "Google Analytics",
status: "ok"
}
)
|> Plausible.Repo.update!()
conn =
get(
conn,
"/api/stats/#{site.domain}/top-stats?period=month&date=2021-01-01&with_imported=false&comparison=year_over_year"
)
assert %{"top_stats" => top_stats, "with_imported" => false} = json_response(conn, 200)
assert %{"change" => 100, "comparison_value" => 0, "name" => "Total visits", "value" => 4} in top_stats
end
end end
end end