Prevent overwriting with_imported when comparing (#2871)

This commit fixes a bug where comparisons would not work with imported data. This is because comparisons copies the source query and modifies the dates only, where it should reevaluate if there is imported data for the new comparison date range.

Closes #2870

Co-authored-by: Adam <hq@mtod.org>
This commit is contained in:
Vini Brasil 2023-04-26 10:06:40 +01:00 committed by GitHub
parent cc5c795fd6
commit 8ec3dd402f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 275 additions and 44 deletions

View File

@ -53,14 +53,21 @@ defmodule Plausible.Stats.Comparisons do
it will be shifted to start on Sunday, January 2nd, 2022 instead of
January 1st. Defaults to false.
* `:include_imported?` - includes imported data in the generated comparison
query if there is imported data. Defaults to true.
"""
def compare(%Plausible.Site{} = site, %Stats.Query{} = source_query, mode, opts \\ []) do
if valid_mode?(source_query, mode) do
opts = Keyword.put_new(opts, :now, Timex.now(site.timezone))
do_compare(source_query, mode, opts)
else
{:error, :not_supported}
end
opts =
opts
|> Keyword.put_new(:now, Timex.now(site.timezone))
|> Keyword.put_new(:match_day_of_week?, false)
|> Keyword.put_new(:include_imported?, true)
with :ok <- validate_mode(source_query, mode),
{:ok, comparison_query} <- do_compare(source_query, mode, opts),
comparison_query <- maybe_include_imported(comparison_query, site, opts),
do: {:ok, comparison_query}
end
defp do_compare(source_query, "year_over_year", opts) do
@ -113,7 +120,7 @@ defmodule Plausible.Stats.Comparisons do
end
defp maybe_match_day_of_week(comparison_query, source_query, opts) do
if Keyword.get(opts, :match_day_of_week?, false) do
if Keyword.fetch!(opts, :match_day_of_week?) do
day_to_match = Date.day_of_week(source_query.date_range.first)
new_first =
@ -158,14 +165,16 @@ defmodule Plausible.Stats.Comparisons do
Date.add(date, -days_to_subtract)
end
@spec valid_mode?(Stats.Query.t(), mode()) :: boolean()
@doc """
Returns whether the source query and the selected mode support comparisons.
defp maybe_include_imported(query, site, opts) do
include_imported? = Stats.Query.include_imported?(query, site, opts[:include_imported?])
%Stats.Query{query | include_imported: include_imported?}
end
For example, the realtime view doesn't support comparisons. Additionally, only
#{inspect(@modes)} are supported.
"""
def valid_mode?(%Stats.Query{period: period}, mode) do
mode in @modes && period not in @disallowed_periods
defp validate_mode(%Stats.Query{period: period}, mode) do
if mode in @modes && period not in @disallowed_periods do
:ok
else
{:error, :not_supported}
end
end
end

View File

@ -249,18 +249,19 @@ defmodule Plausible.Stats.Query do
end
defp maybe_include_imported(query, site, params) do
imported_data_requested = params["with_imported"] == "true"
has_imported_data = site.imported_data && site.imported_data.status == "ok"
requested? = params["with_imported"] == "true"
%{query | include_imported: include_imported?(query, site, requested?)}
end
date_range_overlaps =
has_imported_data && !Timex.after?(query.date_range.first, site.imported_data.end_date)
no_filters_applied = Enum.empty?(query.filters)
include_imported =
imported_data_requested && has_imported_data && date_range_overlaps && no_filters_applied
%{query | include_imported: !!include_imported}
@spec include_imported?(t(), Plausible.Site.t(), boolean()) :: boolean()
def include_imported?(query, site, requested?) do
cond do
is_nil(site.imported_data) -> false
site.imported_data.status != "ok" -> false
Timex.after?(query.date_range.first, site.imported_data.end_date) -> false
Enum.any?(query.filters) -> false
true -> requested?
end
end
@spec trace(%__MODULE__{}) :: %__MODULE__{}

View File

@ -113,10 +113,13 @@ defmodule PlausibleWeb.Api.StatsController do
comparison_opts = parse_comparison_opts(params)
comparison_result =
{comparison_query, comparison_result} =
case Comparisons.compare(site, query, params["comparison"], comparison_opts) do
{:ok, comparison_query} -> Stats.timeseries(site, comparison_query, [selected_metric])
{:error, :not_supported} -> nil
{:ok, comparison_query} ->
{comparison_query, Stats.timeseries(site, comparison_query, [selected_metric])}
{:error, :not_supported} ->
{nil, nil}
end
labels = label_timeseries(timeseries_result, comparison_result)
@ -130,7 +133,7 @@ defmodule PlausibleWeb.Api.StatsController do
comparison_labels: comparison_result && label_timeseries(comparison_result, nil),
present_index: present_index,
interval: query.interval,
with_imported: query.include_imported,
with_imported: with_imported?(query, comparison_query),
imported_source: site.imported_data && site.imported_data.source,
full_intervals: full_intervals
})
@ -206,7 +209,7 @@ defmodule PlausibleWeb.Api.StatsController do
top_stats: top_stats,
interval: query.interval,
sample_percent: sample_percent,
with_imported: query.include_imported,
with_imported: with_imported?(query, comparison_query),
imported_source: site.imported_data && site.imported_data.source,
comparing_from: comparison_query && comparison_query.date_range.first,
comparing_to: comparison_query && comparison_query.date_range.last,
@ -1320,7 +1323,16 @@ defmodule PlausibleWeb.Api.StatsController do
[
from: params["compare_from"],
to: params["compare_to"],
match_day_of_week?: params["match_day_of_week"] == "true"
match_day_of_week?: params["match_day_of_week"] == "true",
include_imported?: params["with_imported"] == "true"
]
end
defp with_imported?(source_query, comparison_query) do
cond do
source_query.include_imported -> true
comparison_query && comparison_query.include_imported -> true
true -> false
end
end
end

View File

@ -10,12 +10,28 @@
# We recommend using the bang functions (`insert!`, `update!`
# and so on) as they will fail if something goes wrong.
FunWithFlags.enable(:comparisons)
user = Plausible.Factory.insert(:user, email: "user@plausible.test", password: "plausible")
beginning_of_time = NaiveDateTime.add(NaiveDateTime.utc_now(), -721, :day)
native_stats_range =
Date.range(
Date.add(Date.utc_today(), -720),
Date.utc_today()
)
imported_stats_range =
Date.range(
Date.add(native_stats_range.first, -360),
Date.add(native_stats_range.first, -1)
)
site =
Plausible.Factory.insert(:site, domain: "dummy.site", native_stats_start_at: beginning_of_time)
Plausible.Factory.insert(:site,
domain: "dummy.site",
native_stats_start_at: NaiveDateTime.new!(native_stats_range.first, ~T[00:00:00]),
stats_start_date: NaiveDateTime.new!(imported_stats_range.first, ~T[00:00:00])
)
_membership = Plausible.Factory.insert(:site_membership, user: user, site: site, role: :owner)
@ -77,17 +93,16 @@ geolocations = [
[]
]
Enum.flat_map(-720..0, fn day_index ->
date = Date.add(Date.utc_today(), day_index)
number_of_events = 0..:rand.uniform(500)
Enum.map(number_of_events, fn _ ->
native_stats_range
|> Enum.with_index()
|> Enum.flat_map(fn {date, index} ->
Enum.map(0..:rand.uniform(500), fn _ ->
geolocation = Enum.random(geolocations)
[
site_id: site.id,
hostname: site.domain,
timestamp: put_random_time.(date, day_index),
timestamp: put_random_time.(date, index),
referrer_source: Enum.random(["", "Facebook", "Twitter", "DuckDuckGo", "Google"]),
browser: Enum.random(["Edge", "Chrome", "Safari", "Firefox", "Vivaldi"]),
browser_version: to_string(Enum.random(0..50)),
@ -100,3 +115,48 @@ Enum.flat_map(-720..0, fn day_index ->
end)
end)
|> Plausible.TestUtils.populate_stats()
site =
site
|> Plausible.Site.start_import(
imported_stats_range.first,
imported_stats_range.last,
"Google Analytics"
)
|> Plausible.Repo.update!()
imported_stats_range
|> Enum.flat_map(fn date ->
Enum.flat_map(0..:rand.uniform(500), fn _ ->
[
Plausible.Factory.build(:imported_visitors,
date: date,
pageviews: Enum.random(1..20),
visitors: Enum.random(1..20),
bounces: Enum.random(1..20),
visits: Enum.random(1..200),
visit_duration: Enum.random(1000..10000)
),
Plausible.Factory.build(:imported_sources,
date: date,
source: Enum.random(["", "Facebook", "Twitter", "DuckDuckGo", "Google"]),
visitors: Enum.random(1..20),
visits: Enum.random(1..200),
bounces: Enum.random(1..20),
visit_duration: Enum.random(1000..10000)
),
Plausible.Factory.build(:imported_pages,
date: date,
visitors: Enum.random(1..20),
pageviews: Enum.random(1..20),
exits: Enum.random(1..20),
time_on_page: Enum.random(1000..10000)
)
]
end)
end)
|> then(&Plausible.TestUtils.populate_stats(site, &1))
site
|> Plausible.Site.import_success()
|> Plausible.Repo.update!()

View File

@ -53,7 +53,8 @@ defmodule PlausibleWeb.Api.StatsController.MainGraphTest do
"/api/stats/#{site.domain}/main-graph?period=day&date=2021-01-01&with_imported=true"
)
assert %{"plot" => plot, "imported_source" => "Google Analytics"} = json_response(conn, 200)
assert %{"plot" => plot, "imported_source" => "Google Analytics", "with_imported" => true} =
json_response(conn, 200)
assert plot == [2] ++ List.duplicate(0, 23)
end
@ -119,7 +120,8 @@ defmodule PlausibleWeb.Api.StatsController.MainGraphTest do
"/api/stats/#{site.domain}/main-graph?period=month&date=2021-01-01&with_imported=true"
)
assert %{"plot" => plot, "imported_source" => "Google Analytics"} = json_response(conn, 200)
assert %{"plot" => plot, "imported_source" => "Google Analytics", "with_imported" => true} =
json_response(conn, 200)
assert Enum.count(plot) == 31
assert List.first(plot) == 2
@ -139,7 +141,8 @@ defmodule PlausibleWeb.Api.StatsController.MainGraphTest do
"/api/stats/#{site.domain}/main-graph?period=month&date=2021-01-01&with_imported=true"
)
assert %{"plot" => plot, "imported_source" => "Google Analytics"} = json_response(conn, 200)
assert %{"plot" => plot, "imported_source" => "Google Analytics", "with_imported" => true} =
json_response(conn, 200)
assert Enum.count(plot) == 31
assert List.first(plot) == 1
@ -829,5 +832,84 @@ defmodule PlausibleWeb.Api.StatsController.MainGraphTest do
assert length(labels) == length(comparison_labels)
assert "__blank__" == List.last(labels)
end
test "compares imported data and native data together", %{conn: conn, site: site} do
populate_stats(site, [
build(:imported_visitors, date: ~D[2020-01-02]),
build(:imported_visitors, date: ~D[2020-01-02]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00])
])
site
|> Ecto.Changeset.change(
imported_data: %{
start_date: ~D[2005-01-01],
end_date: ~D[2020-01-31],
source: "Google Analytics",
status: "ok"
}
)
|> Plausible.Repo.update!()
conn =
get(
conn,
"/api/stats/#{site.domain}/main-graph?period=year&date=2021-01-01&with_imported=true&comparison=year_over_year&interval=month"
)
assert %{
"plot" => plot,
"comparison_plot" => comparison_plot,
"imported_source" => "Google Analytics",
"with_imported" => true
} = json_response(conn, 200)
assert 4 == Enum.sum(plot)
assert 2 == Enum.sum(comparison_plot)
end
test "does not return imported data when with_imported is set to false when comparing", %{
conn: conn,
site: site
} do
populate_stats(site, [
build(:imported_visitors, date: ~D[2020-01-02]),
build(:imported_visitors, date: ~D[2020-01-02]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00])
])
site
|> Ecto.Changeset.change(
imported_data: %{
start_date: ~D[2005-01-01],
end_date: ~D[2020-01-31],
source: "Google Analytics",
status: "ok"
}
)
|> Plausible.Repo.update!()
conn =
get(
conn,
"/api/stats/#{site.domain}/main-graph?period=year&date=2021-01-01&with_imported=false&comparison=year_over_year&interval=month"
)
assert %{
"plot" => plot,
"comparison_plot" => comparison_plot,
"imported_source" => "Google Analytics",
"with_imported" => false
} = json_response(conn, 200)
assert 4 == Enum.sum(plot)
assert 0 == Enum.sum(comparison_plot)
end
end
end

View File

@ -779,7 +779,7 @@ defmodule PlausibleWeb.Api.StatsController.TopStatsTest do
end
describe "GET /api/stats/top-stats - with comparisons" do
setup [:create_user, :log_in, :create_new_site]
setup [:create_user, :log_in, :create_new_site, :add_imported_data]
test "defaults to previous period when comparison is not set", %{site: site, conn: conn} do
populate_stats(site, [
@ -847,5 +847,72 @@ defmodule PlausibleWeb.Api.StatsController.TopStatsTest do
"to" => "2021-01-31"
} = json_response(conn, 200)
end
test "compares native and imported data", %{site: site, conn: conn} do
populate_stats(site, [
build(:imported_visitors, date: ~D[2020-01-02]),
build(:imported_visitors, date: ~D[2020-01-02]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00])
])
site
|> Ecto.Changeset.change(
imported_data: %{
start_date: ~D[2005-01-01],
end_date: ~D[2020-01-31],
source: "Google Analytics",
status: "ok"
}
)
|> Plausible.Repo.update!()
conn =
get(
conn,
"/api/stats/#{site.domain}/top-stats?period=month&date=2021-01-01&with_imported=true&comparison=year_over_year"
)
assert %{"top_stats" => top_stats, "with_imported" => true} = json_response(conn, 200)
assert %{"change" => 100, "comparison_value" => 2, "name" => "Total visits", "value" => 4} in top_stats
end
test "does not compare imported data when with_imported is set to false", %{
site: site,
conn: conn
} do
populate_stats(site, [
build(:imported_visitors, date: ~D[2020-01-02]),
build(:imported_visitors, date: ~D[2020-01-02]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, timestamp: ~N[2021-01-01 00:00:00])
])
site
|> Ecto.Changeset.change(
imported_data: %{
start_date: ~D[2005-01-01],
end_date: ~D[2020-01-31],
source: "Google Analytics",
status: "ok"
}
)
|> Plausible.Repo.update!()
conn =
get(
conn,
"/api/stats/#{site.domain}/top-stats?period=month&date=2021-01-01&with_imported=false&comparison=year_over_year"
)
assert %{"top_stats" => top_stats, "with_imported" => false} = json_response(conn, 200)
assert %{"change" => 100, "comparison_value" => 0, "name" => "Total visits", "value" => 4} in top_stats
end
end
end