mirror of
https://github.com/plausible/analytics.git
synced 2024-12-22 17:11:36 +03:00
add csv fixture for e2e export/import test (#4037)
* add inline csv fixture * use new csvs * cleanup csv reading and site_id replacing * perform comparisons between native and imported queries * help help help * help help * help * eh * fin * exclude export/import e2e test when experimental_reduced_joins flag is enabled * adapt to new pageviews * adapt to experimental_reduced_joins * credo is formatter * cleanup * assert bounce rates equal in city breakdown * fix rebase against master * clean-up dataset * update comment * fix typo * apply csv changes to the files * use sessions timestamp for exports' dates --------- Co-authored-by: RobertJoonas <56999674+RobertJoonas@users.noreply.github.com>
This commit is contained in:
parent
62138e0dad
commit
02d4709be7
Binary file not shown.
Can't render this file because it is too large.
|
File diff suppressed because it is too large
Load Diff
@ -1,8 +1,8 @@
|
||||
defmodule Plausible.Imported.CSVImporterTest do
|
||||
use Plausible
|
||||
use Plausible.DataCase
|
||||
use Plausible.Repo
|
||||
use PlausibleWeb.ConnCase
|
||||
use Bamboo.Test
|
||||
|
||||
alias Plausible.Imported.{CSVImporter, SiteImport}
|
||||
require SiteImport
|
||||
|
||||
@ -414,78 +414,44 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
end
|
||||
|
||||
describe "export -> import" do
|
||||
setup [:create_user, :create_new_site, :clean_buckets]
|
||||
setup [:create_user, :log_in, :create_api_key, :use_api_key, :clean_buckets]
|
||||
|
||||
@tag :tmp_dir
|
||||
test "it works", %{site: site, user: user, tmp_dir: tmp_dir} do
|
||||
populate_stats(site, [
|
||||
build(:pageview,
|
||||
user_id: 123,
|
||||
pathname: "/",
|
||||
timestamp:
|
||||
Timex.shift(~N[2021-10-20 12:00:00], minutes: -1) |> NaiveDateTime.truncate(:second),
|
||||
country_code: "EE",
|
||||
subdivision1_code: "EE-37",
|
||||
city_geoname_id: 588_409,
|
||||
referrer_source: "Google"
|
||||
),
|
||||
build(:pageview,
|
||||
user_id: 123,
|
||||
pathname: "/some-other-page",
|
||||
timestamp:
|
||||
Timex.shift(~N[2021-10-20 12:00:00], minutes: -2) |> NaiveDateTime.truncate(:second),
|
||||
country_code: "EE",
|
||||
subdivision1_code: "EE-37",
|
||||
city_geoname_id: 588_409,
|
||||
referrer_source: "Google"
|
||||
),
|
||||
build(:pageview,
|
||||
pathname: "/",
|
||||
timestamp:
|
||||
Timex.shift(~N[2021-10-20 12:00:00], days: -1) |> NaiveDateTime.truncate(:second),
|
||||
utm_medium: "search",
|
||||
utm_campaign: "ads",
|
||||
utm_source: "google",
|
||||
utm_content: "content",
|
||||
utm_term: "term",
|
||||
browser: "Firefox",
|
||||
browser_version: "120",
|
||||
operating_system: "Mac",
|
||||
operating_system_version: "14"
|
||||
),
|
||||
build(:pageview,
|
||||
timestamp:
|
||||
Timex.shift(~N[2021-10-20 12:00:00], months: -1) |> NaiveDateTime.truncate(:second),
|
||||
country_code: "EE",
|
||||
browser: "Firefox",
|
||||
browser_version: "120",
|
||||
operating_system: "Mac",
|
||||
operating_system_version: "14"
|
||||
),
|
||||
build(:pageview,
|
||||
timestamp:
|
||||
Timex.shift(~N[2021-10-20 12:00:00], months: -5) |> NaiveDateTime.truncate(:second),
|
||||
utm_campaign: "ads",
|
||||
country_code: "EE",
|
||||
referrer_source: "Google",
|
||||
browser: "FirefoxNoVersion",
|
||||
operating_system: "MacNoVersion"
|
||||
),
|
||||
build(:event,
|
||||
timestamp:
|
||||
Timex.shift(~N[2021-10-20 12:00:00], days: -1) |> NaiveDateTime.truncate(:second),
|
||||
name: "Signup",
|
||||
"meta.key": ["variant"],
|
||||
"meta.value": ["A"]
|
||||
)
|
||||
test "it works", %{conn: conn, user: user, tmp_dir: tmp_dir} do
|
||||
exported_site = insert(:site, members: [user])
|
||||
imported_site = insert(:site, members: [user])
|
||||
|
||||
process_csv = fn path ->
|
||||
[header | rows] = NimbleCSV.RFC4180.parse_string(File.read!(path), skip_headers: false)
|
||||
|
||||
site_id_column_index =
|
||||
Enum.find_index(header, &(&1 == "site_id")) ||
|
||||
raise "couldn't find site_id column in CSV header #{inspect(header)}"
|
||||
|
||||
rows =
|
||||
Enum.map(rows, fn row ->
|
||||
List.replace_at(row, site_id_column_index, exported_site.id)
|
||||
end)
|
||||
|
||||
NimbleCSV.RFC4180.dump_to_iodata([header | rows])
|
||||
end
|
||||
|
||||
Plausible.IngestRepo.query!([
|
||||
"insert into events_v2 format CSVWithNames\n",
|
||||
process_csv.("fixture/plausible_io_events_v2_2024_03_01_2024_03_31_500users_dump.csv")
|
||||
])
|
||||
|
||||
Plausible.IngestRepo.query!([
|
||||
"insert into sessions_v2 format CSVWithNames\n",
|
||||
process_csv.("fixture/plausible_io_sessions_v2_2024_03_01_2024_03_31_500users_dump.csv")
|
||||
])
|
||||
|
||||
# export archive to s3
|
||||
on_ee do
|
||||
assert {:ok, _job} = Plausible.Exports.schedule_s3_export(site.id, user.email)
|
||||
assert {:ok, _job} = Plausible.Exports.schedule_s3_export(exported_site.id, user.email)
|
||||
else
|
||||
assert {:ok, %{args: %{"local_path" => local_path}}} =
|
||||
Plausible.Exports.schedule_local_export(site.id, user.email)
|
||||
Plausible.Exports.schedule_local_export(exported_site.id, user.email)
|
||||
end
|
||||
|
||||
assert %{success: 1} = Oban.drain_queue(queue: :analytics_exports, with_safety: false)
|
||||
@ -498,14 +464,14 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
assert email.to == [{user.name, user.email}]
|
||||
|
||||
assert email.html_body =~
|
||||
~s[Please click <a href="http://localhost:8000/#{URI.encode_www_form(site.domain)}/download/export">here</a> to start the download process.]
|
||||
~s[Please click <a href="http://localhost:8000/#{URI.encode_www_form(exported_site.domain)}/download/export">here</a> to start the download process.]
|
||||
|
||||
# download archive
|
||||
on_ee do
|
||||
ExAws.request!(
|
||||
ExAws.S3.download_file(
|
||||
Plausible.S3.exports_bucket(),
|
||||
to_string(site.id),
|
||||
to_string(exported_site.id),
|
||||
Path.join(tmp_dir, "plausible-export.zip")
|
||||
)
|
||||
)
|
||||
@ -521,7 +487,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
uploads =
|
||||
Enum.map(files, fn file ->
|
||||
on_ee do
|
||||
%{s3_url: s3_url} = Plausible.S3.import_presign_upload(site.id, file)
|
||||
%{s3_url: s3_url} = Plausible.S3.import_presign_upload(imported_site.id, file)
|
||||
[bucket, key] = String.split(URI.parse(s3_url).path, "/", parts: 2)
|
||||
ExAws.request!(ExAws.S3.put_object(bucket, key, File.read!(file)))
|
||||
%{"filename" => Path.basename(file), "s3_url" => s3_url}
|
||||
@ -534,7 +500,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
date_range = CSVImporter.date_range(uploads)
|
||||
|
||||
{:ok, _job} =
|
||||
CSVImporter.new_import(site, user,
|
||||
CSVImporter.new_import(imported_site, user,
|
||||
start_date: date_range.first,
|
||||
end_date: date_range.last,
|
||||
uploads: uploads,
|
||||
@ -545,13 +511,347 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
|
||||
# validate import
|
||||
assert %SiteImport{
|
||||
start_date: ~D[2021-05-20],
|
||||
end_date: ~D[2021-10-20],
|
||||
start_date: ~D[2024-03-28],
|
||||
end_date: ~D[2024-03-31],
|
||||
source: :csv,
|
||||
status: :completed
|
||||
} = Repo.get_by!(SiteImport, site_id: site.id)
|
||||
} = Repo.get_by!(SiteImport, site_id: imported_site.id)
|
||||
|
||||
assert Plausible.Stats.Clickhouse.imported_pageview_count(site) == 5
|
||||
assert Plausible.Stats.Clickhouse.imported_pageview_count(exported_site) == 0
|
||||
assert Plausible.Stats.Clickhouse.imported_pageview_count(imported_site) == 6298
|
||||
|
||||
# compare original and imported data via stats api requests
|
||||
results = fn path, params ->
|
||||
get(conn, path, params)
|
||||
|> json_response(200)
|
||||
|> Map.fetch!("results")
|
||||
end
|
||||
|
||||
timeseries = fn params ->
|
||||
results.("/api/v1/stats/timeseries", params)
|
||||
end
|
||||
|
||||
common_params = fn site ->
|
||||
%{
|
||||
"site_id" => site.domain,
|
||||
"period" => "custom",
|
||||
"date" => "2024-03-28,2024-03-31",
|
||||
"with_imported" => true
|
||||
}
|
||||
end
|
||||
|
||||
breakdown = fn params_or_site, by ->
|
||||
params =
|
||||
case params_or_site do
|
||||
%Plausible.Site{} = site ->
|
||||
common_params.(site)
|
||||
|> Map.put("metrics", "visitors,visits,pageviews,visit_duration,bounce_rate")
|
||||
|> Map.put("limit", 1000)
|
||||
|> Map.put("property", "visit:#{by}")
|
||||
|
||||
params ->
|
||||
params
|
||||
end
|
||||
|
||||
Enum.sort_by(results.("/api/v1/stats/breakdown", params), &Map.fetch!(&1, by))
|
||||
end
|
||||
|
||||
# timeseries
|
||||
timeseries_params = fn site ->
|
||||
Map.put(
|
||||
common_params.(site),
|
||||
"metrics",
|
||||
"visitors,visits,pageviews,views_per_visit,visit_duration,bounce_rate"
|
||||
)
|
||||
end
|
||||
|
||||
exported_timeseries = timeseries.(timeseries_params.(exported_site))
|
||||
imported_timeseries = timeseries.(timeseries_params.(imported_site))
|
||||
|
||||
pairwise(exported_timeseries, imported_timeseries, fn exported, imported ->
|
||||
assert exported["date"] == imported["date"]
|
||||
assert exported["pageviews"] == imported["pageviews"]
|
||||
assert exported["bounce_rate"] == imported["bounce_rate"]
|
||||
assert exported["visitors"] == imported["visitors"]
|
||||
assert exported["visits"] == imported["visits"]
|
||||
assert_in_delta exported["visit_duration"], imported["visit_duration"], 1
|
||||
end)
|
||||
|
||||
# timeseries' views per visit difference is within 3%
|
||||
assert summary(field(exported_timeseries, "views_per_visit")) == [
|
||||
2.96,
|
||||
2.99,
|
||||
3.065,
|
||||
3.135,
|
||||
3.15
|
||||
]
|
||||
|
||||
assert summary(field(imported_timeseries, "views_per_visit")) == [
|
||||
2.95,
|
||||
3.04,
|
||||
3.075,
|
||||
3.1025,
|
||||
3.17
|
||||
]
|
||||
|
||||
assert summary(
|
||||
pairwise(exported_timeseries, imported_timeseries, fn exported, imported ->
|
||||
abs(1 - imported["views_per_visit"] / exported["views_per_visit"])
|
||||
end)
|
||||
) == [
|
||||
0.0033783783783782884,
|
||||
0.005606499356499317,
|
||||
0.011161823621887501,
|
||||
0.017814164004259808,
|
||||
0.023333333333333206
|
||||
]
|
||||
|
||||
# pages
|
||||
pages_params = fn site ->
|
||||
common_params.(site)
|
||||
|> Map.put("metrics", "visitors,visits,pageviews,time_on_page,visit_duration,bounce_rate")
|
||||
|> Map.put("limit", 1000)
|
||||
|> Map.put("property", "event:page")
|
||||
end
|
||||
|
||||
exported_pages = breakdown.(pages_params.(exported_site), "page")
|
||||
imported_pages = breakdown.(pages_params.(imported_site), "page")
|
||||
|
||||
pairwise(exported_pages, imported_pages, fn exported, imported ->
|
||||
assert exported["page"] == imported["page"]
|
||||
assert exported["pageviews"] == imported["pageviews"]
|
||||
assert exported["bounce_rate"] == imported["bounce_rate"]
|
||||
|
||||
# time on page is not being exported/imported right now
|
||||
assert imported["time_on_page"] == 0
|
||||
end)
|
||||
|
||||
# page breakdown's visit_duration difference is within 1%
|
||||
assert summary(field(exported_pages, "visit_duration")) == [0, 0, 25, 217.5, 743]
|
||||
assert summary(field(imported_pages, "visit_duration")) == [0, 0, 25, 217.55, 742.8]
|
||||
|
||||
assert summary(
|
||||
pairwise(exported_pages, imported_pages, fn exported, imported ->
|
||||
e = exported["visit_duration"]
|
||||
i = imported["visit_duration"]
|
||||
|
||||
if is_number(e) and is_number(i) and i > 0 do
|
||||
abs(1 - e / i)
|
||||
else
|
||||
# both nil or both zero
|
||||
assert e == i
|
||||
_no_diff = 0
|
||||
end
|
||||
end)
|
||||
) == [0, 0, 0, 0, 0.002375296912114022]
|
||||
|
||||
# NOTE: page breakdown's visitors difference is up to almost 37%
|
||||
assert summary(field(exported_pages, "visitors")) == [1, 1, 2, 2.5, 393]
|
||||
assert summary(field(imported_pages, "visitors")) == [1, 1, 2, 2.5, 617]
|
||||
|
||||
assert summary(
|
||||
pairwise(exported_pages, imported_pages, fn exported, imported ->
|
||||
e = exported["visitors"]
|
||||
i = imported["visitors"]
|
||||
|
||||
# only consider non tiny readings
|
||||
if e > 5, do: abs(1 - e / i), else: 0
|
||||
end)
|
||||
) == [0, 0, 0, 0, 0.36304700162074555]
|
||||
|
||||
# page breakdown's visits difference is within 2% for non-tiny values
|
||||
assert summary(field(exported_pages, "visits")) == [1, 1, 2, 3, 1774]
|
||||
assert summary(field(imported_pages, "visits")) == [1, 1, 2, 2.5, 1777]
|
||||
|
||||
assert summary(
|
||||
pairwise(exported_pages, imported_pages, fn exported, imported ->
|
||||
e = exported["visits"]
|
||||
i = imported["visits"]
|
||||
|
||||
# only consider non tiny readings
|
||||
if e > 4, do: abs(1 - e / i), else: 0
|
||||
end)
|
||||
) == [0, 0, 0, 0, 0.01666666666666672]
|
||||
|
||||
# sources
|
||||
exported_sources = breakdown.(exported_site, "source")
|
||||
imported_sources = breakdown.(imported_site, "source")
|
||||
|
||||
pairwise(exported_sources, imported_sources, fn exported, imported ->
|
||||
assert exported["source"] == imported["source"]
|
||||
assert exported["bounce_rate"] == imported["bounce_rate"]
|
||||
assert exported["visits"] == imported["visits"]
|
||||
assert exported["pageviews"] == imported["pageviews"]
|
||||
assert_in_delta exported["visit_duration"], imported["visit_duration"], 1
|
||||
end)
|
||||
|
||||
# NOTE: source breakdown's visitors difference is up to almost 40%
|
||||
assert summary(field(exported_sources, "visitors")) == [1, 1, 1, 2, 451]
|
||||
assert summary(field(imported_sources, "visitors")) == [1, 1, 1, 2, 711]
|
||||
|
||||
assert summary(
|
||||
pairwise(exported_sources, imported_sources, fn exported, imported ->
|
||||
abs(1 - exported["visitors"] / imported["visitors"])
|
||||
end)
|
||||
) == [0, 0, 0, 0, 0.3656821378340366]
|
||||
|
||||
# utm mediums
|
||||
assert breakdown.(exported_site, "utm_medium") == breakdown.(imported_site, "utm_medium")
|
||||
|
||||
# entry pages
|
||||
exported_entry_pages = breakdown.(exported_site, "entry_page")
|
||||
imported_entry_pages = breakdown.(imported_site, "entry_page")
|
||||
|
||||
pairwise(exported_entry_pages, imported_entry_pages, fn exported, imported ->
|
||||
assert exported["entry_page"] == imported["entry_page"]
|
||||
assert exported["bounce_rate"] == imported["bounce_rate"]
|
||||
assert exported["visits"] == imported["visits"]
|
||||
assert exported["pageviews"] == imported["pageviews"]
|
||||
assert_in_delta exported["visit_duration"], imported["visit_duration"], 1
|
||||
end)
|
||||
|
||||
# NOTE: entry page breakdown's visitors difference is up to almost 50%
|
||||
assert summary(field(exported_entry_pages, "visitors")) == [1, 1, 1, 2, 310]
|
||||
assert summary(field(imported_entry_pages, "visitors")) == [1, 1, 1, 2, 475]
|
||||
|
||||
assert summary(
|
||||
pairwise(exported_entry_pages, imported_entry_pages, fn exported, imported ->
|
||||
abs(1 - exported["visitors"] / imported["visitors"])
|
||||
end)
|
||||
) == [0, 0, 0, 0, 0.5]
|
||||
|
||||
# cities
|
||||
exported_cities = breakdown.(exported_site, "city")
|
||||
imported_cities = breakdown.(imported_site, "city")
|
||||
|
||||
pairwise(exported_cities, imported_cities, fn exported, imported ->
|
||||
assert exported["city"] == imported["city"]
|
||||
assert exported["bounce_rate"] == imported["bounce_rate"]
|
||||
assert exported["pageviews"] == imported["pageviews"]
|
||||
assert_in_delta exported["visit_duration"], imported["visit_duration"], 1
|
||||
assert_in_delta exported["visits"], imported["visits"], 1
|
||||
end)
|
||||
|
||||
# NOTE: city breakdown's visitors relative difference is up to 60%,
|
||||
# but the absolute difference is small
|
||||
assert summary(field(exported_cities, "visitors")) == [1, 1, 1, 1, 7]
|
||||
assert summary(field(imported_cities, "visitors")) == [1, 1, 1, 3, 13]
|
||||
|
||||
assert summary(
|
||||
pairwise(exported_cities, imported_cities, fn exported, imported ->
|
||||
e = exported["visitors"]
|
||||
i = imported["visitors"]
|
||||
|
||||
# only consider non tiny readings
|
||||
if e > 3, do: abs(1 - e / i), else: 0
|
||||
end)
|
||||
) == [0, 0, 0, 0, 0.6]
|
||||
|
||||
# devices
|
||||
exported_devices = breakdown.(exported_site, "device")
|
||||
imported_devices = breakdown.(imported_site, "device")
|
||||
|
||||
pairwise(exported_devices, imported_devices, fn exported, imported ->
|
||||
assert exported["device"] == imported["device"]
|
||||
assert exported["bounce_rate"] == imported["bounce_rate"]
|
||||
assert exported["visits"] == imported["visits"]
|
||||
assert exported["pageviews"] == imported["pageviews"]
|
||||
assert_in_delta exported["visit_duration"], imported["visit_duration"], 1
|
||||
end)
|
||||
|
||||
# NOTE: device breakdown's visitors difference is between 30% and 40%
|
||||
assert summary(field(exported_devices, "visitors")) == [216, 232.25, 248.5, 264.75, 281]
|
||||
assert summary(field(imported_devices, "visitors")) == [304, 341.5, 379, 416.5, 454]
|
||||
|
||||
assert summary(
|
||||
pairwise(exported_devices, imported_devices, fn exported, imported ->
|
||||
abs(1 - exported["visitors"] / imported["visitors"])
|
||||
end)
|
||||
) == [
|
||||
0.2894736842105263,
|
||||
0.3123695803385115,
|
||||
0.3352654764664966,
|
||||
0.3581613725944818,
|
||||
0.3810572687224669
|
||||
]
|
||||
|
||||
# browsers
|
||||
exported_browsers = breakdown.(exported_site, "browser")
|
||||
imported_browsers = breakdown.(imported_site, "browser")
|
||||
|
||||
pairwise(exported_browsers, imported_browsers, fn exported, imported ->
|
||||
assert exported["browser"] == imported["browser"]
|
||||
assert exported["bounce_rate"] == imported["bounce_rate"]
|
||||
assert exported["visits"] == imported["visits"]
|
||||
assert exported["pageviews"] == imported["pageviews"]
|
||||
assert_in_delta exported["visit_duration"], imported["visit_duration"], 1
|
||||
end)
|
||||
|
||||
# NOTE: browser breakdown's visitors difference is up to almost 70%
|
||||
assert summary(field(exported_browsers, "visitors")) == [1, 1, 10, 105, 274]
|
||||
assert summary(field(imported_browsers, "visitors")) == [1, 2, 18, 156.5, 422]
|
||||
|
||||
assert summary(
|
||||
pairwise(exported_browsers, imported_browsers, fn exported, imported ->
|
||||
abs(1 - exported["visitors"] / imported["visitors"])
|
||||
end)
|
||||
) == [
|
||||
0,
|
||||
0.1422018348623853,
|
||||
0.3507109004739336,
|
||||
0.43801169590643274,
|
||||
0.6666666666666667
|
||||
]
|
||||
|
||||
# os
|
||||
exported_os = breakdown.(exported_site, "os")
|
||||
imported_os = breakdown.(imported_site, "os")
|
||||
|
||||
pairwise(exported_os, imported_os, fn exported, imported ->
|
||||
assert exported["os"] == imported["os"]
|
||||
assert exported["bounce_rate"] == imported["bounce_rate"]
|
||||
assert exported["visits"] == imported["visits"]
|
||||
assert exported["pageviews"] == imported["pageviews"]
|
||||
assert_in_delta exported["visit_duration"], imported["visit_duration"], 1
|
||||
end)
|
||||
|
||||
# NOTE: os breakdown's visitors difference is between 20% and 60%
|
||||
assert summary(field(exported_os, "visitors")) == [2, 9.5, 51, 130, 165]
|
||||
assert summary(field(imported_os, "visitors")) == [5, 12.5, 70, 200, 258]
|
||||
|
||||
assert summary(
|
||||
pairwise(exported_os, imported_os, fn exported, imported ->
|
||||
abs(1 - exported["visitors"] / imported["visitors"])
|
||||
end)
|
||||
) == [
|
||||
0.1578947368421053,
|
||||
0.28315018315018314,
|
||||
0.36046511627906974,
|
||||
0.463855421686747,
|
||||
0.6
|
||||
]
|
||||
|
||||
# os versions
|
||||
exported_os_versions = breakdown.(exported_site, "os_version")
|
||||
imported_os_versions = breakdown.(imported_site, "os_version")
|
||||
|
||||
pairwise(exported_os_versions, imported_os_versions, fn exported, imported ->
|
||||
assert exported["os_version"] == imported["os_version"]
|
||||
assert exported["bounce_rate"] == imported["bounce_rate"]
|
||||
assert exported["visits"] == imported["visits"]
|
||||
assert exported["pageviews"] == imported["pageviews"]
|
||||
assert_in_delta exported["visit_duration"], imported["visit_duration"], 1
|
||||
end)
|
||||
|
||||
# NOTE: os version breakdown's visitors difference is up to almost 80%
|
||||
assert summary(field(exported_os_versions, "visitors")) == [1, 1, 3, 10.75, 165]
|
||||
assert summary(field(imported_os_versions, "visitors")) == [1, 1.75, 4.5, 14.5, 258]
|
||||
|
||||
assert summary(
|
||||
pairwise(exported_os_versions, imported_os_versions, fn exported, imported ->
|
||||
abs(1 - exported["visitors"] / imported["visitors"])
|
||||
end)
|
||||
) == [0, 0, 0.16985645933014354, 0.3401162790697675, 0.75]
|
||||
end
|
||||
end
|
||||
|
||||
@ -578,4 +878,36 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
defp pairwise(left, right, f) do
|
||||
assert length(left) == length(right)
|
||||
zipped = Enum.zip(left, right)
|
||||
Enum.map(zipped, fn {left, right} -> f.(left, right) end)
|
||||
end
|
||||
|
||||
defp field(results, field) do
|
||||
results
|
||||
|> Enum.map(&Map.fetch!(&1, field))
|
||||
|> Enum.filter(&is_number/1)
|
||||
end
|
||||
|
||||
defp summary(values) do
|
||||
values = Enum.sort(values)
|
||||
|
||||
percentile = fn n ->
|
||||
r = n / 100.0 * (length(values) - 1)
|
||||
f = :erlang.trunc(r)
|
||||
lower = Enum.at(values, f)
|
||||
upper = Enum.at(values, f + 1)
|
||||
lower + (upper - lower) * (r - f)
|
||||
end
|
||||
|
||||
[
|
||||
List.first(values),
|
||||
percentile.(25),
|
||||
percentile.(50),
|
||||
percentile.(75),
|
||||
List.last(values)
|
||||
]
|
||||
end
|
||||
end
|
||||
|
Loading…
Reference in New Issue
Block a user