Delete imports which have no stats (#3972)

This commit is contained in:
Adrian Gruntkowski 2024-04-04 18:55:14 +02:00 committed by GitHub
parent e5b7f1afd0
commit 33eed9d7db
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 56 additions and 23 deletions

View File

@ -72,35 +72,36 @@ defmodule Plausible.DataMigration.SiteImports do
end_date = imported_stats_end_date(site.id, import_ids) end_date = imported_stats_end_date(site.id, import_ids)
end_date = if !end_date do
if !end_date do IO.puts(
IO.puts( "Site import #{site_import.id} (site ID #{site.id}) does not have any recorded stats. Removing it."
"Site import #{site_import.id} (site ID #{site.id}) does not have any recorded stats. Setting end date to minimum." )
)
Date.add(site_import.start_date, 2) if site_import.legacy do
else clear_imported_data(site, dry_run?)
end_date
end end
end_date = delete!(site_import, dry_run?)
if Date.compare(end_date, site_import.end_date) in [:lt, :eq] do else
end_date end_date =
else if Date.compare(end_date, site_import.end_date) in [:lt, :eq] do
IO.puts( end_date
"Site import #{site_import.id} (site ID #{site.id}) computed end date is later than the current one. Skipping." else
) IO.puts(
"Site import #{site_import.id} (site ID #{site.id}) computed end date is later than the current one. Skipping."
)
site_import.end_date site_import.end_date
end end
site_import site_import
|> Ecto.Changeset.change(end_date: end_date) |> Ecto.Changeset.change(end_date: end_date)
|> update!(dry_run?) |> update!(dry_run?)
IO.puts( IO.puts(
"End date of site import #{site_import.id} (site ID #{site.id}) adjusted to #{end_date}" "End date of site import #{site_import.id} (site ID #{site.id}) adjusted to #{end_date}"
) )
end
end end
IO.puts("Done processing site ID #{site.id}") IO.puts("Done processing site ID #{site.id}")
@ -149,6 +150,14 @@ defmodule Plausible.DataMigration.SiteImports do
end end
end end
defp clear_imported_data(site, false = _dry_run?) do
Repo.update_all(from(s in Site, where: s.id == ^site.id), set: [imported_data: nil])
end
defp clear_imported_data(site, true = _dry_run?) do
%{site | imported_data: nil}
end
defp update!(changeset, false = _dry_run?) do defp update!(changeset, false = _dry_run?) do
Repo.update!(changeset) Repo.update!(changeset)
end end
@ -161,6 +170,14 @@ defmodule Plausible.DataMigration.SiteImports do
end end
end end
defp delete!(entity, false = _dry_run?) do
Repo.delete!(entity)
end
defp delete!(entity, true = _dry_run?) do
entity
end
defp max_date_query(schema, site_id, import_ids) do defp max_date_query(schema, site_id, import_ids) do
from(q in schema, from(q in schema,
where: q.site_id == ^site_id, where: q.site_id == ^site_id,

View File

@ -91,6 +91,22 @@ defmodule Plausible.DataMigration.SiteImportsTest do
assert site_import.source == :universal_analytics assert site_import.source == :universal_analytics
end end
test "removes site import when there are no stats" do
site =
insert(:site)
|> Site.start_import(~D[2021-01-02], ~D[2020-02-02], "Google Analytics", "ok")
|> Repo.update!()
_another_site_import = insert(:site_import, site: site)
assert capture_io(fn ->
assert :ok = SiteImports.run(dry_run?: false)
end) =~ "Processing 1 site"
site = Repo.reload!(site)
assert [] = Imported.list_all_imports(site)
end
test "leaves site and imports unchanged if everything fits" do test "leaves site and imports unchanged if everything fits" do
site = site =
insert(:site) insert(:site)