From d79678871557cce46772b07940145b48372ce88e Mon Sep 17 00:00:00 2001 From: Adrian Gruntkowski Date: Tue, 9 Apr 2024 09:04:51 +0200 Subject: [PATCH] Keep `sites.imported_data` in sync with backfilled `SiteImport` when migrating (#3979) * Keep `sites.imported_data` in sync with backfilled `SiteImport` when migrating * Consider only completed site imports in data migration --- lib/plausible/data_migration/site_imports.ex | 54 +++++++++++++------ .../data_migration/site_imports_test.exs | 6 +++ 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/lib/plausible/data_migration/site_imports.ex b/lib/plausible/data_migration/site_imports.ex index fc4c91b2d8..53f06947b4 100644 --- a/lib/plausible/data_migration/site_imports.ex +++ b/lib/plausible/data_migration/site_imports.ex @@ -2,15 +2,20 @@ defmodule Plausible.DataMigration.SiteImports do @moduledoc """ Site imports migration backfilling SiteImport entries for old imports and alters import end dates to match actual end date of respective import stats. + + """ import Ecto.Query alias Plausible.ClickhouseRepo alias Plausible.Imported + alias Plausible.Imported.SiteImport alias Plausible.Repo alias Plausible.Site + require Plausible.Imported.SiteImport + def run(opts \\ []) do dry_run? = Keyword.get(opts, :dry_run?, true) @@ -27,7 +32,9 @@ defmodule Plausible.DataMigration.SiteImports do for {site, idx} <- Enum.with_index(sites_with_imports) do site_imports = - from(i in Imported.SiteImport, where: i.site_id == ^site.id) + from(i in Imported.SiteImport, + where: i.site_id == ^site.id and i.status == ^SiteImport.completed() + ) |> Repo.all(log: false) IO.puts( @@ -78,29 +85,44 @@ defmodule Plausible.DataMigration.SiteImports do ) if site_import.legacy do + # sanity check that data is correct + "ok" = site.imported_data.status + clear_imported_data(site, dry_run?) end delete!(site_import, dry_run?) else - end_date = - if Date.compare(end_date, site_import.end_date) in [:lt, :eq] do - end_date - else + case Date.compare(end_date, site_import.end_date) do + :lt -> + IO.puts( + "End date of site import #{site_import.id} (site ID #{site.id}) is adjusted from #{site_import.end_date} to #{end_date}." + ) + + site_import + |> Ecto.Changeset.change(end_date: end_date) + |> update!(dry_run?) + + # credo:disable-for-next-line Credo.Check.Refactor.Nesting + if site_import.legacy do + # sanity check that data is correct + "ok" = site.imported_data.status + + site + |> Ecto.Changeset.change(imported_data: %{end_date: end_date}) + |> update!(dry_run?) + end + + :eq -> + IO.puts( + "End date of site import #{site_import.id} (site ID #{site.id}) is left unadjusted." + ) + + :gt -> IO.puts( "Site import #{site_import.id} (site ID #{site.id}) computed end date is later than the current one. Skipping." ) - - site_import.end_date - end - - site_import - |> Ecto.Changeset.change(end_date: end_date) - |> update!(dry_run?) - - IO.puts( - "End date of site import #{site_import.id} (site ID #{site.id}) adjusted to #{end_date}" - ) + end end end diff --git a/test/plausible/data_migration/site_imports_test.exs b/test/plausible/data_migration/site_imports_test.exs index a99ded0976..e1bf730f96 100644 --- a/test/plausible/data_migration/site_imports_test.exs +++ b/test/plausible/data_migration/site_imports_test.exs @@ -47,6 +47,7 @@ defmodule Plausible.DataMigration.SiteImportsTest do assert id > 0 assert site_import.start_date == site.imported_data.start_date assert site_import.end_date == ~D[2021-01-07] + assert site.imported_data.end_date == ~D[2021-01-07] assert site_import.source == :universal_analytics end @@ -64,8 +65,11 @@ defmodule Plausible.DataMigration.SiteImportsTest do assert :ok = SiteImports.run() end) =~ "Processing 1 sites" + site = Repo.reload!(site) + assert [%{id: id, legacy: true}] = Imported.list_all_imports(site) assert id == 0 + assert site.imported_data.end_date == ~D[2021-01-08] end test "does not set end date to latter than the current one" do @@ -88,6 +92,7 @@ defmodule Plausible.DataMigration.SiteImportsTest do assert id > 0 assert site_import.start_date == site.imported_data.start_date assert site_import.end_date == ~D[2021-01-08] + assert site.imported_data.end_date == ~D[2021-01-08] assert site_import.source == :universal_analytics end @@ -105,6 +110,7 @@ defmodule Plausible.DataMigration.SiteImportsTest do site = Repo.reload!(site) assert [] = Imported.list_all_imports(site) + assert site.imported_data == nil end test "leaves site and imports unchanged if everything fits" do