Introduce active_visitors to imported_pages and start populating it with activeUsers from GA4 imports (#4027)

* Import `activeUsers` into `imported_pages.active_visitors` for GA4

* Add test for active visitors

* Simplify assertion in active visitors test

* Improve assertion for active visitors further
This commit is contained in:
Adrian Gruntkowski 2024-04-22 10:18:16 +02:00 committed by GitHub
parent 069170eb1d
commit 3023cb12fd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 10063 additions and 1 deletions

File diff suppressed because it is too large Load Diff

View File

@ -60,7 +60,13 @@ defmodule Plausible.Google.GA4.ReportRequest do
dataset: "imported_pages",
dimensions: ["date", "hostName", "pagePath"],
# NOTE: no exits as GA4 DATA API does not provide that metric
metrics: ["totalUsers", "screenPageViews", "sessions", "userEngagementDuration"]
metrics: [
"totalUsers",
"activeUsers",
"screenPageViews",
"sessions",
"userEngagementDuration"
]
},
%__MODULE__{
dataset: "imported_entry_pages",

View File

@ -124,6 +124,7 @@ defmodule Plausible.Imported.GoogleAnalytics4 do
hostname: row.dimensions |> Map.fetch!("hostName") |> String.replace_prefix("www.", ""),
page: row.dimensions |> Map.fetch!("pagePath") |> URI.parse() |> Map.get(:path),
visitors: row.metrics |> Map.fetch!("totalUsers") |> parse_number(),
active_visitors: row.metrics |> Map.fetch!("activeUsers") |> parse_number(),
visits: row.metrics |> Map.fetch!("sessions") |> parse_number(),
pageviews: row.metrics |> Map.fetch!("screenPageViews") |> parse_number(),
# NOTE: no exits metric in GA4 API currently

View File

@ -11,6 +11,7 @@ defmodule Plausible.Imported.Page do
field :page, :string
field :visits, Ch, type: "UInt64"
field :visitors, Ch, type: "UInt64"
field :active_visitors, Ch, type: "UInt64"
field :pageviews, Ch, type: "UInt64"
field :exits, Ch, type: "UInt64"
field :time_on_page, Ch, type: "UInt64"

View File

@ -4,6 +4,7 @@ defmodule Plausible.Imported.GoogleAnalytics4Test do
import Mox
import Ecto.Query, only: [from: 2]
alias Plausible.ClickhouseRepo
alias Plausible.Repo
alias Plausible.Imported.GoogleAnalytics4
@ -131,9 +132,38 @@ defmodule Plausible.Imported.GoogleAnalytics4Test do
assert_browsers(conn, breakdown_params)
assert_os(conn, breakdown_params)
assert_os_versions(conn, breakdown_params)
assert_active_visitors(site_import)
end
end
defp assert_active_visitors(site_import) do
result =
ClickhouseRepo.query!(
"SELECT date, sum(visitors) AS all_visitors, sum(active_visitors) AS all_active_visitors " <>
"FROM imported_pages WHERE site_id = #{site_import.site_id} AND import_id = #{site_import.id} GROUP BY date"
)
|> Map.fetch!(:rows)
|> Enum.map(fn [date, all_visitors, all_active_visitors] ->
%{date: date, visitors: all_visitors, active_visitors: all_active_visitors}
end)
assert length(result) == 31
Enum.each(result, fn row ->
assert row.visitors > 100 and row.active_visitors > 100
assert row.active_visitors <= row.visitors
end)
ClickhouseRepo.query!(
"SELECT time_on_page FROM imported_pages WHERE active_visitors = 0 AND " <>
"site_id = #{site_import.site_id} AND import_id = #{site_import.id}"
)
|> Map.fetch!(:rows)
|> Enum.each(fn [time_on_page] ->
assert time_on_page == 0
end)
end
defp assert_timeseries(conn, params) do
params =
Map.put(