Google Analytics Import Refactor (#2046)

* Create separate module for GA HTTP requests

* Fetch GA data entirely instead of monthly

* Add buffering to GA imports

* Change positional args to maps when serializing from GA

* Create Google Analytics VCR tests
This commit is contained in:
Vinicius Brasil 2022-08-03 06:25:50 -03:00 committed by GitHub
parent 2d7dee7067
commit 4b9032d822
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 22421 additions and 863 deletions

View File

@ -242,7 +242,8 @@ config :plausible, :paddle,
config :plausible, :google,
client_id: google_cid,
client_secret: google_secret
client_secret: google_secret,
max_buffer_size: get_int_from_path_or_env(config_dir, "GOOGLE_MAX_BUFFER_SIZE", 10_000)
config :plausible, Plausible.ClickhouseRepo,
loggers: [Ecto.LogEntry],

20736
fixture/ga_batch_report.json Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -1,5 +1,5 @@
defmodule Plausible.Google.Api do
alias Plausible.Imported
alias Plausible.Google.{ReportRequest, HTTP}
use Timex
require Logger
@ -27,250 +27,54 @@ defmodule Plausible.Google.Api do
end
end
def fetch_access_token(code) do
res =
HTTPoison.post!(
"https://www.googleapis.com/oauth2/v4/token",
"client_id=#{client_id()}&client_secret=#{client_secret()}&code=#{code}&grant_type=authorization_code&redirect_uri=#{redirect_uri()}",
"Content-Type": "application/x-www-form-urlencoded"
)
Jason.decode!(res.body)
end
def fetch_verified_properties(auth) do
with {:ok, auth} <- refresh_if_needed(auth) do
res =
HTTPoison.get!("https://www.googleapis.com/webmasters/v3/sites",
"Content-Type": "application/json",
Authorization: "Bearer #{auth.access_token}"
)
domains =
Jason.decode!(res.body)
|> Map.get("siteEntry", [])
|> Enum.filter(fn site -> site["permissionLevel"] in @verified_permission_levels end)
|> Enum.map(fn site -> site["siteUrl"] end)
|> Enum.map(fn url -> String.trim_trailing(url, "/") end)
{:ok, domains}
else
err -> err
with {:ok, auth} <- refresh_if_needed(auth),
{:ok, sites} <- Plausible.Google.HTTP.list_sites(auth.access_token) do
sites
|> Map.get("siteEntry", [])
|> Enum.filter(fn site -> site["permissionLevel"] in @verified_permission_levels end)
|> Enum.map(fn site -> site["siteUrl"] end)
|> Enum.map(fn url -> String.trim_trailing(url, "/") end)
|> then(&{:ok, &1})
end
end
defp property_base_url(property) do
case property do
"sc-domain:" <> domain -> "https://" <> domain
url -> url
def fetch_stats(site, %{date_range: date_range, filters: %{"page" => page}}, limit) do
with {:ok, %{access_token: access_token, property: property}} <-
refresh_if_needed(site.google_auth),
{:ok, stats} <- HTTP.list_stats(access_token, property, date_range, limit, page) do
stats
|> Map.get("rows", [])
|> Enum.filter(fn row -> row["clicks"] > 0 end)
|> Enum.map(fn row -> %{name: row["keys"], visitors: round(row["clicks"])} end)
end
end
def fetch_stats(site, query, limit) do
with {:ok, auth} <- refresh_if_needed(site.google_auth) do
do_fetch_stats(auth, query, limit)
else
err -> err
def get_analytics_view_ids(access_token) do
case HTTP.list_views_for_user(access_token) do
{:ok, %{"items" => views}} ->
view_ids = for view <- views, do: build_view_ids(view), into: %{}
{:ok, view_ids}
error ->
error
end
end
defp do_fetch_stats(auth, query, limit) do
property = URI.encode_www_form(auth.property)
base_url = property_base_url(auth.property)
defp build_view_ids(view) do
uri = URI.parse(Map.get(view, "websiteUrl", ""))
filter_groups =
if query.filters["page"] do
[
%{
filters: [
%{
dimension: "page",
expression: "https://#{base_url}#{query.filters["page"]}"
}
]
}
]
end
res =
HTTPoison.post!(
"https://www.googleapis.com/webmasters/v3/sites/#{property}/searchAnalytics/query",
Jason.encode!(%{
startDate: Date.to_iso8601(query.date_range.first),
endDate: Date.to_iso8601(query.date_range.last),
dimensions: ["query"],
rowLimit: limit,
dimensionFilterGroups: filter_groups || %{}
}),
"Content-Type": "application/json",
Authorization: "Bearer #{auth.access_token}"
)
case res.status_code do
200 ->
terms =
(Jason.decode!(res.body)["rows"] || [])
|> Enum.filter(fn row -> row["clicks"] > 0 end)
|> Enum.map(fn row -> %{name: row["keys"], visitors: round(row["clicks"])} end)
{:ok, terms}
401 ->
Sentry.capture_message("Error fetching Google queries", extra: Jason.decode!(res.body))
{:error, :invalid_credentials}
403 ->
Sentry.capture_message("Error fetching Google queries", extra: Jason.decode!(res.body))
msg = Jason.decode!(res.body)["error"]["message"]
{:error, msg}
_ ->
Sentry.capture_message("Error fetching Google queries", extra: Jason.decode!(res.body))
{:error, :unknown}
if !uri.host do
Sentry.capture_message("No URI for view ID", extra: view)
end
host = uri.host || Map.get(view, "id", "")
name = Map.get(view, "name")
{"#{host} - #{name}", Map.get(view, "id")}
end
def get_analytics_view_ids(token) do
res =
HTTPoison.get!(
"https://www.googleapis.com/analytics/v3/management/accounts/~all/webproperties/~all/profiles",
Authorization: "Bearer #{token}"
)
case res.status_code do
200 ->
profiles =
Jason.decode!(res.body)
|> Map.get("items")
|> Enum.map(fn item ->
uri = URI.parse(Map.get(item, "websiteUrl", ""))
if !uri.host do
Sentry.capture_message("No URI for view ID", extra: Jason.decode!(res.body))
end
host = uri.host || Map.get(item, "id", "")
name = Map.get(item, "name")
{"#{host} - #{name}", Map.get(item, "id")}
end)
|> Map.new()
{:ok, profiles}
_ ->
Sentry.capture_message("Error fetching Google view ID", extra: Jason.decode!(res.body))
{:error, res.body}
end
end
def get_analytics_start_date(view_id, token) do
report = %{
viewId: view_id,
dateRanges: [
%{
# The earliest valid date
startDate: "2005-01-01",
endDate: Timex.today() |> Date.to_iso8601()
}
],
dimensions: [%{name: "ga:date", histogramBuckets: []}],
metrics: [%{expression: "ga:pageviews"}],
hideTotals: true,
hideValueRanges: true,
orderBys: [
%{
fieldName: "ga:date",
sortOrder: "ASCENDING"
}
],
pageSize: 1
}
res =
HTTPoison.post!(
"https://analyticsreporting.googleapis.com/v4/reports:batchGet",
Jason.encode!(%{reportRequests: [report]}),
[Authorization: "Bearer #{token}"],
timeout: 15_000,
recv_timeout: 15_000
)
case res.status_code do
200 ->
report = List.first(Jason.decode!(res.body)["reports"])
date =
case report["data"]["rows"] do
[%{"dimensions" => [date_str]}] ->
Timex.parse!(date_str, "%Y%m%d", :strftime) |> NaiveDateTime.to_date()
_ ->
nil
end
{:ok, date}
_ ->
Sentry.capture_message("Error fetching Google view ID", extra: Jason.decode!(res.body))
{:error, res.body}
end
end
# Each element is: {dataset, dimensions, metrics}
@request_data [
{
"imported_visitors",
["ga:date"],
[
"ga:users",
"ga:pageviews",
"ga:bounces",
"ga:sessions",
"ga:sessionDuration"
]
},
{
"imported_sources",
["ga:date", "ga:source", "ga:medium", "ga:campaign", "ga:adContent", "ga:keyword"],
["ga:users", "ga:sessions", "ga:bounces", "ga:sessionDuration"]
},
{
"imported_pages",
["ga:date", "ga:hostname", "ga:pagePath"],
["ga:users", "ga:pageviews", "ga:exits", "ga:timeOnPage"]
},
{
"imported_entry_pages",
["ga:date", "ga:landingPagePath"],
["ga:users", "ga:entrances", "ga:sessionDuration", "ga:bounces"]
},
{
"imported_exit_pages",
["ga:date", "ga:exitPagePath"],
["ga:users", "ga:exits"]
},
{
"imported_locations",
["ga:date", "ga:countryIsoCode", "ga:regionIsoCode"],
["ga:users", "ga:sessions", "ga:bounces", "ga:sessionDuration"]
},
{
"imported_devices",
["ga:date", "ga:deviceCategory"],
["ga:users", "ga:sessions", "ga:bounces", "ga:sessionDuration"]
},
{
"imported_browsers",
["ga:date", "ga:browser"],
["ga:users", "ga:sessions", "ga:bounces", "ga:sessionDuration"]
},
{
"imported_operating_systems",
["ga:date", "ga:operatingSystem"],
["ga:users", "ga:sessions", "ga:bounces", "ga:sessionDuration"]
}
]
@per_page 10_000
@one_day_in_ms 86_400_000
@doc """
API reference:
https://developers.google.com/analytics/devguides/reporting/core/v4/rest/v4/reports/batchGet#ReportRequest
@ -278,78 +82,56 @@ defmodule Plausible.Google.Api do
Dimensions reference: https://ga-dev-tools.web.app/dimensions-metrics-explorer
"""
def import_analytics(site, date_range, view_id, access_token) do
for month_batch <- prepare_batches(date_range, view_id, access_token) do
tasks =
for batch_request <- month_batch do
Task.async(fn -> fetch_and_persist(site, batch_request) end)
end
{:ok, buffer} = Plausible.Google.Buffer.start_link()
# 1 hour max to get 1 month's worth of data
Task.await_many(tasks, 3_600_000)
end
ReportRequest.full_report()
|> Task.async_stream(
fn %ReportRequest{} = report_request ->
report_request = %ReportRequest{
report_request
| date_range: date_range,
view_id: view_id,
access_token: access_token,
page_token: nil,
page_size: @per_page
}
fetch_and_persist(site, report_request, buffer: buffer)
end,
ordered: false,
max_concurrency: 3,
timeout: @one_day_in_ms
)
|> Stream.run()
Plausible.Google.Buffer.flush(buffer)
Plausible.Google.Buffer.stop(buffer)
:ok
end
defp prepare_batches(import_date_range, view_id, access_token) do
total_months = Timex.diff(import_date_range.last, import_date_range.first, :months)
monthly_batches =
for month <- 0..total_months do
batch_start_date = Timex.shift(import_date_range.first, months: month)
batch_end_date = Timex.shift(batch_start_date, months: 1, days: -1)
batch_end_date =
if Timex.before?(import_date_range.last, batch_end_date),
do: import_date_range.last,
else: batch_end_date
Date.range(batch_start_date, batch_end_date)
end
for date_range <- monthly_batches do
for {dataset, dimensions, metrics} <- @request_data do
%{
dataset: dataset,
dimensions: dimensions,
metrics: metrics,
date_range: date_range,
view_id: view_id,
access_token: access_token,
page_token: nil
}
end
end
end
@max_attempts 5
def fetch_and_persist(site, request, opts \\ []) do
report_request = build_import_report_request(request)
http_client = Keyword.get(opts, :http_client, HTTPoison)
def fetch_and_persist(site, %ReportRequest{} = report_request, opts \\ []) do
buffer_pid = Keyword.get(opts, :buffer)
attempt = Keyword.get(opts, :attempt, 1)
sleep_time = Keyword.get(opts, :sleep_time, 1000)
http_client = Keyword.get(opts, :http_client, Finch)
res =
http_client.post(
"https://analyticsreporting.googleapis.com/v4/reports:batchGet",
Jason.encode!(%{reportRequests: [report_request]}),
[Authorization: "Bearer #{request.access_token}"],
timeout: 30_000,
recv_timeout: 30_000
)
case HTTP.get_report(http_client, report_request) do
{:ok, {rows, next_page_token}} ->
records = Plausible.Imported.from_google_analytics(rows, site.id, report_request.dataset)
:ok = Plausible.Google.Buffer.insert_many(buffer_pid, report_request.dataset, records)
with {:ok, %HTTPoison.Response{status_code: 200, body: raw_body}} <- res,
{:ok, body} <- Jason.decode(raw_body),
report <- List.first(body["reports"]),
{:ok, data} <- get_non_empty_rows(report) do
Imported.from_google_analytics(data, site.id, request.dataset)
if next_page_token do
fetch_and_persist(
site,
%ReportRequest{report_request | page_token: next_page_token},
opts
)
else
:ok
end
if report["nextPageToken"] do
fetch_and_persist(site, %{request | page_token: report["nextPageToken"]})
else
:ok
end
else
error ->
context_key = "request:#{attempt}"
Sentry.Context.set_extra_context(%{context_key => error})
@ -358,68 +140,30 @@ defmodule Plausible.Google.Api do
raise "Google API request failed too many times"
else
Process.sleep(sleep_time)
fetch_and_persist(site, request, Keyword.merge(opts, attempt: attempt + 1))
fetch_and_persist(site, report_request, Keyword.merge(opts, attempt: attempt + 1))
end
end
end
defp get_non_empty_rows(report) do
case get_in(report, ["data", "rows"]) do
[] -> {:error, :empty_response_rows}
rows -> {:ok, rows}
end
end
defp build_import_report_request(request) do
%{
viewId: request.view_id,
dateRanges: [
%{
startDate: request.date_range.first,
endDate: request.date_range.last
}
],
dimensions: Enum.map(request.dimensions, &%{name: &1, histogramBuckets: []}),
metrics: Enum.map(request.metrics, &%{expression: &1}),
hideTotals: true,
hideValueRanges: true,
orderBys: [
%{
fieldName: "ga:date",
sortOrder: "DESCENDING"
}
],
pageSize: 10_000,
pageToken: request.page_token
}
end
defp refresh_if_needed(auth) do
if Timex.before?(auth.expires, Timex.now() |> Timex.shift(seconds: 30)) do
refresh_token(auth)
do_refresh_token(auth)
else
{:ok, auth}
end
end
defp refresh_token(auth) do
res =
HTTPoison.post!(
"https://www.googleapis.com/oauth2/v4/token",
"client_id=#{client_id()}&client_secret=#{client_secret()}&refresh_token=#{auth.refresh_token}&grant_type=refresh_token&redirect_uri=#{redirect_uri()}",
"Content-Type": "application/x-www-form-urlencoded"
)
defp do_refresh_token(auth) do
case HTTP.refresh_auth_token(auth.refresh_token) do
{:ok, %{"access_token" => access_token, "expires_in" => expires_in}} ->
expires_in = NaiveDateTime.add(NaiveDateTime.utc_now(), expires_in)
body = Jason.decode!(res.body)
auth
|> Plausible.Site.GoogleAuth.changeset(%{access_token: access_token, expires: expires_in})
|> Plausible.Repo.update()
if res.status_code == 200 do
Plausible.Site.GoogleAuth.changeset(auth, %{
access_token: body["access_token"],
expires: NaiveDateTime.utc_now() |> NaiveDateTime.add(body["expires_in"])
})
|> Plausible.Repo.update()
else
{:error, body["error"]}
error ->
error
end
end
@ -427,10 +171,6 @@ defmodule Plausible.Google.Api do
Keyword.fetch!(Application.get_env(:plausible, :google), :client_id)
end
defp client_secret() do
Keyword.fetch!(Application.get_env(:plausible, :google), :client_secret)
end
defp redirect_uri() do
PlausibleWeb.Endpoint.url() <> "/auth/google/callback"
end

View File

@ -0,0 +1,96 @@
defmodule Plausible.Google.Buffer do
@moduledoc """
This GenServer inserts records into Clickhouse `imported_*` tables. Multiple buffers are
automatically created for each table. Records are flushed when the table buffer reaches the
maximum size, defined by `max_buffer_size/0`.
"""
use GenServer
require Logger
def start_link do
GenServer.start_link(__MODULE__, nil)
end
def init(_opts) do
{:ok, %{buffers: %{}}}
end
@spec insert_many(pid(), term(), [map()]) :: :ok
@doc """
Puts the given records into the table buffer.
"""
def insert_many(pid, table_name, records) do
GenServer.call(pid, {:insert_many, table_name, records})
end
@spec size(pid(), term()) :: non_neg_integer()
@doc """
Returns the total count of items in the given table buffer.
"""
def size(pid, table_name) do
GenServer.call(pid, {:get_size, table_name})
end
@spec flush(pid()) :: :ok
@doc """
Flushes all table buffers to Clickhouse.
"""
def flush(pid, timeout \\ :infinity) do
GenServer.call(pid, :flush_all_buffers, timeout)
end
def stop(pid) do
GenServer.stop(pid)
end
def handle_call({:get_size, table_name}, _from, %{buffers: buffers} = state) do
size =
buffers
|> Map.get(table_name, [])
|> length()
{:reply, size, state}
end
def handle_call({:insert_many, table_name, records}, _from, %{buffers: buffers} = state) do
Logger.info("Import: Adding #{length(records)} to #{table_name} buffer")
new_buffer = Map.get(buffers, table_name, []) ++ records
new_state = put_in(state.buffers[table_name], new_buffer)
if length(new_buffer) >= max_buffer_size() do
{:reply, :ok, new_state, {:continue, {:flush, table_name}}}
else
{:reply, :ok, new_state}
end
end
def handle_call(:flush_all_buffers, _from, state) do
Enum.each(state.buffers, fn {table_name, records} ->
flush_buffer(records, table_name)
end)
{:reply, :ok, put_in(state.buffers, %{})}
end
def handle_continue({:flush, table_name}, state) do
flush_buffer(state.buffers[table_name], table_name)
{:noreply, put_in(state.buffers[table_name], [])}
end
defp max_buffer_size do
:plausible
|> Application.get_env(:google)
|> Keyword.fetch!(:max_buffer_size)
end
defp flush_buffer(records, table_name) do
# Clickhouse does not recommend sending more than 1 INSERT operation per second, and this
# sleep call slows down the flushing
Process.sleep(1000)
Logger.info("Import: Flushing #{length(records)} from #{table_name} buffer")
Plausible.ClickhouseRepo.insert_all(table_name, records)
end
end

View File

@ -0,0 +1,208 @@
defmodule Plausible.Google.HTTP do
@spec get_report(module(), Plausible.Google.ReportRequest.t()) ::
{:ok, {[map()], String.t() | nil}} | {:error, any()}
def get_report(http_client, %Plausible.Google.ReportRequest{} = report_request) do
params =
Jason.encode!(%{
reportRequests: [
%{
viewId: report_request.view_id,
dateRanges: [
%{
startDate: report_request.date_range.first,
endDate: report_request.date_range.last
}
],
dimensions: Enum.map(report_request.dimensions, &%{name: &1, histogramBuckets: []}),
metrics: Enum.map(report_request.metrics, &%{expression: &1}),
hideTotals: true,
hideValueRanges: true,
orderBys: [%{fieldName: "ga:date", sortOrder: "DESCENDING"}],
pageSize: report_request.page_size,
pageToken: report_request.page_token
}
]
})
response =
:post
|> Finch.build(
"https://analyticsreporting.googleapis.com/v4/reports:batchGet",
[{"Authorization", "Bearer #{report_request.access_token}"}],
params
)
|> http_client.request(Plausible.Finch)
with {:ok, %{status: 200, body: body}} <- response,
{:ok, %{"reports" => [report | _]}} <- Jason.decode(body),
token <- Map.get(report, "nextPageToken"),
report <- convert_to_maps(report) do
{:ok, {report, token}}
end
end
defp convert_to_maps(%{
"data" => %{"rows" => rows},
"columnHeader" => %{
"dimensions" => dimension_headers,
"metricHeader" => %{"metricHeaderEntries" => metric_headers}
}
}) do
metric_headers = Enum.map(metric_headers, & &1["name"])
Enum.map(rows, fn %{"dimensions" => dimensions, "metrics" => [%{"values" => metrics}]} ->
metrics = Enum.zip(metric_headers, metrics)
dimensions = Enum.zip(dimension_headers, dimensions)
%{metrics: Map.new(metrics), dimensions: Map.new(dimensions)}
end)
end
def list_sites(access_token) do
"https://www.googleapis.com/webmasters/v3/sites"
|> HTTPoison.get!("Content-Type": "application/json", Authorization: "Bearer #{access_token}")
|> Map.get(:body)
|> Jason.decode!()
|> then(&{:ok, &1})
end
def fetch_access_token(code) do
"https://www.googleapis.com/oauth2/v4/token"
|> HTTPoison.post!(
"client_id=#{client_id()}&client_secret=#{client_secret()}&code=#{code}&grant_type=authorization_code&redirect_uri=#{redirect_uri()}",
"Content-Type": "application/x-www-form-urlencoded"
)
|> Map.get(:body)
|> Jason.decode!()
end
def list_views_for_user(access_token) do
"https://www.googleapis.com/analytics/v3/management/accounts/~all/webproperties/~all/profiles"
|> HTTPoison.get!(Authorization: "Bearer #{access_token}")
|> case do
%{body: body, status_code: 200} ->
{:ok, Jason.decode!(body)}
%{body: body} ->
Sentry.capture_message("Error fetching Google view ID", extra: Jason.decode!(body))
{:error, body}
end
end
def list_stats(access_token, property, date_range, limit, page \\ nil) do
property = URI.encode_www_form(property)
filter_groups =
if page do
url = property_base_url(property)
[%{filters: [%{dimension: "page", expression: "https://#{url}#{page}"}]}]
else
%{}
end
params =
Jason.encode!(%{
startDate: Date.to_iso8601(date_range.first),
endDate: Date.to_iso8601(date_range.last),
dimensions: ["query"],
rowLimit: limit,
dimensionFilterGroups: filter_groups
})
"https://www.googleapis.com/webmasters/v3/sites/#{property}/searchAnalytics/query"
|> HTTPoison.post!(params,
"Content-Type": "application/json",
Authorization: "Bearer #{access_token}"
)
|> case do
%{status_code: 200, body: body} ->
{:ok, Jason.decode!(body)}
%{status_code: 401, body: body} ->
Sentry.capture_message("Error fetching Google queries", extra: Jason.decode!(body))
{:error, :invalid_credentials}
%{status_code: 403, body: body} ->
body = Jason.decode!(body)
Sentry.capture_message("Error fetching Google queries", extra: body)
{:error, get_in(body, ["error", "message"])}
%{body: body} ->
Sentry.capture_message("Error fetching Google queries", extra: Jason.decode!(body))
{:error, :unknown}
end
end
defp property_base_url("sc-domain:" <> domain), do: "https://" <> domain
defp property_base_url(url), do: url
def refresh_auth_token(refresh_token) do
"https://www.googleapis.com/oauth2/v4/token"
|> HTTPoison.post!(
"client_id=#{client_id()}&client_secret=#{client_secret()}&refresh_token=#{refresh_token}&grant_type=refresh_token&redirect_uri=#{redirect_uri()}",
"Content-Type": "application/x-www-form-urlencoded"
)
|> case do
%{body: body, status_code: 200} ->
{:ok, Jason.decode!(body)}
%{body: body} ->
body
|> Jason.decode!(body)
|> Map.get("error")
|> then(&{:error, &1})
end
end
@earliest_valid_date "2005-01-01"
def get_analytics_start_date(view_id, access_token) do
params =
Jason.encode!(%{
reportRequests: [
%{
viewId: view_id,
dateRanges: [
%{startDate: @earliest_valid_date, endDate: Date.to_iso8601(Timex.today())}
],
dimensions: [%{name: "ga:date", histogramBuckets: []}],
metrics: [%{expression: "ga:pageviews"}],
hideTotals: true,
hideValueRanges: true,
orderBys: [%{fieldName: "ga:date", sortOrder: "ASCENDING"}],
pageSize: 1
}
]
})
"https://analyticsreporting.googleapis.com/v4/reports:batchGet"
|> HTTPoison.post!(
params,
[Authorization: "Bearer #{access_token}"],
timeout: 15_000,
recv_timeout: 15_000
)
|> case do
%{status_code: 200, body: body} ->
report = List.first(Jason.decode!(body)["reports"])
date =
case report["data"]["rows"] do
[%{"dimensions" => [date_str]}] ->
Timex.parse!(date_str, "%Y%m%d", :strftime) |> NaiveDateTime.to_date()
_ ->
nil
end
{:ok, date}
%{body: body} ->
Sentry.capture_message("Error fetching Google view ID", extra: Jason.decode!(body))
{:error, body}
end
end
defp config, do: Application.get_env(:plausible, :google)
defp client_id, do: Keyword.fetch!(config(), :client_id)
defp client_secret, do: Keyword.fetch!(config(), :client_secret)
defp redirect_uri, do: PlausibleWeb.Endpoint.url() <> "/auth/google/callback"
end

View File

@ -0,0 +1,80 @@
defmodule Plausible.Google.ReportRequest do
defstruct [
:dataset,
:dimensions,
:metrics,
:date_range,
:view_id,
:access_token,
:page_token,
:page_size
]
@type t() :: %__MODULE__{
dataset: String.t(),
dimensions: [String.t()],
metrics: [String.t()],
date_range: Date.Range.t(),
view_id: term(),
access_token: String.t(),
page_token: String.t() | nil,
page_size: non_neg_integer()
}
def full_report do
[
%__MODULE__{
dataset: "imported_visitors",
dimensions: ["ga:date"],
metrics: ["ga:users", "ga:pageviews", "ga:bounces", "ga:sessions", "ga:sessionDuration"]
},
%__MODULE__{
dataset: "imported_sources",
dimensions: [
"ga:date",
"ga:source",
"ga:medium",
"ga:campaign",
"ga:adContent",
"ga:keyword"
],
metrics: ["ga:users", "ga:sessions", "ga:bounces", "ga:sessionDuration"]
},
%__MODULE__{
dataset: "imported_pages",
dimensions: ["ga:date", "ga:hostname", "ga:pagePath"],
metrics: ["ga:users", "ga:pageviews", "ga:exits", "ga:timeOnPage"]
},
%__MODULE__{
dataset: "imported_entry_pages",
dimensions: ["ga:date", "ga:landingPagePath"],
metrics: ["ga:users", "ga:entrances", "ga:sessionDuration", "ga:bounces"]
},
%__MODULE__{
dataset: "imported_exit_pages",
dimensions: ["ga:date", "ga:exitPagePath"],
metrics: ["ga:users", "ga:exits"]
},
%__MODULE__{
dataset: "imported_locations",
dimensions: ["ga:date", "ga:countryIsoCode", "ga:regionIsoCode"],
metrics: ["ga:users", "ga:sessions", "ga:bounces", "ga:sessionDuration"]
},
%__MODULE__{
dataset: "imported_devices",
dimensions: ["ga:date", "ga:deviceCategory"],
metrics: ["ga:users", "ga:sessions", "ga:bounces", "ga:sessionDuration"]
},
%__MODULE__{
dataset: "imported_browsers",
dimensions: ["ga:date", "ga:browser"],
metrics: ["ga:users", "ga:sessions", "ga:bounces", "ga:sessionDuration"]
},
%__MODULE__{
dataset: "imported_operating_systems",
dimensions: ["ga:date", "ga:operatingSystem"],
metrics: ["ga:users", "ga:sessions", "ga:bounces", "ga:sessionDuration"]
}
]
end
end

View File

@ -7,18 +7,10 @@ defmodule Plausible.Imported do
Plausible.ClickhouseRepo.clear_imported_stats_for(site.id)
end
def from_google_analytics(nil, _site_id, _metric), do: {:ok, nil}
def from_google_analytics(nil, _site_id, _metric), do: nil
def from_google_analytics(data, site_id, table) do
data =
Enum.map(data, fn row ->
new_from_google_analytics(site_id, table, row)
end)
case ClickhouseRepo.insert_all(table, data) do
{n_rows, _} when n_rows > 0 -> :ok
error -> error
end
Enum.map(data, fn row -> new_from_google_analytics(site_id, table, row) end)
end
defp parse_number(nr) do
@ -26,152 +18,92 @@ defmodule Plausible.Imported do
float
end
defp new_from_google_analytics(site_id, "imported_visitors", %{
"dimensions" => [date],
"metrics" => [%{"values" => values}]
}) do
[visitors, pageviews, bounces, visits, visit_duration] = values |> Enum.map(&parse_number/1)
defp new_from_google_analytics(site_id, "imported_visitors", row) do
%{
site_id: site_id,
date: format_date(date),
visitors: visitors,
pageviews: pageviews,
bounces: bounces,
visits: visits,
visit_duration: visit_duration
date: get_date(row),
visitors: row.metrics |> Map.fetch!("ga:users") |> parse_number(),
pageviews: row.metrics |> Map.fetch!("ga:pageviews") |> parse_number(),
bounces: row.metrics |> Map.fetch!("ga:bounces") |> parse_number(),
visits: row.metrics |> Map.fetch!("ga:sessions") |> parse_number(),
visit_duration: row.metrics |> Map.fetch!("ga:sessionDuration") |> parse_number()
}
end
# Credit: https://github.com/kvesteri/validators
@domain ~r/^(([a-zA-Z]{1})|([a-zA-Z]{1}[a-zA-Z]{1})|([a-zA-Z]{1}[0-9]{1})|([0-9]{1}[a-zA-Z]{1})|([a-zA-Z0-9][-_.a-zA-Z0-9]{0,61}[a-zA-Z0-9]))\.([a-zA-Z]{2,13}|[a-zA-Z0-9-]{2,30}.[a-zA-Z]{2,3})$/
defp new_from_google_analytics(site_id, "imported_sources", %{
"dimensions" => [date, source, medium, campaign, content, term],
"metrics" => [%{"values" => [visitors, visits, bounces, visit_duration]}]
}) do
{visitors, ""} = Integer.parse(visitors)
{visits, ""} = Integer.parse(visits)
{bounces, ""} = Integer.parse(bounces)
{visit_duration, _} = Integer.parse(visit_duration)
source = if source == "(direct)", do: nil, else: source
source = if source && String.match?(source, @domain), do: parse_referrer(source), else: source
defp new_from_google_analytics(site_id, "imported_sources", row) do
%{
site_id: site_id,
date: format_date(date),
source: parse_referrer(source),
utm_medium: nil_if_missing(medium),
utm_campaign: nil_if_missing(campaign),
utm_content: nil_if_missing(content),
utm_term: nil_if_missing(term),
visitors: visitors,
visits: visits,
bounces: bounces,
visit_duration: visit_duration
date: get_date(row),
source: row.dimensions |> Map.fetch!("ga:source") |> parse_referrer(),
utm_medium: row.dimensions |> Map.fetch!("ga:medium") |> default_if_missing(),
utm_campaign: row.dimensions |> Map.fetch!("ga:campaign") |> default_if_missing(),
utm_content: row.dimensions |> Map.fetch!("ga:adContent") |> default_if_missing(),
utm_term: row.dimensions |> Map.fetch!("ga:keyword") |> default_if_missing(),
visitors: row.metrics |> Map.fetch!("ga:users") |> parse_number(),
visits: row.metrics |> Map.fetch!("ga:sessions") |> parse_number(),
bounces: row.metrics |> Map.fetch!("ga:bounces") |> parse_number(),
visit_duration: row.metrics |> Map.fetch!("ga:sessionDuration") |> parse_number()
}
end
defp new_from_google_analytics(site_id, "imported_pages", %{
"dimensions" => [date, hostname, page],
"metrics" => [%{"values" => [visitors, pageviews, exits, time_on_page]}]
}) do
page = URI.parse(page).path
{visitors, ""} = Integer.parse(visitors)
{pageviews, ""} = Integer.parse(pageviews)
{exits, ""} = Integer.parse(exits)
{time_on_page, _} = Integer.parse(time_on_page)
defp new_from_google_analytics(site_id, "imported_pages", row) do
%{
site_id: site_id,
date: format_date(date),
hostname: String.replace_prefix(hostname, "www.", ""),
page: page,
visitors: visitors,
pageviews: pageviews,
exits: exits,
time_on_page: time_on_page
date: get_date(row),
hostname: row.dimensions |> Map.fetch!("ga:hostname") |> String.replace_prefix("www.", ""),
page: row.dimensions |> Map.fetch!("ga:pagePath") |> URI.parse() |> Map.get(:path),
visitors: row.metrics |> Map.fetch!("ga:users") |> parse_number(),
pageviews: row.metrics |> Map.fetch!("ga:pageviews") |> parse_number(),
exits: row.metrics |> Map.fetch!("ga:exits") |> parse_number(),
time_on_page: row.metrics |> Map.fetch!("ga:timeOnPage") |> parse_number()
}
end
defp new_from_google_analytics(site_id, "imported_entry_pages", %{
"dimensions" => [date, entry_page],
"metrics" => [%{"values" => [visitors, entrances, visit_duration, bounces]}]
}) do
{visitors, ""} = Integer.parse(visitors)
{entrances, ""} = Integer.parse(entrances)
{bounces, ""} = Integer.parse(bounces)
{visit_duration, _} = Integer.parse(visit_duration)
defp new_from_google_analytics(site_id, "imported_entry_pages", row) do
%{
site_id: site_id,
date: format_date(date),
entry_page: entry_page,
visitors: visitors,
entrances: entrances,
visit_duration: visit_duration,
bounces: bounces
date: get_date(row),
entry_page: row.dimensions |> Map.fetch!("ga:landingPagePath"),
visitors: row.metrics |> Map.fetch!("ga:users") |> parse_number(),
entrances: row.metrics |> Map.fetch!("ga:entrances") |> parse_number(),
visit_duration: row.metrics |> Map.fetch!("ga:sessionDuration") |> parse_number(),
bounces: row.metrics |> Map.fetch!("ga:bounces") |> parse_number()
}
end
defp new_from_google_analytics(site_id, "imported_exit_pages", %{
"dimensions" => [date, exit_page],
"metrics" => [%{"values" => [visitors, exits]}]
}) do
{visitors, ""} = Integer.parse(visitors)
{exits, ""} = Integer.parse(exits)
defp new_from_google_analytics(site_id, "imported_exit_pages", row) do
%{
site_id: site_id,
date: format_date(date),
exit_page: exit_page,
visitors: visitors,
exits: exits
date: get_date(row),
exit_page: Map.fetch!(row.dimensions, "ga:exitPagePath"),
visitors: row.metrics |> Map.fetch!("ga:users") |> parse_number(),
exits: row.metrics |> Map.fetch!("ga:exits") |> parse_number()
}
end
defp new_from_google_analytics(site_id, "imported_locations", %{
"dimensions" => [date, country, region],
"metrics" => [%{"values" => [visitors, visits, bounces, visit_duration]}]
}) do
country = if country == "(not set)", do: "", else: country
region = if region == "(not set)", do: "", else: region
{visitors, ""} = Integer.parse(visitors)
{visits, ""} = Integer.parse(visits)
{bounces, ""} = Integer.parse(bounces)
{visit_duration, _} = Integer.parse(visit_duration)
defp new_from_google_analytics(site_id, "imported_locations", row) do
%{
site_id: site_id,
date: format_date(date),
country: country,
region: region,
date: get_date(row),
country: row.dimensions |> Map.fetch!("ga:countryIsoCode") |> default_if_missing(""),
region: row.dimensions |> Map.fetch!("ga:regionIsoCode") |> default_if_missing(""),
city: 0,
visitors: visitors,
visits: visits,
bounces: bounces,
visit_duration: visit_duration
visitors: row.metrics |> Map.fetch!("ga:users") |> parse_number(),
visits: row.metrics |> Map.fetch!("ga:sessions") |> parse_number(),
bounces: row.metrics |> Map.fetch!("ga:bounces") |> parse_number(),
visit_duration: row.metrics |> Map.fetch!("ga:sessionDuration") |> parse_number()
}
end
defp new_from_google_analytics(site_id, "imported_devices", %{
"dimensions" => [date, device],
"metrics" => [%{"values" => [visitors, visits, bounces, visit_duration]}]
}) do
{visitors, ""} = Integer.parse(visitors)
{visits, ""} = Integer.parse(visits)
{bounces, ""} = Integer.parse(bounces)
{visit_duration, _} = Integer.parse(visit_duration)
defp new_from_google_analytics(site_id, "imported_devices", row) do
%{
site_id: site_id,
date: format_date(date),
device: String.capitalize(device),
visitors: visitors,
visits: visits,
bounces: bounces,
visit_duration: visit_duration
date: get_date(row),
device: row.dimensions |> Map.fetch!("ga:deviceCategory") |> String.capitalize(),
visitors: row.metrics |> Map.fetch!("ga:users") |> parse_number(),
visits: row.metrics |> Map.fetch!("ga:sessions") |> parse_number(),
bounces: row.metrics |> Map.fetch!("ga:bounces") |> parse_number(),
visit_duration: row.metrics |> Map.fetch!("ga:sessionDuration") |> parse_number()
}
end
@ -185,23 +117,17 @@ defmodule Plausible.Imported do
"(not set)" => ""
}
defp new_from_google_analytics(site_id, "imported_browsers", %{
"dimensions" => [date, browser],
"metrics" => [%{"values" => [visitors, visits, bounces, visit_duration]}]
}) do
{visitors, ""} = Integer.parse(visitors)
{visits, ""} = Integer.parse(visits)
{bounces, ""} = Integer.parse(bounces)
{visit_duration, _} = Integer.parse(visit_duration)
defp new_from_google_analytics(site_id, "imported_browsers", row) do
browser = Map.fetch!(row.dimensions, "ga:browser")
%{
site_id: site_id,
date: format_date(date),
date: get_date(row),
browser: Map.get(@browser_google_to_plausible, browser, browser),
visitors: visitors,
visits: visits,
bounces: bounces,
visit_duration: visit_duration
visitors: row.metrics |> Map.fetch!("ga:users") |> parse_number(),
visits: row.metrics |> Map.fetch!("ga:sessions") |> parse_number(),
bounces: row.metrics |> Map.fetch!("ga:bounces") |> parse_number(),
visit_duration: row.metrics |> Map.fetch!("ga:sessionDuration") |> parse_number()
}
end
@ -211,47 +137,38 @@ defmodule Plausible.Imported do
"(not set)" => ""
}
defp new_from_google_analytics(site_id, "imported_operating_systems", %{
"dimensions" => [date, operating_system],
"metrics" => [%{"values" => [visitors, visits, bounces, visit_duration]}]
}) do
{visitors, ""} = Integer.parse(visitors)
{visits, ""} = Integer.parse(visits)
{bounces, ""} = Integer.parse(bounces)
{visit_duration, _} = Integer.parse(visit_duration)
defp new_from_google_analytics(site_id, "imported_operating_systems", row) do
os = Map.fetch!(row.dimensions, "ga:operatingSystem")
%{
site_id: site_id,
date: format_date(date),
operating_system: Map.get(@os_google_to_plausible, operating_system, operating_system),
visitors: visitors,
visits: visits,
bounces: bounces,
visit_duration: visit_duration
date: get_date(row),
operating_system: Map.get(@os_google_to_plausible, os, os),
visitors: row.metrics |> Map.fetch!("ga:users") |> parse_number(),
visits: row.metrics |> Map.fetch!("ga:sessions") |> parse_number(),
bounces: row.metrics |> Map.fetch!("ga:bounces") |> parse_number(),
visit_duration: row.metrics |> Map.fetch!("ga:sessionDuration") |> parse_number()
}
end
defp format_date(date) do
case Timex.parse("#{date}", "%Y%m%d", :strftime) do
{:ok, datetime} ->
NaiveDateTime.to_date(datetime)
{:error, e} ->
Logger.error(e)
raise e
end
defp get_date(%{dimensions: %{"ga:date" => date}}) do
date
|> Timex.parse!("%Y%m%d", :strftime)
|> NaiveDateTime.to_date()
end
@missing_values ["(none)", "(not set)", "(not provided)"]
def nil_if_missing(value) when value in @missing_values, do: nil
def nil_if_missing(value), do: value
defp default_if_missing(value, default \\ nil)
defp default_if_missing(value, default) when value in @missing_values, do: default
defp default_if_missing(value, _default), do: value
def parse_referrer(nil), do: nil
def parse_referrer("google"), do: "Google"
def parse_referrer("bing"), do: "Bing"
def parse_referrer("duckduckgo"), do: "DuckDuckGo"
defp parse_referrer(nil), do: nil
defp parse_referrer("(direct)"), do: nil
defp parse_referrer("google"), do: "Google"
defp parse_referrer("bing"), do: "Bing"
defp parse_referrer("duckduckgo"), do: "DuckDuckGo"
def parse_referrer(ref) do
defp parse_referrer(ref) do
RefInspector.parse("https://" <> ref)
|> PlausibleWeb.RefInspector.parse()
end

View File

@ -537,7 +537,7 @@ defmodule PlausibleWeb.AuthController do
end
def google_auth_callback(conn, %{"code" => code, "state" => state}) do
res = Plausible.Google.Api.fetch_access_token(code)
res = Plausible.Google.HTTP.fetch_access_token(code)
[site_id, redirect_to] = Jason.decode!(state)
site = Repo.get(Plausible.Site, site_id)

View File

@ -670,7 +670,7 @@ defmodule PlausibleWeb.SiteController do
@google_analytics_new_user_metric_date ~D[2016-08-24]
def import_from_google_view_id(conn, %{"view_id" => view_id, "access_token" => access_token}) do
site = conn.assigns[:site]
start_date = Plausible.Google.Api.get_analytics_start_date(view_id, access_token)
start_date = Plausible.Google.HTTP.get_analytics_start_date(view_id, access_token)
case start_date do
{:ok, nil} ->
@ -711,7 +711,7 @@ defmodule PlausibleWeb.SiteController do
def import_from_google_confirm(conn, %{"access_token" => access_token, "view_id" => view_id}) do
site = conn.assigns[:site]
start_date = Plausible.Google.Api.get_analytics_start_date(view_id, access_token)
start_date = Plausible.Google.HTTP.get_analytics_start_date(view_id, access_token)
end_date =
Plausible.Stats.Clickhouse.pageview_start_date_local(site) || Timex.today(site.timezone)

View File

@ -109,7 +109,8 @@ defmodule Plausible.MixProject do
{:opentelemetry_ecto, "~> 1.0.0"},
{:observer_cli, "~> 1.7"},
{:mimic, "~> 1.7", only: :test},
{:prom_ex, "~> 1.7.1"}
{:prom_ex, "~> 1.7.1"},
{:exvcr, "~> 0.11", only: :test}
]
end

View File

@ -35,7 +35,10 @@
"erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"},
"eternal": {:hex, :eternal, "1.2.2", "d1641c86368de99375b98d183042dd6c2b234262b8d08dfd72b9eeaafc2a1abd", [:mix], [], "hexpm", "2c9fe32b9c3726703ba5e1d43a1d255a4f3f2d8f8f9bc19f094c7cb1a7a9e782"},
"ex_machina": {:hex, :ex_machina, "2.7.0", "b792cc3127fd0680fecdb6299235b4727a4944a09ff0fa904cc639272cd92dc7", [:mix], [{:ecto, "~> 2.2 or ~> 3.0", [hex: :ecto, repo: "hexpm", optional: true]}, {:ecto_sql, "~> 3.0", [hex: :ecto_sql, repo: "hexpm", optional: true]}], "hexpm", "419aa7a39bde11894c87a615c4ecaa52d8f107bbdd81d810465186f783245bf8"},
"exactor": {:hex, :exactor, "2.2.4", "5efb4ddeb2c48d9a1d7c9b465a6fffdd82300eb9618ece5d34c3334d5d7245b1", [:mix], [], "hexpm", "1222419f706e01bfa1095aec9acf6421367dcfab798a6f67c54cf784733cd6b5"},
"excoveralls": {:hex, :excoveralls, "0.14.4", "295498f1ae47bdc6dce59af9a585c381e1aefc63298d48172efaaa90c3d251db", [:mix], [{:hackney, "~> 1.16", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "e3ab02f2df4c1c7a519728a6f0a747e71d7d6e846020aae338173619217931c1"},
"exjsx": {:hex, :exjsx, "4.0.0", "60548841e0212df401e38e63c0078ec57b33e7ea49b032c796ccad8cde794b5c", [:mix], [{:jsx, "~> 2.8.0", [hex: :jsx, repo: "hexpm", optional: false]}], "hexpm", "32e95820a97cffea67830e91514a2ad53b888850442d6d395f53a1ac60c82e07"},
"exvcr": {:hex, :exvcr, "0.13.3", "fcd5f54ea0ebd41db7fe16701f3c67871d1b51c3c104ab88f11135a173d47134", [:mix], [{:exactor, "~> 2.2", [hex: :exactor, repo: "hexpm", optional: false]}, {:exjsx, "~> 4.0", [hex: :exjsx, repo: "hexpm", optional: false]}, {:finch, "~> 0.8", [hex: :finch, repo: "hexpm", optional: true]}, {:httpoison, "~> 1.0", [hex: :httpoison, repo: "hexpm", optional: true]}, {:httpotion, "~> 3.1", [hex: :httpotion, repo: "hexpm", optional: true]}, {:ibrowse, "4.4.0", [hex: :ibrowse, repo: "hexpm", optional: true]}, {:meck, "~> 0.8", [hex: :meck, repo: "hexpm", optional: false]}], "hexpm", "db61057447388b7adc4443a55047d11d09acc75eeb5548507c775a8402e02689"},
"file_system": {:hex, :file_system, "0.2.10", "fb082005a9cd1711c05b5248710f8826b02d7d1784e7c3451f9c1231d4fc162d", [:mix], [], "hexpm", "41195edbfb562a593726eda3b3e8b103a309b733ad25f3d642ba49696bf715dc"},
"finch": {:hex, :finch, "0.12.0", "6bbb3e0bb62dd91cd1217d9682a30f5bfc9b0b74950bf10a0b4d4399c2076892", [:mix], [{:castore, "~> 0.1", [hex: :castore, repo: "hexpm", optional: false]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.3", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 0.2.6", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "320da3f32459e7dcb77f4271b4f2445ba6c5d32cc3c7cca8e2cff599e24be5a6"},
"floki": {:hex, :floki, "0.32.1", "dfe3b8db3b793939c264e6f785bca01753d17318d144bd44b407fb3493acaa87", [:mix], [{:html_entities, "~> 0.5.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm", "d4b91c713e4a784a3f7b1e3cc016eefc619f6b1c3898464222867cafd3c681a3"},
@ -56,9 +59,11 @@
"hut": {:hex, :hut, "1.3.0", "71f2f054e657c03f959cf1acc43f436ea87580696528ca2a55c8afb1b06c85e7", [:"erlang.mk", :rebar, :rebar3], [], "hexpm", "7e15d28555d8a1f2b5a3a931ec120af0753e4853a4c66053db354f35bf9ab563"},
"idna": {:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d", [:rebar3], [{:unicode_util_compat, "~>0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "92376eb7894412ed19ac475e4a86f7b413c1b9fbb5bd16dccd57934157944cea"},
"jason": {:hex, :jason, "1.3.0", "fa6b82a934feb176263ad2df0dbd91bf633d4a46ebfdffea0c8ae82953714946", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "53fc1f51255390e0ec7e50f9cb41e751c260d065dcba2bf0d08dc51a4002c2ac"},
"jsx": {:hex, :jsx, "2.8.3", "a05252d381885240744d955fbe3cf810504eb2567164824e19303ea59eef62cf", [:mix, :rebar3], [], "hexpm", "fc3499fed7a726995aa659143a248534adc754ebd16ccd437cd93b649a95091f"},
"jumper": {:hex, :jumper, "1.0.1", "3c00542ef1a83532b72269fab9f0f0c82bf23a35e27d278bfd9ed0865cecabff", [:mix], [], "hexpm", "318c59078ac220e966d27af3646026db9b5a5e6703cb2aa3e26bcfaba65b7433"},
"kaffy": {:hex, :kaffy, "0.9.0", "bef34c9729f6a3af4d0dea8eede8bcb9e11371a83ac9a8b393991bce81839517", [:mix], [{:ecto, "~> 3.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:phoenix, "~> 1.4", [hex: :phoenix, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 2.11", [hex: :phoenix_html, repo: "hexpm", optional: false]}], "hexpm", "d18ff57b8e68feb433aed11e71510cd357abc7034e75358af5deff7d0d4c6ed3"},
"location": {:git, "https://github.com/plausible/location.git", "8faf4f08b06905adde43554dc1d9d35675654816", []},
"meck": {:hex, :meck, "0.9.2", "85ccbab053f1db86c7ca240e9fc718170ee5bda03810a6292b5306bf31bae5f5", [:rebar3], [], "hexpm", "81344f561357dc40a8344afa53767c32669153355b626ea9fcbc8da6b3045826"},
"metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm", "69b09adddc4f74a40716ae54d140f93beb0fb8978d8636eaded0c31b6f099f16"},
"mime": {:hex, :mime, "1.6.0", "dabde576a497cef4bbdd60aceee8160e02a6c89250d6c0b29e56c0dfb00db3d2", [:mix], [], "hexpm", "31a1a8613f8321143dde1dafc36006a17d28d02bdfecb9e95a880fa7aabd19a7"},
"mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm", "f278585650aa581986264638ebf698f8bb19df297f66ad91b18910dfc6e19323"},

View File

@ -4,123 +4,84 @@ defmodule Plausible.Google.ApiTest do
import Plausible.TestUtils
import Double
@ok_response Jason.encode!(%{
"reports" => [
%{
"data" => %{
"rows" => [
%{
"dimensions" => ["20220101"],
"metrics" => [%{"values" => ["1", "1", "1", "1", "1"]}]
}
]
}
}
]
})
@ok_response File.read!("fixture/ga_batch_report.json")
@empty_response Jason.encode!(%{
"reports" => [%{"data" => %{"rows" => []}}]
})
def start_buffer(_setup_args) do
{:ok, pid} = Plausible.Google.Buffer.start_link()
{:ok, buffer: pid}
end
describe "fetch_and_persist/4" do
setup [:create_user, :create_new_site]
setup [:create_user, :create_new_site, :start_buffer]
test "will fetch and persist import data from Google Analytics", %{site: site} do
httpoison =
HTTPoison
|> stub(:post, fn _url, _body, _headers, _opts ->
{:ok, %HTTPoison.Response{status_code: 200, body: @ok_response}}
test "will fetch and persist import data from Google Analytics", %{site: site, buffer: buffer} do
finch_double =
Finch
|> stub(:request, fn _, _ ->
{:ok, %Finch.Response{status: 200, body: @ok_response}}
end)
request = %{
dataset: "imported_visitors",
request = %Plausible.Google.ReportRequest{
dataset: "imported_exit_pages",
view_id: "123",
date_range: Date.range(~D[2022-01-01], ~D[2022-02-01]),
dimensions: ["ga:date"],
metrics: ["ga:users"],
dimensions: ["ga:date", "ga:exitPagePath"],
metrics: ["ga:users", "ga:exits"],
access_token: "fake-token",
page_token: nil
page_token: nil,
page_size: 10_000
}
Api.fetch_and_persist(site, request, http_client: httpoison, sleep_time: 0)
Api.fetch_and_persist(site, request,
http_client: finch_double,
sleep_time: 0,
buffer: buffer
)
assert imported_visitor_count(site) == 1
Plausible.Google.Buffer.flush(buffer)
assert 1479 ==
Plausible.ClickhouseRepo.aggregate(
from(iex in "imported_exit_pages", where: iex.site_id == ^site.id),
:count
)
end
test "retries HTTP request up to 5 times before raising the last error", %{site: site} do
httpoison =
HTTPoison
|> stub(:post, fn _url, _body, _headers, _opts ->
{:error, %HTTPoison.Error{reason: :nxdomain}}
end)
|> stub(:post, fn _url, _body, _headers, _opts ->
{:error, %HTTPoison.Error{reason: :timeout}}
end)
|> stub(:post, fn _url, _body, _headers, _opts ->
{:error, %HTTPoison.Error{reason: :closed}}
end)
|> stub(:post, fn _url, _body, _headers, _opts ->
{:ok, %HTTPoison.Response{status_code: 503}}
end)
|> stub(:post, fn _url, _body, _headers, _opts ->
{:ok, %HTTPoison.Response{status_code: 502}}
end)
test "retries HTTP request up to 5 times before raising the last error", %{
site: site,
buffer: buffer
} do
finch_double =
Finch
|> stub(:request, fn _, _ -> {:error, :timeout} end)
|> stub(:request, fn _, _ -> {:error, :nx_domain} end)
|> stub(:request, fn _, _ -> {:error, :closed} end)
|> stub(:request, fn _, _ -> {:ok, %Finch.Response{status: 503}} end)
|> stub(:request, fn _, _ -> {:ok, %Finch.Response{status: 502}} end)
request = %{
request = %Plausible.Google.ReportRequest{
view_id: "123",
date_range: Date.range(~D[2022-01-01], ~D[2022-02-01]),
dimensions: ["ga:date"],
metrics: ["ga:users"],
access_token: "fake-token",
page_token: nil
page_token: nil,
page_size: 10_000
}
assert_raise RuntimeError, "Google API request failed too many times", fn ->
Api.fetch_and_persist(site, request, http_client: httpoison, sleep_time: 0)
Api.fetch_and_persist(site, request,
http_client: finch_double,
sleep_time: 0,
buffer: buffer
)
end
assert_receive({HTTPoison, :post, [_, _, _, _]})
assert_receive({HTTPoison, :post, [_, _, _, _]})
assert_receive({HTTPoison, :post, [_, _, _, _]})
assert_receive({HTTPoison, :post, [_, _, _, _]})
assert_receive({HTTPoison, :post, [_, _, _, _]})
assert_receive({Finch, :request, [_, _]})
assert_receive({Finch, :request, [_, _]})
assert_receive({Finch, :request, [_, _]})
assert_receive({Finch, :request, [_, _]})
assert_receive({Finch, :request, [_, _]})
end
test "retries HTTP request if the rows are empty", %{site: site} do
httpoison =
HTTPoison
|> stub(:post, fn _url, _body, _headers, _opts ->
{:ok, %HTTPoison.Response{status_code: 200, body: @empty_response}}
end)
|> stub(:post, fn _url, _body, _headers, _opts ->
{:ok, %HTTPoison.Response{status_code: 200, body: @ok_response}}
end)
request = %{
dataset: "imported_visitors",
view_id: "123",
date_range: Date.range(~D[2022-01-01], ~D[2022-02-01]),
dimensions: ["ga:date"],
metrics: ["ga:users"],
access_token: "fake-token",
page_token: nil
}
Api.fetch_and_persist(site, request, http_client: httpoison, sleep_time: 0)
assert_receive({HTTPoison, :post, [_, _, _, _]})
assert_receive({HTTPoison, :post, [_, _, _, _]})
assert imported_visitor_count(site) == 1
end
end
defp imported_visitor_count(site) do
Plausible.ClickhouseRepo.one(
from iv in "imported_visitors",
where: iv.site_id == ^site.id,
select: sum(iv.visitors)
)
end
end

View File

@ -0,0 +1,96 @@
defmodule Plausible.Google.BufferTest do
use Plausible.DataCase, async: true
import Plausible.TestUtils
import Ecto.Query
alias Plausible.Google.Buffer
setup [:create_user, :create_new_site, :set_buffer_size]
defp set_buffer_size(_setup_args) do
Application.put_env(:plausible, :google, max_buffer_size: 10)
:ok
end
defp imported_count(%{id: site_id}, table_name) do
table_name
|> from()
|> where([record], record.site_id == ^site_id)
|> Plausible.ClickhouseRepo.aggregate(:count)
end
defp build_records(count, factory_name, site) do
count
|> build_list(factory_name, site_id: site.id)
|> Enum.map(&Map.drop(&1, [:table]))
end
test "insert_many/3 flushes when buffer reaches limit", %{site: site} do
{:ok, pid} = Buffer.start_link()
imported_visitors = build_records(9, :imported_visitors, site)
assert :ok == Buffer.insert_many(pid, "imported_visitors", imported_visitors)
assert Buffer.size(pid, "imported_visitors") == 9
assert imported_count(site, "imported_visitors") == 0, "expected not to have flushed"
imported_visitors = build_records(1, :imported_visitors, site)
assert :ok == Buffer.insert_many(pid, "imported_visitors", imported_visitors)
assert Buffer.size(pid, "imported_visitors") == 0
assert imported_count(site, "imported_visitors") == 10, "expected to have flushed"
end
test "insert_many/3 uses separate buffers for each table", %{site: site} do
{:ok, pid} = Buffer.start_link()
imported_visitors = build_records(9, :imported_visitors, site)
assert :ok == Buffer.insert_many(pid, "imported_visitors", imported_visitors)
assert Buffer.size(pid, "imported_visitors") == 9
assert imported_count(site, "imported_visitors") == 0, "expected not to have flushed"
imported_sources = build_records(1, :imported_sources, site)
assert :ok == Buffer.insert_many(pid, "imported_sources", imported_sources)
assert Buffer.size(pid, "imported_sources") == 1
assert imported_count(site, "imported_visitors") == 0, "expected not to have flushed"
imported_visitors = build_records(1, :imported_visitors, site)
assert :ok == Buffer.insert_many(pid, "imported_visitors", imported_visitors)
assert Buffer.size(pid, "imported_visitors") == 0
assert imported_count(site, "imported_visitors") == 10, "expected to have flushed"
imported_sources = build_records(9, :imported_sources, site)
assert :ok == Buffer.insert_many(pid, "imported_sources", imported_sources)
assert Buffer.size(pid, "imported_sources") == 0
assert imported_count(site, "imported_sources") == 10, "expected to have flushed"
end
test "insert_many/3 flushes buffer automatically with many records", %{site: site} do
{:ok, pid} = Buffer.start_link()
imported_visitors = build_records(50, :imported_visitors, site)
assert :ok == Buffer.insert_many(pid, "imported_visitors", imported_visitors)
assert Buffer.size(pid, "imported_visitors") == 0
assert imported_count(site, "imported_visitors") == 50, "expected to have flushed"
end
test "flush/2 flushes all buffers", %{site: site} do
{:ok, pid} = Buffer.start_link()
imported_sources = build_records(1, :imported_sources, site)
Buffer.insert_many(pid, "imported_sources", imported_sources)
imported_visitors = build_records(1, :imported_visitors, site)
Buffer.insert_many(pid, "imported_visitors", imported_visitors)
imported_operating_systems = build_records(2, :imported_operating_systems, site)
Buffer.insert_many(pid, "imported_operating_systems", imported_operating_systems)
assert :ok == Buffer.flush(pid, :timer.seconds(4))
assert Buffer.size(pid, "imported_sources") == 0
assert Buffer.size(pid, "imported_visitors") == 0
assert Buffer.size(pid, "imported_operating_systems") == 0
assert imported_count(site, "imported_sources") == 1
assert imported_count(site, "imported_visitors") == 1
assert imported_count(site, "imported_operating_systems") == 2
end
end

View File

@ -0,0 +1,39 @@
defmodule Plausible.Google.Api.VCRTest do
use Plausible.DataCase, async: false
use ExVCR.Mock, adapter: ExVCR.Adapter.Finch
require Ecto.Query
import Plausible.TestUtils
setup [:create_user, :create_site]
defp get_insert_count do
Plausible.ClickhouseRepo.aggregate(
from(ql in "query_log",
prefix: "system",
where: ql.query_kind == "Insert" and ql.is_initial_query == true
),
:count
)
end
test "imports page views from Google Analytics", %{site: site} do
use_cassette "google_analytics_import#1", match_requests_on: [:request_body] do
inserts_before_importing = get_insert_count()
before_importing_timestamp = DateTime.utc_now()
access_token = "***"
view_id = "54297898"
date_range = Date.range(~D[2011-01-01], ~D[2022-07-19])
assert :ok == Plausible.Google.Api.import_analytics(site, date_range, view_id, access_token)
total_seconds = DateTime.diff(DateTime.utc_now(), before_importing_timestamp, :second)
total_inserts = get_insert_count() - inserts_before_importing
assert total_inserts / total_seconds > 1.0,
"should not call Clickhouse more than 1 time per second"
assert 1_495_150 == Plausible.Stats.Clickhouse.imported_pageview_count(site)
end
end
end

View File

@ -5,6 +5,12 @@ defmodule Plausible.ImportedTest do
@user_id 123
defp import_data(ga_data, site_id, table_name) do
ga_data
|> Plausible.Imported.from_google_analytics(site_id, table_name)
|> then(&Plausible.ClickhouseRepo.insert_all(table_name, &1))
end
describe "Parse and import third party data fetched from Google Analytics" do
setup [:create_user, :log_in, :create_new_site, :add_imported_data]
@ -14,21 +20,32 @@ defmodule Plausible.ImportedTest do
build(:pageview, timestamp: ~N[2021-01-31 00:00:00])
])
assert :ok =
Plausible.Imported.from_google_analytics(
[
%{
"dimensions" => ["20210101"],
"metrics" => [%{"values" => ["1", "1", "0", "1", "60"]}]
},
%{
"dimensions" => ["20210131"],
"metrics" => [%{"values" => ["1", "1", "1", "1", "60"]}]
}
],
site.id,
"imported_visitors"
)
import_data(
[
%{
dimensions: %{"ga:date" => "20210101"},
metrics: %{
"ga:users" => "1",
"ga:pageviews" => "1",
"ga:bounces" => "0",
"ga:sessions" => "1",
"ga:sessionDuration" => "60"
}
},
%{
dimensions: %{"ga:date" => "20210131"},
metrics: %{
"ga:users" => "1",
"ga:pageviews" => "1",
"ga:bounces" => "0",
"ga:sessions" => "1",
"ga:sessionDuration" => "60"
}
}
],
site.id,
"imported_visitors"
)
conn =
get(
@ -63,44 +80,108 @@ defmodule Plausible.ImportedTest do
)
])
assert :ok =
Plausible.Imported.from_google_analytics(
[
%{
"dimensions" => ["20210101", "duckduckgo.com", "organic", "", "", ""],
"metrics" => [%{"values" => ["1", "1", "0", "60"]}]
},
%{
"dimensions" => ["20210131", "google.com", "organic", "", "", ""],
"metrics" => [%{"values" => ["1", "1", "1", "60"]}]
},
%{
"dimensions" => ["20210101", "google.com", "paid", "", "", ""],
"metrics" => [%{"values" => ["1", "1", "1", "60"]}]
},
%{
"dimensions" => ["20210101", "Twitter", "social", "", "", ""],
"metrics" => [%{"values" => ["1", "1", "1", "60"]}]
},
%{
"dimensions" => [
"20210131",
"A Nice Newsletter",
"email",
"newsletter",
"",
""
],
"metrics" => [%{"values" => ["1", "1", "1", "60"]}]
},
%{
"dimensions" => ["20210101", "(direct)", "(none)", "", "", ""],
"metrics" => [%{"values" => ["1", "1", "1", "60"]}]
}
],
site.id,
"imported_sources"
)
import_data(
[
%{
dimensions: %{
"ga:adContent" => "",
"ga:campaign" => "",
"ga:date" => "20210101",
"ga:keyword" => "",
"ga:medium" => "organic",
"ga:source" => "duckduckgo.com"
},
metrics: %{
"ga:bounces" => "0",
"ga:sessionDuration" => "60",
"ga:sessions" => "1",
"ga:users" => "1"
}
},
%{
dimensions: %{
"ga:adContent" => "",
"ga:campaign" => "",
"ga:date" => "20210131",
"ga:keyword" => "",
"ga:medium" => "organic",
"ga:source" => "google.com"
},
metrics: %{
"ga:bounces" => "1",
"ga:sessionDuration" => "60",
"ga:sessions" => "1",
"ga:users" => "1"
}
},
%{
dimensions: %{
"ga:adContent" => "",
"ga:campaign" => "",
"ga:date" => "20210101",
"ga:keyword" => "",
"ga:medium" => "paid",
"ga:source" => "google.com"
},
metrics: %{
"ga:bounces" => "1",
"ga:sessionDuration" => "60",
"ga:sessions" => "1",
"ga:users" => "1"
}
},
%{
dimensions: %{
"ga:adContent" => "",
"ga:campaign" => "",
"ga:date" => "20210101",
"ga:keyword" => "",
"ga:medium" => "social",
"ga:source" => "Twitter"
},
metrics: %{
"ga:bounces" => "1",
"ga:sessionDuration" => "60",
"ga:sessions" => "1",
"ga:users" => "1"
}
},
%{
dimensions: %{
"ga:adContent" => "",
"ga:campaign" => "newsletter",
"ga:date" => "20210131",
"ga:keyword" => "",
"ga:medium" => "email",
"ga:source" => "A Nice Newsletter"
},
metrics: %{
"ga:bounces" => "1",
"ga:sessionDuration" => "60",
"ga:sessions" => "1",
"ga:users" => "1"
}
},
%{
dimensions: %{
"ga:adContent" => "",
"ga:campaign" => "",
"ga:date" => "20210101",
"ga:keyword" => "",
"ga:medium" => "(none)",
"ga:source" => "(direct)"
},
metrics: %{
"ga:bounces" => "1",
"ga:sessionDuration" => "60",
"ga:sessions" => "1",
"ga:users" => "1"
}
}
],
site.id,
"imported_sources"
)
conn =
get(
@ -108,10 +189,10 @@ defmodule Plausible.ImportedTest do
"/api/stats/#{site.domain}/sources?period=month&date=2021-01-01&with_imported=true"
)
assert json_response(conn, 200) == [
%{"name" => "Google", "visitors" => 4},
%{"name" => "DuckDuckGo", "visitors" => 2},
assert conn |> json_response(200) |> Enum.sort() == [
%{"name" => "A Nice Newsletter", "visitors" => 1},
%{"name" => "DuckDuckGo", "visitors" => 2},
%{"name" => "Google", "visitors" => 4},
%{"name" => "Twitter", "visitors" => 1}
]
end
@ -128,21 +209,44 @@ defmodule Plausible.ImportedTest do
)
])
assert :ok =
Plausible.Imported.from_google_analytics(
[
%{
"dimensions" => ["20210101", "Twitter", "social", "", "", ""],
"metrics" => [%{"values" => ["1", "1", "1", "60"]}]
},
%{
"dimensions" => ["20210101", "(direct)", "(none)", "", "", ""],
"metrics" => [%{"values" => ["1", "1", "1", "60"]}]
}
],
site.id,
"imported_sources"
)
import_data(
[
%{
dimensions: %{
"ga:adContent" => "",
"ga:campaign" => "",
"ga:date" => "20210101",
"ga:keyword" => "",
"ga:medium" => "social",
"ga:source" => "Twitter"
},
metrics: %{
"ga:bounces" => "1",
"ga:sessionDuration" => "60",
"ga:sessions" => "1",
"ga:users" => "1"
}
},
%{
dimensions: %{
"ga:adContent" => "",
"ga:campaign" => "",
"ga:date" => "20210101",
"ga:keyword" => "",
"ga:medium" => "(none)",
"ga:source" => "(direct)"
},
metrics: %{
"ga:bounces" => "1",
"ga:sessionDuration" => "60",
"ga:sessions" => "1",
"ga:users" => "1"
}
}
],
site.id,
"imported_sources"
)
conn =
get(
@ -166,25 +270,60 @@ defmodule Plausible.ImportedTest do
build(:pageview, utm_campaign: "august", timestamp: ~N[2021-01-01 00:00:00])
])
assert :ok =
Plausible.Imported.from_google_analytics(
[
%{
"dimensions" => ["20210101", "Twitter", "social", "profile", "", ""],
"metrics" => [%{"values" => ["1", "1", "1", "100"]}]
},
%{
"dimensions" => ["20210101", "Gmail", "email", "august", "", ""],
"metrics" => [%{"values" => ["1", "1", "0", "100"]}]
},
%{
"dimensions" => ["20210101", "Gmail", "email", "(not set)", "", ""],
"metrics" => [%{"values" => ["1", "1", "0", "100"]}]
}
],
site.id,
"imported_sources"
)
import_data(
[
%{
dimensions: %{
"ga:adContent" => "",
"ga:campaign" => "profile",
"ga:date" => "20210101",
"ga:keyword" => "",
"ga:medium" => "social",
"ga:source" => "Twitter"
},
metrics: %{
"ga:bounces" => "1",
"ga:sessionDuration" => "100",
"ga:sessions" => "1",
"ga:users" => "1"
}
},
%{
dimensions: %{
"ga:adContent" => "",
"ga:campaign" => "august",
"ga:date" => "20210101",
"ga:keyword" => "",
"ga:medium" => "email",
"ga:source" => "Gmail"
},
metrics: %{
"ga:bounces" => "0",
"ga:sessionDuration" => "100",
"ga:sessions" => "1",
"ga:users" => "1"
}
},
%{
dimensions: %{
"ga:adContent" => "",
"ga:campaign" => "(not set)",
"ga:date" => "20210101",
"ga:keyword" => "",
"ga:medium" => "email",
"ga:source" => "Gmail"
},
metrics: %{
"ga:bounces" => "0",
"ga:sessionDuration" => "100",
"ga:sessions" => "1",
"ga:users" => "1"
}
}
],
site.id,
"imported_sources"
)
conn =
get(
@ -215,25 +354,60 @@ defmodule Plausible.ImportedTest do
build(:pageview, utm_term: "Sweden", timestamp: ~N[2021-01-01 00:00:00])
])
assert :ok =
Plausible.Imported.from_google_analytics(
[
%{
"dimensions" => ["20210101", "Google", "paid", "", "", "oat milk"],
"metrics" => [%{"values" => ["1", "1", "1", "100"]}]
},
%{
"dimensions" => ["20210101", "Google", "paid", "", "", "Sweden"],
"metrics" => [%{"values" => ["1", "1", "0", "100"]}]
},
%{
"dimensions" => ["20210101", "Google", "paid", "", "", "(not set)"],
"metrics" => [%{"values" => ["1", "1", "0", "100"]}]
}
],
site.id,
"imported_sources"
)
import_data(
[
%{
dimensions: %{
"ga:adContent" => "",
"ga:campaign" => "",
"ga:date" => "20210101",
"ga:keyword" => "oat milk",
"ga:medium" => "paid",
"ga:source" => "Google"
},
metrics: %{
"ga:bounces" => "1",
"ga:sessionDuration" => "100",
"ga:sessions" => "1",
"ga:users" => "1"
}
},
%{
dimensions: %{
"ga:adContent" => "",
"ga:campaign" => "",
"ga:date" => "20210101",
"ga:keyword" => "Sweden",
"ga:medium" => "paid",
"ga:source" => "Google"
},
metrics: %{
"ga:bounces" => "0",
"ga:sessionDuration" => "100",
"ga:sessions" => "1",
"ga:users" => "1"
}
},
%{
dimensions: %{
"ga:adContent" => "",
"ga:campaign" => "",
"ga:date" => "20210101",
"ga:keyword" => "(not set)",
"ga:medium" => "paid",
"ga:source" => "Google"
},
metrics: %{
"ga:bounces" => "0",
"ga:sessionDuration" => "100",
"ga:sessions" => "1",
"ga:users" => "1"
}
}
],
site.id,
"imported_sources"
)
conn =
get(
@ -263,25 +437,60 @@ defmodule Plausible.ImportedTest do
build(:pageview, utm_content: "blog", timestamp: ~N[2021-01-01 00:00:00])
])
assert :ok =
Plausible.Imported.from_google_analytics(
[
%{
"dimensions" => ["20210101", "Google", "paid", "", "ad", ""],
"metrics" => [%{"values" => ["1", "1", "1", "100"]}]
},
%{
"dimensions" => ["20210101", "Google", "paid", "", "blog", ""],
"metrics" => [%{"values" => ["1", "1", "0", "100"]}]
},
%{
"dimensions" => ["20210101", "Google", "paid", "", "(not set)", ""],
"metrics" => [%{"values" => ["1", "1", "0", "100"]}]
}
],
site.id,
"imported_sources"
)
import_data(
[
%{
dimensions: %{
"ga:adContent" => "ad",
"ga:campaign" => "",
"ga:date" => "20210101",
"ga:keyword" => "",
"ga:medium" => "paid",
"ga:source" => "Google"
},
metrics: %{
"ga:bounces" => "1",
"ga:sessionDuration" => "100",
"ga:sessions" => "1",
"ga:users" => "1"
}
},
%{
dimensions: %{
"ga:adContent" => "blog",
"ga:campaign" => "",
"ga:date" => "20210101",
"ga:keyword" => "",
"ga:medium" => "paid",
"ga:source" => "Google"
},
metrics: %{
"ga:bounces" => "0",
"ga:sessionDuration" => "100",
"ga:sessions" => "1",
"ga:users" => "1"
}
},
%{
dimensions: %{
"ga:adContent" => "(not set)",
"ga:campaign" => "",
"ga:date" => "20210101",
"ga:keyword" => "",
"ga:medium" => "paid",
"ga:source" => "Google"
},
metrics: %{
"ga:bounces" => "0",
"ga:sessionDuration" => "100",
"ga:sessions" => "1",
"ga:users" => "1"
}
}
],
site.id,
"imported_sources"
)
conn =
get(
@ -321,37 +530,67 @@ defmodule Plausible.ImportedTest do
)
])
assert :ok =
Plausible.Imported.from_google_analytics(
[
%{
"dimensions" => ["20210101", "host-a.com", "/"],
"metrics" => [%{"values" => ["1", "1", "0", "700"]}]
},
%{
"dimensions" => ["20210101", "host-b.com", "/some-other-page"],
"metrics" => [%{"values" => ["1", "2", "1", "60"]}]
},
%{
"dimensions" => ["20210101", "host-b.com", "/some-other-page?wat=wot"],
"metrics" => [%{"values" => ["1", "1", "0", "60"]}]
}
],
site.id,
"imported_pages"
)
import_data(
[
%{
dimensions: %{
"ga:date" => "20210101",
"ga:hostname" => "host-a.com",
"ga:pagePath" => "/"
},
metrics: %{
"ga:exits" => "0",
"ga:pageviews" => "1",
"ga:timeOnPage" => "700",
"ga:users" => "1"
}
},
%{
dimensions: %{
"ga:date" => "20210101",
"ga:hostname" => "host-b.com",
"ga:pagePath" => "/some-other-page"
},
metrics: %{
"ga:exits" => "1",
"ga:pageviews" => "2",
"ga:timeOnPage" => "60",
"ga:users" => "1"
}
},
%{
dimensions: %{
"ga:date" => "20210101",
"ga:hostname" => "host-b.com",
"ga:pagePath" => "/some-other-page?wat=wot"
},
metrics: %{
"ga:exits" => "0",
"ga:pageviews" => "1",
"ga:timeOnPage" => "60",
"ga:users" => "1"
}
}
],
site.id,
"imported_pages"
)
assert :ok =
Plausible.Imported.from_google_analytics(
[
%{
"dimensions" => ["20210101", "/"],
"metrics" => [%{"values" => ["1", "3", "10", "1"]}]
}
],
site.id,
"imported_entry_pages"
)
import_data(
[
%{
dimensions: %{"ga:date" => "20210101", "ga:landingPagePath" => "/"},
metrics: %{
"ga:bounces" => "1",
"ga:entrances" => "3",
"ga:sessionDuration" => "10",
"ga:users" => "1"
}
}
],
site.id,
"imported_entry_pages"
)
conn =
get(
@ -399,29 +638,36 @@ defmodule Plausible.ImportedTest do
)
])
assert :ok =
Plausible.Imported.from_google_analytics(
[
%{
"dimensions" => ["20210101", "host-a.com", "/page2"],
"metrics" => [%{"values" => ["2", "4", "0", "10"]}]
}
],
site.id,
"imported_pages"
)
import_data(
[
%{
dimensions: %{
"ga:date" => "20210101",
"ga:hostname" => "host-a.com",
"ga:pagePath" => "/page2"
},
metrics: %{
"ga:exits" => "0",
"ga:pageviews" => "4",
"ga:timeOnPage" => "10",
"ga:users" => "2"
}
}
],
site.id,
"imported_pages"
)
assert :ok =
Plausible.Imported.from_google_analytics(
[
%{
"dimensions" => ["20210101", "/page2"],
"metrics" => [%{"values" => ["2", "3"]}]
}
],
site.id,
"imported_exit_pages"
)
import_data(
[
%{
dimensions: %{"ga:date" => "20210101", "ga:exitPagePath" => "/page2"},
metrics: %{"ga:exits" => "3", "ga:users" => "2"}
}
],
site.id,
"imported_exit_pages"
)
conn =
get(
@ -456,21 +702,38 @@ defmodule Plausible.ImportedTest do
)
])
assert :ok =
Plausible.Imported.from_google_analytics(
[
%{
"dimensions" => ["20210101", "EE", "Tartumaa"],
"metrics" => [%{"values" => ["1", "1", "0", "10"]}]
},
%{
"dimensions" => ["20210101", "GB", "Midlothian"],
"metrics" => [%{"values" => ["1", "1", "0", "10"]}]
}
],
site.id,
"imported_locations"
)
import_data(
[
%{
dimensions: %{
"ga:countryIsoCode" => "EE",
"ga:date" => "20210101",
"ga:regionIsoCode" => "Tartumaa"
},
metrics: %{
"ga:bounces" => "0",
"ga:sessionDuration" => "10",
"ga:sessions" => "1",
"ga:users" => "1"
}
},
%{
dimensions: %{
"ga:countryIsoCode" => "GB",
"ga:date" => "20210101",
"ga:regionIsoCode" => "Midlothian"
},
metrics: %{
"ga:bounces" => "0",
"ga:sessionDuration" => "10",
"ga:sessions" => "1",
"ga:users" => "1"
}
}
],
site.id,
"imported_locations"
)
conn =
get(
@ -505,21 +768,30 @@ defmodule Plausible.ImportedTest do
build(:pageview, screen_size: "Laptop", timestamp: ~N[2021-01-01 00:15:00])
])
assert :ok =
Plausible.Imported.from_google_analytics(
[
%{
"dimensions" => ["20210101", "mobile"],
"metrics" => [%{"values" => ["1", "1", "0", "10"]}]
},
%{
"dimensions" => ["20210101", "Laptop"],
"metrics" => [%{"values" => ["1", "1", "0", "10"]}]
}
],
site.id,
"imported_devices"
)
import_data(
[
%{
dimensions: %{"ga:date" => "20210101", "ga:deviceCategory" => "mobile"},
metrics: %{
"ga:bounces" => "0",
"ga:sessionDuration" => "10",
"ga:sessions" => "1",
"ga:users" => "1"
}
},
%{
dimensions: %{"ga:date" => "20210101", "ga:deviceCategory" => "Laptop"},
metrics: %{
"ga:bounces" => "0",
"ga:sessionDuration" => "10",
"ga:sessions" => "1",
"ga:users" => "1"
}
}
],
site.id,
"imported_devices"
)
conn =
get(
@ -540,21 +812,33 @@ defmodule Plausible.ImportedTest do
build(:pageview, browser: "Firefox", timestamp: ~N[2021-01-01 00:15:00])
])
assert :ok =
Plausible.Imported.from_google_analytics(
[
%{
"dimensions" => ["20210101", "User-Agent: Mozilla"],
"metrics" => [%{"values" => ["1", "1", "0", "10"]}]
},
%{
"dimensions" => ["20210101", "Android Browser"],
"metrics" => [%{"values" => ["1", "1", "0", "10"]}]
}
],
site.id,
"imported_browsers"
)
import_data(
[
%{
dimensions: %{
"ga:browser" => "User-Agent: Mozilla",
"ga:date" => "20210101"
},
metrics: %{
"ga:bounces" => "0",
"ga:sessionDuration" => "10",
"ga:sessions" => "1",
"ga:users" => "1"
}
},
%{
dimensions: %{"ga:browser" => "Android Browser", "ga:date" => "20210101"},
metrics: %{
"ga:bounces" => "0",
"ga:sessionDuration" => "10",
"ga:sessions" => "1",
"ga:users" => "1"
}
}
],
site.id,
"imported_browsers"
)
conn =
get(
@ -576,21 +860,30 @@ defmodule Plausible.ImportedTest do
build(:pageview, operating_system: "GNU/Linux", timestamp: ~N[2021-01-01 00:15:00])
])
assert :ok =
Plausible.Imported.from_google_analytics(
[
%{
"dimensions" => ["20210101", "Macintosh"],
"metrics" => [%{"values" => ["1", "1", "0", "10"]}]
},
%{
"dimensions" => ["20210101", "Linux"],
"metrics" => [%{"values" => ["1", "1", "0", "10"]}]
}
],
site.id,
"imported_operating_systems"
)
import_data(
[
%{
dimensions: %{"ga:date" => "20210101", "ga:operatingSystem" => "Macintosh"},
metrics: %{
"ga:bounces" => "0",
"ga:sessionDuration" => "10",
"ga:sessions" => "1",
"ga:users" => "1"
}
},
%{
dimensions: %{"ga:date" => "20210101", "ga:operatingSystem" => "Linux"},
metrics: %{
"ga:bounces" => "0",
"ga:sessionDuration" => "10",
"ga:sessions" => "1",
"ga:users" => "1"
}
}
],
site.id,
"imported_operating_systems"
)
conn =
get(
@ -610,21 +903,32 @@ defmodule Plausible.ImportedTest do
build(:pageview, timestamp: ~N[2021-01-31 00:00:00])
])
assert :ok =
Plausible.Imported.from_google_analytics(
[
%{
"dimensions" => ["20210101"],
"metrics" => [%{"values" => ["1", "1", "0", "1", "1.391607E7"]}]
},
%{
"dimensions" => ["20210131"],
"metrics" => [%{"values" => ["1", "1", "1", "1", "60"]}]
}
],
site.id,
"imported_visitors"
)
import_data(
[
%{
dimensions: %{"ga:date" => "20210101"},
metrics: %{
"ga:bounces" => "0",
"ga:pageviews" => "1",
"ga:sessionDuration" => "1.391607E7",
"ga:sessions" => "1",
"ga:users" => "1"
}
},
%{
dimensions: %{"ga:date" => "20210131"},
metrics: %{
"ga:bounces" => "1",
"ga:pageviews" => "1",
"ga:sessionDuration" => "60",
"ga:sessions" => "1",
"ga:users" => "1"
}
}
],
site.id,
"imported_visitors"
)
conn =
get(