2022-04-28 12:24:29 +03:00
|
|
|
defimpl FunWithFlags.Actor, for: BitString do
|
|
|
|
def id(str) do
|
|
|
|
str
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-09-02 14:29:19 +03:00
|
|
|
defmodule PlausibleWeb.Api.ExternalController do
|
|
|
|
use PlausibleWeb, :controller
|
|
|
|
require Logger
|
|
|
|
|
2019-10-31 06:49:46 +03:00
|
|
|
def event(conn, _params) do
|
2021-11-18 11:36:16 +03:00
|
|
|
with {:ok, params} <- parse_body(conn),
|
|
|
|
_ <- Sentry.Context.set_extra_context(%{request: params}),
|
|
|
|
:ok <- create_event(conn, params) do
|
|
|
|
conn |> put_status(202) |> text("ok")
|
|
|
|
else
|
|
|
|
{:error, :invalid_json} ->
|
|
|
|
conn
|
|
|
|
|> put_status(400)
|
|
|
|
|> json(%{errors: %{request: "Unable to parse request body as json"}})
|
2020-06-08 10:35:13 +03:00
|
|
|
|
2021-09-24 10:38:23 +03:00
|
|
|
{:error, errors} ->
|
|
|
|
conn |> put_status(400) |> json(%{errors: errors})
|
2019-09-02 14:29:19 +03:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def error(conn, _params) do
|
2021-03-31 11:38:14 +03:00
|
|
|
Sentry.capture_message("JS snippet error")
|
2019-09-02 14:29:19 +03:00
|
|
|
send_resp(conn, 200, "")
|
|
|
|
end
|
|
|
|
|
2020-07-20 10:34:35 +03:00
|
|
|
def health(conn, _params) do
|
2020-07-21 09:58:00 +03:00
|
|
|
postgres_health =
|
|
|
|
case Ecto.Adapters.SQL.query(Plausible.Repo, "SELECT 1", []) do
|
|
|
|
{:ok, _} -> "ok"
|
|
|
|
e -> "error: #{inspect(e)}"
|
|
|
|
end
|
2020-07-20 10:34:35 +03:00
|
|
|
|
2020-07-21 09:58:00 +03:00
|
|
|
clickhouse_health =
|
2020-09-17 16:36:01 +03:00
|
|
|
case Ecto.Adapters.SQL.query(Plausible.ClickhouseRepo, "SELECT 1", []) do
|
2020-07-21 09:58:00 +03:00
|
|
|
{:ok, _} -> "ok"
|
|
|
|
e -> "error: #{inspect(e)}"
|
|
|
|
end
|
2020-07-20 10:34:35 +03:00
|
|
|
|
2020-07-21 09:58:00 +03:00
|
|
|
status =
|
|
|
|
case {postgres_health, clickhouse_health} do
|
|
|
|
{"ok", "ok"} -> 200
|
|
|
|
_ -> 500
|
|
|
|
end
|
2020-07-20 10:34:35 +03:00
|
|
|
|
|
|
|
put_status(conn, status)
|
|
|
|
|> json(%{
|
|
|
|
postgres: postgres_health,
|
2020-07-21 09:58:00 +03:00
|
|
|
clickhouse: clickhouse_health
|
2020-07-20 10:34:35 +03:00
|
|
|
})
|
|
|
|
end
|
|
|
|
|
2022-05-27 10:49:04 +03:00
|
|
|
def info(conn, _params) do
|
2022-05-27 15:24:11 +03:00
|
|
|
build_metadata = System.get_env("BUILD_METADATA", "{}") |> Jason.decode!()
|
2022-05-27 14:30:21 +03:00
|
|
|
|
2022-05-27 10:49:04 +03:00
|
|
|
geo_database =
|
|
|
|
case Geolix.metadata([:geolocation]) do
|
|
|
|
%{geolocation: %{database_type: type}} ->
|
|
|
|
type
|
|
|
|
|
|
|
|
_ ->
|
|
|
|
"(not configured)"
|
|
|
|
end
|
|
|
|
|
2022-05-27 15:51:24 +03:00
|
|
|
info = %{
|
|
|
|
geo_database: geo_database,
|
|
|
|
build: %{
|
|
|
|
version: get_in(build_metadata, ["labels", "org.opencontainers.image.version"]),
|
|
|
|
commit: get_in(build_metadata, ["labels", "org.opencontainers.image.revision"]),
|
|
|
|
created: get_in(build_metadata, ["labels", "org.opencontainers.image.created"]),
|
|
|
|
tags: get_in(build_metadata, ["tags"])
|
|
|
|
}
|
|
|
|
}
|
2022-05-27 10:49:04 +03:00
|
|
|
|
|
|
|
json(conn, info)
|
|
|
|
end
|
|
|
|
|
2020-12-14 17:06:43 +03:00
|
|
|
defp parse_user_agent(conn) do
|
|
|
|
user_agent = Plug.Conn.get_req_header(conn, "user-agent") |> List.first()
|
2021-01-04 17:38:56 +03:00
|
|
|
|
|
|
|
if user_agent do
|
2021-10-18 12:27:21 +03:00
|
|
|
res =
|
|
|
|
Cachex.fetch(:user_agents, user_agent, fn ua ->
|
|
|
|
UAInspector.parse(ua)
|
|
|
|
end)
|
|
|
|
|
|
|
|
case res do
|
|
|
|
{:ok, user_agent} -> user_agent
|
|
|
|
{:commit, user_agent} -> user_agent
|
|
|
|
_ -> nil
|
|
|
|
end
|
2021-01-04 17:38:56 +03:00
|
|
|
end
|
2020-12-14 17:06:43 +03:00
|
|
|
end
|
|
|
|
|
2021-09-24 14:28:10 +03:00
|
|
|
@no_domain_error {:error, %{domain: ["can't be blank"]}}
|
|
|
|
|
2022-04-28 12:24:29 +03:00
|
|
|
require OpenTelemetry.Tracer, as: Tracer
|
|
|
|
|
2019-10-31 06:49:46 +03:00
|
|
|
defp create_event(conn, params) do
|
2020-08-24 15:16:07 +03:00
|
|
|
params = %{
|
|
|
|
"name" => params["n"] || params["name"],
|
|
|
|
"url" => params["u"] || params["url"],
|
|
|
|
"referrer" => params["r"] || params["referrer"],
|
|
|
|
"domain" => params["d"] || params["domain"],
|
|
|
|
"screen_width" => params["w"] || params["screen_width"],
|
2020-08-25 10:56:36 +03:00
|
|
|
"hash_mode" => params["h"] || params["hashMode"],
|
2020-10-28 12:09:04 +03:00
|
|
|
"meta" => parse_meta(params)
|
2020-08-24 15:16:07 +03:00
|
|
|
}
|
|
|
|
|
2022-04-28 12:24:29 +03:00
|
|
|
ua =
|
|
|
|
Tracer.with_span "parse_user_agent" do
|
|
|
|
parse_user_agent(conn)
|
|
|
|
end
|
2020-06-08 10:35:13 +03:00
|
|
|
|
2022-03-10 22:58:30 +03:00
|
|
|
blacklist_domain = params["domain"] in Application.get_env(:plausible, :domain_blacklist)
|
|
|
|
referrer_spam = is_spammer?(params["referrer"])
|
|
|
|
|
|
|
|
if is_bot?(ua) || blacklist_domain || referrer_spam do
|
2021-04-27 10:26:44 +03:00
|
|
|
:ok
|
2019-09-02 14:29:19 +03:00
|
|
|
else
|
2020-11-17 15:40:10 +03:00
|
|
|
uri = params["url"] && URI.parse(params["url"])
|
2021-09-16 11:05:43 +03:00
|
|
|
host = if uri && uri.host == "", do: "(none)", else: uri && uri.host
|
2021-10-25 11:02:38 +03:00
|
|
|
query = decode_query_params(uri)
|
2020-11-03 12:20:11 +03:00
|
|
|
|
2020-02-12 12:11:02 +03:00
|
|
|
ref = parse_referrer(uri, params["referrer"])
|
2022-04-28 12:24:29 +03:00
|
|
|
|
|
|
|
location_details =
|
|
|
|
Tracer.with_span "parse_visitor_location" do
|
|
|
|
visitor_location_details(conn)
|
|
|
|
end
|
|
|
|
|
2020-07-15 11:47:24 +03:00
|
|
|
salts = Plausible.Session.Salts.fetch()
|
2019-09-02 14:29:19 +03:00
|
|
|
|
2019-10-31 06:49:46 +03:00
|
|
|
event_attrs = %{
|
2020-09-17 16:36:01 +03:00
|
|
|
timestamp: NaiveDateTime.utc_now() |> NaiveDateTime.truncate(:second),
|
2019-10-31 06:49:46 +03:00
|
|
|
name: params["name"],
|
2021-09-16 11:05:43 +03:00
|
|
|
hostname: strip_www(host),
|
2020-08-25 10:56:36 +03:00
|
|
|
pathname: get_pathname(uri, params["hash_mode"]),
|
2021-04-27 11:38:56 +03:00
|
|
|
referrer_source: get_referrer_source(query, ref),
|
|
|
|
referrer: clean_referrer(ref),
|
|
|
|
utm_medium: query["utm_medium"],
|
|
|
|
utm_source: query["utm_source"],
|
|
|
|
utm_campaign: query["utm_campaign"],
|
2021-12-16 12:02:09 +03:00
|
|
|
utm_content: query["utm_content"],
|
|
|
|
utm_term: query["utm_term"],
|
2021-11-23 12:39:09 +03:00
|
|
|
country_code: location_details[:country_code],
|
|
|
|
country_geoname_id: location_details[:country_geoname_id],
|
|
|
|
subdivision1_code: location_details[:subdivision1_code],
|
|
|
|
subdivision2_code: location_details[:subdivision2_code],
|
|
|
|
city_geoname_id: location_details[:city_geoname_id],
|
2021-04-27 11:38:56 +03:00
|
|
|
operating_system: ua && os_name(ua),
|
|
|
|
operating_system_version: ua && os_version(ua),
|
|
|
|
browser: ua && browser_name(ua),
|
|
|
|
browser_version: ua && browser_version(ua),
|
|
|
|
screen_size: calculate_screen_size(params["screen_width"]),
|
2020-10-28 12:09:04 +03:00
|
|
|
"meta.key": Map.keys(params["meta"]),
|
2020-11-18 15:53:43 +03:00
|
|
|
"meta.value": Map.values(params["meta"]) |> Enum.map(&Kernel.to_string/1)
|
2019-09-02 14:29:19 +03:00
|
|
|
}
|
|
|
|
|
2021-09-24 14:28:10 +03:00
|
|
|
Enum.reduce_while(get_domains(params, uri), @no_domain_error, fn domain, _res ->
|
2021-04-27 11:10:37 +03:00
|
|
|
user_id = generate_user_id(conn, domain, event_attrs[:hostname], salts[:current])
|
|
|
|
|
|
|
|
previous_user_id =
|
|
|
|
salts[:previous] &&
|
|
|
|
generate_user_id(conn, domain, event_attrs[:hostname], salts[:previous])
|
|
|
|
|
|
|
|
changeset =
|
|
|
|
event_attrs
|
|
|
|
|> Map.merge(%{domain: domain, user_id: user_id})
|
|
|
|
|> Plausible.ClickhouseEvent.new()
|
2020-06-08 10:35:13 +03:00
|
|
|
|
2021-04-27 10:26:44 +03:00
|
|
|
if changeset.valid? do
|
2021-04-27 11:38:56 +03:00
|
|
|
event = Ecto.Changeset.apply_changes(changeset)
|
2022-04-28 12:24:29 +03:00
|
|
|
|
|
|
|
session_id =
|
2022-06-06 10:44:33 +03:00
|
|
|
Tracer.with_span "cache_store_event" do
|
|
|
|
Plausible.Session.CacheStore.on_event(event, previous_user_id)
|
2022-04-28 12:24:29 +03:00
|
|
|
end
|
2020-05-21 13:36:53 +03:00
|
|
|
|
2021-04-27 10:26:44 +03:00
|
|
|
event
|
|
|
|
|> Map.put(:session_id, session_id)
|
|
|
|
|> Plausible.Event.WriteBuffer.insert()
|
|
|
|
|
|
|
|
{:cont, :ok}
|
|
|
|
else
|
2021-09-24 10:38:23 +03:00
|
|
|
errors = Ecto.Changeset.traverse_errors(changeset, &encode_error/1)
|
|
|
|
{:halt, {:error, errors}}
|
2021-04-27 10:26:44 +03:00
|
|
|
end
|
|
|
|
end)
|
2019-09-02 14:29:19 +03:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2021-09-24 10:38:23 +03:00
|
|
|
# https://hexdocs.pm/ecto/Ecto.Changeset.html#traverse_errors/2-examples
|
|
|
|
defp encode_error({msg, opts}) do
|
|
|
|
Regex.replace(~r"%{(\w+)}", msg, fn _, key ->
|
|
|
|
opts |> Keyword.get(String.to_existing_atom(key), key) |> to_string()
|
|
|
|
end)
|
|
|
|
end
|
|
|
|
|
2020-11-10 16:18:59 +03:00
|
|
|
defp is_bot?(%UAInspector.Result.Bot{}), do: true
|
2020-12-29 16:17:27 +03:00
|
|
|
|
|
|
|
defp is_bot?(%UAInspector.Result{client: %UAInspector.Result.Client{name: "Headless Chrome"}}),
|
|
|
|
do: true
|
|
|
|
|
2020-11-10 16:18:59 +03:00
|
|
|
defp is_bot?(_), do: false
|
|
|
|
|
2022-03-10 22:58:30 +03:00
|
|
|
defp is_spammer?(nil), do: false
|
|
|
|
|
|
|
|
defp is_spammer?(referrer_str) do
|
|
|
|
uri = URI.parse(referrer_str)
|
|
|
|
ReferrerBlocklist.is_spammer?(strip_www(uri.host))
|
|
|
|
end
|
|
|
|
|
2020-10-28 12:09:04 +03:00
|
|
|
defp parse_meta(params) do
|
2020-10-30 11:49:41 +03:00
|
|
|
raw_meta = params["m"] || params["meta"] || params["p"] || params["props"]
|
2020-11-03 12:20:11 +03:00
|
|
|
|
2022-04-29 11:13:36 +03:00
|
|
|
case decode_raw_props(raw_meta) do
|
|
|
|
{:ok, parsed_json} ->
|
|
|
|
Enum.filter(parsed_json, fn
|
|
|
|
{_, ""} -> false
|
2022-04-29 11:50:31 +03:00
|
|
|
{_, nil} -> false
|
2022-04-29 11:13:36 +03:00
|
|
|
{_, val} when is_list(val) -> false
|
|
|
|
{_, val} when is_map(val) -> false
|
|
|
|
_ -> true
|
|
|
|
end)
|
|
|
|
|> Map.new()
|
2021-10-22 16:48:59 +03:00
|
|
|
|
2022-04-29 11:13:36 +03:00
|
|
|
_ ->
|
|
|
|
%{}
|
|
|
|
end
|
2021-10-22 16:48:59 +03:00
|
|
|
end
|
|
|
|
|
2021-11-10 16:51:43 +03:00
|
|
|
defp decode_raw_props(props) when is_map(props), do: {:ok, props}
|
|
|
|
|
|
|
|
defp decode_raw_props(raw_json) when is_binary(raw_json) do
|
|
|
|
case Jason.decode(raw_json) do
|
|
|
|
{:ok, parsed_props} when is_map(parsed_props) ->
|
|
|
|
{:ok, parsed_props}
|
|
|
|
|
|
|
|
_ ->
|
|
|
|
:not_a_map
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp decode_raw_props(_), do: :bad_format
|
|
|
|
|
2021-04-27 10:26:44 +03:00
|
|
|
defp get_domains(params, uri) do
|
|
|
|
if params["domain"] do
|
|
|
|
String.split(params["domain"], ",")
|
|
|
|
|> Enum.map(&String.trim/1)
|
|
|
|
|> Enum.map(&strip_www/1)
|
|
|
|
else
|
|
|
|
List.wrap(strip_www(uri && uri.host))
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-08-25 10:56:36 +03:00
|
|
|
defp get_pathname(nil, _), do: "/"
|
2020-11-03 12:20:11 +03:00
|
|
|
|
2020-08-25 10:56:36 +03:00
|
|
|
defp get_pathname(uri, hash_mode) do
|
2020-12-29 16:17:27 +03:00
|
|
|
pathname =
|
|
|
|
(uri.path || "/")
|
|
|
|
|> URI.decode()
|
2022-06-02 11:33:24 +03:00
|
|
|
|> String.trim_trailing()
|
2020-11-03 12:20:11 +03:00
|
|
|
|
2020-08-25 10:56:36 +03:00
|
|
|
if hash_mode && uri.fragment do
|
2020-11-17 15:40:10 +03:00
|
|
|
pathname <> "#" <> URI.decode(uri.fragment)
|
2020-08-25 10:56:36 +03:00
|
|
|
else
|
|
|
|
pathname
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2021-12-29 16:29:13 +03:00
|
|
|
@city_overrides %{
|
|
|
|
# Austria
|
|
|
|
# Gemeindebezirk Floridsdorf -> Vienna
|
|
|
|
2_779_467 => 2_761_369,
|
|
|
|
# Gemeindebezirk Leopoldstadt -> Vienna
|
|
|
|
2_772_614 => 2_761_369,
|
|
|
|
# Gemeindebezirk Landstrasse -> Vienna
|
|
|
|
2_773_040 => 2_761_369,
|
2022-01-20 18:54:27 +03:00
|
|
|
# Gemeindebezirk Donaustadt -> Vienna
|
|
|
|
2_780_851 => 2_761_369,
|
|
|
|
# Gemeindebezirk Favoriten -> Vienna
|
|
|
|
2_779_776 => 2_761_369,
|
|
|
|
# Gemeindebezirk Währing -> Vienna
|
|
|
|
2_762_091 => 2_761_369,
|
|
|
|
# Gemeindebezirk Wieden -> Vienna
|
|
|
|
2_761_393 => 2_761_369,
|
|
|
|
# Gemeindebezirk Innere Stadt -> Vienna
|
|
|
|
2_775_259 => 2_761_369,
|
|
|
|
# Gemeindebezirk Alsergrund -> Vienna
|
|
|
|
2_782_729 => 2_761_369,
|
|
|
|
# Gemeindebezirk Liesing -> Vienna
|
|
|
|
2_772_484 => 2_761_369,
|
2021-12-29 16:29:13 +03:00
|
|
|
# Urfahr -> Linz
|
|
|
|
2_762_518 => 2_772_400,
|
2022-01-12 20:40:53 +03:00
|
|
|
|
2022-01-12 20:34:29 +03:00
|
|
|
# Canada
|
|
|
|
# Old Toronto -> Toronto
|
|
|
|
8_436_019 => 6_167_865,
|
|
|
|
# Etobicoke -> Toronto
|
|
|
|
5_950_267 => 6_167_865,
|
|
|
|
# East York -> Toronto
|
|
|
|
5_946_235 => 6_167_865,
|
|
|
|
# Scarborough -> Toronto
|
|
|
|
6_948_711 => 6_167_865,
|
|
|
|
# North York -> Toronto
|
|
|
|
6_091_104 => 6_167_865,
|
2022-01-12 20:40:53 +03:00
|
|
|
|
2021-12-29 16:29:13 +03:00
|
|
|
# Czech republic
|
|
|
|
# Praha 5 -> Prague
|
|
|
|
11_951_220 => 3_067_696,
|
|
|
|
# Praha 4 -> Prague
|
|
|
|
11_951_218 => 3_067_696,
|
|
|
|
# Praha 11 -> Prague
|
|
|
|
11_951_232 => 3_067_696,
|
|
|
|
# Praha 10 -> Prague
|
|
|
|
11_951_210 => 3_067_696,
|
|
|
|
# Praha 4 -> Prague
|
|
|
|
8_378_772 => 3_067_696,
|
2022-01-12 20:40:53 +03:00
|
|
|
|
2022-01-06 16:56:33 +03:00
|
|
|
# Denmark
|
|
|
|
# København SV -> Copenhagen
|
|
|
|
11_747_123 => 2_618_425,
|
|
|
|
# København NV -> Copenhagen
|
|
|
|
11_746_894 => 2_618_425,
|
|
|
|
# Odense S -> Odense
|
|
|
|
11_746_825 => 2_615_876,
|
|
|
|
# Odense M -> Odense
|
|
|
|
11_746_974 => 2_615_876,
|
|
|
|
# Odense SØ -> Odense
|
|
|
|
11_746_888 => 2_615_876,
|
|
|
|
# Aarhus C -> Aarhus
|
|
|
|
11_746_746 => 2_624_652,
|
|
|
|
# Aarhus N -> Aarhus
|
|
|
|
11_746_890 => 2_624_652,
|
2022-01-12 20:40:53 +03:00
|
|
|
|
2022-01-12 20:34:29 +03:00
|
|
|
# Estonia
|
|
|
|
# Kristiine linnaosa -> Tallinn
|
|
|
|
11_050_530 => 588_409,
|
|
|
|
# Kesklinna linnaosa -> Tallinn
|
|
|
|
11_053_706 => 588_409,
|
|
|
|
# Lasnamäe linnaosa -> Tallinn
|
|
|
|
11_050_526 => 588_409,
|
|
|
|
# Põhja-Tallinna linnaosa -> Tallinn
|
|
|
|
11_049_594 => 588_409,
|
|
|
|
# Mustamäe linnaosa -> Tallinn
|
|
|
|
11_050_531 => 588_409,
|
|
|
|
# Haabersti linnaosa -> Tallinn
|
|
|
|
11_053_707 => 588_409,
|
|
|
|
# Viimsi -> Tallinn
|
|
|
|
587_629 => 588_409,
|
2022-01-12 20:40:53 +03:00
|
|
|
|
2022-01-12 20:34:29 +03:00
|
|
|
# Germany
|
|
|
|
# Bezirk Tempelhof-Schöneberg -> Berlin
|
|
|
|
3_336_297 => 2_950_159,
|
|
|
|
# Bezirk Mitte -> Berlin
|
|
|
|
2_870_912 => 2_950_159,
|
|
|
|
# Bezirk Charlottenburg-Wilmersdorf -> Berlin
|
|
|
|
3_336_294 => 2_950_159,
|
|
|
|
# Bezirk Friedrichshain-Kreuzberg -> Berlin
|
|
|
|
3_336_295 => 2_950_159,
|
|
|
|
# Moosach -> Munich
|
|
|
|
8_351_447 => 2_867_714,
|
|
|
|
# Schwabing-Freimann -> Munich
|
|
|
|
8_351_448 => 2_867_714,
|
|
|
|
# Stadtbezirk 06 -> Düsseldorf
|
|
|
|
6_947_276 => 2_934_246,
|
|
|
|
# Stadtbezirk 04 -> Düsseldorf
|
|
|
|
6_947_274 => 2_934_246,
|
|
|
|
# Köln-Ehrenfeld -> Köln
|
|
|
|
6_947_479 => 2_886_242,
|
|
|
|
# Köln-Lindenthal- -> Köln
|
|
|
|
6_947_481 => 2_886_242,
|
|
|
|
# Beuel -> Bonn
|
|
|
|
2_949_619 => 2_946_447,
|
2022-05-02 11:36:54 +03:00
|
|
|
# Innenstadt I -> Frankfurt am Main
|
|
|
|
6_946_225 => 2_925_533,
|
2022-01-12 20:40:53 +03:00
|
|
|
|
2022-01-12 20:34:29 +03:00
|
|
|
# India
|
|
|
|
# Navi Mumbai -> Mumbai
|
|
|
|
6_619_347 => 1_275_339,
|
2022-01-12 20:40:53 +03:00
|
|
|
|
2021-12-30 11:46:05 +03:00
|
|
|
# Mexico
|
|
|
|
# Miguel Hidalgo Villa Olímpica -> Mexico city
|
|
|
|
11_561_026 => 3_530_597,
|
|
|
|
# Zedec Santa Fe -> Mexico city
|
|
|
|
3_517_471 => 3_530_597,
|
|
|
|
# Fuentes del Pedregal-> Mexico city
|
|
|
|
11_562_596 => 3_530_597,
|
|
|
|
# Centro -> Mexico city
|
|
|
|
9_179_691 => 3_530_597,
|
|
|
|
# Cuauhtémoc-> Mexico city
|
2022-01-12 20:34:29 +03:00
|
|
|
12_266_959 => 3_530_597,
|
2022-01-12 20:40:53 +03:00
|
|
|
|
2022-01-12 20:34:29 +03:00
|
|
|
# Netherlands
|
|
|
|
# Schiphol-Rijk -> Amsterdam
|
|
|
|
10_173_838 => 2_759_794,
|
2022-01-20 18:55:03 +03:00
|
|
|
# Westpoort -> Amsterdam
|
|
|
|
11_525_047 => 2_759_794,
|
|
|
|
# Amsterdam-Zuidoost -> Amsterdam
|
|
|
|
6_544_881 => 2_759_794,
|
|
|
|
# Loosduinen -> The Hague
|
|
|
|
11_525_037 => 2_747_373,
|
|
|
|
# Laak -> The Hague
|
|
|
|
11_525_042 => 2_747_373,
|
2022-01-20 18:53:07 +03:00
|
|
|
|
2022-01-19 18:44:57 +03:00
|
|
|
# Norway
|
|
|
|
# Nordre Aker District -> Oslo
|
|
|
|
6_940_981 => 3_143_244,
|
2022-01-12 20:40:53 +03:00
|
|
|
|
2022-01-12 20:34:29 +03:00
|
|
|
# Romania
|
|
|
|
# Sector 1 -> Bucharest,
|
|
|
|
11_055_041 => 683_506,
|
|
|
|
# Sector 2 -> Bucharest
|
|
|
|
11_055_040 => 683_506,
|
|
|
|
# Sector 3 -> Bucharest
|
|
|
|
11_055_044 => 683_506,
|
|
|
|
# Sector 4 -> Bucharest
|
|
|
|
11_055_042 => 683_506,
|
|
|
|
# Sector 5 -> Bucharest
|
2022-01-12 20:40:53 +03:00
|
|
|
11_055_043 => 683_506,
|
2022-01-12 20:34:29 +03:00
|
|
|
# Sector 6 -> Bucharest
|
|
|
|
11_055_039 => 683_506,
|
|
|
|
# Bucuresti -> Bucharest
|
|
|
|
6_691_781 => 683_506,
|
2022-01-12 20:40:53 +03:00
|
|
|
|
2022-01-12 20:34:29 +03:00
|
|
|
# Slovakia
|
|
|
|
# Bratislava -> Bratislava
|
|
|
|
3_343_955 => 3_060_972,
|
2022-01-12 20:40:53 +03:00
|
|
|
|
2022-01-12 20:34:29 +03:00
|
|
|
# Sweden
|
|
|
|
# Södermalm -> Stockholm
|
|
|
|
2_676_209 => 2_673_730,
|
2022-01-12 20:40:53 +03:00
|
|
|
|
2022-01-12 20:34:29 +03:00
|
|
|
# Switzerland
|
|
|
|
# Vorstädte -> Basel
|
|
|
|
11_789_440 => 2_661_604,
|
2022-01-19 18:38:09 +03:00
|
|
|
# Zürich (Kreis 11) / Oerlikon -> Zürich
|
|
|
|
2_659_310 => 2_657_896,
|
|
|
|
# Zürich (Kreis 3) / Alt-Wiedikon -> Zürich
|
|
|
|
2_658_007 => 2_657_896,
|
|
|
|
# Zürich (Kreis 5) -> Zürich
|
|
|
|
6_295_521 => 2_657_896,
|
|
|
|
# Zürich (Kreis 1) / Hochschulen -> Zürich
|
|
|
|
6_295_489 => 2_657_896,
|
2022-01-12 20:40:53 +03:00
|
|
|
|
2022-01-12 20:34:29 +03:00
|
|
|
# UK
|
|
|
|
# Shadwell -> London
|
|
|
|
6_690_595 => 2_643_743,
|
|
|
|
# City of London -> London
|
|
|
|
2_643_741 => 2_643_743,
|
|
|
|
# South Bank -> London
|
|
|
|
6_545_251 => 2_643_743,
|
|
|
|
# Soho -> London
|
|
|
|
6_545_173 => 2_643_743,
|
|
|
|
# Whitechapel -> London
|
|
|
|
2_634_112 => 2_643_743,
|
|
|
|
# King's Cross -> London
|
|
|
|
6_690_589 => 2_643_743,
|
|
|
|
# Poplar -> London
|
|
|
|
2_640_091 => 2_643_743,
|
|
|
|
# Hackney -> London
|
|
|
|
2_647_694 => 2_643_743
|
2021-12-29 16:29:13 +03:00
|
|
|
}
|
|
|
|
|
2021-11-23 12:39:09 +03:00
|
|
|
defp visitor_location_details(conn) do
|
2020-07-02 11:21:11 +03:00
|
|
|
result =
|
2020-08-11 11:04:26 +03:00
|
|
|
PlausibleWeb.RemoteIp.get(conn)
|
2020-07-02 11:21:11 +03:00
|
|
|
|> Geolix.lookup()
|
2020-06-12 09:51:45 +03:00
|
|
|
|
2022-01-18 19:23:26 +03:00
|
|
|
country_code =
|
|
|
|
get_in(result, [:geolocation, :country, :iso_code])
|
|
|
|
|> ignore_unknown_country
|
|
|
|
|
2021-11-23 12:39:09 +03:00
|
|
|
city_geoname_id = get_in(result, [:geolocation, :city, :geoname_id])
|
|
|
|
|
|
|
|
subdivision1_code =
|
|
|
|
case result do
|
|
|
|
%{geolocation: %{subdivisions: [%{iso_code: iso_code} | _rest]}} ->
|
|
|
|
country_code <> "-" <> iso_code
|
|
|
|
|
|
|
|
_ ->
|
|
|
|
""
|
|
|
|
end
|
|
|
|
|
|
|
|
subdivision2_code =
|
|
|
|
case result do
|
|
|
|
%{geolocation: %{subdivisions: [_first, %{iso_code: iso_code} | _rest]}} ->
|
|
|
|
country_code <> "-" <> iso_code
|
|
|
|
|
|
|
|
_ ->
|
|
|
|
""
|
|
|
|
end
|
|
|
|
|
|
|
|
%{
|
|
|
|
country_code: country_code,
|
|
|
|
subdivision1_code: subdivision1_code,
|
|
|
|
subdivision2_code: subdivision2_code,
|
2021-12-29 16:29:13 +03:00
|
|
|
city_geoname_id: Map.get(@city_overrides, city_geoname_id, city_geoname_id)
|
2021-11-23 12:39:09 +03:00
|
|
|
}
|
2020-06-12 09:51:45 +03:00
|
|
|
end
|
|
|
|
|
2022-01-18 19:23:26 +03:00
|
|
|
defp ignore_unknown_country("ZZ"), do: nil
|
|
|
|
defp ignore_unknown_country(country), do: country
|
|
|
|
|
2020-12-14 17:06:43 +03:00
|
|
|
defp parse_referrer(_, nil), do: nil
|
2020-12-29 16:17:27 +03:00
|
|
|
|
2020-02-12 12:11:02 +03:00
|
|
|
defp parse_referrer(uri, referrer_str) do
|
|
|
|
referrer_uri = URI.parse(referrer_str)
|
|
|
|
|
|
|
|
if strip_www(referrer_uri.host) !== strip_www(uri.host) && referrer_uri.host !== "localhost" do
|
|
|
|
RefInspector.parse(referrer_str)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2021-04-27 11:10:37 +03:00
|
|
|
defp generate_user_id(conn, domain, hostname, salt) do
|
2020-05-21 13:36:53 +03:00
|
|
|
user_agent = List.first(Plug.Conn.get_req_header(conn, "user-agent")) || ""
|
2020-08-11 11:04:26 +03:00
|
|
|
ip_address = PlausibleWeb.RemoteIp.get(conn)
|
2021-09-10 12:35:00 +03:00
|
|
|
root_domain = get_root_domain(hostname)
|
2020-05-21 13:36:53 +03:00
|
|
|
|
2021-09-10 12:35:00 +03:00
|
|
|
if domain && root_domain do
|
|
|
|
SipHash.hash!(salt, user_agent <> ip_address <> domain <> root_domain)
|
2021-05-03 17:06:36 +03:00
|
|
|
end
|
2020-05-21 13:36:53 +03:00
|
|
|
end
|
|
|
|
|
2021-09-22 12:20:11 +03:00
|
|
|
defp get_root_domain(nil), do: "(none)"
|
2021-09-16 11:22:54 +03:00
|
|
|
|
2021-09-22 12:20:11 +03:00
|
|
|
defp get_root_domain(hostname) do
|
|
|
|
case PublicSuffix.registrable_domain(hostname) do
|
|
|
|
domain when is_binary(domain) -> domain
|
|
|
|
_ -> hostname
|
|
|
|
end
|
2021-09-10 12:35:00 +03:00
|
|
|
end
|
2021-09-10 12:38:03 +03:00
|
|
|
|
2020-06-08 10:35:13 +03:00
|
|
|
defp calculate_screen_size(nil), do: nil
|
2019-09-02 14:29:19 +03:00
|
|
|
defp calculate_screen_size(width) when width < 576, do: "Mobile"
|
|
|
|
defp calculate_screen_size(width) when width < 992, do: "Tablet"
|
|
|
|
defp calculate_screen_size(width) when width < 1440, do: "Laptop"
|
|
|
|
defp calculate_screen_size(width) when width >= 1440, do: "Desktop"
|
|
|
|
|
2020-02-12 12:11:02 +03:00
|
|
|
defp clean_referrer(nil), do: nil
|
2020-06-08 10:35:13 +03:00
|
|
|
|
2020-02-12 12:11:02 +03:00
|
|
|
defp clean_referrer(ref) do
|
2020-03-02 11:47:25 +03:00
|
|
|
uri = URI.parse(ref.referer)
|
2019-09-02 14:29:19 +03:00
|
|
|
|
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|
|
|
if PlausibleWeb.RefInspector.right_uri?(uri) do
|
2019-09-02 14:29:19 +03:00
|
|
|
host = String.replace_prefix(uri.host, "www.", "")
|
2020-03-02 11:47:25 +03:00
|
|
|
path = uri.path || ""
|
|
|
|
host <> String.trim_trailing(path, "/")
|
2019-09-02 14:29:19 +03:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp parse_body(conn) do
|
2021-06-02 15:10:44 +03:00
|
|
|
case conn.body_params do
|
|
|
|
%Plug.Conn.Unfetched{} ->
|
|
|
|
{:ok, body, _conn} = Plug.Conn.read_body(conn)
|
2021-11-18 11:36:16 +03:00
|
|
|
|
|
|
|
case Jason.decode(body) do
|
|
|
|
{:ok, params} -> {:ok, params}
|
|
|
|
_ -> {:error, :invalid_json}
|
|
|
|
end
|
2021-06-02 15:10:44 +03:00
|
|
|
|
|
|
|
params ->
|
2021-11-18 11:36:16 +03:00
|
|
|
{:ok, params}
|
2021-06-02 15:10:44 +03:00
|
|
|
end
|
2019-09-02 14:29:19 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
defp strip_www(nil), do: nil
|
2020-06-08 10:35:13 +03:00
|
|
|
|
2019-09-02 14:29:19 +03:00
|
|
|
defp strip_www(hostname) do
|
|
|
|
String.replace_prefix(hostname, "www.", "")
|
|
|
|
end
|
|
|
|
|
|
|
|
defp browser_name(ua) do
|
|
|
|
case ua.client do
|
2020-11-10 16:18:59 +03:00
|
|
|
:unknown -> ""
|
2019-09-02 14:29:19 +03:00
|
|
|
%UAInspector.Result.Client{name: "Mobile Safari"} -> "Safari"
|
|
|
|
%UAInspector.Result.Client{name: "Chrome Mobile"} -> "Chrome"
|
|
|
|
%UAInspector.Result.Client{name: "Chrome Mobile iOS"} -> "Chrome"
|
2020-11-10 16:18:59 +03:00
|
|
|
%UAInspector.Result.Client{name: "Firefox Mobile"} -> "Firefox"
|
|
|
|
%UAInspector.Result.Client{name: "Firefox Mobile iOS"} -> "Firefox"
|
2020-11-23 12:09:21 +03:00
|
|
|
%UAInspector.Result.Client{name: "Opera Mobile"} -> "Opera"
|
2021-12-09 12:49:00 +03:00
|
|
|
%UAInspector.Result.Client{name: "Opera Mini"} -> "Opera"
|
|
|
|
%UAInspector.Result.Client{name: "Opera Mini iOS"} -> "Opera"
|
|
|
|
%UAInspector.Result.Client{name: "Yandex Browser Lite"} -> "Yandex Browser"
|
2020-11-10 16:18:59 +03:00
|
|
|
%UAInspector.Result.Client{name: "Chrome Webview"} -> "Mobile App"
|
2019-09-02 14:29:19 +03:00
|
|
|
%UAInspector.Result.Client{type: "mobile app"} -> "Mobile App"
|
|
|
|
client -> client.name
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-11-10 16:18:59 +03:00
|
|
|
defp major_minor(:unknown), do: ""
|
2020-12-29 16:17:27 +03:00
|
|
|
|
2020-11-10 16:18:59 +03:00
|
|
|
defp major_minor(version) do
|
|
|
|
version
|
|
|
|
|> String.split(".")
|
|
|
|
|> Enum.take(2)
|
|
|
|
|> Enum.join(".")
|
|
|
|
end
|
|
|
|
|
|
|
|
defp browser_version(ua) do
|
|
|
|
case ua.client do
|
|
|
|
:unknown -> ""
|
|
|
|
%UAInspector.Result.Client{type: "mobile app"} -> ""
|
|
|
|
client -> major_minor(client.version)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-09-02 14:29:19 +03:00
|
|
|
defp os_name(ua) do
|
|
|
|
case ua.os do
|
2020-11-10 16:18:59 +03:00
|
|
|
:unknown -> ""
|
2019-09-02 14:29:19 +03:00
|
|
|
os -> os.name
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-11-10 16:18:59 +03:00
|
|
|
defp os_version(ua) do
|
|
|
|
case ua.os do
|
|
|
|
:unknown -> ""
|
|
|
|
os -> major_minor(os.version)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-09-18 11:37:32 +03:00
|
|
|
defp get_referrer_source(query, ref) do
|
2020-08-24 16:32:24 +03:00
|
|
|
source = query["utm_source"] || query["source"] || query["ref"]
|
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|
|
|
source || PlausibleWeb.RefInspector.parse(ref)
|
2020-08-24 16:32:24 +03:00
|
|
|
end
|
|
|
|
|
2021-10-25 11:02:38 +03:00
|
|
|
defp decode_query_params(nil), do: nil
|
|
|
|
defp decode_query_params(%URI{query: nil}), do: nil
|
|
|
|
|
|
|
|
defp decode_query_params(%URI{query: query_part}) do
|
|
|
|
try do
|
|
|
|
URI.decode_query(query_part)
|
|
|
|
rescue
|
|
|
|
_ -> nil
|
|
|
|
end
|
|
|
|
end
|
2019-09-02 14:29:19 +03:00
|
|
|
end
|