mirror of
https://github.com/plausible/analytics.git
synced 2024-12-22 09:01:40 +03:00
Channel and source data updates (#4599)
* Channel and source data updates * Update source mappings for migration * Fix codespell Co-authored-by: Karl-Aksel Puulmann <macobo@users.noreply.github.com> * Update lib/plausible/ingestion/acquisition.ex Co-authored-by: Karl-Aksel Puulmann <macobo@users.noreply.github.com> * Standardize access to utm params * Add wikipedia as "known" source * Move custom sources to json file * Add some advertising utm_sources * Move source mapping logic to refinspector file * Rename PlausibleWeb.RefInspector -> Plausible.Ingestion.Source * Move mapping overrides to custom_sources.json * More robust detection of paid sources * Add missing utm_sources to migration * Codespell * Add moduledoc for Plausible.Ingestion.Source * Fix dialyzer * Remove migration * Add more custom favicons * Re-generate referrer favicons file * Add doctest for sources --------- Co-authored-by: Karl-Aksel Puulmann <macobo@users.noreply.github.com>
This commit is contained in:
parent
62fb285b71
commit
c3a06caa97
@ -5,4 +5,4 @@ Taht
|
||||
taht
|
||||
referer
|
||||
referers
|
||||
|
||||
statics
|
||||
|
@ -108,7 +108,7 @@ defmodule Plausible.Application do
|
||||
|
||||
setup_geolocation()
|
||||
Location.load_all()
|
||||
Plausible.Ingestion.Acquisition.init()
|
||||
Plausible.Ingestion.Source.init()
|
||||
Plausible.Geo.await_loader()
|
||||
|
||||
Supervisor.start_link(List.flatten(children), opts)
|
||||
|
@ -174,7 +174,7 @@ defmodule Plausible.Imported.GoogleAnalytics4 do
|
||||
site_id: site_id,
|
||||
import_id: import_id,
|
||||
date: get_date(row),
|
||||
source: row.dimensions |> Map.fetch!("sessionSource") |> parse_referrer(),
|
||||
source: row.dimensions |> Map.fetch!("sessionSource") |> parse_source(),
|
||||
referrer: nil,
|
||||
# Only `source` exists in GA4 API
|
||||
utm_source: nil,
|
||||
@ -343,14 +343,13 @@ defmodule Plausible.Imported.GoogleAnalytics4 do
|
||||
defp default_if_missing(value, default) when value in @missing_values, do: default
|
||||
defp default_if_missing(value, _default), do: value
|
||||
|
||||
defp parse_referrer(nil), do: nil
|
||||
defp parse_referrer("(direct)"), do: nil
|
||||
defp parse_referrer("google"), do: "Google"
|
||||
defp parse_referrer("bing"), do: "Bing"
|
||||
defp parse_referrer("duckduckgo"), do: "DuckDuckGo"
|
||||
defp parse_source(nil), do: nil
|
||||
defp parse_source("(direct)"), do: nil
|
||||
defp parse_source("google"), do: "Google"
|
||||
defp parse_source("bing"), do: "Bing"
|
||||
defp parse_source("duckduckgo"), do: "DuckDuckGo"
|
||||
|
||||
defp parse_referrer(ref) do
|
||||
RefInspector.parse("https://" <> ref)
|
||||
|> PlausibleWeb.RefInspector.parse()
|
||||
defp parse_source(ref) do
|
||||
Plausible.Ingestion.Source.parse("https://" <> ref)
|
||||
end
|
||||
end
|
||||
|
@ -1,37 +1,53 @@
|
||||
defmodule Plausible.Ingestion.Acquisition do
|
||||
@moduledoc false
|
||||
@moduledoc """
|
||||
This module is responsible for figuring out acquisition channel from event referrer_source.
|
||||
|
||||
Acquisition channel is the marketing channel where people come from and convert and help
|
||||
users to understand and improve their marketing flow.
|
||||
|
||||
Note it uses priv/ga4-source-categories.csv as a source, which comes from https://support.google.com/analytics/answer/9756891?hl=en.
|
||||
|
||||
Notable differences from GA4 that have been implemented just for Plausible:
|
||||
1. The @custom_source_categories module attribute contains a list of custom source categories that we have manually
|
||||
added based on our own judgement and user feedback. For example we treat AI tools (ChatGPT, Perplexity) as search engines.
|
||||
2. Google is in a privileged position to analyze paid traffic from within their own network. The biggest use-case is auto-tagged adwords campaigns.
|
||||
We do our best by categorizing as paid search when source is Google and the url has `gclid` parameter. Same for source Bing and `msclkid` url parameter.
|
||||
3. The @paid_sources module attribute in Plausible.Ingestion.Source contains a list of utm_sources that we will automatically categorize as paid traffic
|
||||
regardless of the medium. Examples are `yt-ads`, `facebook_ad`, `adwords`, etc. See also: Plausible.Ingestion.Source.paid_source?/1
|
||||
"""
|
||||
|
||||
@external_resource "priv/ga4-source-categories.csv"
|
||||
@custom_source_categories [
|
||||
{"hacker news", "SOURCE_CATEGORY_SOCIAL"},
|
||||
{"yahoo!", "SOURCE_CATEGORY_SEARCH"},
|
||||
{"gmail", "SOURCE_CATEGORY_EMAIL"},
|
||||
{"telegram", "SOURCE_CATEGORY_SOCIAL"},
|
||||
{"slack", "SOURCE_CATEGORY_SOCIAL"},
|
||||
{"producthunt", "SOURCE_CATEGORY_SOCIAL"},
|
||||
{"github", "SOURCE_CATEGORY_SOCIAL"},
|
||||
{"steamcommunity.com", "SOURCE_CATEGORY_SOCIAL"},
|
||||
{"statics.teams.cdn.office.net", "SOURCE_CATEGORY_SOCIAL"},
|
||||
{"vkontakte", "SOURCE_CATEGORY_SOCIAL"},
|
||||
{"threads", "SOURCE_CATEGORY_SOCIAL"},
|
||||
{"ecosia", "SOURCE_CATEGORY_SEARCH"},
|
||||
{"perplexity", "SOURCE_CATEGORY_SEARCH"},
|
||||
{"brave", "SOURCE_CATEGORY_SEARCH"},
|
||||
{"chatgpt.com", "SOURCE_CATEGORY_SEARCH"},
|
||||
{"temu.com", "SOURCE_CATEGORY_SHOPPING"},
|
||||
{"discord", "SOURCE_CATEGORY_SOCIAL"},
|
||||
{"sogou", "SOURCE_CATEGORY_SEARCH"},
|
||||
{"microsoft teams", "SOURCE_CATEGORY_SOCIAL"}
|
||||
]
|
||||
@source_categories Application.app_dir(:plausible, "priv/ga4-source-categories.csv")
|
||||
|> File.read!()
|
||||
|> NimbleCSV.RFC4180.parse_string(skip_headers: false)
|
||||
|> Enum.map(fn [source, category] -> {source, category} end)
|
||||
|> then(&(@custom_source_categories ++ &1))
|
||||
|> Enum.into(%{})
|
||||
|
||||
def init() do
|
||||
:ets.new(__MODULE__, [
|
||||
:named_table,
|
||||
:set,
|
||||
:public,
|
||||
{:read_concurrency, true}
|
||||
])
|
||||
|
||||
[{"referers.yml", map}] = RefInspector.Database.list(:default)
|
||||
|
||||
Enum.flat_map(map, fn {_, entries} ->
|
||||
Enum.map(entries, fn {_, _, _, _, _, _, name} ->
|
||||
:ets.insert(__MODULE__, {String.downcase(name), name})
|
||||
end)
|
||||
end)
|
||||
end
|
||||
|
||||
def find_mapping(source) do
|
||||
case :ets.lookup(__MODULE__, source) do
|
||||
[{_, name}] -> name
|
||||
_ -> source
|
||||
end
|
||||
end
|
||||
|
||||
def get_channel(request, source) do
|
||||
source = source && String.downcase(source)
|
||||
|
||||
cond do
|
||||
cross_network?(request) -> "Cross-network"
|
||||
paid_shopping?(request, source) -> "Paid Shopping"
|
||||
@ -44,7 +60,7 @@ defmodule Plausible.Ingestion.Acquisition do
|
||||
organic_social?(request, source) -> "Organic Social"
|
||||
organic_video?(request, source) -> "Organic Video"
|
||||
search_source?(source) -> "Organic Search"
|
||||
email?(request) -> "Email"
|
||||
email?(request, source) -> "Email"
|
||||
affiliates?(request) -> "Affiliates"
|
||||
audio?(request) -> "Audio"
|
||||
sms?(request) -> "SMS"
|
||||
@ -55,30 +71,32 @@ defmodule Plausible.Ingestion.Acquisition do
|
||||
end
|
||||
|
||||
defp cross_network?(request) do
|
||||
String.contains?(request.query_params["utm_campaign"] || "", "cross-network")
|
||||
String.contains?(query_param(request, "utm_campaign"), "cross-network")
|
||||
end
|
||||
|
||||
defp paid_shopping?(request, source) do
|
||||
(shopping_source?(source) or shopping_campaign?(request.query_params["utm_campaign"])) and
|
||||
paid_medium?(request.query_params["utm_medium"])
|
||||
(shopping_source?(source) or shopping_campaign?(request)) and paid_medium?(request)
|
||||
end
|
||||
|
||||
defp paid_search?(request, source) do
|
||||
(search_source?(source) and paid_medium?(request.query_params["utm_medium"])) or
|
||||
(source == "Google" and !!request.query_params["gclid"]) or
|
||||
(source == "Bing" and !!request.query_params["msclkid"])
|
||||
(search_source?(source) and paid_medium?(request)) or
|
||||
(search_source?(source) and paid_source?(request)) or
|
||||
(source == "google" and !!request.query_params["gclid"]) or
|
||||
(source == "bing" and !!request.query_params["msclkid"])
|
||||
end
|
||||
|
||||
defp paid_social?(request, source) do
|
||||
social_source?(source) and paid_medium?(request.query_params["utm_medium"])
|
||||
(social_source?(source) and paid_medium?(request)) or
|
||||
(social_source?(source) and paid_source?(request))
|
||||
end
|
||||
|
||||
defp paid_video?(request, source) do
|
||||
video_source?(source) and paid_medium?(request.query_params["utm_medium"])
|
||||
(video_source?(source) and paid_medium?(request)) or
|
||||
(video_source?(source) and paid_source?(request))
|
||||
end
|
||||
|
||||
defp display?(request) do
|
||||
request.query_params["utm_medium"] in [
|
||||
query_param(request, "utm_medium") in [
|
||||
"display",
|
||||
"banner",
|
||||
"expandable",
|
||||
@ -88,16 +106,16 @@ defmodule Plausible.Ingestion.Acquisition do
|
||||
end
|
||||
|
||||
defp paid_other?(request) do
|
||||
paid_medium?(request.query_params["utm_medium"])
|
||||
paid_medium?(request)
|
||||
end
|
||||
|
||||
defp organic_shopping?(request, source) do
|
||||
shopping_source?(source) or shopping_campaign?(request.query_params["utm_campaign"])
|
||||
shopping_source?(source) or shopping_campaign?(request)
|
||||
end
|
||||
|
||||
defp organic_social?(request, source) do
|
||||
social_source?(source) or
|
||||
request.query_params["utm_medium"] in [
|
||||
query_param(request, "utm_medium") in [
|
||||
"social",
|
||||
"social-network",
|
||||
"social-media",
|
||||
@ -108,71 +126,88 @@ defmodule Plausible.Ingestion.Acquisition do
|
||||
end
|
||||
|
||||
defp organic_video?(request, source) do
|
||||
video_source?(source) or String.contains?(request.query_params["utm_medium"] || "", "video")
|
||||
video_source?(source) or String.contains?(query_param(request, "utm_medium"), "video")
|
||||
end
|
||||
|
||||
defp referral?(request, source) do
|
||||
request.query_params["utm_medium"] in ["referral", "app", "link"] or
|
||||
query_param(request, "utm_medium") in ["referral", "app", "link"] or
|
||||
!!source
|
||||
end
|
||||
|
||||
@email_tags ["email", "e-mail", "e_mail", "e mail"]
|
||||
defp email?(request) do
|
||||
String.contains?(request.query_params["utm_source"] || "", @email_tags) or
|
||||
String.contains?(request.query_params["utm_medium"] || "", @email_tags)
|
||||
@email_tags ["email", "e-mail", "e_mail", "e mail", "newsletter"]
|
||||
defp email?(request, source) do
|
||||
email_source?(source) or
|
||||
String.contains?(query_param(request, "utm_source"), @email_tags) or
|
||||
String.contains?(query_param(request, "utm_medium"), @email_tags)
|
||||
end
|
||||
|
||||
defp affiliates?(request) do
|
||||
request.query_params["utm_medium"] == "affiliate"
|
||||
query_param(request, "utm_medium") == "affiliate"
|
||||
end
|
||||
|
||||
defp audio?(request) do
|
||||
request.query_params["utm_medium"] == "audio"
|
||||
query_param(request, "utm_medium") == "audio"
|
||||
end
|
||||
|
||||
defp sms?(request) do
|
||||
request.query_params["utm_source"] == "sms" or
|
||||
request.query_params["utm_medium"] == "sms"
|
||||
query_param(request, "utm_source") == "sms" or
|
||||
query_param(request, "utm_medium") == "sms"
|
||||
end
|
||||
|
||||
defp mobile_push_notifications?(request, source) do
|
||||
medium = request.query_params["utm_medium"] || ""
|
||||
medium = query_param(request, "utm_medium")
|
||||
|
||||
String.ends_with?(medium, "push") or
|
||||
String.contains?(medium, ["mobile", "notification"]) or
|
||||
source == "firebase"
|
||||
end
|
||||
|
||||
# # Helper functions for source and medium checks
|
||||
defp shopping_source?(nil), do: false
|
||||
|
||||
defp shopping_source?(source) do
|
||||
@source_categories[String.downcase(source)] == "SOURCE_CATEGORY_SHOPPING"
|
||||
end
|
||||
|
||||
defp shopping_campaign?(campaign_name) do
|
||||
Regex.match?(~r/^(.*(([^a-df-z]|^)shop|shopping).*)$/, campaign_name || "")
|
||||
@source_categories[source] == "SOURCE_CATEGORY_SHOPPING"
|
||||
end
|
||||
|
||||
defp search_source?(nil), do: false
|
||||
|
||||
defp search_source?(source) do
|
||||
@source_categories[String.downcase(source)] == "SOURCE_CATEGORY_SEARCH"
|
||||
@source_categories[source] == "SOURCE_CATEGORY_SEARCH"
|
||||
end
|
||||
|
||||
defp social_source?(nil), do: false
|
||||
|
||||
defp social_source?(source) do
|
||||
@source_categories[String.downcase(source)] == "SOURCE_CATEGORY_SOCIAL"
|
||||
@source_categories[source] == "SOURCE_CATEGORY_SOCIAL"
|
||||
end
|
||||
|
||||
defp video_source?(nil), do: false
|
||||
|
||||
defp video_source?(source) do
|
||||
@source_categories[String.downcase(source)] == "SOURCE_CATEGORY_VIDEO"
|
||||
@source_categories[source] == "SOURCE_CATEGORY_VIDEO"
|
||||
end
|
||||
|
||||
defp paid_medium?(medium) do
|
||||
Regex.match?(~r/^(.*cp.*|ppc|retargeting|paid.*)$/, medium || "")
|
||||
defp email_source?(nil), do: false
|
||||
|
||||
defp email_source?(source) do
|
||||
@source_categories[source] == "SOURCE_CATEGORY_EMAIL"
|
||||
end
|
||||
|
||||
defp shopping_campaign?(request) do
|
||||
campaign_name = query_param(request, "utm_campaign")
|
||||
Regex.match?(~r/^(.*(([^a-df-z]|^)shop|shopping).*)$/, campaign_name)
|
||||
end
|
||||
|
||||
defp paid_medium?(request) do
|
||||
medium = query_param(request, "utm_medium")
|
||||
Regex.match?(~r/^(.*cp.*|ppc|retargeting|paid.*)$/, medium)
|
||||
end
|
||||
|
||||
defp paid_source?(request) do
|
||||
query_param(request, "utm_source")
|
||||
|> Plausible.Ingestion.Source.paid_source?()
|
||||
end
|
||||
|
||||
defp query_param(request, name) do
|
||||
String.downcase(request.query_params[name] || "")
|
||||
end
|
||||
end
|
||||
|
@ -251,14 +251,13 @@ defmodule Plausible.Ingestion.Event do
|
||||
end
|
||||
|
||||
defp put_referrer(%__MODULE__{} = event, _context) do
|
||||
ref = parse_referrer(event.request.uri, event.request.referrer)
|
||||
source = get_referrer_source(event.request, ref)
|
||||
source = Plausible.Ingestion.Source.resolve(event.request)
|
||||
channel = Plausible.Ingestion.Acquisition.get_channel(event.request, source)
|
||||
|
||||
update_session_attrs(event, %{
|
||||
channel: channel,
|
||||
referrer_source: source,
|
||||
referrer: clean_referrer(ref)
|
||||
referrer: Plausible.Ingestion.Source.format_referrer(event.request.referrer)
|
||||
})
|
||||
end
|
||||
|
||||
@ -392,40 +391,6 @@ defmodule Plausible.Ingestion.Event do
|
||||
event
|
||||
end
|
||||
|
||||
defp parse_referrer(_uri, _referrer_str = nil), do: nil
|
||||
|
||||
defp parse_referrer(uri, referrer_str) do
|
||||
referrer_uri = URI.parse(referrer_str)
|
||||
|
||||
if Request.sanitize_hostname(referrer_uri.host) !== Request.sanitize_hostname(uri.host) &&
|
||||
referrer_uri.host !== "localhost" do
|
||||
RefInspector.parse(referrer_str)
|
||||
end
|
||||
end
|
||||
|
||||
defp get_referrer_source(request, ref) do
|
||||
tagged_source =
|
||||
request.query_params["utm_source"] ||
|
||||
request.query_params["source"] ||
|
||||
request.query_params["ref"]
|
||||
|
||||
if tagged_source do
|
||||
Plausible.Ingestion.Acquisition.find_mapping(tagged_source)
|
||||
else
|
||||
PlausibleWeb.RefInspector.parse(ref)
|
||||
end
|
||||
end
|
||||
|
||||
defp clean_referrer(nil), do: nil
|
||||
|
||||
defp clean_referrer(ref) do
|
||||
uri = URI.parse(ref.referer)
|
||||
|
||||
if PlausibleWeb.RefInspector.right_uri?(uri) do
|
||||
PlausibleWeb.RefInspector.format_referrer(uri)
|
||||
end
|
||||
end
|
||||
|
||||
defp parse_user_agent(%Request{user_agent: user_agent}) when is_binary(user_agent) do
|
||||
Plausible.Cache.Adapter.get(:user_agents, user_agent, fn ->
|
||||
UAInspector.parse(user_agent)
|
||||
|
147
lib/plausible/ingestion/source.ex
Normal file
147
lib/plausible/ingestion/source.ex
Normal file
@ -0,0 +1,147 @@
|
||||
defmodule Plausible.Ingestion.Source do
|
||||
@moduledoc """
|
||||
Resolves the `source` dimension from a combination of `referer` header and either `utm_source`, `source`, or `ref` query parameter.
|
||||
|
||||
"""
|
||||
alias Plausible.Ingestion.Request
|
||||
|
||||
@external_resource "priv/custom_sources.json"
|
||||
@custom_sources Application.app_dir(:plausible, "priv/custom_sources.json")
|
||||
|> File.read!()
|
||||
|> Jason.decode!()
|
||||
|
||||
@paid_sources Map.keys(@custom_sources)
|
||||
|> Enum.filter(&String.ends_with?(&1, ["ads", "ad"]))
|
||||
|> then(&["adwords" | &1])
|
||||
|> MapSet.new()
|
||||
|
||||
def init() do
|
||||
:ets.new(__MODULE__, [
|
||||
:named_table,
|
||||
:set,
|
||||
:public,
|
||||
{:read_concurrency, true}
|
||||
])
|
||||
|
||||
[{"referers.yml", map}] = RefInspector.Database.list(:default)
|
||||
|
||||
Enum.each(map, fn {_, entries} ->
|
||||
Enum.each(entries, fn {_, _, _, _, _, _, name} ->
|
||||
:ets.insert(__MODULE__, {String.downcase(name), name})
|
||||
end)
|
||||
end)
|
||||
|
||||
Enum.each(@custom_sources, fn {key, val} ->
|
||||
:ets.insert(__MODULE__, {key, val})
|
||||
:ets.insert(__MODULE__, {String.downcase(val), val})
|
||||
end)
|
||||
end
|
||||
|
||||
def paid_source?(source) do
|
||||
MapSet.member?(@paid_sources, source)
|
||||
end
|
||||
|
||||
@doc """
|
||||
Resolves the source of a session based on query params and the `Referer` header.
|
||||
|
||||
When a query parameter like `utm_source` is present, it will be prioritized over the `Referer` header. When the URL does not contain a source tag, we fall
|
||||
back to using `Referer` to determine the source. This module also takes care of certain transformations to make the data more useful for the user:
|
||||
1. The RefInspector library is used to categorize referrers into "known" sources. For example, when the referrer is google.com or google.co.uk,
|
||||
it will always be stored as "Google" which is more useful for marketers.
|
||||
2. On top of the standard RefInspector behaviour, we also keep a list of `custom_sources.json` which extends it with referrers that we have seen in the wild.
|
||||
For example, Wikipedia has many domains that need to be combined into a single known source. These could all in theory be [upstreamed](https://github.com/snowplow-referer-parser/referer-parser).
|
||||
3. When a known source is supplied in utm_source (or source, ref) query parameter, we merge it with our known sources in a case-insensitive manner.
|
||||
4. Our list of `custom_sources.json` also contains some commonly used utm_source shorthands for certain sources. URL tagging is a mess, and we can never do it
|
||||
perfectly, but at least we're making an effort for the most commonly used ones. For example, `ig -> Instagram` and `adwords -> Google`.
|
||||
|
||||
### Examples:
|
||||
|
||||
iex> alias Plausible.Ingestion.{Source, Request}
|
||||
iex> base_request = %Request{uri: URI.parse("https://plausible.io")}
|
||||
iex> Source.resolve(%{base_request | referrer: "https://google.com"}) # Known referrer from RefInspector
|
||||
"Google"
|
||||
iex> Source.resolve(%{base_request | query_params: %{"utm_source" => "google"}}) # Known source from RefInspector supplied as downcased utm_source by user
|
||||
"Google"
|
||||
iex> Source.resolve(%{base_request | query_params: %{"utm_source" => "GOOGLE"}}) # Known source from RefInspector supplied as uppercased utm_source by user
|
||||
"Google"
|
||||
iex> Source.resolve(%{base_request | referrer: "https://en.m.wikipedia.org"}) # Known referrer from custom_sources.json
|
||||
"Wikipedia"
|
||||
iex> Source.resolve(%{base_request | query_params: %{"utm_source" => "wikipedia"}}) # Known source from custom_sources.json supplied as downcased utm_source by user
|
||||
"Wikipedia"
|
||||
iex> Source.resolve(%{base_request | query_params: %{"utm_source" => "ig"}}) # Known utm_source from custom_sources.json
|
||||
"Instagram"
|
||||
iex> Source.resolve(%{base_request | referrer: "https://www.markosaric.com"}) # Unknown source, it is just stored as the domain name
|
||||
"markosaric.com"
|
||||
"""
|
||||
def resolve(request) do
|
||||
tagged_source =
|
||||
request.query_params["utm_source"] ||
|
||||
request.query_params["source"] ||
|
||||
request.query_params["ref"]
|
||||
|
||||
source =
|
||||
cond do
|
||||
tagged_source -> tagged_source
|
||||
has_referral?(request) -> parse(request.referrer)
|
||||
true -> nil
|
||||
end
|
||||
|
||||
find_mapping(source)
|
||||
end
|
||||
|
||||
def parse(ref) do
|
||||
case RefInspector.parse(ref).source do
|
||||
:unknown ->
|
||||
uri = URI.parse(String.trim(ref))
|
||||
|
||||
if valid_referrer?(uri) do
|
||||
format_referrer_host(uri)
|
||||
end
|
||||
|
||||
source ->
|
||||
source
|
||||
end
|
||||
end
|
||||
|
||||
def find_mapping(nil), do: nil
|
||||
|
||||
def find_mapping(source) do
|
||||
case :ets.lookup(__MODULE__, String.downcase(source)) do
|
||||
[{_, name}] -> name
|
||||
_ -> source
|
||||
end
|
||||
end
|
||||
|
||||
def format_referrer(nil), do: nil
|
||||
|
||||
def format_referrer(referrer) do
|
||||
referrer_uri = URI.parse(referrer)
|
||||
|
||||
if valid_referrer?(referrer_uri) do
|
||||
path = String.trim_trailing(referrer_uri.path || "", "/")
|
||||
format_referrer_host(referrer_uri) <> path
|
||||
end
|
||||
end
|
||||
|
||||
defp valid_referrer?(%URI{host: host, scheme: scheme})
|
||||
when scheme in ["http", "https", "android-app"] and byte_size(host) > 0,
|
||||
do: true
|
||||
|
||||
defp valid_referrer?(_), do: false
|
||||
|
||||
defp has_referral?(%Request{referrer: nil}), do: nil
|
||||
|
||||
defp has_referral?(%Request{referrer: referrer, uri: uri}) do
|
||||
referrer_uri = URI.parse(referrer)
|
||||
|
||||
Request.sanitize_hostname(referrer_uri.host) !== Request.sanitize_hostname(uri.host) and
|
||||
referrer_uri.host !== "localhost"
|
||||
end
|
||||
|
||||
defp format_referrer_host(uri) do
|
||||
protocol = if uri.scheme == "android-app", do: "android-app://", else: ""
|
||||
host = String.replace_prefix(uri.host, "www.", "")
|
||||
|
||||
protocol <> host
|
||||
end
|
||||
end
|
@ -31,11 +31,20 @@ defmodule PlausibleWeb.Favicon do
|
||||
|
||||
@placeholder_icon_location "priv/placeholder_favicon.ico"
|
||||
@placeholder_icon File.read!(@placeholder_icon_location)
|
||||
@custom_icons %{
|
||||
"Brave" => "search.brave.com",
|
||||
"Sogou" => "sogou.com",
|
||||
"Wikipedia" => "en.wikipedia.org",
|
||||
"Discord" => "discord.com",
|
||||
"Perplexity" => "perplexity.ai",
|
||||
"Microsoft Teams" => "microsoft.com"
|
||||
}
|
||||
|
||||
def init(_) do
|
||||
domains =
|
||||
File.read!(Application.app_dir(:plausible, @referer_domains_file))
|
||||
|> Jason.decode!()
|
||||
|> Map.merge(@custom_icons)
|
||||
|
||||
[favicon_domains: domains]
|
||||
end
|
||||
|
@ -1,37 +0,0 @@
|
||||
defmodule PlausibleWeb.RefInspector do
|
||||
def parse(nil), do: nil
|
||||
|
||||
def parse(ref) do
|
||||
case ref.source do
|
||||
:unknown ->
|
||||
uri = URI.parse(String.trim(ref.referer))
|
||||
|
||||
if right_uri?(uri) do
|
||||
format_referrer_host(uri)
|
||||
end
|
||||
|
||||
source ->
|
||||
source
|
||||
end
|
||||
end
|
||||
|
||||
def format_referrer(uri) do
|
||||
path = String.trim_trailing(uri.path || "", "/")
|
||||
format_referrer_host(uri) <> path
|
||||
end
|
||||
|
||||
def right_uri?(%URI{host: nil}), do: false
|
||||
|
||||
def right_uri?(%URI{host: host, scheme: scheme})
|
||||
when scheme in ["http", "https", "android-app"] and byte_size(host) > 0,
|
||||
do: true
|
||||
|
||||
def right_uri?(_), do: false
|
||||
|
||||
defp format_referrer_host(uri) do
|
||||
protocol = if uri.scheme == "android-app", do: "android-app://", else: ""
|
||||
host = String.replace_prefix(uri.host, "www.", "")
|
||||
|
||||
protocol <> host
|
||||
end
|
||||
end
|
215
priv/custom_sources.json
Normal file
215
priv/custom_sources.json
Normal file
@ -0,0 +1,215 @@
|
||||
{
|
||||
"android-app://com.reddit.frontpage":"Reddit",
|
||||
"baidu.com":"Baidu",
|
||||
"discord.com":"Discord",
|
||||
"discordapp.com":"Discord",
|
||||
"linktr.ee":"Linktree",
|
||||
"m.sogou.com":"Sogou",
|
||||
"ntp.msn.com":"Bing",
|
||||
"perplexity.ai":"Perplexity",
|
||||
"ptb.discord.com":"Discord",
|
||||
"search.brave.com":"Brave",
|
||||
"sogou.com":"Sogou",
|
||||
"statics.teams.cdn.office.net":"Microsoft Teams",
|
||||
"t.me":"Telegram",
|
||||
"wap.sogou.com":"Sogou",
|
||||
"ya.ru":"Yandex",
|
||||
"yandex.com.tr":"Yandex",
|
||||
"yandex.eu":"Yandex",
|
||||
"yandex.fr":"Yandex",
|
||||
"yandex.kz":"Yandex",
|
||||
"yandex.tm":"Yandex",
|
||||
"yandex.uz":"Yandex",
|
||||
"fb": "Facebook",
|
||||
"fb-ads": "Facebook",
|
||||
"fbads": "Facebook",
|
||||
"fbad": "Facebook",
|
||||
"facebook-ads": "Facebook",
|
||||
"facebook_ads": "Facebook",
|
||||
"fcb": "Facebook",
|
||||
"facebook_ad": "Facebook",
|
||||
"facebook_feed_ad": "Facebook",
|
||||
"ig": "Instagram",
|
||||
"yt": "Youtube",
|
||||
"yt-ads": "Youtube",
|
||||
"reddit-ads": "Reddit",
|
||||
"google_ads": "Google",
|
||||
"google-ads": "Google",
|
||||
"googleads": "Google",
|
||||
"gads": "Google",
|
||||
"google ads": "Google",
|
||||
"adwords": "Google",
|
||||
"twitter-ads": "Twitter",
|
||||
"tiktokads": "TikTok",
|
||||
"tik.tok": "TikTok",
|
||||
"perplexity": "Perplexity",
|
||||
"linktree": "Linktree",
|
||||
"fo.wikipedia.org":"Wikipedia",
|
||||
"ga.wikipedia.org":"Wikipedia",
|
||||
"el.m.wikipedia.org":"Wikipedia",
|
||||
"eo.m.wikipedia.org":"Wikipedia",
|
||||
"ms.m.wikipedia.org":"Wikipedia",
|
||||
"nl.wikipedia.org":"Wikipedia",
|
||||
"dga.m.wikipedia.org":"Wikipedia",
|
||||
"th.wikipedia.org":"Wikipedia",
|
||||
"oc.wikipedia.org":"Wikipedia",
|
||||
"da.wikipedia.org":"Wikipedia",
|
||||
"pt.m.wikipedia.org":"Wikipedia",
|
||||
"szl.m.wikipedia.org":"Wikipedia",
|
||||
"be-tarask.wikipedia.org":"Wikipedia",
|
||||
"ta.m.wikipedia.org":"Wikipedia",
|
||||
"pa.m.wikipedia.org":"Wikipedia",
|
||||
"mn.wikipedia.org":"Wikipedia",
|
||||
"sv.m.wikipedia.org":"Wikipedia",
|
||||
"sk.wikipedia.org":"Wikipedia",
|
||||
"it.wikipedia.org":"Wikipedia",
|
||||
"el.wikipedia.org":"Wikipedia",
|
||||
"olo.wikipedia.org":"Wikipedia",
|
||||
"hi.m.wikipedia.org":"Wikipedia",
|
||||
"bn.m.wikipedia.org":"Wikipedia",
|
||||
"uz.wikipedia.org":"Wikipedia",
|
||||
"fr.m.wikipedia.org":"Wikipedia",
|
||||
"fa.wikipedia.org":"Wikipedia",
|
||||
"fi.wikipedia.org":"Wikipedia",
|
||||
"arz.m.wikipedia.org":"Wikipedia",
|
||||
"si.m.wikipedia.org":"Wikipedia",
|
||||
"bjn.wikipedia.org":"Wikipedia",
|
||||
"kn.wikipedia.org":"Wikipedia",
|
||||
"is.m.wikipedia.org":"Wikipedia",
|
||||
"nostalgia.wikipedia.org":"Wikipedia",
|
||||
"en.wikipedia.org":"Wikipedia",
|
||||
"nl.m.wikipedia.org":"Wikipedia",
|
||||
"nn.m.wikipedia.org":"Wikipedia",
|
||||
"bs.wikipedia.org":"Wikipedia",
|
||||
"sh.m.wikipedia.org":"Wikipedia",
|
||||
"vi.m.wikipedia.org":"Wikipedia",
|
||||
"ru.wikipedia.org":"Wikipedia",
|
||||
"tr.m.wikipedia.org":"Wikipedia",
|
||||
"he.wikipedia.org":"Wikipedia",
|
||||
"ta.wikipedia.org":"Wikipedia",
|
||||
"es.wikipedia.org":"Wikipedia",
|
||||
"si.wikipedia.org":"Wikipedia",
|
||||
"pl.wikipedia.org":"Wikipedia",
|
||||
"hu.wikipedia.org":"Wikipedia",
|
||||
"lij.m.wikipedia.org":"Wikipedia",
|
||||
"nn.wikipedia.org":"Wikipedia",
|
||||
"ko.m.wikipedia.org":"Wikipedia",
|
||||
"da.m.wikipedia.org":"Wikipedia",
|
||||
"zh.m.wikipedia.org":"Wikipedia",
|
||||
"vec.wikipedia.org":"Wikipedia",
|
||||
"ar.wikipedia.org":"Wikipedia",
|
||||
"bcl.m.wikipedia.org":"Wikipedia",
|
||||
"en.m.wikipedia.org":"Wikipedia",
|
||||
"sw.wikipedia.org":"Wikipedia",
|
||||
"la.m.wikipedia.org":"Wikipedia",
|
||||
"ur.m.wikipedia.org":"Wikipedia",
|
||||
"id.m.wikipedia.org":"Wikipedia",
|
||||
"crh.wikipedia.org":"Wikipedia",
|
||||
"sr.wikipedia.org":"Wikipedia",
|
||||
"sw.m.wikipedia.org":"Wikipedia",
|
||||
"ka.m.wikipedia.org":"Wikipedia",
|
||||
"lt.m.wikipedia.org":"Wikipedia",
|
||||
"fy.wikipedia.org":"Wikipedia",
|
||||
"ro.m.wikipedia.org":"Wikipedia",
|
||||
"hr.wikipedia.org":"Wikipedia",
|
||||
"mn.m.wikipedia.org":"Wikipedia",
|
||||
"pt.wikipedia.org":"Wikipedia",
|
||||
"it.m.wikipedia.org":"Wikipedia",
|
||||
"lv.m.wikipedia.org":"Wikipedia",
|
||||
"fa.m.wikipedia.org":"Wikipedia",
|
||||
"ja.wikipedia.org":"Wikipedia",
|
||||
"lv.wikipedia.org":"Wikipedia",
|
||||
"hu.m.wikipedia.org":"Wikipedia",
|
||||
"de.wikipedia.org":"Wikipedia",
|
||||
"uk.wikipedia.org":"Wikipedia",
|
||||
"ml.wikipedia.org":"Wikipedia",
|
||||
"te.m.wikipedia.org":"Wikipedia",
|
||||
"bg.wikipedia.org":"Wikipedia",
|
||||
"eu.wikipedia.org":"Wikipedia",
|
||||
"arz.wikipedia.org":"Wikipedia",
|
||||
"id.wikipedia.org":"Wikipedia",
|
||||
"mg.m.wikipedia.org":"Wikipedia",
|
||||
"sq.m.wikipedia.org":"Wikipedia",
|
||||
"ca.wikipedia.org":"Wikipedia",
|
||||
"sk.m.wikipedia.org":"Wikipedia",
|
||||
"az.wikipedia.org":"Wikipedia",
|
||||
"ru.m.wikipedia.org":"Wikipedia",
|
||||
"uz.m.wikipedia.org":"Wikipedia",
|
||||
"wuu.wikipedia.org":"Wikipedia",
|
||||
"hy.wikipedia.org":"Wikipedia",
|
||||
"la.wikipedia.org":"Wikipedia",
|
||||
"ca.m.wikipedia.org":"Wikipedia",
|
||||
"ckb.m.wikipedia.org":"Wikipedia",
|
||||
"tt.wikipedia.org":"Wikipedia",
|
||||
"gu.m.wikipedia.org":"Wikipedia",
|
||||
"lrc.wikipedia.org":"Wikipedia",
|
||||
"be-tarask.m.wikipedia.org":"Wikipedia",
|
||||
"no.m.wikipedia.org":"Wikipedia",
|
||||
"simple.m.wikipedia.org":"Wikipedia",
|
||||
"eu.m.wikipedia.org":"Wikipedia",
|
||||
"ne.m.wikipedia.org":"Wikipedia",
|
||||
"sr.m.wikipedia.org":"Wikipedia",
|
||||
"vi.wikipedia.org":"Wikipedia",
|
||||
"lt.wikipedia.org":"Wikipedia",
|
||||
"cs.m.wikipedia.org":"Wikipedia",
|
||||
"hy.m.wikipedia.org":"Wikipedia",
|
||||
"mr.wikipedia.org":"Wikipedia",
|
||||
"sv.wikipedia.org":"Wikipedia",
|
||||
"eo.wikipedia.org":"Wikipedia",
|
||||
"as.m.wikipedia.org":"Wikipedia",
|
||||
"is.wikipedia.org":"Wikipedia",
|
||||
"sh.wikipedia.org":"Wikipedia",
|
||||
"zh-classical.wikipedia.org":"Wikipedia",
|
||||
"nds-nl.m.wikipedia.org":"Wikipedia",
|
||||
"tl.m.wikipedia.org":"Wikipedia",
|
||||
"tr.wikipedia.org":"Wikipedia",
|
||||
"cs.wikipedia.org":"Wikipedia",
|
||||
"uk.m.wikipedia.org":"Wikipedia",
|
||||
"sq.wikipedia.org":"Wikipedia",
|
||||
"et.m.wikipedia.org":"Wikipedia",
|
||||
"hr.m.wikipedia.org":"Wikipedia",
|
||||
"bn.wikipedia.org":"Wikipedia",
|
||||
"sl.wikipedia.org":"Wikipedia",
|
||||
"th.m.wikipedia.org":"Wikipedia",
|
||||
"hi.wikipedia.org":"Wikipedia",
|
||||
"he.m.wikipedia.org":"Wikipedia",
|
||||
"bat-smg.wikipedia.org":"Wikipedia",
|
||||
"ml.m.wikipedia.org":"Wikipedia",
|
||||
"zh.wikipedia.org":"Wikipedia",
|
||||
"fi.m.wikipedia.org":"Wikipedia",
|
||||
"de.m.wikipedia.org":"Wikipedia",
|
||||
"be.wikipedia.org":"Wikipedia",
|
||||
"pl.m.wikipedia.org":"Wikipedia",
|
||||
"simple.wikipedia.org":"Wikipedia",
|
||||
"rw.m.wikipedia.org":"Wikipedia",
|
||||
"no.wikipedia.org":"Wikipedia",
|
||||
"ja.m.wikipedia.org":"Wikipedia",
|
||||
"yi.m.wikipedia.org":"Wikipedia",
|
||||
"ga.m.wikipedia.org":"Wikipedia",
|
||||
"ar.m.wikipedia.org":"Wikipedia",
|
||||
"canary.discord.com":"Discord",
|
||||
"sa.m.wikipedia.org":"Wikipedia",
|
||||
"ky.wikipedia.org":"Wikipedia",
|
||||
"es.m.wikipedia.org":"Wikipedia",
|
||||
"new.wikipedia.org":"Wikipedia",
|
||||
"lij.wikipedia.org":"Wikipedia",
|
||||
"zh-yue.wikipedia.org":"Wikipedia",
|
||||
"bg.m.wikipedia.org":"Wikipedia",
|
||||
"bs.m.wikipedia.org":"Wikipedia",
|
||||
"dz.wikipedia.org":"Wikipedia",
|
||||
"kk.m.wikipedia.org":"Wikipedia",
|
||||
"fr.wikipedia.org":"Wikipedia",
|
||||
"qu.wikipedia.org":"Wikipedia",
|
||||
"ka.wikipedia.org":"Wikipedia",
|
||||
"webk.telegram.org":"Telegram",
|
||||
"et.wikipedia.org":"Wikipedia",
|
||||
"ms.wikipedia.org":"Wikipedia",
|
||||
"az.m.wikipedia.org":"Wikipedia",
|
||||
"cy.wikipedia.org":"Wikipedia",
|
||||
"ro.wikipedia.org":"Wikipedia",
|
||||
"mk.wikipedia.org":"Wikipedia",
|
||||
"tl.wikipedia.org":"Wikipedia",
|
||||
"am.wikipedia.org":"Wikipedia",
|
||||
"ko.wikipedia.org":"Wikipedia",
|
||||
"sl.m.wikipedia.org":"Wikipedia"
|
||||
}
|
File diff suppressed because one or more lines are too long
4
test/plausible/ingestion/source_test.exs
Normal file
4
test/plausible/ingestion/source_test.exs
Normal file
@ -0,0 +1,4 @@
|
||||
defmodule Plausible.Ingestion.SourceTest do
|
||||
use ExUnit.Case, async: true
|
||||
doctest Plausible.Ingestion.Source
|
||||
end
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user