From c3a06caa97873da892b9f767260365bd89c8e121 Mon Sep 17 00:00:00 2001 From: Uku Taht Date: Wed, 30 Oct 2024 15:41:51 +0200 Subject: [PATCH] Channel and source data updates (#4599) * Channel and source data updates * Update source mappings for migration * Fix codespell Co-authored-by: Karl-Aksel Puulmann * Update lib/plausible/ingestion/acquisition.ex Co-authored-by: Karl-Aksel Puulmann * Standardize access to utm params * Add wikipedia as "known" source * Move custom sources to json file * Add some advertising utm_sources * Move source mapping logic to refinspector file * Rename PlausibleWeb.RefInspector -> Plausible.Ingestion.Source * Move mapping overrides to custom_sources.json * More robust detection of paid sources * Add missing utm_sources to migration * Codespell * Add moduledoc for Plausible.Ingestion.Source * Fix dialyzer * Remove migration * Add more custom favicons * Re-generate referrer favicons file * Add doctest for sources --------- Co-authored-by: Karl-Aksel Puulmann --- .codespellignore | 2 +- lib/plausible/application.ex | 2 +- lib/plausible/imported/google_analytics4.ex | 17 +- lib/plausible/ingestion/acquisition.ex | 155 ++- lib/plausible/ingestion/event.ex | 39 +- lib/plausible/ingestion/source.ex | 147 ++ lib/plausible_web/plugs/favicon.ex | 9 + lib/plausible_web/refinspector.ex | 37 - priv/custom_sources.json | 215 +++ priv/referer_favicon_domains.json | 2 +- test/plausible/ingestion/source_test.exs | 4 + .../api/external_controller_test.exs | 1204 +++++++++++++++++ 12 files changed, 1687 insertions(+), 146 deletions(-) create mode 100644 lib/plausible/ingestion/source.ex delete mode 100644 lib/plausible_web/refinspector.ex create mode 100644 priv/custom_sources.json create mode 100644 test/plausible/ingestion/source_test.exs diff --git a/.codespellignore b/.codespellignore index da4c972da..cb20144e7 100644 --- a/.codespellignore +++ b/.codespellignore @@ -5,4 +5,4 @@ Taht taht referer referers - +statics diff --git a/lib/plausible/application.ex b/lib/plausible/application.ex index 04fea5be7..32bc19658 100644 --- a/lib/plausible/application.ex +++ b/lib/plausible/application.ex @@ -108,7 +108,7 @@ defmodule Plausible.Application do setup_geolocation() Location.load_all() - Plausible.Ingestion.Acquisition.init() + Plausible.Ingestion.Source.init() Plausible.Geo.await_loader() Supervisor.start_link(List.flatten(children), opts) diff --git a/lib/plausible/imported/google_analytics4.ex b/lib/plausible/imported/google_analytics4.ex index 2118d5797..4cc4de218 100644 --- a/lib/plausible/imported/google_analytics4.ex +++ b/lib/plausible/imported/google_analytics4.ex @@ -174,7 +174,7 @@ defmodule Plausible.Imported.GoogleAnalytics4 do site_id: site_id, import_id: import_id, date: get_date(row), - source: row.dimensions |> Map.fetch!("sessionSource") |> parse_referrer(), + source: row.dimensions |> Map.fetch!("sessionSource") |> parse_source(), referrer: nil, # Only `source` exists in GA4 API utm_source: nil, @@ -343,14 +343,13 @@ defmodule Plausible.Imported.GoogleAnalytics4 do defp default_if_missing(value, default) when value in @missing_values, do: default defp default_if_missing(value, _default), do: value - defp parse_referrer(nil), do: nil - defp parse_referrer("(direct)"), do: nil - defp parse_referrer("google"), do: "Google" - defp parse_referrer("bing"), do: "Bing" - defp parse_referrer("duckduckgo"), do: "DuckDuckGo" + defp parse_source(nil), do: nil + defp parse_source("(direct)"), do: nil + defp parse_source("google"), do: "Google" + defp parse_source("bing"), do: "Bing" + defp parse_source("duckduckgo"), do: "DuckDuckGo" - defp parse_referrer(ref) do - RefInspector.parse("https://" <> ref) - |> PlausibleWeb.RefInspector.parse() + defp parse_source(ref) do + Plausible.Ingestion.Source.parse("https://" <> ref) end end diff --git a/lib/plausible/ingestion/acquisition.ex b/lib/plausible/ingestion/acquisition.ex index d6024bf0e..d72dc11cb 100644 --- a/lib/plausible/ingestion/acquisition.ex +++ b/lib/plausible/ingestion/acquisition.ex @@ -1,37 +1,53 @@ defmodule Plausible.Ingestion.Acquisition do - @moduledoc false + @moduledoc """ + This module is responsible for figuring out acquisition channel from event referrer_source. + + Acquisition channel is the marketing channel where people come from and convert and help + users to understand and improve their marketing flow. + + Note it uses priv/ga4-source-categories.csv as a source, which comes from https://support.google.com/analytics/answer/9756891?hl=en. + + Notable differences from GA4 that have been implemented just for Plausible: + 1. The @custom_source_categories module attribute contains a list of custom source categories that we have manually + added based on our own judgement and user feedback. For example we treat AI tools (ChatGPT, Perplexity) as search engines. + 2. Google is in a privileged position to analyze paid traffic from within their own network. The biggest use-case is auto-tagged adwords campaigns. + We do our best by categorizing as paid search when source is Google and the url has `gclid` parameter. Same for source Bing and `msclkid` url parameter. + 3. The @paid_sources module attribute in Plausible.Ingestion.Source contains a list of utm_sources that we will automatically categorize as paid traffic + regardless of the medium. Examples are `yt-ads`, `facebook_ad`, `adwords`, etc. See also: Plausible.Ingestion.Source.paid_source?/1 + """ + @external_resource "priv/ga4-source-categories.csv" + @custom_source_categories [ + {"hacker news", "SOURCE_CATEGORY_SOCIAL"}, + {"yahoo!", "SOURCE_CATEGORY_SEARCH"}, + {"gmail", "SOURCE_CATEGORY_EMAIL"}, + {"telegram", "SOURCE_CATEGORY_SOCIAL"}, + {"slack", "SOURCE_CATEGORY_SOCIAL"}, + {"producthunt", "SOURCE_CATEGORY_SOCIAL"}, + {"github", "SOURCE_CATEGORY_SOCIAL"}, + {"steamcommunity.com", "SOURCE_CATEGORY_SOCIAL"}, + {"statics.teams.cdn.office.net", "SOURCE_CATEGORY_SOCIAL"}, + {"vkontakte", "SOURCE_CATEGORY_SOCIAL"}, + {"threads", "SOURCE_CATEGORY_SOCIAL"}, + {"ecosia", "SOURCE_CATEGORY_SEARCH"}, + {"perplexity", "SOURCE_CATEGORY_SEARCH"}, + {"brave", "SOURCE_CATEGORY_SEARCH"}, + {"chatgpt.com", "SOURCE_CATEGORY_SEARCH"}, + {"temu.com", "SOURCE_CATEGORY_SHOPPING"}, + {"discord", "SOURCE_CATEGORY_SOCIAL"}, + {"sogou", "SOURCE_CATEGORY_SEARCH"}, + {"microsoft teams", "SOURCE_CATEGORY_SOCIAL"} + ] @source_categories Application.app_dir(:plausible, "priv/ga4-source-categories.csv") |> File.read!() |> NimbleCSV.RFC4180.parse_string(skip_headers: false) |> Enum.map(fn [source, category] -> {source, category} end) + |> then(&(@custom_source_categories ++ &1)) |> Enum.into(%{}) - def init() do - :ets.new(__MODULE__, [ - :named_table, - :set, - :public, - {:read_concurrency, true} - ]) - - [{"referers.yml", map}] = RefInspector.Database.list(:default) - - Enum.flat_map(map, fn {_, entries} -> - Enum.map(entries, fn {_, _, _, _, _, _, name} -> - :ets.insert(__MODULE__, {String.downcase(name), name}) - end) - end) - end - - def find_mapping(source) do - case :ets.lookup(__MODULE__, source) do - [{_, name}] -> name - _ -> source - end - end - def get_channel(request, source) do + source = source && String.downcase(source) + cond do cross_network?(request) -> "Cross-network" paid_shopping?(request, source) -> "Paid Shopping" @@ -44,7 +60,7 @@ defmodule Plausible.Ingestion.Acquisition do organic_social?(request, source) -> "Organic Social" organic_video?(request, source) -> "Organic Video" search_source?(source) -> "Organic Search" - email?(request) -> "Email" + email?(request, source) -> "Email" affiliates?(request) -> "Affiliates" audio?(request) -> "Audio" sms?(request) -> "SMS" @@ -55,30 +71,32 @@ defmodule Plausible.Ingestion.Acquisition do end defp cross_network?(request) do - String.contains?(request.query_params["utm_campaign"] || "", "cross-network") + String.contains?(query_param(request, "utm_campaign"), "cross-network") end defp paid_shopping?(request, source) do - (shopping_source?(source) or shopping_campaign?(request.query_params["utm_campaign"])) and - paid_medium?(request.query_params["utm_medium"]) + (shopping_source?(source) or shopping_campaign?(request)) and paid_medium?(request) end defp paid_search?(request, source) do - (search_source?(source) and paid_medium?(request.query_params["utm_medium"])) or - (source == "Google" and !!request.query_params["gclid"]) or - (source == "Bing" and !!request.query_params["msclkid"]) + (search_source?(source) and paid_medium?(request)) or + (search_source?(source) and paid_source?(request)) or + (source == "google" and !!request.query_params["gclid"]) or + (source == "bing" and !!request.query_params["msclkid"]) end defp paid_social?(request, source) do - social_source?(source) and paid_medium?(request.query_params["utm_medium"]) + (social_source?(source) and paid_medium?(request)) or + (social_source?(source) and paid_source?(request)) end defp paid_video?(request, source) do - video_source?(source) and paid_medium?(request.query_params["utm_medium"]) + (video_source?(source) and paid_medium?(request)) or + (video_source?(source) and paid_source?(request)) end defp display?(request) do - request.query_params["utm_medium"] in [ + query_param(request, "utm_medium") in [ "display", "banner", "expandable", @@ -88,16 +106,16 @@ defmodule Plausible.Ingestion.Acquisition do end defp paid_other?(request) do - paid_medium?(request.query_params["utm_medium"]) + paid_medium?(request) end defp organic_shopping?(request, source) do - shopping_source?(source) or shopping_campaign?(request.query_params["utm_campaign"]) + shopping_source?(source) or shopping_campaign?(request) end defp organic_social?(request, source) do social_source?(source) or - request.query_params["utm_medium"] in [ + query_param(request, "utm_medium") in [ "social", "social-network", "social-media", @@ -108,71 +126,88 @@ defmodule Plausible.Ingestion.Acquisition do end defp organic_video?(request, source) do - video_source?(source) or String.contains?(request.query_params["utm_medium"] || "", "video") + video_source?(source) or String.contains?(query_param(request, "utm_medium"), "video") end defp referral?(request, source) do - request.query_params["utm_medium"] in ["referral", "app", "link"] or + query_param(request, "utm_medium") in ["referral", "app", "link"] or !!source end - @email_tags ["email", "e-mail", "e_mail", "e mail"] - defp email?(request) do - String.contains?(request.query_params["utm_source"] || "", @email_tags) or - String.contains?(request.query_params["utm_medium"] || "", @email_tags) + @email_tags ["email", "e-mail", "e_mail", "e mail", "newsletter"] + defp email?(request, source) do + email_source?(source) or + String.contains?(query_param(request, "utm_source"), @email_tags) or + String.contains?(query_param(request, "utm_medium"), @email_tags) end defp affiliates?(request) do - request.query_params["utm_medium"] == "affiliate" + query_param(request, "utm_medium") == "affiliate" end defp audio?(request) do - request.query_params["utm_medium"] == "audio" + query_param(request, "utm_medium") == "audio" end defp sms?(request) do - request.query_params["utm_source"] == "sms" or - request.query_params["utm_medium"] == "sms" + query_param(request, "utm_source") == "sms" or + query_param(request, "utm_medium") == "sms" end defp mobile_push_notifications?(request, source) do - medium = request.query_params["utm_medium"] || "" + medium = query_param(request, "utm_medium") String.ends_with?(medium, "push") or String.contains?(medium, ["mobile", "notification"]) or source == "firebase" end - # # Helper functions for source and medium checks defp shopping_source?(nil), do: false defp shopping_source?(source) do - @source_categories[String.downcase(source)] == "SOURCE_CATEGORY_SHOPPING" - end - - defp shopping_campaign?(campaign_name) do - Regex.match?(~r/^(.*(([^a-df-z]|^)shop|shopping).*)$/, campaign_name || "") + @source_categories[source] == "SOURCE_CATEGORY_SHOPPING" end defp search_source?(nil), do: false defp search_source?(source) do - @source_categories[String.downcase(source)] == "SOURCE_CATEGORY_SEARCH" + @source_categories[source] == "SOURCE_CATEGORY_SEARCH" end defp social_source?(nil), do: false defp social_source?(source) do - @source_categories[String.downcase(source)] == "SOURCE_CATEGORY_SOCIAL" + @source_categories[source] == "SOURCE_CATEGORY_SOCIAL" end defp video_source?(nil), do: false defp video_source?(source) do - @source_categories[String.downcase(source)] == "SOURCE_CATEGORY_VIDEO" + @source_categories[source] == "SOURCE_CATEGORY_VIDEO" end - defp paid_medium?(medium) do - Regex.match?(~r/^(.*cp.*|ppc|retargeting|paid.*)$/, medium || "") + defp email_source?(nil), do: false + + defp email_source?(source) do + @source_categories[source] == "SOURCE_CATEGORY_EMAIL" + end + + defp shopping_campaign?(request) do + campaign_name = query_param(request, "utm_campaign") + Regex.match?(~r/^(.*(([^a-df-z]|^)shop|shopping).*)$/, campaign_name) + end + + defp paid_medium?(request) do + medium = query_param(request, "utm_medium") + Regex.match?(~r/^(.*cp.*|ppc|retargeting|paid.*)$/, medium) + end + + defp paid_source?(request) do + query_param(request, "utm_source") + |> Plausible.Ingestion.Source.paid_source?() + end + + defp query_param(request, name) do + String.downcase(request.query_params[name] || "") end end diff --git a/lib/plausible/ingestion/event.ex b/lib/plausible/ingestion/event.ex index f177ee940..4c2c88769 100644 --- a/lib/plausible/ingestion/event.ex +++ b/lib/plausible/ingestion/event.ex @@ -251,14 +251,13 @@ defmodule Plausible.Ingestion.Event do end defp put_referrer(%__MODULE__{} = event, _context) do - ref = parse_referrer(event.request.uri, event.request.referrer) - source = get_referrer_source(event.request, ref) + source = Plausible.Ingestion.Source.resolve(event.request) channel = Plausible.Ingestion.Acquisition.get_channel(event.request, source) update_session_attrs(event, %{ channel: channel, referrer_source: source, - referrer: clean_referrer(ref) + referrer: Plausible.Ingestion.Source.format_referrer(event.request.referrer) }) end @@ -392,40 +391,6 @@ defmodule Plausible.Ingestion.Event do event end - defp parse_referrer(_uri, _referrer_str = nil), do: nil - - defp parse_referrer(uri, referrer_str) do - referrer_uri = URI.parse(referrer_str) - - if Request.sanitize_hostname(referrer_uri.host) !== Request.sanitize_hostname(uri.host) && - referrer_uri.host !== "localhost" do - RefInspector.parse(referrer_str) - end - end - - defp get_referrer_source(request, ref) do - tagged_source = - request.query_params["utm_source"] || - request.query_params["source"] || - request.query_params["ref"] - - if tagged_source do - Plausible.Ingestion.Acquisition.find_mapping(tagged_source) - else - PlausibleWeb.RefInspector.parse(ref) - end - end - - defp clean_referrer(nil), do: nil - - defp clean_referrer(ref) do - uri = URI.parse(ref.referer) - - if PlausibleWeb.RefInspector.right_uri?(uri) do - PlausibleWeb.RefInspector.format_referrer(uri) - end - end - defp parse_user_agent(%Request{user_agent: user_agent}) when is_binary(user_agent) do Plausible.Cache.Adapter.get(:user_agents, user_agent, fn -> UAInspector.parse(user_agent) diff --git a/lib/plausible/ingestion/source.ex b/lib/plausible/ingestion/source.ex new file mode 100644 index 000000000..2866db191 --- /dev/null +++ b/lib/plausible/ingestion/source.ex @@ -0,0 +1,147 @@ +defmodule Plausible.Ingestion.Source do + @moduledoc """ + Resolves the `source` dimension from a combination of `referer` header and either `utm_source`, `source`, or `ref` query parameter. + + """ + alias Plausible.Ingestion.Request + + @external_resource "priv/custom_sources.json" + @custom_sources Application.app_dir(:plausible, "priv/custom_sources.json") + |> File.read!() + |> Jason.decode!() + + @paid_sources Map.keys(@custom_sources) + |> Enum.filter(&String.ends_with?(&1, ["ads", "ad"])) + |> then(&["adwords" | &1]) + |> MapSet.new() + + def init() do + :ets.new(__MODULE__, [ + :named_table, + :set, + :public, + {:read_concurrency, true} + ]) + + [{"referers.yml", map}] = RefInspector.Database.list(:default) + + Enum.each(map, fn {_, entries} -> + Enum.each(entries, fn {_, _, _, _, _, _, name} -> + :ets.insert(__MODULE__, {String.downcase(name), name}) + end) + end) + + Enum.each(@custom_sources, fn {key, val} -> + :ets.insert(__MODULE__, {key, val}) + :ets.insert(__MODULE__, {String.downcase(val), val}) + end) + end + + def paid_source?(source) do + MapSet.member?(@paid_sources, source) + end + + @doc """ + Resolves the source of a session based on query params and the `Referer` header. + + When a query parameter like `utm_source` is present, it will be prioritized over the `Referer` header. When the URL does not contain a source tag, we fall + back to using `Referer` to determine the source. This module also takes care of certain transformations to make the data more useful for the user: + 1. The RefInspector library is used to categorize referrers into "known" sources. For example, when the referrer is google.com or google.co.uk, + it will always be stored as "Google" which is more useful for marketers. + 2. On top of the standard RefInspector behaviour, we also keep a list of `custom_sources.json` which extends it with referrers that we have seen in the wild. + For example, Wikipedia has many domains that need to be combined into a single known source. These could all in theory be [upstreamed](https://github.com/snowplow-referer-parser/referer-parser). + 3. When a known source is supplied in utm_source (or source, ref) query parameter, we merge it with our known sources in a case-insensitive manner. + 4. Our list of `custom_sources.json` also contains some commonly used utm_source shorthands for certain sources. URL tagging is a mess, and we can never do it + perfectly, but at least we're making an effort for the most commonly used ones. For example, `ig -> Instagram` and `adwords -> Google`. + + ### Examples: + + iex> alias Plausible.Ingestion.{Source, Request} + iex> base_request = %Request{uri: URI.parse("https://plausible.io")} + iex> Source.resolve(%{base_request | referrer: "https://google.com"}) # Known referrer from RefInspector + "Google" + iex> Source.resolve(%{base_request | query_params: %{"utm_source" => "google"}}) # Known source from RefInspector supplied as downcased utm_source by user + "Google" + iex> Source.resolve(%{base_request | query_params: %{"utm_source" => "GOOGLE"}}) # Known source from RefInspector supplied as uppercased utm_source by user + "Google" + iex> Source.resolve(%{base_request | referrer: "https://en.m.wikipedia.org"}) # Known referrer from custom_sources.json + "Wikipedia" + iex> Source.resolve(%{base_request | query_params: %{"utm_source" => "wikipedia"}}) # Known source from custom_sources.json supplied as downcased utm_source by user + "Wikipedia" + iex> Source.resolve(%{base_request | query_params: %{"utm_source" => "ig"}}) # Known utm_source from custom_sources.json + "Instagram" + iex> Source.resolve(%{base_request | referrer: "https://www.markosaric.com"}) # Unknown source, it is just stored as the domain name + "markosaric.com" + """ + def resolve(request) do + tagged_source = + request.query_params["utm_source"] || + request.query_params["source"] || + request.query_params["ref"] + + source = + cond do + tagged_source -> tagged_source + has_referral?(request) -> parse(request.referrer) + true -> nil + end + + find_mapping(source) + end + + def parse(ref) do + case RefInspector.parse(ref).source do + :unknown -> + uri = URI.parse(String.trim(ref)) + + if valid_referrer?(uri) do + format_referrer_host(uri) + end + + source -> + source + end + end + + def find_mapping(nil), do: nil + + def find_mapping(source) do + case :ets.lookup(__MODULE__, String.downcase(source)) do + [{_, name}] -> name + _ -> source + end + end + + def format_referrer(nil), do: nil + + def format_referrer(referrer) do + referrer_uri = URI.parse(referrer) + + if valid_referrer?(referrer_uri) do + path = String.trim_trailing(referrer_uri.path || "", "/") + format_referrer_host(referrer_uri) <> path + end + end + + defp valid_referrer?(%URI{host: host, scheme: scheme}) + when scheme in ["http", "https", "android-app"] and byte_size(host) > 0, + do: true + + defp valid_referrer?(_), do: false + + defp has_referral?(%Request{referrer: nil}), do: nil + + defp has_referral?(%Request{referrer: referrer, uri: uri}) do + referrer_uri = URI.parse(referrer) + + Request.sanitize_hostname(referrer_uri.host) !== Request.sanitize_hostname(uri.host) and + referrer_uri.host !== "localhost" + end + + defp format_referrer_host(uri) do + protocol = if uri.scheme == "android-app", do: "android-app://", else: "" + host = String.replace_prefix(uri.host, "www.", "") + + protocol <> host + end +end diff --git a/lib/plausible_web/plugs/favicon.ex b/lib/plausible_web/plugs/favicon.ex index e8dc9a5e2..da676dff0 100644 --- a/lib/plausible_web/plugs/favicon.ex +++ b/lib/plausible_web/plugs/favicon.ex @@ -31,11 +31,20 @@ defmodule PlausibleWeb.Favicon do @placeholder_icon_location "priv/placeholder_favicon.ico" @placeholder_icon File.read!(@placeholder_icon_location) + @custom_icons %{ + "Brave" => "search.brave.com", + "Sogou" => "sogou.com", + "Wikipedia" => "en.wikipedia.org", + "Discord" => "discord.com", + "Perplexity" => "perplexity.ai", + "Microsoft Teams" => "microsoft.com" + } def init(_) do domains = File.read!(Application.app_dir(:plausible, @referer_domains_file)) |> Jason.decode!() + |> Map.merge(@custom_icons) [favicon_domains: domains] end diff --git a/lib/plausible_web/refinspector.ex b/lib/plausible_web/refinspector.ex deleted file mode 100644 index 0bfbcb456..000000000 --- a/lib/plausible_web/refinspector.ex +++ /dev/null @@ -1,37 +0,0 @@ -defmodule PlausibleWeb.RefInspector do - def parse(nil), do: nil - - def parse(ref) do - case ref.source do - :unknown -> - uri = URI.parse(String.trim(ref.referer)) - - if right_uri?(uri) do - format_referrer_host(uri) - end - - source -> - source - end - end - - def format_referrer(uri) do - path = String.trim_trailing(uri.path || "", "/") - format_referrer_host(uri) <> path - end - - def right_uri?(%URI{host: nil}), do: false - - def right_uri?(%URI{host: host, scheme: scheme}) - when scheme in ["http", "https", "android-app"] and byte_size(host) > 0, - do: true - - def right_uri?(_), do: false - - defp format_referrer_host(uri) do - protocol = if uri.scheme == "android-app", do: "android-app://", else: "" - host = String.replace_prefix(uri.host, "www.", "") - - protocol <> host - end -end diff --git a/priv/custom_sources.json b/priv/custom_sources.json new file mode 100644 index 000000000..16bc9f0d4 --- /dev/null +++ b/priv/custom_sources.json @@ -0,0 +1,215 @@ +{ + "android-app://com.reddit.frontpage":"Reddit", + "baidu.com":"Baidu", + "discord.com":"Discord", + "discordapp.com":"Discord", + "linktr.ee":"Linktree", + "m.sogou.com":"Sogou", + "ntp.msn.com":"Bing", + "perplexity.ai":"Perplexity", + "ptb.discord.com":"Discord", + "search.brave.com":"Brave", + "sogou.com":"Sogou", + "statics.teams.cdn.office.net":"Microsoft Teams", + "t.me":"Telegram", + "wap.sogou.com":"Sogou", + "ya.ru":"Yandex", + "yandex.com.tr":"Yandex", + "yandex.eu":"Yandex", + "yandex.fr":"Yandex", + "yandex.kz":"Yandex", + "yandex.tm":"Yandex", + "yandex.uz":"Yandex", + "fb": "Facebook", + "fb-ads": "Facebook", + "fbads": "Facebook", + "fbad": "Facebook", + "facebook-ads": "Facebook", + "facebook_ads": "Facebook", + "fcb": "Facebook", + "facebook_ad": "Facebook", + "facebook_feed_ad": "Facebook", + "ig": "Instagram", + "yt": "Youtube", + "yt-ads": "Youtube", + "reddit-ads": "Reddit", + "google_ads": "Google", + "google-ads": "Google", + "googleads": "Google", + "gads": "Google", + "google ads": "Google", + "adwords": "Google", + "twitter-ads": "Twitter", + "tiktokads": "TikTok", + "tik.tok": "TikTok", + "perplexity": "Perplexity", + "linktree": "Linktree", + "fo.wikipedia.org":"Wikipedia", + "ga.wikipedia.org":"Wikipedia", + "el.m.wikipedia.org":"Wikipedia", + "eo.m.wikipedia.org":"Wikipedia", + "ms.m.wikipedia.org":"Wikipedia", + "nl.wikipedia.org":"Wikipedia", + "dga.m.wikipedia.org":"Wikipedia", + "th.wikipedia.org":"Wikipedia", + "oc.wikipedia.org":"Wikipedia", + "da.wikipedia.org":"Wikipedia", + "pt.m.wikipedia.org":"Wikipedia", + "szl.m.wikipedia.org":"Wikipedia", + "be-tarask.wikipedia.org":"Wikipedia", + "ta.m.wikipedia.org":"Wikipedia", + "pa.m.wikipedia.org":"Wikipedia", + "mn.wikipedia.org":"Wikipedia", + "sv.m.wikipedia.org":"Wikipedia", + "sk.wikipedia.org":"Wikipedia", + "it.wikipedia.org":"Wikipedia", + "el.wikipedia.org":"Wikipedia", + "olo.wikipedia.org":"Wikipedia", + "hi.m.wikipedia.org":"Wikipedia", + "bn.m.wikipedia.org":"Wikipedia", + "uz.wikipedia.org":"Wikipedia", + "fr.m.wikipedia.org":"Wikipedia", + "fa.wikipedia.org":"Wikipedia", + "fi.wikipedia.org":"Wikipedia", + "arz.m.wikipedia.org":"Wikipedia", + "si.m.wikipedia.org":"Wikipedia", + "bjn.wikipedia.org":"Wikipedia", + "kn.wikipedia.org":"Wikipedia", + "is.m.wikipedia.org":"Wikipedia", + "nostalgia.wikipedia.org":"Wikipedia", + "en.wikipedia.org":"Wikipedia", + "nl.m.wikipedia.org":"Wikipedia", + "nn.m.wikipedia.org":"Wikipedia", + "bs.wikipedia.org":"Wikipedia", + "sh.m.wikipedia.org":"Wikipedia", + "vi.m.wikipedia.org":"Wikipedia", + "ru.wikipedia.org":"Wikipedia", + "tr.m.wikipedia.org":"Wikipedia", + "he.wikipedia.org":"Wikipedia", + "ta.wikipedia.org":"Wikipedia", + "es.wikipedia.org":"Wikipedia", + "si.wikipedia.org":"Wikipedia", + "pl.wikipedia.org":"Wikipedia", + "hu.wikipedia.org":"Wikipedia", + "lij.m.wikipedia.org":"Wikipedia", + "nn.wikipedia.org":"Wikipedia", + "ko.m.wikipedia.org":"Wikipedia", + "da.m.wikipedia.org":"Wikipedia", + "zh.m.wikipedia.org":"Wikipedia", + "vec.wikipedia.org":"Wikipedia", + "ar.wikipedia.org":"Wikipedia", + "bcl.m.wikipedia.org":"Wikipedia", + "en.m.wikipedia.org":"Wikipedia", + "sw.wikipedia.org":"Wikipedia", + "la.m.wikipedia.org":"Wikipedia", + "ur.m.wikipedia.org":"Wikipedia", + "id.m.wikipedia.org":"Wikipedia", + "crh.wikipedia.org":"Wikipedia", + "sr.wikipedia.org":"Wikipedia", + "sw.m.wikipedia.org":"Wikipedia", + "ka.m.wikipedia.org":"Wikipedia", + "lt.m.wikipedia.org":"Wikipedia", + "fy.wikipedia.org":"Wikipedia", + "ro.m.wikipedia.org":"Wikipedia", + "hr.wikipedia.org":"Wikipedia", + "mn.m.wikipedia.org":"Wikipedia", + "pt.wikipedia.org":"Wikipedia", + "it.m.wikipedia.org":"Wikipedia", + "lv.m.wikipedia.org":"Wikipedia", + "fa.m.wikipedia.org":"Wikipedia", + "ja.wikipedia.org":"Wikipedia", + "lv.wikipedia.org":"Wikipedia", + "hu.m.wikipedia.org":"Wikipedia", + "de.wikipedia.org":"Wikipedia", + "uk.wikipedia.org":"Wikipedia", + "ml.wikipedia.org":"Wikipedia", + "te.m.wikipedia.org":"Wikipedia", + "bg.wikipedia.org":"Wikipedia", + "eu.wikipedia.org":"Wikipedia", + "arz.wikipedia.org":"Wikipedia", + "id.wikipedia.org":"Wikipedia", + "mg.m.wikipedia.org":"Wikipedia", + "sq.m.wikipedia.org":"Wikipedia", + "ca.wikipedia.org":"Wikipedia", + "sk.m.wikipedia.org":"Wikipedia", + "az.wikipedia.org":"Wikipedia", + "ru.m.wikipedia.org":"Wikipedia", + "uz.m.wikipedia.org":"Wikipedia", + "wuu.wikipedia.org":"Wikipedia", + "hy.wikipedia.org":"Wikipedia", + "la.wikipedia.org":"Wikipedia", + "ca.m.wikipedia.org":"Wikipedia", + "ckb.m.wikipedia.org":"Wikipedia", + "tt.wikipedia.org":"Wikipedia", + "gu.m.wikipedia.org":"Wikipedia", + "lrc.wikipedia.org":"Wikipedia", + "be-tarask.m.wikipedia.org":"Wikipedia", + "no.m.wikipedia.org":"Wikipedia", + "simple.m.wikipedia.org":"Wikipedia", + "eu.m.wikipedia.org":"Wikipedia", + "ne.m.wikipedia.org":"Wikipedia", + "sr.m.wikipedia.org":"Wikipedia", + "vi.wikipedia.org":"Wikipedia", + "lt.wikipedia.org":"Wikipedia", + "cs.m.wikipedia.org":"Wikipedia", + "hy.m.wikipedia.org":"Wikipedia", + "mr.wikipedia.org":"Wikipedia", + "sv.wikipedia.org":"Wikipedia", + "eo.wikipedia.org":"Wikipedia", + "as.m.wikipedia.org":"Wikipedia", + "is.wikipedia.org":"Wikipedia", + "sh.wikipedia.org":"Wikipedia", + "zh-classical.wikipedia.org":"Wikipedia", + "nds-nl.m.wikipedia.org":"Wikipedia", + "tl.m.wikipedia.org":"Wikipedia", + "tr.wikipedia.org":"Wikipedia", + "cs.wikipedia.org":"Wikipedia", + "uk.m.wikipedia.org":"Wikipedia", + "sq.wikipedia.org":"Wikipedia", + "et.m.wikipedia.org":"Wikipedia", + "hr.m.wikipedia.org":"Wikipedia", + "bn.wikipedia.org":"Wikipedia", + "sl.wikipedia.org":"Wikipedia", + "th.m.wikipedia.org":"Wikipedia", + "hi.wikipedia.org":"Wikipedia", + "he.m.wikipedia.org":"Wikipedia", + "bat-smg.wikipedia.org":"Wikipedia", + "ml.m.wikipedia.org":"Wikipedia", + "zh.wikipedia.org":"Wikipedia", + "fi.m.wikipedia.org":"Wikipedia", + "de.m.wikipedia.org":"Wikipedia", + "be.wikipedia.org":"Wikipedia", + "pl.m.wikipedia.org":"Wikipedia", + "simple.wikipedia.org":"Wikipedia", + "rw.m.wikipedia.org":"Wikipedia", + "no.wikipedia.org":"Wikipedia", + "ja.m.wikipedia.org":"Wikipedia", + "yi.m.wikipedia.org":"Wikipedia", + "ga.m.wikipedia.org":"Wikipedia", + "ar.m.wikipedia.org":"Wikipedia", + "canary.discord.com":"Discord", + "sa.m.wikipedia.org":"Wikipedia", + "ky.wikipedia.org":"Wikipedia", + "es.m.wikipedia.org":"Wikipedia", + "new.wikipedia.org":"Wikipedia", + "lij.wikipedia.org":"Wikipedia", + "zh-yue.wikipedia.org":"Wikipedia", + "bg.m.wikipedia.org":"Wikipedia", + "bs.m.wikipedia.org":"Wikipedia", + "dz.wikipedia.org":"Wikipedia", + "kk.m.wikipedia.org":"Wikipedia", + "fr.wikipedia.org":"Wikipedia", + "qu.wikipedia.org":"Wikipedia", + "ka.wikipedia.org":"Wikipedia", + "webk.telegram.org":"Telegram", + "et.wikipedia.org":"Wikipedia", + "ms.wikipedia.org":"Wikipedia", + "az.m.wikipedia.org":"Wikipedia", + "cy.wikipedia.org":"Wikipedia", + "ro.wikipedia.org":"Wikipedia", + "mk.wikipedia.org":"Wikipedia", + "tl.wikipedia.org":"Wikipedia", + "am.wikipedia.org":"Wikipedia", + "ko.wikipedia.org":"Wikipedia", + "sl.m.wikipedia.org":"Wikipedia" +} diff --git a/priv/referer_favicon_domains.json b/priv/referer_favicon_domains.json index 0d3786d53..6430d7f77 100644 --- a/priv/referer_favicon_domains.json +++ b/priv/referer_favicon_domains.json @@ -1 +1 @@ -{"White Pages":"www.whitepages.com.au","QQ Mail":"mail.qq.com","eo":"eo.st","Toolbarhome":"www.toolbarhome.com","YouGoo":"www.yougoo.fr","Walhello":"www.walhello.info","Tixuma":"www.tixuma.de","Hyves":"hyves.nl","PriceRunner":"www.pricerunner.co.uk","Euroseek":"www.euroseek.com","Web.nl":"www.web.nl","360.cn":"so.360.cn","Seznam":"search.seznam.cz","Nigma":"nigma.ru","Wirtualna Polska":"szukaj.wp.pl","Google Product Search":"google.ac/products","Picsearch":"www.picsearch.com","Suchnase":"www.suchnase.de","WebSearch":"www.websearch.com","Lycos":"search.lycos.com","Conduit":"search.conduit.com","StackOverflow":"stackoverflow.com","GitHub":"github.com","FriendFeed":"friendfeed.com","Flickr":"flickr.com","Google+":"url.google.com","APOLL07":"apollo7.de","Douban":"douban.com","Pocket":"getpocket.com","Doubleclick":"ad.doubleclick.net","Exalead":"www.exalead.fr","Sharelook":"www.sharelook.fr","Adam Internet":"webmail.adam.com.au","Friendster":"friendster.com","2degrees":"webmail.2degreesbroadband.co.nz","ZEDO":"zedo.com","Outlook.com":"mail.live.com","1&1":"search.1and1.com","URL.ORGanizier":"www.url.org","Google":"support.google.com","Snapdo":"search.snapdo.com","Bebo":"bebo.com","Biglobe":"cgi.search.biglobe.ne.jp","1und1":"search.1und1.de","BlackPlanet":"blackplanet.com","Findwide":"search.findwide.com","Digg":"digg.com","Eurip":"www.eurip.com","SearchCanvas":"www.searchcanvas.com","Sapo":"pesquisa.sapo.pt","Yieldmo":"yieldmo.com","ABCsøk":"abcsolk.no","Gaia Online":"gaiaonline.com","blekko":"blekko.com","Vodafone":"webmail.vodafone.co.nz","qip":"search.qip.ru","Hooseek.com":"www.hooseek.com","Tumblr":"tumblr.com","GAIS":"gais.cs.ccu.edu.tw","AllTheWeb":"www.alltheweb.com","Terra":"buscador.terra.es","Buzznet":"buzznet.com","Searchy":"www.searchy.co.uk","Commander":"webmail.commander.net.au","Twingly":"www.twingly.com","Yandex":"yandex.ru","Najdi":"www.najdi.si","Sociomantic Labs":"sociomantic.com","Netlog":"netlog.com","Volny":"web.volny.cz","Criteo":"cas.jp.as.criteo.com","dmoz":"dmoz.org","Bing":"bing.com","Altavista":"www.altavista.com","Kataweb":"www.kataweb.it","Charter":"www.charter.net","Startpagina":"startgoogle.startpagina.nl","Search.ch":"www.search.ch","Uludag Sozluk":"uludagsozluk.com","Orkut":"orkut.com","UKR.net":"search.ukr.net","Sonico.com":"sonico.com","Comcast":"serach.comcast.net","Yandex Images":"images.yandex.ru","Vindex":"www.vindex.nl","Zoho":"mail.zoho.com","SteelHouse":"steelhousemedia.com","Geona":"geona.net","Excite":"search.excite.it","1.cz":"1.cz","Vimeo":"vimeo.com","Rubicon Project":"optimized-by.rubiconproject.com","Yasni":"www.yasni.de","Badoo":"badoo.com","Tiscali":"search.tiscali.it","Inbox.com":"inbox.com","Orange Webmail":"orange.fr/webmail","Blogpulse":"www.blogpulse.com","Delfi latvia":"smart.delfi.lv","iPrimus":"webmail.iprimus.com.au","Mail.ru":"my.mail.ru","Google News":"news.google.ac","Delicious":"delicious.com","Sonobi":"sonobi.com","Road Runner Search":"search.rr.com","Fast Browser Search":"www.fastbrowsersearch.com","WAYN":"wayn.com","Weibo":"weibo.com","Windows Live Spaces":"login.live.com","Clix":"pesquisa.clix.pt","T-Online":"suche.t-online.de","Ask Toolbar":"search.tb.ask.com","Flyingbird":"inspsearch.com","Yahoo!":"finance.yahoo.com","Alexa":"alexa.com","Jungle Key":"junglekey.com","Gmail":"mail.google.com","Optus Zoo":"webmail.optuszoo.com.au","Web.de":"suche.web.de","Odnoklassniki":"odnoklassniki.ru","GMX":"suche.gmx.net","Freshweather":"www.fresh-weather.com","Quora":"quora.com","AppNexus":"ib.adnxs.com","Onet":"szukaj.onet.pl","Geni":"geni.com","Naver Images":"image.search.naver.com","Qzone":"qzone.qq.com","Mozbot":"www.mozbot.fr","Adform":"adform.net","Blogdigger":"www.blogdigger.com","Netspace":"webmail.netspace.net.au","Apontador":"apontador.com.br","Jungle Spider":"www.jungle-spider.de","Mozo":"mozo.com.au","Zapmeta":"www.zapmeta.com","MySearch":"www.mysearch.com","X-recherche":"www.x-recherche.com","Lo.st":"lo.st","TrovaRapido":"www.trovarapido.com","Dodo":"webmail.dodo.com.au","Flix":"www.flix.de","Flashtalking":"flashtalking.com","Nasza-klasa.pl":"nk.pl","AOL Mail":"mail.aol.com","Virgilio":"ricerca.virgilio.it","Rambler":"nova.rambler.ru","Atlas":"searchatlas.centrum.cz","Austronaut":"www2.austronaut.at","Xanga":"xanga.com","vKruguDruzei.ru":"vkrugudruzei.ru","Friends Reunited":"friendsreunited.com","Nifty":"search.nifty.com","Plaxo":"plaxo.com","Sizmek":"bs.serving-sys.com","ONE by AOL":"nexage.com","Gomeo":"www.gomeo.com","BidSwitch":"bidswitch.net","Yahoo! Images":"image.yahoo.cn","ITU Sozluk":"itusozluk.com","Instagram":"instagram.com","AOL":"search.aol.com","Compuserve":"websearch.cs.com","Free":"search.free.fr","Reddit":"reddit.com","Metager2":"metager2.de","Tuenti":"tuenti.com","Rakuten":"websearch.rakuten.co.jp","126 Mail":"mail.126.com","Centrum":"serach.centrum.cz","Dalesearch":"www.dalesearch.com","Freecause":"search.freecause.com","Viadeo":"viadeo.com","Bing Images":"bing.com/images/search","Softonic":"search.softonic.com","ICQ":"www.icq.com","Gule Sider":"www.gulesider.no","Winamp":"search.winamp.com","Paperball":"www.paperball.de","Gigablast":"www.gigablast.com","Inci Sozluk":"inci.sozlukspot.com","Outbrain":"paid.outbrain.com","Plista":"farm.plista.com","Neti":"www.neti.ee","LifeStreet":"lfstmedia.com","Finderoo":"www.finderoo.com","Virgin":"webmail.virginbroadband.com.au","Latne":"www.latne.lv","Meinestadt":"www.meinestadt.de","Google Video":"video.google.com","Babylon":"search.babylon.com","Mixi":"mixi.jp","Twitter":"twitter.com","earthlink":"search.earthlink.net","Pinterest":"pinterest.com","Online.no":"online.no","Foursquare":"foursquare.com","Skynet":"www.skynet.be","Amazon":"amazon.com","Crawler":"www.crawler.com","Voila":"search.ke.voila.fr","Orange":"busca.orange.es","Apollo Latvia":"apollo.lv/portal/search/","Zoeken":"www.zoeken.nl","Vinden":"www.vinden.nl","163 Mail":"mail.163.com","Google Images":"google.ac/imgres","Opplysningen 1881":"www.1881.no","Classmates":"classmates.com","Jivox":"jivox.com","Naver Mail":"mail.naver.com","Arianna":"arianna.libero.it","Skyrock":"skyrock.com","Eksi Sozluk":"Sozluk.com","goo":"search.goo.ne.jp","Hacker News":"news.ycombinator.com","Metager":"meta.rrzn.uni-hannover.de","Witch":"www.witch.de","suche.info":"suche.info","SoSoDesk":"sosodesktop.com","Fixsuche":"www.fixsuche.de","Everyclick":"www.everyclick.com","Weborama":"www.weborama.com","Freenet":"webmail.freenet.de","Icerockeet":"blogs.icerocket.com","Vkontakte":"vk.com","Firstfind":"www.firstsfind.com","SourceForge":"sourceforge.net","Donanimhaber":"donanimhaber.com","OpenX":"us-ads.openx.net","Qualigo":"www.qualigo.at","Zoohoo":"zoohoo.cz","Aport":"sm.aport.ru","Tribal Fusion":"cdnx.tribalfusion.com","Ecosia":"ecosia.org","Nate":"search.nate.com","Last.fm":"lastfm.ru","Jyxo":"jyxo.1188.cz","Flixster":"flixster.com","Youtube":"youtube.com","Eniro":"www.eniro.se","Needtofind":"ko.search.need2find.com","Disqus":"redirect.disqus.com","Eyeota":"eyeota.net","PubMatic":"sshowads.pubmatic.com","Holmes":"holmes.ge","Looksmart":"www.looksmart.com","Yatedo":"www.yatedo.com","Telstra":"search.media.telstra.com.au","El Mundo":"ariadna.elmundo.es","Baidu":"www.baidu.com","Trusted-Search":"www.trusted--search.com","WeeWorld":"weeworld.com","MetaCrawler.de":"s1.metacrawler.de","maailm":"www.maailm.com","RPMFind":"rpmfind.net","British Telecommunications":"search.bt.com","WWW":"search.www.ee","AdRoll":"adroll.com","Hit-Parade":"req.-hit-parade.com","Tagged":"login.tagged.com","Paper.li":"paper.li","AudienceScience":"wunderloop.net","Marktplaats":"www.marktplaats.nl","StickyADS.tv":"stickyadstv.com","MyLife":"mylife.ru","Yahoo! Mail":"mail.yahoo.net","Search This":"www.searchthis.com","XING":"xing.com","Monstercrawler":"www.monstercrawler.com","Habbo":"habbo.com","MyHeritage":"myheritage.com","La Toile Du Quebec Via Google":"www.toile.com","Gnadenmeer":"www.gnadenmeer.de","Ask":"ask.com","Yippy":"search.yippy.com","Bigpond":"webmail.bigpond.com","Seznam Mail":"email.seznam.cz","Plazoo":"www.plazoo.com","Goyellow.de":"www.goyellow.de","Fluct":"adingo.jp","LinkedIn":"linkedin.com","PeoplePC":"search.peoplepc.com","I.ua":"search.i.ua","Mixpo":"mixpo.com","Hotbot":"www.hotbot.com","Daum Mail":"mail2.daum.net","Cuil":"www.cuil.com","Francite":"recherche.francite.com","Maxwebsearch":"maxwebsearch.com","Sovrn":"lijit.com","Daum":"search.daum.net","Suchmaschine.com":"www.suchmaschine.com","Myspace":"myspace.com","Instela":"instela.com","Forestle":"forestle.org","Alice Adsl":"rechercher.aliceadsl.fr","Zoek":"www3.zoek.nl","Certified-Toolbar":"search.certified-toolbar.com","MicroAd":"microad.jp","IXquick":"ixquick.com","Trouvez.com":"www.trouvez.com","Globososo":"searches.globososo.com","Startsiden":"www.startsiden.no","LiveJournal":"livejournal.ru","Daemon search":"daemon-search.com","Hocam.com":"hocam.com","Sogou":"www.sougou.com","Renren":"renren.com","Mamma":"www.mamma.com","Fireball":"www.fireball.de","Neustar AdAdvisor":"adadvisor.net","Technorati":"technorati.com","myYearbook":"myyearbook.com","Poisk.ru":"poisk.ru","Mister Wong":"www.mister-wong.com","MoiKrug.ru":"moikrug.ru","Casale Media":"casalemedia.com","Google Blogsearch":"blogsearch.google.ac","The Smart Search":"thesmartsearch.net","all.by":"all.by","Multiply":"multiply.com","Ilse":"www.ilse.nl","DasOertliche":"www.dasoertliche.de","Genieo":"search.genieo.com","Zhongsou":"p.zhongsou.com","Kvasir":"www.kvasir.no","kununu":"kununu.com","StudiVZ":"studivz.net","I-play":"start.iplay.com","iiNet":"webmail.iinet.net.au","DasTelefonbuch":"www1.dastelefonbuch.de","Tut.by":"search.tut.by","Interia":"www.google.interia.pl","Naver":"search.naver.com","Facebook":"facebook.com","Yam":"search.yam.com","Acoon":"www.acoon.de","Searchalot":"searchalot.com","StumbleUpon":"stumbleupon.com","Taboola":"trc.taboola.com","Abacho":"www.abacho.de","Acuity Ads":"acuityplatform.com","Meta":"meta.ua","Cyworld":"global.cyworld.com","canoe.ca":"web.canoe.ca","DuckDuckGo":"duckduckgo.com","Identi.ca":"identi.ca","Bluewin":"search.bluewin.ch","Taringa!":"taringa.net","Teoma":"www.teoma.com","Mynet Mail":"mail.mynet.com","InfoSpace":"infospace.com","arama":"arama.com","Delfi":"otsing.delfi.ee","TalkTalk":"www.talktalk.co.uk","hi5":"hi5.com","HighBeam":"www.highbeam.com","uol.com.br":"busca.uol.com.br","Westnet":"webmail.westnet.com.au","Fotolog":"fotolog.com","Arcor":"www.arcor.de","Search.com":"www.search.com"} +{"Friends Reunited":"friendsreunited.com","I-play":"start.iplay.com","White Pages":"www.whitepages.com.au","Eniro":"www.eniro.se","Kvasir":"www.kvasir.no","Geona":"geona.net","Neustar AdAdvisor":"adadvisor.net","Wirtualna Polska":"szukaj.wp.pl","Tagged":"login.tagged.com","Liveinternet":"liveinternet.ru","Xanga":"xanga.com","Metager2":"metager2.de","Commander":"webmail.commander.net.au","QIP":"mail.qip.ru","Plazoo":"www.plazoo.com","Globososo":"searches.globososo.com","Zoohoo":"zoohoo.cz","Eurip":"www.eurip.com","Web.de":"suche.web.de","Mailchimp":"com.mailchimp.mailchimp","DuckDuckGo":"duckduckgo.com","Google Blogsearch":"blogsearch.google.ac","Searchy":"www.searchy.co.uk","MySearch":"mysearch.com","AppNexus":"ib.adnxs.com","Outbrain":"paid.outbrain.com","Metager":"meta.rrzn.uni-hannover.de","Search.com":"www.search.com","Westnet":"webmail.westnet.com.au","BlackPlanet":"blackplanet.com","Arcor":"www.arcor.de","Bing":"bing.com","Orange Webmail":"orange.fr/webmail","Nasza-klasa.pl":"nk.pl","URL.ORGanizier":"www.url.org","HighBeam":"www.highbeam.com","Instagram":"instagram.com","Rubicon Project":"optimized-by.rubiconproject.com","Mixi":"mixi.jp","Web.nl":"www.web.nl","X-recherche":"www.x-recherche.com","Freenet":"webmail.freenet.de","Altavista":"www.altavista.com","Teoma":"www.teoma.com","2gis":"2gis.ru","Google Video":"video.google.com","Viadeo":"viadeo.com","DasOertliche":"www.dasoertliche.de","canoe.ca":"web.canoe.ca","Sapo":"pesquisa.sapo.pt","TikTok":"tiktok.com","Voila":"search.ke.voila.fr","Sovrn":"lijit.com","Icerockeet":"blogs.icerocket.com","Babylon":"search.babylon.com","Yandex Images":"images.yandex.ru","Euroseek":"www.euroseek.com","Hacker News":"news.ycombinator.com","Vimeo":"vimeo.com","Shenma":"so.m.sm.cn","AOL":"search.aol.com","Tribal Fusion":"cdnx.tribalfusion.com","Flyingbird":"inspsearch.com","Austronaut":"www2.austronaut.at","Hocam.com":"hocam.com","Yahoo! Mail":"mail.yahoo.net","LinkedIn":"com.linkedin.android","SoSoDesk":"sosodesktop.com","Yippy":"search.yippy.com","PubMatic":"sshowads.pubmatic.com","Fireball":"www.fireball.de","Adition":"adition.com","arama":"arama.com","Hyves":"hyves.nl","Classmates":"classmates.com","Lo.st":"lo.st","Google+":"url.google.com","DasTelefonbuch":"www1.dastelefonbuch.de","E1.ru":"mail.e1.ru","Forestle":"forestle.org","StackOverflow":"stackoverflow.com","AdRoll":"adroll.com","Zoho":"mail.zoho.com","TrovaRapido":"www.trovarapido.com","Finderoo":"www.finderoo.com","Dodo":"webmail.dodo.com.au","Flixster":"flixster.com","GitHub":"github.com","Instela":"instela.com","BidSwitch":"bidswitch.net","ONE by AOL":"nexage.com","Cuil":"www.cuil.com","Donanimhaber":"donanimhaber.com","Threads":"threads.net","Opplysningen 1881":"www.1881.no","Virgin":"webmail.virginbroadband.com.au","Google News":"news.google.ac","Acuity Ads":"acuityplatform.com","myYearbook":"myyearbook.com","SearchCanvas":"www.searchcanvas.com","Goyellow.de":"www.goyellow.de","Taringa!":"taringa.net","Nifty":"search.nifty.com","T-Online":"suche.t-online.de","Startsiden":"www.startsiden.no","Alice Adsl":"rechercher.aliceadsl.fr","MyLife":"mylife.ru","Acoon":"www.acoon.de","Freshweather":"www.fresh-weather.com","Last.fm":"lastfm.ru","Telegram":"web.telegram.org","LiveJournal":"livejournal.ru","Plaxo":"plaxo.com","GAIS":"gais.cs.ccu.edu.tw","Sogou":"www.sougou.com","ZEDO":"zedo.com","WhatsApp":"web.whatsapp.com","Lycos":"search.lycos.com","Paper.li":"paper.li","Criteo":"cas.jp.as.criteo.com","Zhongsou":"p.zhongsou.com","Everyclick":"www.everyclick.com","Adform":"adform.net","Picsearch":"www.picsearch.com","iPrimus":"webmail.iprimus.com.au","SourceForge":"sourceforge.net","Kataweb":"www.kataweb.it","dmoz":"dmoz.org","WWW":"search.www.ee","Sonico.com":"sonico.com","OpenX":"us-ads.openx.net","Vindex":"www.vindex.nl","Alexa":"alexa.com","Arianna":"arianna.libero.it","Fluct":"adingo.jp","AllTheWeb":"www.alltheweb.com","Nigma":"nigma.ru","XING":"xing.com","PeoplePC":"search.peoplepc.com","Online.no":"online.no","Myspace":"myspace.com","Witch":"www.witch.de","Monstercrawler":"www.monstercrawler.com","Meta":"meta.ua","Findwide":"search.findwide.com","Foursquare":"foursquare.com","Cyworld":"global.cyworld.com","British Telecommunications":"search.bt.com","Yahoo!":"finance.yahoo.com","Paperball":"www.paperball.de","AudienceScience":"wunderloop.net","Looksmart":"www.looksmart.com","all.by":"all.by","PriceRunner":"www.pricerunner.co.uk","Mastermail":"mastermail.ru","Excite":"search.excite.it","Blogdigger":"www.blogdigger.com","Apollo Latvia":"apollo.lv/portal/search/","Rakuten":"websearch.rakuten.co.jp","Firstfind":"www.firstsfind.com","Zoek":"www3.zoek.nl","Bebo":"bebo.com","Amazon":"amazon.com","Mail.ru":"e.mail.ru","Quora":"quora.com","Tildes":"tildes.net","Certified-Toolbar":"search.certified-toolbar.com","AdNET":"adnet.de","Hotbot":"www.hotbot.com","Outlook.com":"mail.live.com","Vkontakte":"m.vk.com","Twingly":"www.twingly.com","IXquick":"ixquick.com","WebSearch":"www.websearch.com","Digg":"digg.com","vKruguDruzei.ru":"vkrugudruzei.ru","Identi.ca":"identi.ca","Seznam":"search.seznam.cz","Mixpo":"mixpo.com","Sharelook":"www.sharelook.fr","El Mundo":"ariadna.elmundo.es","Compuserve":"websearch.cs.com","TalkTalk":"www.talktalk.co.uk","Bing Images":"bing.com/images/search","Douban":"douban.com","StumbleUpon":"stumbleupon.com","Poisk.ru":"poisk.ru","Yieldmo":"yieldmo.com","Yandex Maps":"maps.yandex.ru","Beeline":"post.ru","Trouvez.com":"www.trouvez.com","Ukr.net":"mail.ukr.net","Mister Wong":"www.mister-wong.com","Daum":"search.daum.net","Exalead":"www.exalead.fr","Suchmaschine.com":"www.suchmaschine.com","AdSpirit":"adspirit.de","Conduit":"search.conduit.com","ABCsøk":"abcsolk.no","Indeed":"de.indeed.com","WAYN":"wayn.com","ADFOX":"adfox.ru","Search This":"www.searchthis.com","uol.com.br":"busca.uol.com.br","Atlas":"searchatlas.centrum.cz","Biglobe":"cgi.search.biglobe.ne.jp","Seznam Mail":"email.seznam.cz","Uludag Sozluk":"uludagsozluk.com","Optus Zoo":"webmail.optuszoo.com.au","blekko":"blekko.com","Qualigo":"www.qualigo.at","Fast Browser Search":"www.fastbrowsersearch.com","LowerMyBills":"lowermybills.com","Delfi":"otsing.delfi.ee","Weibo":"weibo.com","Gule Sider":"www.gulesider.no","Zoeken":"www.zoeken.nl","Taboola":"trc.taboola.com","Tut.by":"search.tut.by","InfoSpace":"infospace.com","Yam":"search.yam.com","Trusted-Search":"www.trusted--search.com","Needtofind":"ko.search.need2find.com","GMX":"suche.gmx.net","126 Mail":"mail.126.com","ITU Sozluk":"itusozluk.com","Qzone":"qzone.qq.com","MyHeritage":"myheritage.com","Maxwebsearch":"maxwebsearch.com","Torg.Mail.ru":"torg.mail.ru","Apontador":"apontador.com.br","APOLL07":"apollo7.de","Ecosia":"ecosia.org","Dalesearch":"www.dalesearch.com","suche.info":"suche.info","iiNet":"webmail.iinet.net.au","Mamma":"www.mamma.com","Buzznet":"buzznet.com","Road Runner Search":"search.rr.com","WeeWorld":"weeworld.com","Toolbarhome":"www.toolbarhome.com","Odnoklassniki":"odnoklassniki.ru","Inbox.com":"inbox.com","Vodafone":"webmail.vodafone.co.nz","Hooseek.com":"www.hooseek.com","Zapmeta":"www.zapmeta.com","Tuenti":"tuenti.com","Adam Internet":"webmail.adam.com.au","Snapdo":"search.snapdo.com","Startpagina":"startgoogle.startpagina.nl","Aport":"sm.aport.ru","Pocket":"getpocket.com","Jungle Key":"junglekey.com","I.ua":"search.i.ua","Eyeota":"eyeota.net","Eksi Sozluk":"Sozluk.com","Renren":"renren.com","Qwant":"www.qwant.com","Terra":"buscador.terra.es","Naver Mail":"mail.naver.com","Delfi latvia":"smart.delfi.lv","Netlog":"netlog.com","Skynet":"www.skynet.be","Daemon search":"daemon-search.com","Badoo":"badoo.com","StickyADS.tv":"stickyadstv.com","Yandex.Market":"market.yandex.ru","Interia":"www.google.interia.pl","Yasni":"www.yasni.de","Searchalot":"searchalot.com","Hit-Parade":"req.-hit-parade.com","Gnadenmeer":"www.gnadenmeer.de","Rambler":"mail.rambler.ru","Gomeo":"www.gomeo.com","Windows Live Spaces":"login.live.com","Sibmail":"sibmail.com","Disqus":"redirect.disqus.com","Mozo":"mozo.com.au","Snapchat":"com.snapchat.android","Whirlpool":"forums.whirlpool.net.au","AOL Mail":"mail.aol.com","Crawler":"www.crawler.com","Najdi":"www.najdi.si","Jyxo":"jyxo.1188.cz","Orange":"busca.orange.es","Tumblr":"tumblr.com","La Toile Du Quebec Via Google":"www.toile.com","Netspace":"webmail.netspace.net.au","Doubleclick":"ad.doubleclick.net","earthlink":"com.earthlink.myearthlink","Twitter":"twitter.com","Ilse":"www.ilse.nl","Jungle Spider":"www.jungle-spider.de","Softonic":"search.softonic.com","Youtube":"youtube.com","Volny":"web.volny.cz","Habbo":"habbo.com","Coccoc":"coccoc.com","Charter":"www.charter.net","1&1":"search.1and1.com","Gigablast":"www.gigablast.com","Flickr":"flickr.com","Google":"support.google.com","LifeStreet":"lfstmedia.com","Search.ch":"www.search.ch","QQ Mail":"mail.qq.com","Casale Media":"casalemedia.com","Weborama":"www.weborama.com","Neti":"www.neti.ee","Flashtalking":"flashtalking.com","Virgilio":"ricerca.virgilio.it","Yandex":"mail.yandex.ru","163 Mail":"mail.163.com","StepStone":"www.stepstone.de","Onet":"szukaj.onet.pl","Fixsuche":"www.fixsuche.de","Clix":"pesquisa.clix.pt","Baidu":"www.baidu.com","Telstra":"search.media.telstra.com.au","Ask":"ask.com","qip":"search.qip.ru","Vinden":"www.vinden.nl","Plista":"farm.plista.com","SteelHouse":"steelhousemedia.com","Winamp":"search.winamp.com","Sociomantic Labs":"sociomantic.com","Sonobi":"sonobi.com","Jivox":"jivox.com","Mynet Mail":"mail.mynet.com","Inci Sozluk":"inci.sozlukspot.com","YouGoo":"www.yougoo.fr","Centrum":"serach.centrum.cz","Reddit":"reddit.com","Francite":"recherche.francite.com","Google Product Search":"google.ac/products","2degrees":"webmail.2degreesbroadband.co.nz","Ask Toolbar":"search.tb.ask.com","Holmes":"holmes.ge","Gmail":"mail.google.com","eo":"eo.st","Yahoo! Images":"image.yahoo.cn","Fotolog":"fotolog.com","Naver":"search.naver.com","Pinterest":"pinterest.ca","Freecause":"search.freecause.com","Skyrock":"skyrock.com","Delicious":"delicious.com","Nate":"search.nate.com","Tiscali":"search.tiscali.it","Tixuma":"www.tixuma.de","MoiKrug.ru":"moikrug.ru","Sizmek":"bs.serving-sys.com","Multiply":"multiply.com","Orkut":"orkut.com","kununu":"kununu.com","UKR.net":"search.ukr.net","RPMFind":"rpmfind.net","Abacho":"www.abacho.de","Free":"search.free.fr","Friendster":"friendster.com","Meinestadt":"www.meinestadt.de","Lilo":"search.lilo.org","ICQ":"www.icq.com","FriendFeed":"friendfeed.com","Slack":"app.slack.com","Mozbot":"www.mozbot.fr","Genieo":"search.genieo.com","The Smart Search":"thesmartsearch.net","Yandex.Direct":"an.yandex.ru","Daum Mail":"mail2.daum.net","1und1":"search.1und1.de","goo":"search.goo.ne.jp","maailm":"www.maailm.com","1.cz":"1.cz","Suchnase":"www.suchnase.de","Technorati":"technorati.com","Naver Images":"image.search.naver.com","Gaia Online":"gaiaonline.com","MetaCrawler.de":"s1.metacrawler.de","Blogpulse":"www.blogpulse.com","MicroAd":"microad.jp","StudiVZ":"studivz.net","Facebook":"facebook.com","Monster":"www.monster.be","hi5":"hi5.com","Bluewin":"search.bluewin.ch","Yatedo":"www.yatedo.com","Walhello":"www.walhello.info","Flix":"www.flix.de","Google Images":"google.ac/imgres","360.cn":"so.360.cn","Comcast":"serach.comcast.net","Skype":"web.skype.com","Latne":"www.latne.lv","Bigpond":"webmail.bigpond.com","Marktplaats":"www.marktplaats.nl","Geni":"geni.com","Price.ru":"price.ru"} diff --git a/test/plausible/ingestion/source_test.exs b/test/plausible/ingestion/source_test.exs new file mode 100644 index 000000000..2037f1653 --- /dev/null +++ b/test/plausible/ingestion/source_test.exs @@ -0,0 +1,4 @@ +defmodule Plausible.Ingestion.SourceTest do + use ExUnit.Case, async: true + doctest Plausible.Ingestion.Source +end diff --git a/test/plausible_web/controllers/api/external_controller_test.exs b/test/plausible_web/controllers/api/external_controller_test.exs index 0e33cbc4e..fd84e99f3 100644 --- a/test/plausible_web/controllers/api/external_controller_test.exs +++ b/test/plausible_web/controllers/api/external_controller_test.exs @@ -1944,6 +1944,1210 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do end end + describe "custom source parsing rules" do + setup do + site = insert(:site) + {:ok, site: site} + end + + test "threads is Threads", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com?utm_source=threads", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Threads" + assert session.utm_source == "threads" + assert session.channel == "Organic Social" + end + + test "ig is Instagram", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com?utm_source=ig", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Instagram" + assert session.utm_source == "ig" + assert session.channel == "Organic Social" + end + + test "yt is Youtube", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com?utm_source=yt", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Youtube" + assert session.utm_source == "yt" + assert session.channel == "Organic Video" + end + + test "yt-ads is Youtube paid", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com?utm_source=yt-ads", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Youtube" + assert session.utm_source == "yt-ads" + assert session.channel == "Paid Video" + end + + test "fb is Facebook", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com?utm_source=fb", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Facebook" + assert session.utm_source == "fb" + assert session.channel == "Organic Social" + end + + test "fb-ads is Facebook", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com?utm_source=fb-ads", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Facebook" + assert session.utm_source == "fb-ads" + assert session.channel == "Paid Social" + end + + test "fbad is Facebook", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com?utm_source=fbad", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Facebook" + assert session.utm_source == "fbad" + assert session.channel == "Paid Social" + end + + test "facebook-ads is Facebook", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com?utm_source=facebook-ads", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Facebook" + assert session.utm_source == "facebook-ads" + assert session.channel == "Paid Social" + end + + test "Reddit-ads is Reddit", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com?utm_source=Reddit-ads", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Reddit" + assert session.utm_source == "Reddit-ads" + assert session.channel == "Paid Social" + end + + test "google_ads is Google", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com?utm_source=google_ads", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Google" + assert session.utm_source == "google_ads" + assert session.channel == "Paid Search" + end + + test "Google-ads is Google", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com?utm_source=Google-ads", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Google" + assert session.utm_source == "Google-ads" + assert session.channel == "Paid Search" + end + + test "utm_source=Adwords is Google paid search", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com?utm_source=Adwords", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Google" + assert session.utm_source == "Adwords" + assert session.channel == "Paid Search" + end + + test "twitter-ads is Twitter", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com?utm_source=twitter-ads", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Twitter" + assert session.utm_source == "twitter-ads" + assert session.channel == "Paid Social" + end + + test "android-app://com.reddit.frontpage is Reddit", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "android-app://com.reddit.frontpage", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Reddit" + assert session.channel == "Organic Social" + end + + test "perplexity.ai is Perplexity", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://perplexity.ai", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Perplexity" + assert session.channel == "Organic Search" + end + + test "utm_source=perplexity is Perplexity", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com?utm_source=perplexity", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Perplexity" + assert session.channel == "Organic Search" + end + + test "statics.teams.cdn.office.net is Microsoft Teams", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://statics.teams.cdn.office.net", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Microsoft Teams" + assert session.channel == "Organic Social" + end + + test "wikipedia domain is resolved as Wikipedia", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://en.wikipedia.org", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Wikipedia" + assert session.channel == "Referral" + end + + test "ntp.msn.com is Bing", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://ntp.msn.com", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Bing" + assert session.channel == "Organic Search" + end + + test "search.brave.com is Brave", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://search.brave.com", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Brave" + assert session.channel == "Organic Search" + end + + test "yandex.com.tr is Yandex", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://yandex.com.tr", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Yandex" + assert session.channel == "Organic Search" + end + + test "yandex.kz is Yandex", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://yandex.kz", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Yandex" + assert session.channel == "Organic Search" + end + + test "ya.ru is Yandex", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://ya.ru", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Yandex" + assert session.channel == "Organic Search" + end + + test "yandex.uz is Yandex", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://yandex.uz", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Yandex" + assert session.channel == "Organic Search" + end + + test "yandex.fr is Yandex", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://yandex.fr", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Yandex" + assert session.channel == "Organic Search" + end + + test "yandex.eu is Yandex", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://yandex.eu", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Yandex" + assert session.channel == "Organic Search" + end + + test "yandex.tm is Yandex", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://yandex.tm", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Yandex" + assert session.channel == "Organic Search" + end + + test "discord.com is Discord", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://discord.com", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Discord" + assert session.channel == "Organic Social" + end + + test "discordapp.com is Discord", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://discordapp.com", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Discord" + assert session.channel == "Organic Social" + end + + test "canary.discord.com is Discord", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://canary.discord.com", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Discord" + assert session.channel == "Organic Social" + end + + test "ptb.discord.com is Discord", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://ptb.discord.com", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Discord" + assert session.channel == "Organic Social" + end + + test "www.baidu.com is Baidu", %{conn: conn, site: site} do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://baidu.com", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Baidu" + assert session.channel == "Organic Search" + end + + test "t.me is Telegram", %{conn: conn, site: site} do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://t.me", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Telegram" + assert session.channel == "Organic Social" + end + + test "webk.telegram.org is Telegram", %{conn: conn, site: site} do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://webk.telegram.org", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Telegram" + assert session.channel == "Organic Social" + end + + test "sogou.com is Sogou", %{conn: conn, site: site} do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://sogou.com", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Sogou" + assert session.channel == "Organic Search" + end + + test "m.sogou.com is Sogou", %{conn: conn, site: site} do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://m.sogou.com", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Sogou" + assert session.channel == "Organic Search" + end + + test "wap.sogou.com is Sogou", %{conn: conn, site: site} do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://wap.sogou.com", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Sogou" + assert session.channel == "Organic Search" + end + + test "linktr.ee is Linktree", %{conn: conn, site: site} do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://linktr.ee", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Linktree" + assert session.channel == "Referral" + end + + test "linktree is Linktree", %{conn: conn, site: site} do + params = %{ + name: "pageview", + url: "http://example.com?utm_source=linktree", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Linktree" + assert session.channel == "Referral" + end + end + + describe "custom channel parsing rules" do + setup do + site = insert(:site) + {:ok, site: site} + end + + test "hacker news is social channel", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://news.ycombinator.com", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Hacker News" + assert session.channel == "Organic Social" + end + + test "yahoo is organic search", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://search.yahoo.com", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Yahoo!" + assert session.channel == "Organic Search" + end + + test "gmail is email channel", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://mail.google.com", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Gmail" + assert session.channel == "Email" + end + + test "utm_source=newsletter is email channel", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com?utm_source=Newsletter-UK", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Newsletter-UK" + assert session.channel == "Email" + end + + test "temu.com is shopping channel", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://temu.com", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "temu.com" + assert session.channel == "Organic Shopping" + end + + test "utm_source=Telegram is social channel", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com?utm_source=Telegram", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Telegram" + assert session.channel == "Organic Social" + end + + test "chatgpt.com is search channel", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://chatgpt.com", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "chatgpt.com" + assert session.channel == "Organic Search" + end + + test "Slack is social channel", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://app.slack.com", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Slack" + assert session.channel == "Organic Social" + end + + test "producthunt is social", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com?ref=producthunt", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "producthunt" + assert session.channel == "Organic Social" + end + + test "github is social", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://github.com", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "GitHub" + assert session.channel == "Organic Social" + end + + test "steamcommunity.com is social", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://steamcommunity.com", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "steamcommunity.com" + assert session.channel == "Organic Social" + end + + test "Vkontakte is social", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://vkontakte.ru", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Vkontakte" + assert session.channel == "Organic Social" + end + + test "Threads is social", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://threads.net", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Threads" + assert session.channel == "Organic Social" + end + + test "Ecosia is search", %{ + conn: conn, + site: site + } do + params = %{ + name: "pageview", + url: "http://example.com", + referrer: "https://ecosia.org", + domain: site.domain + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + session = get_created_session(site) + + assert response(conn, 202) == "ok" + assert session.referrer_source == "Ecosia" + assert session.channel == "Organic Search" + end + end + describe "user_id generation" do setup do site = insert(:site)