From b4992cedc16f693c5b781b505ae78cb12279eabf Mon Sep 17 00:00:00 2001 From: RobertJoonas <56999674+RobertJoonas@users.noreply.github.com> Date: Thu, 10 Mar 2022 21:58:30 +0200 Subject: [PATCH] Referrer spam blocklist (#1750) * integrating blocklist library * loads blocklist dependency from Github --- lib/plausible/application.ex | 1 + .../controllers/api/external_controller.ex | 12 +++++++++++- mix.exs | 3 ++- mix.lock | 1 + .../api/external_controller_test.exs | 18 ++++++++++++++++++ 5 files changed, 33 insertions(+), 2 deletions(-) diff --git a/lib/plausible/application.ex b/lib/plausible/application.ex index 5e7f31285..d46aa5997 100644 --- a/lib/plausible/application.ex +++ b/lib/plausible/application.ex @@ -15,6 +15,7 @@ defmodule Plausible.Application do Plausible.Session.WriteBuffer, Plausible.Session.Store, Plausible.Session.Salts, + ReferrerBlocklist, {Oban, Application.get_env(:plausible, Oban)}, {Cachex, Keyword.merge(Application.get_env(:plausible, :user_agent_cache), name: :user_agents)} diff --git a/lib/plausible_web/controllers/api/external_controller.ex b/lib/plausible_web/controllers/api/external_controller.ex index 643f32077..af30ec0f2 100644 --- a/lib/plausible_web/controllers/api/external_controller.ex +++ b/lib/plausible_web/controllers/api/external_controller.ex @@ -83,7 +83,10 @@ defmodule PlausibleWeb.Api.ExternalController do ua = parse_user_agent(conn) - if is_bot?(ua) || params["domain"] in Application.get_env(:plausible, :domain_blacklist) do + blacklist_domain = params["domain"] in Application.get_env(:plausible, :domain_blacklist) + referrer_spam = is_spammer?(params["referrer"]) + + if is_bot?(ua) || blacklist_domain || referrer_spam do :ok else uri = params["url"] && URI.parse(params["url"]) @@ -163,6 +166,13 @@ defmodule PlausibleWeb.Api.ExternalController do defp is_bot?(_), do: false + defp is_spammer?(nil), do: false + + defp is_spammer?(referrer_str) do + uri = URI.parse(referrer_str) + ReferrerBlocklist.is_spammer?(strip_www(uri.host)) + end + defp parse_meta(params) do raw_meta = params["m"] || params["meta"] || params["p"] || params["props"] diff --git a/mix.exs b/mix.exs index c038c50d4..19eaa6269 100644 --- a/mix.exs +++ b/mix.exs @@ -101,7 +101,8 @@ defmodule Plausible.MixProject do {:opentelemetry_phoenix, "1.0.0-rc.5"}, {:opentelemetry_ecto, "1.0.0-rc.3"}, {:opentelemetry_oban, "~> 0.2.0-rc.2"}, - {:floki, "~> 0.32.0", only: :test} + {:floki, "~> 0.32.0", only: :test}, + {:referrer_blocklist, git: "https://github.com/plausible/referrer-blocklist.git"} ] end diff --git a/mix.lock b/mix.lock index 835391726..8d04a0ea8 100644 --- a/mix.lock +++ b/mix.lock @@ -88,6 +88,7 @@ "public_suffix": {:git, "https://github.com/axelson/publicsuffix-elixir", "89372422ab8b433de508519ef474e39699fd11ca", []}, "ranch": {:hex, :ranch, "1.8.0", "8c7a100a139fd57f17327b6413e4167ac559fbc04ca7448e9be9057311597a1d", [:make, :rebar3], [], "hexpm", "49fbcfd3682fab1f5d109351b61257676da1a2fdbe295904176d5e521a2ddfe5"}, "ref_inspector": {:hex, :ref_inspector, "1.3.1", "bb0489a4c4299dcd633f2b7a60c41a01f5590789d0b28225a60be484e1fbe777", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}, {:yamerl, "~> 0.7", [hex: :yamerl, repo: "hexpm", optional: false]}], "hexpm", "3172eb1b08e5c69966f796e3fe0e691257546fa143a5eb0ecc18a6e39b233854"}, + "referrer_blocklist": {:git, "https://github.com/plausible/referrer-blocklist.git", "773c495d1b5cde79f324eac38f02de90a356bd17", []}, "sentry": {:hex, :sentry, "8.0.6", "c8de1bf0523bc120ec37d596c55260901029ecb0994e7075b0973328779ceef7", [:mix], [{:hackney, "~> 1.8", [hex: :hackney, repo: "hexpm", optional: true]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: true]}, {:plug, "~> 1.6", [hex: :plug, repo: "hexpm", optional: true]}, {:plug_cowboy, "~> 2.3", [hex: :plug_cowboy, repo: "hexpm", optional: true]}], "hexpm", "051a2d0472162f3137787c7c9d6e6e4ef239de9329c8c45b1f1bf1e9379e1883"}, "siphash": {:hex, :siphash, "3.2.0", "ec03fd4066259218c85e2a4b8eec4bb9663bc02b127ea8a0836db376ba73f2ed", [:make, :mix], [], "hexpm", "ba3810701c6e95637a745e186e8a4899087c3b079ba88fb8f33df054c3b0b7c3"}, "sleeplocks": {:hex, :sleeplocks, "1.1.1", "3d462a0639a6ef36cc75d6038b7393ae537ab394641beb59830a1b8271faeed3", [:rebar3], [], "hexpm", "84ee37aeff4d0d92b290fff986d6a95ac5eedf9b383fadfd1d88e9b84a1c02e1"}, diff --git a/test/plausible_web/controllers/api/external_controller_test.exs b/test/plausible_web/controllers/api/external_controller_test.exs index 24032e567..2cfad4850 100644 --- a/test/plausible_web/controllers/api/external_controller_test.exs +++ b/test/plausible_web/controllers/api/external_controller_test.exs @@ -232,6 +232,24 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do assert pageview.referrer_source == "Facebook" end + test "ignores event when referrer is a spammer", %{conn: conn} do + params = %{ + domain: "ignore-spammers-test.com", + name: "pageview", + url: "http://gigride.live/", + referrer: "https://www.1-best-seo.com", + screen_width: 1440 + } + + conn = + conn + |> put_req_header("user-agent", @user_agent) + |> post("/api/event", params) + + assert response(conn, 202) == "ok" + assert !get_event("ignore-spammers-test.com") + end + test "ignores when referrer is internal", %{conn: conn} do params = %{ name: "pageview",