From c130c2a751cb44bef0df7f99c70f265c583c6162 Mon Sep 17 00:00:00 2001 From: Uku Taht Date: Wed, 6 Nov 2024 13:12:41 +0200 Subject: [PATCH] Fixes for `?source` and `?ref` params (#4781) * Store `?source` and `?ref` params in `utm_source` field * Remove dead code --- lib/plausible/ingestion/acquisition.ex | 155 ++++++++---------- lib/plausible/ingestion/event.ex | 43 +++-- .../api/external_controller_test.exs | 35 +++- 3 files changed, 132 insertions(+), 101 deletions(-) diff --git a/lib/plausible/ingestion/acquisition.ex b/lib/plausible/ingestion/acquisition.ex index d72dc11cb..c65a09cbf 100644 --- a/lib/plausible/ingestion/acquisition.ex +++ b/lib/plausible/ingestion/acquisition.ex @@ -45,58 +45,66 @@ defmodule Plausible.Ingestion.Acquisition do |> then(&(@custom_source_categories ++ &1)) |> Enum.into(%{}) - def get_channel(request, source) do - source = source && String.downcase(source) + def get_channel(source, utm_medium, utm_campaign, utm_source, click_id_param) do + get_channel_lowered( + String.downcase(source || ""), + String.downcase(utm_medium || ""), + String.downcase(utm_campaign || ""), + String.downcase(utm_source || ""), + click_id_param + ) + end + defp get_channel_lowered(source, utm_medium, utm_campaign, utm_source, click_id_param) do cond do - cross_network?(request) -> "Cross-network" - paid_shopping?(request, source) -> "Paid Shopping" - paid_search?(request, source) -> "Paid Search" - paid_social?(request, source) -> "Paid Social" - paid_video?(request, source) -> "Paid Video" - display?(request) -> "Display" - paid_other?(request) -> "Paid Other" - organic_shopping?(request, source) -> "Organic Shopping" - organic_social?(request, source) -> "Organic Social" - organic_video?(request, source) -> "Organic Video" + cross_network?(utm_campaign) -> "Cross-network" + paid_shopping?(source, utm_campaign, utm_medium) -> "Paid Shopping" + paid_search?(source, utm_medium, utm_source, click_id_param) -> "Paid Search" + paid_social?(source, utm_medium, utm_source) -> "Paid Social" + paid_video?(source, utm_medium, utm_source) -> "Paid Video" + display?(utm_medium) -> "Display" + paid_other?(utm_medium) -> "Paid Other" + organic_shopping?(source, utm_campaign) -> "Organic Shopping" + organic_social?(source, utm_medium) -> "Organic Social" + organic_video?(source, utm_medium) -> "Organic Video" search_source?(source) -> "Organic Search" - email?(request, source) -> "Email" - affiliates?(request) -> "Affiliates" - audio?(request) -> "Audio" - sms?(request) -> "SMS" - mobile_push_notifications?(request, source) -> "Mobile Push Notifications" - referral?(request, source) -> "Referral" + email?(source, utm_source, utm_medium) -> "Email" + affiliates?(utm_medium) -> "Affiliates" + audio?(utm_medium) -> "Audio" + sms?(utm_source, utm_medium) -> "SMS" + mobile_push_notifications?(source, utm_medium) -> "Mobile Push Notifications" + referral?(source, utm_medium) -> "Referral" true -> "Direct" end end - defp cross_network?(request) do - String.contains?(query_param(request, "utm_campaign"), "cross-network") + defp cross_network?(utm_campaign) do + String.contains?(utm_campaign, "cross-network") end - defp paid_shopping?(request, source) do - (shopping_source?(source) or shopping_campaign?(request)) and paid_medium?(request) + defp paid_shopping?(source, utm_campaign, utm_medium) do + (shopping_source?(source) or shopping_campaign?(utm_campaign)) and paid_medium?(utm_medium) end - defp paid_search?(request, source) do - (search_source?(source) and paid_medium?(request)) or - (search_source?(source) and paid_source?(request)) or - (source == "google" and !!request.query_params["gclid"]) or - (source == "bing" and !!request.query_params["msclkid"]) + defp paid_search?(source, utm_medium, utm_source, click_id_param) do + (search_source?(source) and paid_medium?(utm_medium)) or + (search_source?(source) and paid_source?(utm_source)) or + (source == "google" and click_id_param == "gclid") or + (source == "bing" and click_id_param == "msclkid") end - defp paid_social?(request, source) do - (social_source?(source) and paid_medium?(request)) or - (social_source?(source) and paid_source?(request)) + defp paid_social?(source, utm_medium, utm_source) do + (social_source?(source) and paid_medium?(utm_medium)) or + (social_source?(source) and paid_source?(utm_source)) end - defp paid_video?(request, source) do - (video_source?(source) and paid_medium?(request)) or - (video_source?(source) and paid_source?(request)) + defp paid_video?(source, utm_medium, utm_source) do + (video_source?(source) and paid_medium?(utm_medium)) or + (video_source?(source) and paid_source?(utm_source)) end - defp display?(request) do - query_param(request, "utm_medium") in [ + defp display?(utm_medium) do + utm_medium in [ "display", "banner", "expandable", @@ -105,17 +113,17 @@ defmodule Plausible.Ingestion.Acquisition do ] end - defp paid_other?(request) do - paid_medium?(request) + defp paid_other?(utm_medium) do + paid_medium?(utm_medium) end - defp organic_shopping?(request, source) do - shopping_source?(source) or shopping_campaign?(request) + defp organic_shopping?(source, utm_campaign) do + shopping_source?(source) or shopping_campaign?(utm_campaign) end - defp organic_social?(request, source) do + defp organic_social?(source, utm_medium) do social_source?(source) or - query_param(request, "utm_medium") in [ + utm_medium in [ "social", "social-network", "social-media", @@ -125,89 +133,68 @@ defmodule Plausible.Ingestion.Acquisition do ] end - defp organic_video?(request, source) do - video_source?(source) or String.contains?(query_param(request, "utm_medium"), "video") + defp organic_video?(source, utm_medium) do + video_source?(source) or String.contains?(utm_medium, "video") end - defp referral?(request, source) do - query_param(request, "utm_medium") in ["referral", "app", "link"] or - !!source + defp referral?(source, utm_medium) do + utm_medium in ["referral", "app", "link"] or source !== "" end @email_tags ["email", "e-mail", "e_mail", "e mail", "newsletter"] - defp email?(request, source) do + defp email?(source, utm_source, utm_medium) do email_source?(source) or - String.contains?(query_param(request, "utm_source"), @email_tags) or - String.contains?(query_param(request, "utm_medium"), @email_tags) + String.contains?(utm_source, @email_tags) or + String.contains?(utm_medium, @email_tags) end - defp affiliates?(request) do - query_param(request, "utm_medium") == "affiliate" + defp affiliates?(utm_medium) do + utm_medium == "affiliate" end - defp audio?(request) do - query_param(request, "utm_medium") == "audio" + defp audio?(utm_medium) do + utm_medium == "audio" end - defp sms?(request) do - query_param(request, "utm_source") == "sms" or - query_param(request, "utm_medium") == "sms" + defp sms?(utm_source, utm_medium) do + utm_source == "sms" or utm_medium == "sms" end - defp mobile_push_notifications?(request, source) do - medium = query_param(request, "utm_medium") - - String.ends_with?(medium, "push") or - String.contains?(medium, ["mobile", "notification"]) or + defp mobile_push_notifications?(source, utm_medium) do + String.ends_with?(utm_medium, "push") or + String.contains?(utm_medium, ["mobile", "notification"]) or source == "firebase" end - defp shopping_source?(nil), do: false - defp shopping_source?(source) do @source_categories[source] == "SOURCE_CATEGORY_SHOPPING" end - defp search_source?(nil), do: false - defp search_source?(source) do @source_categories[source] == "SOURCE_CATEGORY_SEARCH" end - defp social_source?(nil), do: false - defp social_source?(source) do @source_categories[source] == "SOURCE_CATEGORY_SOCIAL" end - defp video_source?(nil), do: false - defp video_source?(source) do @source_categories[source] == "SOURCE_CATEGORY_VIDEO" end - defp email_source?(nil), do: false - defp email_source?(source) do @source_categories[source] == "SOURCE_CATEGORY_EMAIL" end - defp shopping_campaign?(request) do - campaign_name = query_param(request, "utm_campaign") - Regex.match?(~r/^(.*(([^a-df-z]|^)shop|shopping).*)$/, campaign_name) + defp shopping_campaign?(utm_campaign) do + Regex.match?(~r/^(.*(([^a-df-z]|^)shop|shopping).*)$/, utm_campaign) end - defp paid_medium?(request) do - medium = query_param(request, "utm_medium") - Regex.match?(~r/^(.*cp.*|ppc|retargeting|paid.*)$/, medium) + defp paid_medium?(utm_medium) do + Regex.match?(~r/^(.*cp.*|ppc|retargeting|paid.*)$/, utm_medium) end - defp paid_source?(request) do - query_param(request, "utm_source") - |> Plausible.Ingestion.Source.paid_source?() - end - - defp query_param(request, name) do - String.downcase(request.query_params[name] || "") + defp paid_source?(utm_source) do + Plausible.Ingestion.Source.paid_source?(utm_source) end end diff --git a/lib/plausible/ingestion/event.ex b/lib/plausible/ingestion/event.ex index 313bf25df..7aa4d4c3b 100644 --- a/lib/plausible/ingestion/event.ex +++ b/lib/plausible/ingestion/event.ex @@ -118,8 +118,8 @@ defmodule Plausible.Ingestion.Event do drop_shield_rule_country: &drop_shield_rule_country/2, put_user_agent: &put_user_agent/2, put_basic_info: &put_basic_info/2, - put_referrer: &put_referrer/2, - put_utm_tags: &put_utm_tags/2, + put_source_info: &put_source_info/2, + put_channel: &put_channel/2, put_props: &put_props/2, put_revenue: &put_revenue/2, put_salts: &put_salts/2, @@ -250,30 +250,41 @@ defmodule Plausible.Ingestion.Event do }) end - defp put_referrer(%__MODULE__{} = event, _context) do - source = Plausible.Ingestion.Source.resolve(event.request) - channel = Plausible.Ingestion.Acquisition.get_channel(event.request, source) - - update_session_attrs(event, %{ - channel: channel, - referrer_source: source, - referrer: Plausible.Ingestion.Source.format_referrer(event.request.referrer), - click_id_param: get_click_id_param(event.request.query_params) - }) - end - - defp put_utm_tags(%__MODULE__{} = event, _context) do + defp put_source_info(%__MODULE__{} = event, _context) do query_params = event.request.query_params + tagged_source = + query_params["utm_source"] || + query_params["source"] || + query_params["ref"] + update_session_attrs(event, %{ + referrer_source: Plausible.Ingestion.Source.resolve(event.request), + referrer: Plausible.Ingestion.Source.format_referrer(event.request.referrer), + click_id_param: get_click_id_param(event.request.query_params), + utm_source: tagged_source, utm_medium: query_params["utm_medium"], - utm_source: query_params["utm_source"], utm_campaign: query_params["utm_campaign"], utm_content: query_params["utm_content"], utm_term: query_params["utm_term"] }) end + defp put_channel(%__MODULE__{} = event, _context) do + session = event.clickhouse_session_attrs + + channel = + Plausible.Ingestion.Acquisition.get_channel( + session[:referrer_source], + session[:utm_medium], + session[:utm_campaign], + session[:utm_source], + session[:click_id_param] + ) + + update_session_attrs(event, %{channel: channel}) + end + defp put_geolocation(%__MODULE__{} = event, _context) do case event.request.ip_classification do "anonymous_vpn_ip" -> diff --git a/test/plausible_web/controllers/api/external_controller_test.exs b/test/plausible_web/controllers/api/external_controller_test.exs index 7578b5681..dfb6c46c1 100644 --- a/test/plausible_web/controllers/api/external_controller_test.exs +++ b/test/plausible_web/controllers/api/external_controller_test.exs @@ -406,6 +406,39 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do session = get_created_session(site) assert session.referrer_source == "betalist" + assert session.utm_source == "betalist" + end + + test "?ref param behaves like ?utm_source", %{conn: conn, site: site} do + params = %{ + name: "pageview", + url: "http://www.example.com/?ref=betalist", + domain: site.domain + } + + conn + |> post("/api/event", params) + + session = get_created_session(site) + + assert session.referrer_source == "betalist" + assert session.utm_source == "betalist" + end + + test "?source param behaves like ?utm_source", %{conn: conn, site: site} do + params = %{ + name: "pageview", + url: "http://www.example.com/?source=betalist", + domain: site.domain + } + + conn + |> post("/api/event", params) + + session = get_created_session(site) + + assert session.referrer_source == "betalist" + assert session.utm_source == "betalist" end test "if utm_source matches a capitalized form from ref_inspector, the capitalized form is recorded", @@ -2192,7 +2225,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do } do params = %{ name: "pageview", - url: "http://example.com?utm_source=Google-ads", + url: "http://example.com?source=Google-ads", domain: site.domain }