Fixes for ?source and ?ref params (#4781)

* Store `?source` and `?ref` params in `utm_source` field

* Remove dead code
This commit is contained in:
Uku Taht 2024-11-06 13:12:41 +02:00 committed by GitHub
parent c0a8aa025c
commit c130c2a751
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 132 additions and 101 deletions

View File

@ -45,58 +45,66 @@ defmodule Plausible.Ingestion.Acquisition do
|> then(&(@custom_source_categories ++ &1)) |> then(&(@custom_source_categories ++ &1))
|> Enum.into(%{}) |> Enum.into(%{})
def get_channel(request, source) do def get_channel(source, utm_medium, utm_campaign, utm_source, click_id_param) do
source = source && String.downcase(source) get_channel_lowered(
String.downcase(source || ""),
String.downcase(utm_medium || ""),
String.downcase(utm_campaign || ""),
String.downcase(utm_source || ""),
click_id_param
)
end
defp get_channel_lowered(source, utm_medium, utm_campaign, utm_source, click_id_param) do
cond do cond do
cross_network?(request) -> "Cross-network" cross_network?(utm_campaign) -> "Cross-network"
paid_shopping?(request, source) -> "Paid Shopping" paid_shopping?(source, utm_campaign, utm_medium) -> "Paid Shopping"
paid_search?(request, source) -> "Paid Search" paid_search?(source, utm_medium, utm_source, click_id_param) -> "Paid Search"
paid_social?(request, source) -> "Paid Social" paid_social?(source, utm_medium, utm_source) -> "Paid Social"
paid_video?(request, source) -> "Paid Video" paid_video?(source, utm_medium, utm_source) -> "Paid Video"
display?(request) -> "Display" display?(utm_medium) -> "Display"
paid_other?(request) -> "Paid Other" paid_other?(utm_medium) -> "Paid Other"
organic_shopping?(request, source) -> "Organic Shopping" organic_shopping?(source, utm_campaign) -> "Organic Shopping"
organic_social?(request, source) -> "Organic Social" organic_social?(source, utm_medium) -> "Organic Social"
organic_video?(request, source) -> "Organic Video" organic_video?(source, utm_medium) -> "Organic Video"
search_source?(source) -> "Organic Search" search_source?(source) -> "Organic Search"
email?(request, source) -> "Email" email?(source, utm_source, utm_medium) -> "Email"
affiliates?(request) -> "Affiliates" affiliates?(utm_medium) -> "Affiliates"
audio?(request) -> "Audio" audio?(utm_medium) -> "Audio"
sms?(request) -> "SMS" sms?(utm_source, utm_medium) -> "SMS"
mobile_push_notifications?(request, source) -> "Mobile Push Notifications" mobile_push_notifications?(source, utm_medium) -> "Mobile Push Notifications"
referral?(request, source) -> "Referral" referral?(source, utm_medium) -> "Referral"
true -> "Direct" true -> "Direct"
end end
end end
defp cross_network?(request) do defp cross_network?(utm_campaign) do
String.contains?(query_param(request, "utm_campaign"), "cross-network") String.contains?(utm_campaign, "cross-network")
end end
defp paid_shopping?(request, source) do defp paid_shopping?(source, utm_campaign, utm_medium) do
(shopping_source?(source) or shopping_campaign?(request)) and paid_medium?(request) (shopping_source?(source) or shopping_campaign?(utm_campaign)) and paid_medium?(utm_medium)
end end
defp paid_search?(request, source) do defp paid_search?(source, utm_medium, utm_source, click_id_param) do
(search_source?(source) and paid_medium?(request)) or (search_source?(source) and paid_medium?(utm_medium)) or
(search_source?(source) and paid_source?(request)) or (search_source?(source) and paid_source?(utm_source)) or
(source == "google" and !!request.query_params["gclid"]) or (source == "google" and click_id_param == "gclid") or
(source == "bing" and !!request.query_params["msclkid"]) (source == "bing" and click_id_param == "msclkid")
end end
defp paid_social?(request, source) do defp paid_social?(source, utm_medium, utm_source) do
(social_source?(source) and paid_medium?(request)) or (social_source?(source) and paid_medium?(utm_medium)) or
(social_source?(source) and paid_source?(request)) (social_source?(source) and paid_source?(utm_source))
end end
defp paid_video?(request, source) do defp paid_video?(source, utm_medium, utm_source) do
(video_source?(source) and paid_medium?(request)) or (video_source?(source) and paid_medium?(utm_medium)) or
(video_source?(source) and paid_source?(request)) (video_source?(source) and paid_source?(utm_source))
end end
defp display?(request) do defp display?(utm_medium) do
query_param(request, "utm_medium") in [ utm_medium in [
"display", "display",
"banner", "banner",
"expandable", "expandable",
@ -105,17 +113,17 @@ defmodule Plausible.Ingestion.Acquisition do
] ]
end end
defp paid_other?(request) do defp paid_other?(utm_medium) do
paid_medium?(request) paid_medium?(utm_medium)
end end
defp organic_shopping?(request, source) do defp organic_shopping?(source, utm_campaign) do
shopping_source?(source) or shopping_campaign?(request) shopping_source?(source) or shopping_campaign?(utm_campaign)
end end
defp organic_social?(request, source) do defp organic_social?(source, utm_medium) do
social_source?(source) or social_source?(source) or
query_param(request, "utm_medium") in [ utm_medium in [
"social", "social",
"social-network", "social-network",
"social-media", "social-media",
@ -125,89 +133,68 @@ defmodule Plausible.Ingestion.Acquisition do
] ]
end end
defp organic_video?(request, source) do defp organic_video?(source, utm_medium) do
video_source?(source) or String.contains?(query_param(request, "utm_medium"), "video") video_source?(source) or String.contains?(utm_medium, "video")
end end
defp referral?(request, source) do defp referral?(source, utm_medium) do
query_param(request, "utm_medium") in ["referral", "app", "link"] or utm_medium in ["referral", "app", "link"] or source !== ""
!!source
end end
@email_tags ["email", "e-mail", "e_mail", "e mail", "newsletter"] @email_tags ["email", "e-mail", "e_mail", "e mail", "newsletter"]
defp email?(request, source) do defp email?(source, utm_source, utm_medium) do
email_source?(source) or email_source?(source) or
String.contains?(query_param(request, "utm_source"), @email_tags) or String.contains?(utm_source, @email_tags) or
String.contains?(query_param(request, "utm_medium"), @email_tags) String.contains?(utm_medium, @email_tags)
end end
defp affiliates?(request) do defp affiliates?(utm_medium) do
query_param(request, "utm_medium") == "affiliate" utm_medium == "affiliate"
end end
defp audio?(request) do defp audio?(utm_medium) do
query_param(request, "utm_medium") == "audio" utm_medium == "audio"
end end
defp sms?(request) do defp sms?(utm_source, utm_medium) do
query_param(request, "utm_source") == "sms" or utm_source == "sms" or utm_medium == "sms"
query_param(request, "utm_medium") == "sms"
end end
defp mobile_push_notifications?(request, source) do defp mobile_push_notifications?(source, utm_medium) do
medium = query_param(request, "utm_medium") String.ends_with?(utm_medium, "push") or
String.contains?(utm_medium, ["mobile", "notification"]) or
String.ends_with?(medium, "push") or
String.contains?(medium, ["mobile", "notification"]) or
source == "firebase" source == "firebase"
end end
defp shopping_source?(nil), do: false
defp shopping_source?(source) do defp shopping_source?(source) do
@source_categories[source] == "SOURCE_CATEGORY_SHOPPING" @source_categories[source] == "SOURCE_CATEGORY_SHOPPING"
end end
defp search_source?(nil), do: false
defp search_source?(source) do defp search_source?(source) do
@source_categories[source] == "SOURCE_CATEGORY_SEARCH" @source_categories[source] == "SOURCE_CATEGORY_SEARCH"
end end
defp social_source?(nil), do: false
defp social_source?(source) do defp social_source?(source) do
@source_categories[source] == "SOURCE_CATEGORY_SOCIAL" @source_categories[source] == "SOURCE_CATEGORY_SOCIAL"
end end
defp video_source?(nil), do: false
defp video_source?(source) do defp video_source?(source) do
@source_categories[source] == "SOURCE_CATEGORY_VIDEO" @source_categories[source] == "SOURCE_CATEGORY_VIDEO"
end end
defp email_source?(nil), do: false
defp email_source?(source) do defp email_source?(source) do
@source_categories[source] == "SOURCE_CATEGORY_EMAIL" @source_categories[source] == "SOURCE_CATEGORY_EMAIL"
end end
defp shopping_campaign?(request) do defp shopping_campaign?(utm_campaign) do
campaign_name = query_param(request, "utm_campaign") Regex.match?(~r/^(.*(([^a-df-z]|^)shop|shopping).*)$/, utm_campaign)
Regex.match?(~r/^(.*(([^a-df-z]|^)shop|shopping).*)$/, campaign_name)
end end
defp paid_medium?(request) do defp paid_medium?(utm_medium) do
medium = query_param(request, "utm_medium") Regex.match?(~r/^(.*cp.*|ppc|retargeting|paid.*)$/, utm_medium)
Regex.match?(~r/^(.*cp.*|ppc|retargeting|paid.*)$/, medium)
end end
defp paid_source?(request) do defp paid_source?(utm_source) do
query_param(request, "utm_source") Plausible.Ingestion.Source.paid_source?(utm_source)
|> Plausible.Ingestion.Source.paid_source?()
end
defp query_param(request, name) do
String.downcase(request.query_params[name] || "")
end end
end end

View File

@ -118,8 +118,8 @@ defmodule Plausible.Ingestion.Event do
drop_shield_rule_country: &drop_shield_rule_country/2, drop_shield_rule_country: &drop_shield_rule_country/2,
put_user_agent: &put_user_agent/2, put_user_agent: &put_user_agent/2,
put_basic_info: &put_basic_info/2, put_basic_info: &put_basic_info/2,
put_referrer: &put_referrer/2, put_source_info: &put_source_info/2,
put_utm_tags: &put_utm_tags/2, put_channel: &put_channel/2,
put_props: &put_props/2, put_props: &put_props/2,
put_revenue: &put_revenue/2, put_revenue: &put_revenue/2,
put_salts: &put_salts/2, put_salts: &put_salts/2,
@ -250,30 +250,41 @@ defmodule Plausible.Ingestion.Event do
}) })
end end
defp put_referrer(%__MODULE__{} = event, _context) do defp put_source_info(%__MODULE__{} = event, _context) do
source = Plausible.Ingestion.Source.resolve(event.request)
channel = Plausible.Ingestion.Acquisition.get_channel(event.request, source)
update_session_attrs(event, %{
channel: channel,
referrer_source: source,
referrer: Plausible.Ingestion.Source.format_referrer(event.request.referrer),
click_id_param: get_click_id_param(event.request.query_params)
})
end
defp put_utm_tags(%__MODULE__{} = event, _context) do
query_params = event.request.query_params query_params = event.request.query_params
tagged_source =
query_params["utm_source"] ||
query_params["source"] ||
query_params["ref"]
update_session_attrs(event, %{ update_session_attrs(event, %{
referrer_source: Plausible.Ingestion.Source.resolve(event.request),
referrer: Plausible.Ingestion.Source.format_referrer(event.request.referrer),
click_id_param: get_click_id_param(event.request.query_params),
utm_source: tagged_source,
utm_medium: query_params["utm_medium"], utm_medium: query_params["utm_medium"],
utm_source: query_params["utm_source"],
utm_campaign: query_params["utm_campaign"], utm_campaign: query_params["utm_campaign"],
utm_content: query_params["utm_content"], utm_content: query_params["utm_content"],
utm_term: query_params["utm_term"] utm_term: query_params["utm_term"]
}) })
end end
defp put_channel(%__MODULE__{} = event, _context) do
session = event.clickhouse_session_attrs
channel =
Plausible.Ingestion.Acquisition.get_channel(
session[:referrer_source],
session[:utm_medium],
session[:utm_campaign],
session[:utm_source],
session[:click_id_param]
)
update_session_attrs(event, %{channel: channel})
end
defp put_geolocation(%__MODULE__{} = event, _context) do defp put_geolocation(%__MODULE__{} = event, _context) do
case event.request.ip_classification do case event.request.ip_classification do
"anonymous_vpn_ip" -> "anonymous_vpn_ip" ->

View File

@ -406,6 +406,39 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do
session = get_created_session(site) session = get_created_session(site)
assert session.referrer_source == "betalist" assert session.referrer_source == "betalist"
assert session.utm_source == "betalist"
end
test "?ref param behaves like ?utm_source", %{conn: conn, site: site} do
params = %{
name: "pageview",
url: "http://www.example.com/?ref=betalist",
domain: site.domain
}
conn
|> post("/api/event", params)
session = get_created_session(site)
assert session.referrer_source == "betalist"
assert session.utm_source == "betalist"
end
test "?source param behaves like ?utm_source", %{conn: conn, site: site} do
params = %{
name: "pageview",
url: "http://www.example.com/?source=betalist",
domain: site.domain
}
conn
|> post("/api/event", params)
session = get_created_session(site)
assert session.referrer_source == "betalist"
assert session.utm_source == "betalist"
end end
test "if utm_source matches a capitalized form from ref_inspector, the capitalized form is recorded", test "if utm_source matches a capitalized form from ref_inspector, the capitalized form is recorded",
@ -2192,7 +2225,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do
} do } do
params = %{ params = %{
name: "pageview", name: "pageview",
url: "http://example.com?utm_source=Google-ads", url: "http://example.com?source=Google-ads",
domain: site.domain domain: site.domain
} }