Create functions and test acquisition channel logic in clickhouse

Tests were lifted from test/plausible_web/controllers/api/external_controller_test.exs
This commit is contained in:
Karl-Aksel Puulmann 2024-10-18 17:02:51 +03:00
parent 5c1f50956a
commit a954291dfe
3 changed files with 236 additions and 0 deletions

View File

@ -0,0 +1,51 @@
defmodule Plausible.DataMigration.AquisitionChannel do
@moduledoc """
Creates functions to calculate aquisition channel in ClickHouse
SQL files available at: priv/data_migrations/AquisitionChannel/sql
"""
use Plausible.DataMigration, dir: "AquisitionChannel", repo: Plausible.IngestRepo
@source_categories Application.app_dir(:plausible, "priv/ga4-source-categories.csv")
|> File.read!()
|> NimbleCSV.RFC4180.parse_string(skip_headers: true)
|> Enum.group_by(fn [_source, category] -> category end, fn [
source,
_category
] ->
source
end)
def run(opts \\ []) do
on_cluster_statement = Plausible.MigrationUtils.on_cluster_statement("sessions_v2")
unwrap("acquisition_channel_functions")
|> String.split(";", trim: true)
|> Enum.each(&create_function(&1, on_cluster_statement, opts))
end
defp create_function(sql, on_cluster_statement, opts) do
sql =
sql
|> String.replace(" AS ", " #{on_cluster_statement} AS ")
|> String.replace("$SOURCE_CATEGORY_SEARCH", "{$0:Array(String)}")
|> String.replace("$SOURCE_CATEGORY_SHOPPING", "{$1:Array(String)}")
|> String.replace("$SOURCE_CATEGORY_SOCIAL", "{$2:Array(String)}")
|> String.replace("$SOURCE_CATEGORY_VIDEO", "{$3:Array(String)}")
name =
sql
|> String.split()
|> Enum.at(4)
do_run(name, sql,
params: [
@source_categories["SOURCE_CATEGORY_SEARCH"],
@source_categories["SOURCE_CATEGORY_SHOPPING"],
@source_categories["SOURCE_CATEGORY_SOCIAL"],
@source_categories["SOURCE_CATEGORY_VIDEO"]
],
quiet: Keyword.get(opts, :quiet, false)
)
end
end

View File

@ -0,0 +1,82 @@
CREATE OR REPLACE FUNCTION acquisition_channel_cross_network AS (utm_campaign) ->
position(utm_campaign, 'cross-network') > 0;
CREATE OR REPLACE FUNCTION acquisition_channel_paid_shopping AS (referrer_source, utm_medium, utm_campaign) ->
acquisition_channel_paid_medium(utm_medium) AND
(has($SOURCE_CATEGORY_SHOPPING, lower(referrer_source)) OR acquisition_channel_shopping_campaign(utm_campaign));
CREATE OR REPLACE FUNCTION acquisition_channel_paid_search AS (referrer_source, utm_medium, click_id_source) ->
(has($SOURCE_CATEGORY_SEARCH, lower(referrer_source)) and acquisition_channel_paid_medium(utm_medium)) OR
(not empty(referrer_source) AND referrer_source == click_id_source);
CREATE OR REPLACE FUNCTION acquisition_channel_paid_social AS (referrer_source, utm_medium) ->
has($SOURCE_CATEGORY_SOCIAL, lower(referrer_source)) AND acquisition_channel_paid_medium(utm_medium);
CREATE OR REPLACE FUNCTION acquisition_channel_paid_video AS (referrer_source, utm_medium) ->
has($SOURCE_CATEGORY_VIDEO, lower(referrer_source)) AND acquisition_channel_paid_medium(utm_medium);
CREATE OR REPLACE FUNCTION acquisition_channel_display AS (utm_medium) ->
utm_medium IN ('display', 'banner', 'expandable', 'interstitial', 'cpm');
CREATE OR REPLACE FUNCTION acquisition_channel_paid_medium AS (utm_medium) ->
match(utm_medium, '^(.*cp.*|ppc|retargeting|paid.*)$');
CREATE OR REPLACE FUNCTION acquisition_channel_shopping_campaign AS (utm_campaign) ->
match(utm_campaign, '^(.*(([^a-df-z]|^)shop|shopping).*)$');
CREATE OR REPLACE FUNCTION acquisition_channel_organic_shopping AS (referrer_source, utm_campaign) ->
has($SOURCE_CATEGORY_SHOPPING, lower(referrer_source)) OR acquisition_channel_shopping_campaign(utm_campaign);
CREATE OR REPLACE FUNCTION acquisition_channel_organic_social AS (referrer_source, utm_medium) ->
has($SOURCE_CATEGORY_SOCIAL, lower(referrer_source)) OR utm_medium IN ( 'social', 'social-network', 'social-media', 'sm', 'social network', 'social media');
CREATE OR REPLACE FUNCTION acquisition_channel_organic_video AS (referrer_source, utm_medium) ->
has($SOURCE_CATEGORY_VIDEO, lower(referrer_source)) OR position(utm_medium, 'video') > 0;
CREATE OR REPLACE FUNCTION acquisition_channel_search_source AS (referrer_source) ->
has($SOURCE_CATEGORY_SEARCH, lower(referrer_source));
CREATE OR REPLACE FUNCTION acquisition_channel_email AS (column) ->
match(column, 'e[-_ ]?mail');
CREATE OR REPLACE FUNCTION acquisition_channel_affiliates AS (utm_medium) ->
utm_medium == 'affiliate';
CREATE OR REPLACE FUNCTION acquisition_channel_audio AS (utm_medium) ->
utm_medium == 'audio';
CREATE OR REPLACE FUNCTION acquisition_channel_sms AS (column) ->
column == 'sms';
CREATE OR REPLACE FUNCTION acquisition_channel_mobile_push_notifications AS (utm_medium, referrer_source) ->
endsWith(utm_medium, 'push') or
multiSearchAny(utm_medium, ['mobile', 'notification']) or
referrer_source == 'firebase';
CREATE OR REPLACE FUNCTION acquisition_channel_referral AS (utm_medium, referrer_source) ->
utm_medium IN ('referral', 'app', 'link') or
not empty(referrer_source);
CREATE OR REPLACE FUNCTION acquisition_channel AS
(referrer_source, utm_medium, utm_campaign, utm_source, click_id_source) -> multiIf(
acquisition_channel_cross_network(utm_campaign), 'Cross-network',
acquisition_channel_paid_shopping(referrer_source, utm_medium, utm_campaign), 'Paid Shopping',
acquisition_channel_paid_search(referrer_source, utm_medium, click_id_source), 'Paid Search',
acquisition_channel_paid_social(referrer_source, utm_medium), 'Paid Social',
acquisition_channel_paid_video(referrer_source, utm_medium), 'Paid Video',
acquisition_channel_display(utm_medium), 'Display',
acquisition_channel_paid_medium(utm_medium), 'Paid Other',
acquisition_channel_organic_shopping(referrer_source, utm_campaign), 'Organic Shopping',
acquisition_channel_organic_social(referrer_source, utm_medium), 'Organic Social',
acquisition_channel_organic_video(referrer_source, utm_medium), 'Organic Video',
acquisition_channel_search_source(referrer_source), 'Organic Search',
acquisition_channel_email(utm_source), 'Email',
acquisition_channel_email(utm_medium), 'Email',
acquisition_channel_affiliates(utm_medium), 'Affiliates',
acquisition_channel_audio(utm_medium), 'Audio',
acquisition_channel_sms(utm_source), 'SMS',
acquisition_channel_sms(utm_medium), 'SMS',
acquisition_channel_mobile_push_notifications(utm_medium, referrer_source), 'Mobile Push Notifications',
acquisition_channel_referral(utm_medium, referrer_source), 'Referral',
'Direct'
);

View File

@ -0,0 +1,103 @@
defmodule Plausible.Ingestion.EventTest do
use Plausible.DataCase
setup_all _context do
Plausible.DataMigration.AquisitionChannel.run(quiet: true)
end
@static_tests [
%{expected: "Direct"},
%{utm_campaign: "cross-network", expected: "Cross-network"},
%{utm_campaign: "shopping", utm_medium: "paid", expected: "Paid Shopping"},
%{referrer_source: "shopify.com", utm_medium: "paid", expected: "Paid Shopping"},
%{
referrer_source: "shopify",
utm_source: "shopify",
utm_medium: "paid",
expected: "Paid Shopping"
},
%{referrer_source: "DuckDuckGo", utm_medium: "paid", expected: "Paid Search"},
%{referrer_source: "Google", click_id_source: "Google", expected: "Paid Search"},
%{referrer_source: "DuckDuckGo", click_id_source: "Google", expected: "Organic Search"},
%{referrer_source: "Bing", click_id_source: "Bing", expected: "Paid Search"},
%{referrer_source: "DuckDuckGo", click_id_source: "Bing", expected: "Organic Search"},
%{
referrer_source: "google",
utm_source: "google",
utm_medium: "paid",
expected: "Paid Search"
},
%{referrer_source: "TikTok", utm_medium: "paid", expected: "Paid Social"},
%{
referrer_source: "tiktok",
utm_source: "tiktok",
utm_medium: "paid",
expected: "Paid Social"
},
%{referrer_source: "Youtube", utm_medium: "paid", expected: "Paid Video"},
%{
referrer_source: "youtube",
utm_source: "youtube",
utm_medium: "paid",
expected: "Paid Video"
},
%{utm_medium: "banner", expected: "Display"},
%{utm_medium: "cpc", expected: "Paid Other"},
%{referrer_source: "walmart.com", expected: "Organic Shopping"},
%{referrer_source: "walmart", utm_source: "walmart", expected: "Organic Shopping"},
%{utm_campaign: "shop", expected: "Organic Shopping"},
%{referrer_source: "Facebook", expected: "Organic Social"},
%{referrer_source: "twitter", utm_source: "twitter", expected: "Organic Social"},
%{utm_medium: "social", expected: "Organic Social"},
%{referrer_source: "Vimeo", expected: "Organic Video"},
%{referrer_source: "vimeo", utm_source: "vimeo", expected: "Organic Video"},
%{utm_medium: "video", expected: "Organic Video"},
%{referrer_source: "DuckDuckGo", expected: "Organic Search"},
%{referrer_source: "duckduckgo", utm_source: "duckduckgo", expected: "Organic Search"},
%{utm_medium: "referral", expected: "Referral"},
%{referrer_source: "email", utm_source: "email", expected: "Email"},
%{utm_medium: "email", expected: "Email"},
%{utm_medium: "affiliate", expected: "Affiliates"},
%{utm_medium: "audio", expected: "Audio"},
%{referrer_source: "sms", utm_source: "sms", expected: "SMS"},
%{utm_medium: "sms", expected: "SMS"},
%{utm_medium: "app-push", expected: "Mobile Push Notifications"},
%{utm_medium: "example-mobile", expected: "Mobile Push Notifications"},
%{referrer_source: "othersite.com", expected: "Referral"}
]
for {test_data, index} <- Enum.with_index(@static_tests, 1) do
@tag test_data: test_data
test "static test #{index} - #{Jason.encode!(test_data)}", %{test_data: test_data} do
request = %{
query_params: %{
"utm_medium" => test_data[:utm_medium],
"utm_campaign" => test_data[:utm_campaign],
"utm_source" => test_data[:utm_source],
"gclid" => if(test_data[:click_id_source] == "Google", do: "123", else: nil),
"msclkid" => if(test_data[:click_id_source] == "Bing", do: "123", else: nil)
}
}
channel = Plausible.Ingestion.Acquisition.get_channel(request, test_data[:referrer_source])
assert channel == test_data.expected
end
@tag test_data: test_data
test "clickhouse test #{index} - #{Jason.encode!(test_data)}", %{test_data: test_data} do
%{rows: [[channel]]} =
Plausible.IngestRepo.query!(
"SELECT acquisition_channel({$0:String}, {$1:String}, {$2:String}, {$3:String}, {$4:String})",
[
test_data[:referrer_source] || "",
test_data[:utm_medium] || "",
test_data[:utm_campaign] || "",
test_data[:utm_source] || "",
test_data[:click_id_source] || ""
]
)
assert channel == test_data.expected
end
end
end