From a954291dfe4f9f430a102b8ccc536128379a23bb Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Fri, 18 Oct 2024 17:02:51 +0300 Subject: [PATCH] Create functions and test acquisition channel logic in clickhouse Tests were lifted from test/plausible_web/controllers/api/external_controller_test.exs --- .../data_migration/acquisition_channel.ex | 51 +++++++++ .../sql/acquisition_channel_functions.sql.eex | 82 ++++++++++++++ test/plausible/ingestion/acquisition_test.exs | 103 ++++++++++++++++++ 3 files changed, 236 insertions(+) create mode 100644 lib/plausible/data_migration/acquisition_channel.ex create mode 100644 priv/data_migrations/AquisitionChannel/sql/acquisition_channel_functions.sql.eex create mode 100644 test/plausible/ingestion/acquisition_test.exs diff --git a/lib/plausible/data_migration/acquisition_channel.ex b/lib/plausible/data_migration/acquisition_channel.ex new file mode 100644 index 000000000..16879d636 --- /dev/null +++ b/lib/plausible/data_migration/acquisition_channel.ex @@ -0,0 +1,51 @@ +defmodule Plausible.DataMigration.AquisitionChannel do + @moduledoc """ + Creates functions to calculate aquisition channel in ClickHouse + + SQL files available at: priv/data_migrations/AquisitionChannel/sql + """ + use Plausible.DataMigration, dir: "AquisitionChannel", repo: Plausible.IngestRepo + + @source_categories Application.app_dir(:plausible, "priv/ga4-source-categories.csv") + |> File.read!() + |> NimbleCSV.RFC4180.parse_string(skip_headers: true) + |> Enum.group_by(fn [_source, category] -> category end, fn [ + source, + _category + ] -> + source + end) + + def run(opts \\ []) do + on_cluster_statement = Plausible.MigrationUtils.on_cluster_statement("sessions_v2") + + unwrap("acquisition_channel_functions") + |> String.split(";", trim: true) + |> Enum.each(&create_function(&1, on_cluster_statement, opts)) + end + + defp create_function(sql, on_cluster_statement, opts) do + sql = + sql + |> String.replace(" AS ", " #{on_cluster_statement} AS ") + |> String.replace("$SOURCE_CATEGORY_SEARCH", "{$0:Array(String)}") + |> String.replace("$SOURCE_CATEGORY_SHOPPING", "{$1:Array(String)}") + |> String.replace("$SOURCE_CATEGORY_SOCIAL", "{$2:Array(String)}") + |> String.replace("$SOURCE_CATEGORY_VIDEO", "{$3:Array(String)}") + + name = + sql + |> String.split() + |> Enum.at(4) + + do_run(name, sql, + params: [ + @source_categories["SOURCE_CATEGORY_SEARCH"], + @source_categories["SOURCE_CATEGORY_SHOPPING"], + @source_categories["SOURCE_CATEGORY_SOCIAL"], + @source_categories["SOURCE_CATEGORY_VIDEO"] + ], + quiet: Keyword.get(opts, :quiet, false) + ) + end +end diff --git a/priv/data_migrations/AquisitionChannel/sql/acquisition_channel_functions.sql.eex b/priv/data_migrations/AquisitionChannel/sql/acquisition_channel_functions.sql.eex new file mode 100644 index 000000000..9ff969430 --- /dev/null +++ b/priv/data_migrations/AquisitionChannel/sql/acquisition_channel_functions.sql.eex @@ -0,0 +1,82 @@ +CREATE OR REPLACE FUNCTION acquisition_channel_cross_network AS (utm_campaign) -> + position(utm_campaign, 'cross-network') > 0; + +CREATE OR REPLACE FUNCTION acquisition_channel_paid_shopping AS (referrer_source, utm_medium, utm_campaign) -> + acquisition_channel_paid_medium(utm_medium) AND + (has($SOURCE_CATEGORY_SHOPPING, lower(referrer_source)) OR acquisition_channel_shopping_campaign(utm_campaign)); + +CREATE OR REPLACE FUNCTION acquisition_channel_paid_search AS (referrer_source, utm_medium, click_id_source) -> + (has($SOURCE_CATEGORY_SEARCH, lower(referrer_source)) and acquisition_channel_paid_medium(utm_medium)) OR + (not empty(referrer_source) AND referrer_source == click_id_source); + +CREATE OR REPLACE FUNCTION acquisition_channel_paid_social AS (referrer_source, utm_medium) -> + has($SOURCE_CATEGORY_SOCIAL, lower(referrer_source)) AND acquisition_channel_paid_medium(utm_medium); + +CREATE OR REPLACE FUNCTION acquisition_channel_paid_video AS (referrer_source, utm_medium) -> + has($SOURCE_CATEGORY_VIDEO, lower(referrer_source)) AND acquisition_channel_paid_medium(utm_medium); + +CREATE OR REPLACE FUNCTION acquisition_channel_display AS (utm_medium) -> + utm_medium IN ('display', 'banner', 'expandable', 'interstitial', 'cpm'); + +CREATE OR REPLACE FUNCTION acquisition_channel_paid_medium AS (utm_medium) -> + match(utm_medium, '^(.*cp.*|ppc|retargeting|paid.*)$'); + +CREATE OR REPLACE FUNCTION acquisition_channel_shopping_campaign AS (utm_campaign) -> + match(utm_campaign, '^(.*(([^a-df-z]|^)shop|shopping).*)$'); + +CREATE OR REPLACE FUNCTION acquisition_channel_organic_shopping AS (referrer_source, utm_campaign) -> + has($SOURCE_CATEGORY_SHOPPING, lower(referrer_source)) OR acquisition_channel_shopping_campaign(utm_campaign); + +CREATE OR REPLACE FUNCTION acquisition_channel_organic_social AS (referrer_source, utm_medium) -> + has($SOURCE_CATEGORY_SOCIAL, lower(referrer_source)) OR utm_medium IN ( 'social', 'social-network', 'social-media', 'sm', 'social network', 'social media'); + +CREATE OR REPLACE FUNCTION acquisition_channel_organic_video AS (referrer_source, utm_medium) -> + has($SOURCE_CATEGORY_VIDEO, lower(referrer_source)) OR position(utm_medium, 'video') > 0; + +CREATE OR REPLACE FUNCTION acquisition_channel_search_source AS (referrer_source) -> + has($SOURCE_CATEGORY_SEARCH, lower(referrer_source)); + +CREATE OR REPLACE FUNCTION acquisition_channel_email AS (column) -> + match(column, 'e[-_ ]?mail'); + +CREATE OR REPLACE FUNCTION acquisition_channel_affiliates AS (utm_medium) -> + utm_medium == 'affiliate'; + +CREATE OR REPLACE FUNCTION acquisition_channel_audio AS (utm_medium) -> + utm_medium == 'audio'; + +CREATE OR REPLACE FUNCTION acquisition_channel_sms AS (column) -> + column == 'sms'; + +CREATE OR REPLACE FUNCTION acquisition_channel_mobile_push_notifications AS (utm_medium, referrer_source) -> + endsWith(utm_medium, 'push') or + multiSearchAny(utm_medium, ['mobile', 'notification']) or + referrer_source == 'firebase'; + +CREATE OR REPLACE FUNCTION acquisition_channel_referral AS (utm_medium, referrer_source) -> + utm_medium IN ('referral', 'app', 'link') or + not empty(referrer_source); + +CREATE OR REPLACE FUNCTION acquisition_channel AS +(referrer_source, utm_medium, utm_campaign, utm_source, click_id_source) -> multiIf( + acquisition_channel_cross_network(utm_campaign), 'Cross-network', + acquisition_channel_paid_shopping(referrer_source, utm_medium, utm_campaign), 'Paid Shopping', + acquisition_channel_paid_search(referrer_source, utm_medium, click_id_source), 'Paid Search', + acquisition_channel_paid_social(referrer_source, utm_medium), 'Paid Social', + acquisition_channel_paid_video(referrer_source, utm_medium), 'Paid Video', + acquisition_channel_display(utm_medium), 'Display', + acquisition_channel_paid_medium(utm_medium), 'Paid Other', + acquisition_channel_organic_shopping(referrer_source, utm_campaign), 'Organic Shopping', + acquisition_channel_organic_social(referrer_source, utm_medium), 'Organic Social', + acquisition_channel_organic_video(referrer_source, utm_medium), 'Organic Video', + acquisition_channel_search_source(referrer_source), 'Organic Search', + acquisition_channel_email(utm_source), 'Email', + acquisition_channel_email(utm_medium), 'Email', + acquisition_channel_affiliates(utm_medium), 'Affiliates', + acquisition_channel_audio(utm_medium), 'Audio', + acquisition_channel_sms(utm_source), 'SMS', + acquisition_channel_sms(utm_medium), 'SMS', + acquisition_channel_mobile_push_notifications(utm_medium, referrer_source), 'Mobile Push Notifications', + acquisition_channel_referral(utm_medium, referrer_source), 'Referral', + 'Direct' +); diff --git a/test/plausible/ingestion/acquisition_test.exs b/test/plausible/ingestion/acquisition_test.exs new file mode 100644 index 000000000..ec49dbac1 --- /dev/null +++ b/test/plausible/ingestion/acquisition_test.exs @@ -0,0 +1,103 @@ +defmodule Plausible.Ingestion.EventTest do + use Plausible.DataCase + + setup_all _context do + Plausible.DataMigration.AquisitionChannel.run(quiet: true) + end + + @static_tests [ + %{expected: "Direct"}, + %{utm_campaign: "cross-network", expected: "Cross-network"}, + %{utm_campaign: "shopping", utm_medium: "paid", expected: "Paid Shopping"}, + %{referrer_source: "shopify.com", utm_medium: "paid", expected: "Paid Shopping"}, + %{ + referrer_source: "shopify", + utm_source: "shopify", + utm_medium: "paid", + expected: "Paid Shopping" + }, + %{referrer_source: "DuckDuckGo", utm_medium: "paid", expected: "Paid Search"}, + %{referrer_source: "Google", click_id_source: "Google", expected: "Paid Search"}, + %{referrer_source: "DuckDuckGo", click_id_source: "Google", expected: "Organic Search"}, + %{referrer_source: "Bing", click_id_source: "Bing", expected: "Paid Search"}, + %{referrer_source: "DuckDuckGo", click_id_source: "Bing", expected: "Organic Search"}, + %{ + referrer_source: "google", + utm_source: "google", + utm_medium: "paid", + expected: "Paid Search" + }, + %{referrer_source: "TikTok", utm_medium: "paid", expected: "Paid Social"}, + %{ + referrer_source: "tiktok", + utm_source: "tiktok", + utm_medium: "paid", + expected: "Paid Social" + }, + %{referrer_source: "Youtube", utm_medium: "paid", expected: "Paid Video"}, + %{ + referrer_source: "youtube", + utm_source: "youtube", + utm_medium: "paid", + expected: "Paid Video" + }, + %{utm_medium: "banner", expected: "Display"}, + %{utm_medium: "cpc", expected: "Paid Other"}, + %{referrer_source: "walmart.com", expected: "Organic Shopping"}, + %{referrer_source: "walmart", utm_source: "walmart", expected: "Organic Shopping"}, + %{utm_campaign: "shop", expected: "Organic Shopping"}, + %{referrer_source: "Facebook", expected: "Organic Social"}, + %{referrer_source: "twitter", utm_source: "twitter", expected: "Organic Social"}, + %{utm_medium: "social", expected: "Organic Social"}, + %{referrer_source: "Vimeo", expected: "Organic Video"}, + %{referrer_source: "vimeo", utm_source: "vimeo", expected: "Organic Video"}, + %{utm_medium: "video", expected: "Organic Video"}, + %{referrer_source: "DuckDuckGo", expected: "Organic Search"}, + %{referrer_source: "duckduckgo", utm_source: "duckduckgo", expected: "Organic Search"}, + %{utm_medium: "referral", expected: "Referral"}, + %{referrer_source: "email", utm_source: "email", expected: "Email"}, + %{utm_medium: "email", expected: "Email"}, + %{utm_medium: "affiliate", expected: "Affiliates"}, + %{utm_medium: "audio", expected: "Audio"}, + %{referrer_source: "sms", utm_source: "sms", expected: "SMS"}, + %{utm_medium: "sms", expected: "SMS"}, + %{utm_medium: "app-push", expected: "Mobile Push Notifications"}, + %{utm_medium: "example-mobile", expected: "Mobile Push Notifications"}, + %{referrer_source: "othersite.com", expected: "Referral"} + ] + + for {test_data, index} <- Enum.with_index(@static_tests, 1) do + @tag test_data: test_data + test "static test #{index} - #{Jason.encode!(test_data)}", %{test_data: test_data} do + request = %{ + query_params: %{ + "utm_medium" => test_data[:utm_medium], + "utm_campaign" => test_data[:utm_campaign], + "utm_source" => test_data[:utm_source], + "gclid" => if(test_data[:click_id_source] == "Google", do: "123", else: nil), + "msclkid" => if(test_data[:click_id_source] == "Bing", do: "123", else: nil) + } + } + + channel = Plausible.Ingestion.Acquisition.get_channel(request, test_data[:referrer_source]) + assert channel == test_data.expected + end + + @tag test_data: test_data + test "clickhouse test #{index} - #{Jason.encode!(test_data)}", %{test_data: test_data} do + %{rows: [[channel]]} = + Plausible.IngestRepo.query!( + "SELECT acquisition_channel({$0:String}, {$1:String}, {$2:String}, {$3:String}, {$4:String})", + [ + test_data[:referrer_source] || "", + test_data[:utm_medium] || "", + test_data[:utm_campaign] || "", + test_data[:utm_source] || "", + test_data[:click_id_source] || "" + ] + ) + + assert channel == test_data.expected + end + end +end