Property test for acquisition channels

This commit is contained in:
Karl-Aksel Puulmann 2024-10-18 18:13:26 +03:00
parent fdca817a7d
commit 3fa0e0e4eb
9 changed files with 1026 additions and 1 deletions

View File

@ -48,3 +48,6 @@ config :plausible, Plausible.HelpScout,
req_opts: [ req_opts: [
plug: {Req.Test, Plausible.HelpScout} plug: {Req.Test, Plausible.HelpScout}
] ]
config :stream_data,
max_runs: if(System.get_env("CI"), do: 1_000, else: 500)

View File

@ -0,0 +1,3 @@
Google
Bing

View File

@ -0,0 +1,211 @@
127.0.0.1
account.ghost.org
ActiveCampaign
adsensecustomsearchads.com
adwords
Adwords
adwords2024
adyogi
ALLINMAIL
am
an
analogue.co
android
api_engagement
app
app_md
aqp.it
Baidu
Bing
bio.link
blog
brave
brevo
browser
camel-buy-btn
campaign-monitor
charlieward.tv
chatbox
chatgpt.com
checkout.stripe.com
Criteo
crm
discord.com
display
dito
dlvr.it
DuckDuckGo
dv360
email
e-mail
Email
en
en.wikipedia.org
Facebook
facebook_ads
facebook_feed_ad
Facebook_Mobile_Feed
facebook-ads
facebook-novo
facebook.com
facebookads
fantasanremo.com
fb
FB_IG
fbads
fireworks.com
flipboard
ga
gab.com
gads
gamelaunchevent
gdn
GitHub
Gmail
go5pmm.com
Google
Google Ads
Google News
Google Paid
Google Shopping
google_ads
google_ads_pmax_branding
google_jobs_apply
google_shopping
google-ads
Google-ads
googleads
GoogleAds
gov.uk
green
Hacker News
haiper.ai
hiyahealth.com
Homepage
Homepage_gps
hotel.hardrock.com
hs_email
ig
igfb
IGShopping
Influencer
Instabox
Instagram
internal
iris
iterable
ividence
jc
jungroup
JunGroup
Klaviyo
l.wl.co
LinkedIn
linkin.bio
linksbio
linktr.ee
Linktree
listrak
LongNguyen
m6s
mailbiz
mailcoach
mailing
manual
Meta
mg
mode_mobile
msn
msn.com
Naver
naver.com
Netcore
newsletter
Newsletter
NLAP
NotcoinEarnEn1
notification
notion.so
operamini
Outbrain
paid_social
Paid_Social
paidsocial
Pangle
Pinterest
pocket-newtab-de-de
preview-mode
projects.raspberrypi.org
pronews.gr
pubmed.ncbi.nlm.nih.gov
pushly
putinho.net
pwa
qa
qr.codes
qr.link
QuynhTran
Qwant
raindrop.io
Rakuten
Reddit
rss
rtbhouse
salesforce
sanesolution.com
sanoma
scan.chainflip.io
search
SEM
Seznam
sfmc
shopimind
sib
Snapchat
snapchat.com
social
start
startpage.com
steamcommunity.com
substack
support.truecaller.com
syndicatedsearch.goog
taaft_role_playing
Taboola
TCAD
Telegram
telephoneannuaire
termsandconditions.game
Threads
TikTok
tiktok.com
tnp.autopay.io
tonkeeper
topai.tools
TramDang
truecaller.com
trusted-web-activity
tv2.dk
Twitter
upday
vauxhall.co.uk
vegagerdin.is
Vendedor
vg
videre.fail
vitat.com.br
Vkontakte
volume-master
web
webpush
webtv
weibo.cn
WhatsApp
xPortal
ya.ru
Yahoo!
Yandex
yandex.kz
Youtube
YouTube

View File

@ -0,0 +1,33 @@
[G] SHOPPING
app
ashop
audience
band_leader_recruitment
br
buffer
chrome
community
cross-network
cta
daily-newsletter
DE_SEA_Brand-Shopping_CONV_AO
facebook
generic
instagram_social
iraparaosite
PatientNavigationP3
paused_qr_code
PMax: Google Shopping - Smart Fabric
post
QR
reach
Sales
scam_alert
shop
shopping
shopping_free_clicks
trueanthem
try_online
unspecified
zalo

View File

@ -0,0 +1,568 @@
articlelink
CPC
{{campaign.name}}
{{placement}}
@reneschmock
/quiz/generique
166053419415
169168922584
42470886
528c5129-ae2d-4aa4-a05a-7cef73a0471b
ad
Ad
ad-banner
ads
Ads
ads_leads
adsmovil
advert
Advertisement
adway
adwords
aff
AFF
affiliate
Affiliate
affiliates
Affiliates
affiliation
afiliados
agenda
akinon_app
AlluringAngels
an
android
announcement_bar_ai_diagramming
announcement_bar_visual_editor
api
app
appconverta
article
articles
atlas-page
Atom Hybrid App
August 2022
automacao
autopost
AV
baihuna
banner
Banner
banner_ad
banner_ads
basicas
BeautifulGirlsoftheWorld
beloud.com
bio
bitly
blast
bln
block_page
blog
BmcAdSearch
bolster
botao
bots
branding_ads
brandsearch
brave
bridge
brightbid
broad
broad-target
broadcast
browser-extension
button
buzzstormer
bwdk
bwm
c
campaign
campaign-email
car-desk
car-mob
carousel
Carrusel
cart_abandonment_email
CartStack
CativatingPrincess
Cercadors
CharmingGirls
Clic
click
cold_conversion
Collection
com.outfit7.mytalkingtomfriends.amazonkids
company_profile
components
comprar-loja
content_vdo
Control
conversao
Conversao
conversion_ads
Corporate
cpa
cpc
CPC
cpc_google
cpc_search
cpc-bm
cpc-eng
cpc-social
cpcUTM
cpe
cpl
cpm
Cpm
CPM
cppv
cps
CPS
cpv
credential_settings
crm
css
cta
d
dashboard
de
demandgen
denni-tisk
dg
direct
Direct
dis_pn
DIS-RES
discover
Discover
DiscoveryIyer
DiscoveryMaya
DiscoveryPatil
display
Display
display_cpc
display_digikokosivu
display_etusivu_kiintea_nakyvyys
display_retargeting
display_somenosto_cpc
display_tuotenosto
docs
domain
dpa
DPP
dsa
dynamic
e-mail
ebook
edm
em
email
Email
EMAIL
email_action
email_button
email_marketing
email_viptarget
email2
emailing_int
embed
EmilyRatajkowskiFans
EmmaWatsonLovers
en
es
exitlinks
extension
extern
facebook
Facebook
Facebook_Instagram
Facebook_Instream_Video
Facebook_Mobile_Feed
Facebook_Mobile_Reels
Facebook_Stories
fapfeeder
fb
FB
fb_prelaunch
fb_v_main
fb_v_preheat
fb_v_prelaunch
fbads
FBAds
fbc-copy
fbc-qr
FBcross
FBRE
features
feed
feedestory
feednews
FlawlessBabyBeauties
flow
FNF2Players-MoreMods
FNF2Players-PlayMoreMods
footer
Formula1Updates
fr
fytooncatalogue
g
gastro-page
gdn
Gemuesegarten
gerentes_digitais
GirlsWorldofWonder
GlamourousGirls
gmb
google
Google
Google Ads
Google PM
google_ads
google-discovery
google-pmax
google-search
Googlemybusinesslisting
GorgeousGirls
GorgeousGirlsCommunity
HavannaWinterFans
header
HeavenlyAngelsWorldwide
hemsida
Home Get your Free Store button
home_screen
homepage
Hub
iframe
ig
image
Image
Imagen
Imagen_CPL
impact
impulsionamento_p_CzRips0M_sK
in-app-bottom-menu
Inaktive
inapp
influencer
Influencer
inproduct
instagram
Instagram
Instagram_Explore
Instagram_Feed
instagram_profile
Instagram_Reels
Instagram_Stories
instagramstories
instainterno
int_kw
integration
interest/homeappliances
interested/Homeandfurniture
interested/Homeappliance
interested/Homeimprovement
intermark
ios
ipfsio
it
itsruntime
Januar 2022
Januar 2023
Jenna__Miller
Jenna_Miller
jobalert
jobboard-jobposting
JunGroup
KellyBrookFans
KimKardashianDailyUpdate
kivraapp
LCM
link
linkclicks-impressions
linkedin
linktree
LiverpoolFanzoneGlobal
LiverpoolLiveNewsEveryday
LiverpoolYouNeverWalkAlone
loc
local
local-listing
LocalSEO
logins
logo-nav
lojafb
lpm
lyr
mail
mail 167 seconnecter
mail 605 donner
Mailing_CPL
MailNDM
map
marketing
marketplace
MASS_IM
Mathella
Mathellaslife
max
Max
max-eng
mensagem
MessiFansForever
meta
Meta
Meta Ads
meta-ads
Michelle - Dishes and Dust Bunnies
mobile
modules
montecarlo
mrt24
native
Native_CPL
Navbar_Item
ncatalogue
newsfeed
newsletter
Newsletter
nl
node_settings_modal-credential_link
null
ocatalogue
octordle
official
ogury
OleksandrZinchenkoFans
ONTRAPORT-email-broadcast
ONTRAPORT-email-campaign
op
org
organic
organic_social
organico
Organico
ORGÂNICO
ORGÂNICO
OrganicSocial
Others
page
paid
Paid
Paid Search
paid social
Paid Social
paid_ad
paid_display
paid_search
paid_social
paid_social
Paid_Social
paid-community
paid-cpc
paid-search
Paid-Search
paid-social
paidsearch
paidsocial
PaidSocial
parceiros
parents_yellow_banner
Park4Night
partner
Partner
partner_dt
partners
partnerships
pc-br-venturebuilder-aithor-sd-def
perf_css_tiers
performance
Performance Max
PflanzenTanzen
phenom-feeds
photo
pinterest
PLA
places
plarium
platform
PLG
pmax
PMax
PMAX
PN
podcast
popcard
popup
PortalsGeneralistes
post
post-promocionado
PP
ppc
PPC
ppc_social
ppd
ppt
premiumcpc
primary_search
print
product_shelf
product_sync
Programatica
programmatic
prospecting
pt
purchase
Purchase
push
Push
PUSH
push_notification
push-notification
pushnotification
pwa
PWA
qr
QR
qrcode
QUE VER
Quora
Quoter
rec
redesocial
redirect
refer_a_friend
referral
Referral
responsive
Responsive Display
retargeting
rev
ro
rss
RTB-RPR
sbrowser
sea
SEA
sea-campaign
search
Search
Search_CPL
search-ads
sekce-z-internetu
SelenaGomezFans
seller
sem
SEM-AlwaysOn-Brand
sheet
sheet1
sin
singlead
site
siteweb
smart_campaign
smartnewsrecirc
smp
sms
SMS
snapchat
Snapchat
soc
SOC-RPR
social
Social
social paid
social_network_link
social_paid
social_traffic
social-media
social-paid
Social-paid
Social+Paid
socialad
socialmedia
socialpaid
socials
SOP
sourei
sponsorship
Static
steady_hoverbutton
steady_paywall_hard
stories
story
studio
subpers3-cryptoenth
subscriber
SugarGirls
superpwa
tabloide_digital
taboola_news
target
TaylorSwiftFans
TC_app
TCApp
teaser
template_library
tester signup
text
text-ad
textlink
TheHaalandHooligans
TheWarriorsFans
tiktok
TikTok
topmenu
TopNav
tr
trafego_cpc
traffic
Traffic
trafficback
transactional
transactional marketing
trigger_emails
tt
TVOnline
twannl
twitch
twitter
Ugc
universal
upsell_tab
vdoad
vdoads
video
Video
video-description
web
web_push
webcast
webpush
website
Website
website_topai
webstories
webstory
welcome_page_chrome
WeloveDemiRose
whatsapp
WhatsApp
whatsblack
widget
widget_test
Widgets
world resources institute
WOW
www
xml_feed
yellow_banner
youchat
youtube
YouTube
yt
yummly
z-boxiku
zalo
zerion_homepage

View File

@ -0,0 +1,179 @@
google
facebook
convertkit
fb
TCAD
adwords
bing
Instabox
Meta
campaign-monitor
flipboard
newsletter
vg
Facebook
youtube
ig
volume-master
ggsem
Klaviyo
google_ads
Google
meta
pinterest
app
ExactTarget
blog
listrak
instagram
iterable
google-ads
tiktok
GoogleAds
mg
TramDang
SEM
adyogi
Google-ads
outbrain
igfb
rakuten
msn
email
mode_mobile
brevo
twitter
fpf_forum
meawwcom
beehiiv
trusted-web-activity
link
taboola
facebook-ads
pocket-newtab-de-de
sfmc
linkedin
ActiveCampaign
operamini
brave
FB_IG
internal
gads
google_jobs_apply
facebook_ads
googleads
IGShopping
edsp
hs_email
jc
NotcoinEarnEn1
facebook.com
web
mailing
emBlue
ALLINMAIL
TikTok
Vendedor
google_shopping
criteo
camel-buy-btn
tonkeeper
Google Shopping
green
pushly
paid_social
notification
dfy
pinterest.com
Pangle
reddit
Influencer
kwai
Linktree
mailbiz
xPortal
snapchat
dv360
NLAP
iris
Instagram
qa
shopimind
LongNguyen
Spend With Pennies - Main List - All Updates
social
an
paidsocial
fbads
ga
substack
webpush
Homepage_gps
ividence
pwa
Bask Email
dito
JunGroup
salesforce
Newsletter
facebookads
manual
EventyrsportNyhedsbrev
browser
preview-mode
mex1
MwFacebook
Google Paid
Homepage
webtv
display
Adwords
jungroup
Paid_Social
api_engagement
chatsonic-chrome-extension
mailcoach
rtbhouse
YouTube
m6s
facebook-novo
copernica
gdn
linksbio
Facebook_Mobile_Feed
sanoma
app_md
en
facebook_feed_ad
QuynhTran
upday
yandex
am
google_ads_pmax_branding
crm
Netcore
sib
Episerver
whatsapp
boliga
responsys
rss
linktree
RD Station
homescreen
Link da Bio do RD Station
Olex plotter
Email
google_search
mkt
sklik
meta.com
camel-footer
iw
taaft
app_mc
CartStack
n8n_app
gb_weekly
sem

View File

@ -148,7 +148,8 @@ defmodule Plausible.MixProject do
{:ex_json_schema, "~> 0.10.2"}, {:ex_json_schema, "~> 0.10.2"},
{:odgn_json_pointer, "~> 3.0.1"}, {:odgn_json_pointer, "~> 3.0.1"},
{:phoenix_bakery, "~> 0.1.2", only: [:ce, :ce_dev, :ce_test]}, {:phoenix_bakery, "~> 0.1.2", only: [:ce, :ce_dev, :ce_test]},
{:site_encrypt, github: "sasa1977/site_encrypt", only: [:ce, :ce_dev, :ce_test]} {:site_encrypt, github: "sasa1977/site_encrypt", only: [:ce, :ce_dev, :ce_test]},
{:stream_data, "~> 1.1.2", only: [:test, :ce_test]}
] ]
end end

View File

@ -139,6 +139,7 @@
"siphash": {:hex, :siphash, "3.2.0", "ec03fd4066259218c85e2a4b8eec4bb9663bc02b127ea8a0836db376ba73f2ed", [:make, :mix], [], "hexpm", "ba3810701c6e95637a745e186e8a4899087c3b079ba88fb8f33df054c3b0b7c3"}, "siphash": {:hex, :siphash, "3.2.0", "ec03fd4066259218c85e2a4b8eec4bb9663bc02b127ea8a0836db376ba73f2ed", [:make, :mix], [], "hexpm", "ba3810701c6e95637a745e186e8a4899087c3b079ba88fb8f33df054c3b0b7c3"},
"site_encrypt": {:git, "https://github.com/sasa1977/site_encrypt.git", "046fbeca11b889604dafd2df6a71001f8abe5e2c", []}, "site_encrypt": {:git, "https://github.com/sasa1977/site_encrypt.git", "046fbeca11b889604dafd2df6a71001f8abe5e2c", []},
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.7", "354c321cf377240c7b8716899e182ce4890c5938111a1296add3ec74cf1715df", [:make, :mix, :rebar3], [], "hexpm", "fe4c190e8f37401d30167c8c405eda19469f34577987c76dde613e838bbc67f8"}, "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.7", "354c321cf377240c7b8716899e182ce4890c5938111a1296add3ec74cf1715df", [:make, :mix, :rebar3], [], "hexpm", "fe4c190e8f37401d30167c8c405eda19469f34577987c76dde613e838bbc67f8"},
"stream_data": {:hex, :stream_data, "1.1.2", "05499eaec0443349ff877aaabc6e194e82bda6799b9ce6aaa1aadac15a9fdb4d", [:mix], [], "hexpm", "129558d2c77cbc1eb2f4747acbbea79e181a5da51108457000020a906813a1a9"},
"sweet_xml": {:hex, :sweet_xml, "0.7.4", "a8b7e1ce7ecd775c7e8a65d501bc2cd933bff3a9c41ab763f5105688ef485d08", [:mix], [], "hexpm", "e7c4b0bdbf460c928234951def54fe87edf1a170f6896675443279e2dbeba167"}, "sweet_xml": {:hex, :sweet_xml, "0.7.4", "a8b7e1ce7ecd775c7e8a65d501bc2cd933bff3a9c41ab763f5105688ef485d08", [:mix], [], "hexpm", "e7c4b0bdbf460c928234951def54fe87edf1a170f6896675443279e2dbeba167"},
"tailwind": {:hex, :tailwind, "0.2.2", "9e27288b568ede1d88517e8c61259bc214a12d7eed271e102db4c93fcca9b2cd", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}], "hexpm", "ccfb5025179ea307f7f899d1bb3905cd0ac9f687ed77feebc8f67bdca78565c4"}, "tailwind": {:hex, :tailwind, "0.2.2", "9e27288b568ede1d88517e8c61259bc214a12d7eed271e102db4c93fcca9b2cd", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}], "hexpm", "ccfb5025179ea307f7f899d1bb3905cd0ac9f687ed77feebc8f67bdca78565c4"},
"telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"}, "telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"},

View File

@ -1,5 +1,7 @@
defmodule Plausible.Ingestion.EventTest do defmodule Plausible.Ingestion.EventTest do
use Plausible.DataCase use Plausible.DataCase
use ExUnitProperties
import StreamData
setup_all do setup_all do
Plausible.DataMigration.AquisitionChannel.run(quiet: true) Plausible.DataMigration.AquisitionChannel.run(quiet: true)
@ -74,6 +76,30 @@ defmodule Plausible.Ingestion.EventTest do
end end
end end
property "reference implementation matches clickhouse implementation" do
check all(
test_data <-
fixed_map(%{
referrer_source: gen_column("fixture/acquisition_channel/referrer_source.txt"),
utm_medium: gen_column("fixture/acquisition_channel/utm_medium.txt"),
utm_campaign: gen_column("fixture/acquisition_channel/utm_campaign.txt"),
utm_source: gen_column("fixture/acquisition_channel/utm_source.txt"),
click_id_source: gen_column("fixture/acquisition_channel/click_id_source.txt")
})
) do
assert clickhouse_channel(test_data) == reference_channel(test_data)
end
end
def gen_column(filename) do
data = File.read!(filename) |> String.split()
one_of([
repeatedly(fn -> Enum.random(data) end),
string(:ascii, max_length: 10)
])
end
def reference_channel(test_data) do def reference_channel(test_data) do
request = %{ request = %{
query_params: %{ query_params: %{