2019-09-02 14:29:19 +03:00
|
|
|
defmodule Plausible.Factory do
|
|
|
|
use ExMachina.Ecto, repo: Plausible.Repo
|
2023-10-10 20:35:17 +03:00
|
|
|
require Plausible.Billing.Subscription.Status
|
|
|
|
alias Plausible.Billing.Subscription
|
2020-06-08 10:35:13 +03:00
|
|
|
|
2024-10-21 10:35:23 +03:00
|
|
|
def team_factory do
|
|
|
|
%Plausible.Teams.Team{
|
|
|
|
name: "My Team",
|
|
|
|
trial_expiry_date: Timex.today() |> Timex.shift(days: 30)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def team_membership_factory do
|
|
|
|
%Plausible.Teams.Membership{
|
|
|
|
user: build(:user),
|
|
|
|
role: :viewer
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def guest_membership_factory do
|
|
|
|
%Plausible.Teams.GuestMembership{
|
|
|
|
team_membership: build(:team_membership, role: :guest)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def team_invitation_factory do
|
|
|
|
%Plausible.Teams.Invitation{
|
|
|
|
invitation_id: Nanoid.generate(),
|
|
|
|
email: sequence(:email, &"email-#{&1}@example.com"),
|
|
|
|
role: :admin
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def guest_invitation_factory do
|
|
|
|
%Plausible.Teams.GuestInvitation{
|
|
|
|
role: :editor,
|
|
|
|
team_invitation: build(:team_invitation, role: :guest)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2024-10-28 13:11:47 +03:00
|
|
|
def site_transfer_factory do
|
|
|
|
%Plausible.Teams.SiteTransfer{
|
|
|
|
transfer_id: Nanoid.generate(),
|
|
|
|
email: sequence(:email, &"email-#{&1}@example.com")
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2019-09-02 14:29:19 +03:00
|
|
|
def user_factory(attrs) do
|
|
|
|
pw = Map.get(attrs, :password, "password")
|
|
|
|
|
|
|
|
user = %Plausible.Auth.User{
|
|
|
|
name: "Jane Smith",
|
|
|
|
email: sequence(:email, &"email-#{&1}@example.com"),
|
2020-03-18 17:27:46 +03:00
|
|
|
password_hash: Plausible.Auth.Password.hash(pw),
|
2020-12-15 12:30:45 +03:00
|
|
|
trial_expiry_date: Timex.today() |> Timex.shift(days: 30),
|
|
|
|
email_verified: true
|
2019-09-02 14:29:19 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
merge_attributes(user, attrs)
|
|
|
|
end
|
|
|
|
|
2020-12-09 12:00:14 +03:00
|
|
|
def spike_notification_factory do
|
2024-07-11 15:55:18 +03:00
|
|
|
%Plausible.Site.TrafficChangeNotification{
|
|
|
|
threshold: 10,
|
|
|
|
type: :spike
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def drop_notification_factory do
|
|
|
|
%Plausible.Site.TrafficChangeNotification{
|
|
|
|
threshold: 1,
|
|
|
|
type: :drop
|
2020-12-09 16:59:41 +03:00
|
|
|
}
|
2020-12-09 12:00:14 +03:00
|
|
|
end
|
|
|
|
|
2023-10-11 15:40:01 +03:00
|
|
|
def site_factory(attrs) do
|
2023-12-04 15:22:17 +03:00
|
|
|
# The é exercises unicode support in domain names
|
|
|
|
domain = sequence(:domain, &"é-#{&1}.example.com")
|
2019-09-02 14:29:19 +03:00
|
|
|
|
2023-10-11 15:40:01 +03:00
|
|
|
defined_memberships? =
|
|
|
|
Map.has_key?(attrs, :memberships) ||
|
|
|
|
Map.has_key?(attrs, :members) ||
|
|
|
|
Map.has_key?(attrs, :owner)
|
|
|
|
|
2024-10-23 14:29:15 +03:00
|
|
|
attrs =
|
|
|
|
if defined_memberships?,
|
|
|
|
do: attrs,
|
|
|
|
else: Map.put_new(attrs, :members, [build(:user)])
|
2023-10-11 15:40:01 +03:00
|
|
|
|
|
|
|
site = %Plausible.Site{
|
2023-03-01 15:11:31 +03:00
|
|
|
native_stats_start_at: ~N[2000-01-01 00:00:00],
|
2019-09-02 14:29:19 +03:00
|
|
|
domain: domain,
|
2020-06-08 10:35:13 +03:00
|
|
|
timezone: "UTC"
|
2019-09-02 14:29:19 +03:00
|
|
|
}
|
2023-10-11 15:40:01 +03:00
|
|
|
|
|
|
|
merge_attributes(site, attrs)
|
2019-09-02 14:29:19 +03:00
|
|
|
end
|
|
|
|
|
2021-06-16 15:00:07 +03:00
|
|
|
def site_membership_factory do
|
2023-09-04 15:55:07 +03:00
|
|
|
%Plausible.Site.Membership{
|
|
|
|
user: build(:user),
|
|
|
|
role: :viewer
|
|
|
|
}
|
2021-06-16 15:00:07 +03:00
|
|
|
end
|
|
|
|
|
2024-02-28 11:34:04 +03:00
|
|
|
def site_import_factory do
|
|
|
|
today = Date.utc_today()
|
|
|
|
|
|
|
|
%Plausible.Imported.SiteImport{
|
|
|
|
site: build(:site),
|
|
|
|
imported_by: build(:user),
|
|
|
|
start_date: Date.add(today, -200),
|
|
|
|
end_date: today,
|
|
|
|
source: :universal_analytics,
|
|
|
|
status: :completed,
|
|
|
|
legacy: false
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2020-05-18 12:44:52 +03:00
|
|
|
def ch_session_factory do
|
|
|
|
hostname = sequence(:domain, &"example-#{&1}.com")
|
|
|
|
|
2023-04-24 13:17:57 +03:00
|
|
|
%Plausible.ClickhouseSessionV2{
|
|
|
|
sign: 1,
|
|
|
|
session_id: SipHash.hash!(hash_key(), Ecto.UUID.generate()),
|
|
|
|
user_id: SipHash.hash!(hash_key(), Ecto.UUID.generate()),
|
|
|
|
hostname: hostname,
|
|
|
|
site_id: Enum.random(1000..10_000),
|
|
|
|
entry_page: "/",
|
|
|
|
pageviews: 1,
|
|
|
|
events: 1,
|
|
|
|
start: Timex.now(),
|
|
|
|
timestamp: Timex.now(),
|
|
|
|
is_bounce: false
|
|
|
|
}
|
2020-05-18 12:44:52 +03:00
|
|
|
end
|
|
|
|
|
2024-11-12 17:59:14 +03:00
|
|
|
def pageview_factory(attrs) do
|
|
|
|
Map.put(event_factory(attrs), :name, "pageview")
|
2019-10-25 09:06:11 +03:00
|
|
|
end
|
|
|
|
|
2024-11-12 17:59:14 +03:00
|
|
|
def event_factory(attrs) do
|
|
|
|
if Map.get(attrs, :acquisition_channel) do
|
|
|
|
raise "Acquistion channel cannot be written directly since it's a materialized column."
|
|
|
|
end
|
|
|
|
|
2019-09-02 14:29:19 +03:00
|
|
|
hostname = sequence(:domain, &"example-#{&1}.com")
|
|
|
|
|
2024-11-12 17:59:14 +03:00
|
|
|
event = %Plausible.ClickhouseEventV2{
|
2023-04-24 13:17:57 +03:00
|
|
|
hostname: hostname,
|
|
|
|
site_id: Enum.random(1000..10_000),
|
|
|
|
pathname: "/",
|
|
|
|
timestamp: NaiveDateTime.utc_now() |> NaiveDateTime.truncate(:second),
|
|
|
|
user_id: SipHash.hash!(hash_key(), Ecto.UUID.generate()),
|
2024-03-06 11:59:24 +03:00
|
|
|
session_id: SipHash.hash!(hash_key(), Ecto.UUID.generate())
|
2023-04-24 13:17:57 +03:00
|
|
|
}
|
2024-11-12 17:59:14 +03:00
|
|
|
|
|
|
|
event
|
|
|
|
|> merge_attributes(attrs)
|
|
|
|
|> evaluate_lazy_attributes()
|
2019-09-02 14:29:19 +03:00
|
|
|
end
|
|
|
|
|
2024-08-09 12:12:00 +03:00
|
|
|
def goal_factory(attrs) do
|
|
|
|
display_name_provided? = Map.has_key?(attrs, :display_name)
|
|
|
|
|
|
|
|
attrs =
|
|
|
|
case {attrs, display_name_provided?} do
|
|
|
|
{%{page_path: path}, false} when is_binary(path) ->
|
|
|
|
Map.put(attrs, :display_name, "Visit " <> path)
|
|
|
|
|
|
|
|
{%{page_path: path}, false} when is_function(path, 0) ->
|
|
|
|
attrs
|
|
|
|
|> Map.put(:display_name, "Visit " <> path.())
|
|
|
|
|> Map.put(:page_path, path.())
|
|
|
|
|
|
|
|
{%{event_name: event_name}, false} when is_binary(event_name) ->
|
|
|
|
Map.put(attrs, :display_name, event_name)
|
|
|
|
|
|
|
|
{%{event_name: event_name}, false} when is_function(event_name, 0) ->
|
|
|
|
attrs
|
|
|
|
|> Map.put(:display_name, event_name.())
|
|
|
|
|> Map.put(:event_name, event_name.())
|
|
|
|
|
|
|
|
_ ->
|
|
|
|
attrs
|
|
|
|
end
|
|
|
|
|
|
|
|
merge_attributes(%Plausible.Goal{}, attrs)
|
2019-10-31 09:14:06 +03:00
|
|
|
end
|
|
|
|
|
2019-09-02 14:29:19 +03:00
|
|
|
def subscription_factory do
|
|
|
|
%Plausible.Billing.Subscription{
|
|
|
|
paddle_subscription_id: sequence(:paddle_subscription_id, &"subscription-#{&1}"),
|
|
|
|
paddle_plan_id: sequence(:paddle_plan_id, &"plan-#{&1}"),
|
|
|
|
cancel_url: "cancel.com",
|
|
|
|
update_url: "cancel.com",
|
2023-10-10 20:35:17 +03:00
|
|
|
status: Subscription.Status.active(),
|
2019-09-02 14:29:19 +03:00
|
|
|
next_bill_amount: "6.00",
|
2021-05-13 12:42:01 +03:00
|
|
|
next_bill_date: Timex.today(),
|
2021-10-29 12:51:02 +03:00
|
|
|
last_bill_date: Timex.today(),
|
2021-05-13 12:42:01 +03:00
|
|
|
currency_code: "USD"
|
2019-09-02 14:29:19 +03:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2023-10-18 11:48:47 +03:00
|
|
|
def growth_subscription_factory do
|
|
|
|
build(:subscription, paddle_plan_id: "857097")
|
|
|
|
end
|
|
|
|
|
|
|
|
def business_subscription_factory do
|
|
|
|
build(:subscription, paddle_plan_id: "857087")
|
|
|
|
end
|
|
|
|
|
2021-10-20 17:49:11 +03:00
|
|
|
def enterprise_plan_factory do
|
|
|
|
%Plausible.Billing.EnterprisePlan{
|
|
|
|
paddle_plan_id: sequence(:paddle_plan_id, &"plan-#{&1}"),
|
|
|
|
billing_interval: :monthly,
|
|
|
|
monthly_pageview_limit: 1_000_000,
|
2021-10-22 12:26:07 +03:00
|
|
|
hourly_api_request_limit: 3000,
|
2023-11-17 03:57:14 +03:00
|
|
|
site_limit: 100,
|
|
|
|
team_member_limit: 10
|
2021-10-20 17:49:11 +03:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2019-09-02 14:29:19 +03:00
|
|
|
def google_auth_factory do
|
|
|
|
%Plausible.Site.GoogleAuth{
|
2023-10-18 15:01:17 +03:00
|
|
|
email: sequence(:google_auth_email, &"email-#{&1}@example.com"),
|
2019-09-02 14:29:19 +03:00
|
|
|
refresh_token: "123",
|
|
|
|
access_token: "123",
|
|
|
|
expires: Timex.now() |> Timex.shift(days: 1)
|
|
|
|
}
|
|
|
|
end
|
2020-01-16 16:13:03 +03:00
|
|
|
|
2020-01-22 12:16:53 +03:00
|
|
|
def weekly_report_factory do
|
|
|
|
%Plausible.Site.WeeklyReport{}
|
|
|
|
end
|
|
|
|
|
|
|
|
def monthly_report_factory do
|
|
|
|
%Plausible.Site.MonthlyReport{}
|
|
|
|
end
|
2020-01-29 12:29:11 +03:00
|
|
|
|
|
|
|
def shared_link_factory do
|
|
|
|
%Plausible.Site.SharedLink{
|
2021-04-06 14:32:38 +03:00
|
|
|
name: "Link name",
|
2020-01-29 12:29:11 +03:00
|
|
|
slug: Nanoid.generate()
|
|
|
|
}
|
|
|
|
end
|
2021-01-07 16:16:04 +03:00
|
|
|
|
2021-06-16 15:00:07 +03:00
|
|
|
def invitation_factory do
|
|
|
|
%Plausible.Auth.Invitation{
|
|
|
|
invitation_id: Nanoid.generate(),
|
|
|
|
email: sequence(:email, &"email-#{&1}@example.com"),
|
|
|
|
role: :admin
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2021-02-05 12:23:30 +03:00
|
|
|
def api_key_factory do
|
|
|
|
key = :crypto.strong_rand_bytes(64) |> Base.url_encode64() |> binary_part(0, 64)
|
|
|
|
|
|
|
|
%Plausible.Auth.ApiKey{
|
|
|
|
name: "api-key-name",
|
|
|
|
key: key,
|
|
|
|
key_hash: Plausible.Auth.ApiKey.do_hash(key),
|
|
|
|
key_prefix: binary_part(key, 0, 6)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|
|
|
def imported_visitors_factory do
|
|
|
|
%{
|
|
|
|
table: "imported_visitors",
|
|
|
|
date: Timex.today(),
|
|
|
|
visitors: 1,
|
|
|
|
pageviews: 1,
|
|
|
|
bounces: 0,
|
|
|
|
visits: 1,
|
|
|
|
visit_duration: 10
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def imported_sources_factory do
|
|
|
|
%{
|
|
|
|
table: "imported_sources",
|
|
|
|
date: Timex.today(),
|
|
|
|
source: "",
|
|
|
|
visitors: 1,
|
|
|
|
visits: 1,
|
|
|
|
bounces: 0,
|
|
|
|
visit_duration: 10
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def imported_pages_factory do
|
|
|
|
%{
|
|
|
|
table: "imported_pages",
|
|
|
|
date: Timex.today(),
|
|
|
|
page: "",
|
|
|
|
visitors: 1,
|
|
|
|
pageviews: 1,
|
|
|
|
exits: 0,
|
|
|
|
time_on_page: 10
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def imported_entry_pages_factory do
|
|
|
|
%{
|
|
|
|
table: "imported_entry_pages",
|
|
|
|
date: Timex.today(),
|
|
|
|
entry_page: "",
|
|
|
|
visitors: 1,
|
|
|
|
entrances: 1,
|
|
|
|
bounces: 0,
|
|
|
|
visit_duration: 10
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def imported_exit_pages_factory do
|
|
|
|
%{
|
|
|
|
table: "imported_exit_pages",
|
|
|
|
date: Timex.today(),
|
|
|
|
exit_page: "",
|
|
|
|
visitors: 1,
|
|
|
|
exits: 1
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2024-05-09 15:13:19 +03:00
|
|
|
def imported_custom_events_factory do
|
|
|
|
%{
|
|
|
|
table: "imported_custom_events",
|
|
|
|
date: Timex.today(),
|
|
|
|
name: "",
|
|
|
|
link_url: "",
|
|
|
|
visitors: 1,
|
|
|
|
events: 1
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|
|
|
def imported_locations_factory do
|
|
|
|
%{
|
|
|
|
table: "imported_locations",
|
|
|
|
date: Timex.today(),
|
|
|
|
country: "",
|
|
|
|
region: "",
|
|
|
|
city: 0,
|
|
|
|
visitors: 1,
|
|
|
|
visits: 1,
|
|
|
|
bounces: 0,
|
|
|
|
visit_duration: 10
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def imported_devices_factory do
|
|
|
|
%{
|
|
|
|
table: "imported_devices",
|
|
|
|
date: Timex.today(),
|
|
|
|
device: "",
|
|
|
|
visitors: 1,
|
|
|
|
visits: 1,
|
|
|
|
bounces: 0,
|
|
|
|
visit_duration: 10
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def imported_browsers_factory do
|
|
|
|
%{
|
|
|
|
table: "imported_browsers",
|
|
|
|
date: Timex.today(),
|
|
|
|
browser: "",
|
|
|
|
visitors: 1,
|
|
|
|
visits: 1,
|
|
|
|
bounces: 0,
|
|
|
|
visit_duration: 10
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def imported_operating_systems_factory do
|
|
|
|
%{
|
|
|
|
table: "imported_operating_systems",
|
|
|
|
date: Timex.today(),
|
|
|
|
operating_system: "",
|
|
|
|
visitors: 1,
|
|
|
|
visits: 1,
|
|
|
|
bounces: 0,
|
|
|
|
visit_duration: 10
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2024-02-12 16:55:20 +03:00
|
|
|
def ip_rule_factory do
|
|
|
|
%Plausible.Shield.IPRule{
|
|
|
|
inet: Plausible.TestUtils.random_ip(),
|
|
|
|
description: "Test IP Rule",
|
|
|
|
added_by: "Mr Seed <user@plausible.test>"
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2024-02-27 14:08:13 +03:00
|
|
|
def country_rule_factory do
|
|
|
|
%Plausible.Shield.CountryRule{
|
|
|
|
added_by: "Mr Seed <user@plausible.test>"
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2021-01-07 16:16:04 +03:00
|
|
|
defp hash_key() do
|
|
|
|
Keyword.fetch!(
|
|
|
|
Application.get_env(:plausible, PlausibleWeb.Endpoint),
|
|
|
|
:secret_key_base
|
|
|
|
)
|
|
|
|
|> binary_part(0, 16)
|
|
|
|
end
|
2019-09-02 14:29:19 +03:00
|
|
|
end
|