2019-09-02 14:29:19 +03:00
defmodule PlausibleWeb.SiteController do
use PlausibleWeb , :controller
use Plausible.Repo
2023-09-13 15:55:29 +03:00
alias Plausible.Sites
2023-12-13 13:47:50 +03:00
alias Plausible.Billing.Quota
2019-09-02 14:29:19 +03:00
plug PlausibleWeb.RequireAccountPlug
2021-06-16 15:00:07 +03:00
plug PlausibleWeb.AuthorizeSiteAccess ,
2023-11-02 15:18:11 +03:00
[ :owner , :admin , :super_admin ] when action not in [ :new , :create_site ]
2020-03-31 16:16:21 +03:00
2019-09-02 14:29:19 +03:00
def new ( conn , _params ) do
2022-09-01 17:09:28 +03:00
current_user = conn . assigns [ :current_user ]
2019-09-02 14:29:19 +03:00
2020-12-29 16:17:27 +03:00
render ( conn , " new.html " ,
2023-08-16 19:38:38 +03:00
changeset : Plausible.Site . changeset ( % Plausible.Site { } ) ,
2023-12-13 13:47:50 +03:00
first_site? : Quota . site_usage ( current_user ) == 0 ,
site_limit : Quota . site_limit ( current_user ) ,
site_limit_exceeded? : Quota . ensure_can_add_new_site ( current_user ) != :ok ,
2020-12-29 16:17:27 +03:00
layout : { PlausibleWeb.LayoutView , " focus.html " }
)
2019-09-02 14:29:19 +03:00
end
def create_site ( conn , %{ " site " = > site_params } ) do
user = conn . assigns [ :current_user ]
2023-12-13 13:47:50 +03:00
first_site? = Quota . site_usage ( user ) == 0
2019-09-02 14:29:19 +03:00
2021-05-04 15:37:58 +03:00
case Sites . create ( user , site_params ) do
2019-09-02 14:29:19 +03:00
{ :ok , %{ site : site } } ->
2023-12-13 13:47:50 +03:00
if first_site? do
2020-12-15 13:09:03 +03:00
PlausibleWeb.Email . welcome_email ( user )
2022-10-24 13:13:23 +03:00
|> Plausible.Mailer . send ( )
2020-12-15 13:09:03 +03:00
end
2019-09-10 18:51:34 +03:00
conn
|> put_session ( site . domain <> " _offer_email_report " , true )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :add_snippet , site . domain ) )
2020-06-08 10:35:13 +03:00
2023-12-13 13:47:50 +03:00
{ :error , { :over_limit , limit } } ->
2023-01-19 17:03:18 +03:00
render ( conn , " new.html " ,
changeset : Plausible.Site . changeset ( % Plausible.Site { } ) ,
2023-12-13 13:47:50 +03:00
first_site? : first_site? ,
2023-01-19 17:03:18 +03:00
site_limit : limit ,
2023-12-13 13:47:50 +03:00
site_limit_exceeded? : true ,
2023-01-19 17:03:18 +03:00
layout : { PlausibleWeb.LayoutView , " focus.html " }
)
{ :error , _ , changeset , _ } ->
2020-06-08 10:35:13 +03:00
render ( conn , " new.html " ,
changeset : changeset ,
2023-12-13 13:47:50 +03:00
first_site? : first_site? ,
site_limit : Quota . site_limit ( user ) ,
site_limit_exceeded? : false ,
2020-06-08 10:35:13 +03:00
layout : { PlausibleWeb.LayoutView , " focus.html " }
)
2019-09-02 14:29:19 +03:00
end
end
2021-06-16 15:00:07 +03:00
def add_snippet ( conn , _params ) do
2020-12-15 12:30:45 +03:00
user = conn . assigns [ :current_user ]
2023-11-29 13:04:54 +03:00
site = conn . assigns [ :site ]
2020-02-26 12:46:28 +03:00
2020-12-29 16:17:27 +03:00
is_first_site =
! Repo . exists? (
from sm in Plausible.Site.Membership ,
where :
sm . user_id == ^ user . id and
sm . site_id != ^ site . id
)
2020-12-15 12:30:45 +03:00
2019-09-02 14:29:19 +03:00
conn
2020-12-29 16:17:27 +03:00
|> render ( " snippet.html " ,
site : site ,
2023-05-09 11:51:35 +03:00
skip_plausible_tracking : true ,
2020-12-29 16:17:27 +03:00
is_first_site : is_first_site ,
layout : { PlausibleWeb.LayoutView , " focus.html " }
)
2019-09-02 14:29:19 +03:00
end
2023-06-22 10:00:07 +03:00
def update_feature_visibility ( conn , %{
" setting " = > setting ,
" r " = > " / " <> _ = redirect_path ,
" set " = > value
} )
when setting in ~w[ conversions_enabled funnels_enabled props_enabled ] and
value in [ " true " , " false " ] do
2023-06-13 13:26:33 +03:00
site = conn . assigns [ :site ]
2023-10-11 15:40:01 +03:00
toggle_field = String . to_existing_atom ( setting )
2023-06-13 13:26:33 +03:00
2023-10-11 15:40:01 +03:00
feature_mod =
Enum . find ( Plausible.Billing.Feature . list ( ) , & ( &1 . toggle_field ( ) == toggle_field ) )
2023-06-13 13:26:33 +03:00
2023-10-11 15:40:01 +03:00
case feature_mod . toggle ( site , override : value == " true " ) do
2023-06-22 10:00:07 +03:00
{ :ok , updated_site } ->
message =
2023-10-11 15:40:01 +03:00
if Map . fetch! ( updated_site , toggle_field ) do
" #{ feature_mod . display_name ( ) } are now visible again on your dashboard "
2023-06-22 10:00:07 +03:00
else
2023-10-11 15:40:01 +03:00
" #{ feature_mod . display_name ( ) } are now hidden from your dashboard "
2023-06-22 10:00:07 +03:00
end
2023-06-13 13:26:33 +03:00
2023-06-22 10:00:07 +03:00
conn
|> put_flash ( :success , message )
|> redirect ( to : redirect_path )
2023-06-13 13:26:33 +03:00
2023-06-22 10:00:07 +03:00
{ :error , _ } ->
conn
|> put_flash (
:error ,
2023-10-11 15:40:01 +03:00
" Something went wrong. Failed to toggle #{ feature_mod . display_name ( ) } on your dashboard. "
2023-06-22 10:00:07 +03:00
)
|> redirect ( to : redirect_path )
end
2023-06-13 13:26:33 +03:00
end
2019-09-02 14:29:19 +03:00
def settings ( conn , %{ " website " = > website } ) do
2021-10-26 11:59:14 +03:00
redirect ( conn , to : Routes . site_path ( conn , :settings_general , website ) )
2020-11-06 14:30:38 +03:00
end
2021-06-16 15:00:07 +03:00
def settings_general ( conn , _params ) do
2023-11-29 13:04:54 +03:00
site = conn . assigns [ :site ]
2022-03-23 12:58:36 +03:00
2020-11-19 15:57:55 +03:00
conn
|> render ( " settings_general.html " ,
site : site ,
changeset : Plausible.Site . changeset ( site , %{ } ) ,
2023-09-21 13:05:50 +03:00
dogfood_page_path : " /:dashboard/settings/general " ,
2020-11-19 15:57:55 +03:00
layout : { PlausibleWeb.LayoutView , " site_settings.html " }
)
end
2020-11-16 16:38:44 +03:00
2021-06-16 15:00:07 +03:00
def settings_people ( conn , _params ) do
site =
conn . assigns [ :site ]
2023-11-29 13:04:54 +03:00
|> Repo . preload ( memberships : :user , invitations : [ ] )
2021-06-16 15:00:07 +03:00
conn
|> render ( " settings_people.html " ,
site : site ,
2023-09-21 13:05:50 +03:00
dogfood_page_path : " /:dashboard/settings/people " ,
2021-06-16 15:00:07 +03:00
layout : { PlausibleWeb.LayoutView , " site_settings.html " }
)
end
def settings_visibility ( conn , _params ) do
2023-11-29 13:04:54 +03:00
site = conn . assigns [ :site ]
2020-11-16 16:38:44 +03:00
shared_links = Repo . all ( from l in Plausible.Site.SharedLink , where : l . site_id == ^ site . id )
conn
2020-11-19 15:57:55 +03:00
|> render ( " settings_visibility.html " ,
2020-11-16 16:38:44 +03:00
site : site ,
shared_links : shared_links ,
2023-09-21 13:05:50 +03:00
dogfood_page_path : " /:dashboard/settings/visibility " ,
2020-11-16 16:38:44 +03:00
layout : { PlausibleWeb.LayoutView , " site_settings.html " }
)
end
2021-06-16 15:00:07 +03:00
def settings_goals ( conn , _params ) do
2023-11-29 13:04:54 +03:00
site = Repo . preload ( conn . assigns [ :site ] , [ :owner ] )
2023-10-11 23:24:16 +03:00
owner = Plausible.Users . with_subscription ( site . owner )
site = Map . put ( site , :owner , owner )
2020-11-16 16:38:44 +03:00
conn
|> render ( " settings_goals.html " ,
site : site ,
2023-09-21 13:05:50 +03:00
dogfood_page_path : " /:dashboard/settings/goals " ,
2023-09-04 14:44:22 +03:00
connect_live_socket : true ,
2020-11-16 16:38:44 +03:00
layout : { PlausibleWeb.LayoutView , " site_settings.html " }
)
end
2023-06-22 10:00:07 +03:00
def settings_funnels ( conn , _params ) do
2023-11-29 13:04:54 +03:00
site = Repo . preload ( conn . assigns [ :site ] , [ :owner ] )
2023-10-11 15:40:01 +03:00
owner = Plausible.Users . with_subscription ( site . owner )
site = Map . put ( site , :owner , owner )
2023-09-13 15:55:29 +03:00
conn
|> render ( " settings_funnels.html " ,
site : site ,
2023-09-21 13:05:50 +03:00
dogfood_page_path : " /:dashboard/settings/funnels " ,
2023-09-13 15:55:29 +03:00
connect_live_socket : true ,
layout : { PlausibleWeb.LayoutView , " site_settings.html " }
)
2023-06-22 10:00:07 +03:00
end
2023-07-27 17:46:32 +03:00
def settings_props ( conn , _params ) do
2023-11-29 13:04:54 +03:00
site = Repo . preload ( conn . assigns [ :site ] , [ :owner ] )
2023-10-11 15:40:01 +03:00
owner = Plausible.Users . with_subscription ( site . owner )
site = Map . put ( site , :owner , owner )
2023-09-05 15:40:43 +03:00
2023-08-31 17:14:54 +03:00
conn
|> render ( " settings_props.html " ,
2023-09-05 15:40:43 +03:00
site : site ,
2023-09-21 13:05:50 +03:00
dogfood_page_path : " /:dashboard/settings/properties " ,
2023-08-31 17:14:54 +03:00
layout : { PlausibleWeb.LayoutView , " site_settings.html " } ,
connect_live_socket : true
)
2023-07-27 17:46:32 +03:00
end
2021-06-16 15:00:07 +03:00
def settings_email_reports ( conn , _params ) do
2023-11-29 13:04:54 +03:00
site = conn . assigns [ :site ]
2020-11-06 14:30:38 +03:00
conn
2020-11-16 16:38:44 +03:00
|> render ( " settings_email_reports.html " ,
2020-11-06 14:30:38 +03:00
site : site ,
2020-11-16 16:38:44 +03:00
weekly_report : Repo . get_by ( Plausible.Site.WeeklyReport , site_id : site . id ) ,
monthly_report : Repo . get_by ( Plausible.Site.MonthlyReport , site_id : site . id ) ,
2020-12-09 16:59:41 +03:00
spike_notification : Repo . get_by ( Plausible.Site.SpikeNotification , site_id : site . id ) ,
2023-09-21 13:05:50 +03:00
dogfood_page_path : " /:dashboard/settings/email-reports " ,
2020-11-16 16:38:44 +03:00
layout : { PlausibleWeb.LayoutView , " site_settings.html " }
2020-11-06 14:30:38 +03:00
)
end
2021-06-16 15:00:07 +03:00
def settings_danger_zone ( conn , _params ) do
2023-11-29 13:04:54 +03:00
site = conn . assigns [ :site ]
2021-06-17 10:51:58 +03:00
2020-11-19 15:57:55 +03:00
conn
|> render ( " settings_danger_zone.html " ,
2021-06-17 10:51:58 +03:00
site : site ,
2023-09-21 13:05:50 +03:00
dogfood_page_path : " /:dashboard/settings/danger-zone " ,
2020-11-19 15:57:55 +03:00
layout : { PlausibleWeb.LayoutView , " site_settings.html " }
)
end
2023-10-18 15:01:17 +03:00
def settings_integrations ( conn , _params ) do
site =
conn . assigns . site
2023-11-29 13:04:54 +03:00
|> Repo . preload ( [ :google_auth ] )
2023-10-18 15:01:17 +03:00
search_console_domains =
if site . google_auth do
Plausible.Google.Api . fetch_verified_properties ( site . google_auth )
end
imported_pageviews =
if site . imported_data do
Plausible.Stats.Clickhouse . imported_pageview_count ( site )
else
0
end
has_plugins_tokens? = Plausible.Plugins.API.Tokens . any? ( site )
conn
|> render ( " settings_integrations.html " ,
site : site ,
imported_pageviews : imported_pageviews ,
has_plugins_tokens? : has_plugins_tokens? ,
search_console_domains : search_console_domains ,
dogfood_page_path : " /:dashboard/settings/integrations " ,
connect_live_socket : true ,
layout : { PlausibleWeb.LayoutView , " site_settings.html " }
)
end
2024-02-12 16:55:20 +03:00
def settings_shields ( conn , _params ) do
site = conn . assigns . site
conn
|> render ( " settings_shields.html " ,
site : site ,
dogfood_page_path : " /:dashboard/settings/shields " ,
connect_live_socket : true ,
layout : { PlausibleWeb.LayoutView , " site_settings.html " }
)
end
2021-06-16 15:00:07 +03:00
def update_google_auth ( conn , %{ " google_auth " = > attrs } ) do
site = conn . assigns [ :site ] |> Repo . preload ( :google_auth )
2019-10-10 07:12:15 +03:00
Plausible.Site.GoogleAuth . set_property ( site . google_auth , attrs )
2020-06-08 10:35:13 +03:00
|> Repo . update! ( )
2019-10-10 07:12:15 +03:00
conn
2021-03-15 12:40:53 +03:00
|> put_flash ( :success , " Google integration saved successfully " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_integrations , site . domain ) )
2019-10-10 07:12:15 +03:00
end
2021-06-16 15:00:07 +03:00
def delete_google_auth ( conn , _params ) do
2020-06-30 11:11:47 +03:00
site =
2021-06-16 15:00:07 +03:00
conn . assigns [ :site ]
2020-06-30 11:11:47 +03:00
|> Repo . preload ( :google_auth )
Repo . delete! ( site . google_auth )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
conn = put_flash ( conn , :success , " Google account unlinked from Plausible " )
2023-12-04 15:22:17 +03:00
redirect ( conn , external : Routes . site_path ( conn , :settings_integrations , site . domain ) )
2020-06-30 11:11:47 +03:00
end
2021-06-16 15:00:07 +03:00
def update_settings ( conn , %{ " site " = > site_params } ) do
2023-11-29 13:04:54 +03:00
site = conn . assigns [ :site ]
2023-04-04 11:55:12 +03:00
changeset = Plausible.Site . update_changeset ( site , site_params )
2019-09-02 14:29:19 +03:00
2023-04-04 11:55:12 +03:00
case Repo . update ( changeset ) do
2019-09-02 14:29:19 +03:00
{ :ok , site } ->
2019-11-21 11:59:06 +03:00
site_session_key = " authorized_site__ " <> site . domain
2019-09-02 14:29:19 +03:00
conn
2019-11-21 11:59:06 +03:00
|> put_session ( site_session_key , nil )
2020-11-16 16:38:44 +03:00
|> put_flash ( :success , " Your site settings have been saved " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_general , site . domain ) )
2020-06-08 10:35:13 +03:00
2019-09-02 14:29:19 +03:00
{ :error , changeset } ->
2023-04-04 11:55:12 +03:00
conn
|> put_flash ( :error , " Could not update your site settings " )
|> render ( " settings_general.html " ,
site : site ,
changeset : changeset ,
layout : { PlausibleWeb.LayoutView , " site_settings.html " }
)
2019-09-02 14:29:19 +03:00
end
end
2021-06-16 15:00:07 +03:00
def reset_stats ( conn , _params ) do
site = conn . assigns [ :site ]
2023-03-01 15:11:31 +03:00
Plausible.Purge . reset! ( site )
2020-07-16 13:07:45 +03:00
conn
|> put_flash ( :success , " #{ site . domain } stats will be reset in a few minutes " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_danger_zone , site . domain ) )
2020-07-16 13:07:45 +03:00
end
2021-06-16 15:00:07 +03:00
def delete_site ( conn , _params ) do
site = conn . assigns [ :site ]
2020-06-08 10:35:13 +03:00
2023-01-31 22:11:04 +03:00
Plausible.Site.Removal . run ( site . domain )
2019-09-02 14:29:19 +03:00
conn
2023-01-31 22:11:04 +03:00
|> put_flash ( :success , " Your site and page views deletion process has started. " )
2020-06-29 11:17:15 +03:00
|> redirect ( to : " /sites " )
2019-09-02 14:29:19 +03:00
end
2021-06-16 15:00:07 +03:00
def make_public ( conn , _params ) do
2020-06-08 10:35:13 +03:00
site =
2021-06-16 15:00:07 +03:00
conn . assigns [ :site ]
2020-06-08 10:35:13 +03:00
|> Plausible.Site . make_public ( )
|> Repo . update! ( )
2019-09-02 14:29:19 +03:00
conn
2020-11-16 16:38:44 +03:00
|> put_flash ( :success , " Stats for #{ site . domain } are now public. " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_visibility , site . domain ) )
2019-09-02 14:29:19 +03:00
end
2021-06-16 15:00:07 +03:00
def make_private ( conn , _params ) do
2020-06-08 10:35:13 +03:00
site =
2021-06-16 15:00:07 +03:00
conn . assigns [ :site ]
2020-06-08 10:35:13 +03:00
|> Plausible.Site . make_private ( )
|> Repo . update! ( )
2019-09-02 14:29:19 +03:00
conn
|> put_flash ( :success , " Stats for #{ site . domain } are now private. " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_visibility , site . domain ) )
2019-09-02 14:29:19 +03:00
end
2021-06-16 15:00:07 +03:00
def enable_weekly_report ( conn , _params ) do
site = conn . assigns [ :site ]
2019-09-07 17:01:37 +03:00
2023-07-27 15:27:01 +03:00
result =
Plausible.Site.WeeklyReport . changeset ( % Plausible.Site.WeeklyReport { } , %{
site_id : site . id ,
recipients : [ conn . assigns [ :current_user ] . email ]
} )
|> Repo . insert ( )
:ok = tolerate_unique_contraint_violation ( result , " weekly_reports_site_id_index " )
2019-09-07 17:01:37 +03:00
conn
2020-11-16 16:38:44 +03:00
|> put_flash ( :success , " You will receive an email report every Monday going forward " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_email_reports , site . domain ) )
2019-09-07 17:01:37 +03:00
end
2021-06-16 15:00:07 +03:00
def disable_weekly_report ( conn , _params ) do
site = conn . assigns [ :site ]
2019-09-09 14:37:57 +03:00
Repo . delete_all ( from wr in Plausible.Site.WeeklyReport , where : wr . site_id == ^ site . id )
2019-09-07 17:01:37 +03:00
conn
2020-11-16 16:38:44 +03:00
|> put_flash ( :success , " You will not receive weekly email reports going forward " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_email_reports , site . domain ) )
2019-09-07 17:01:37 +03:00
end
2021-06-16 15:00:07 +03:00
def add_weekly_report_recipient ( conn , %{ " recipient " = > recipient } ) do
site = conn . assigns [ :site ]
2020-01-22 12:16:53 +03:00
2019-09-09 14:37:57 +03:00
Repo . get_by ( Plausible.Site.WeeklyReport , site_id : site . id )
2020-01-22 12:16:53 +03:00
|> Plausible.Site.WeeklyReport . add_recipient ( recipient )
2020-06-08 10:35:13 +03:00
|> Repo . update! ( )
2019-09-09 14:37:57 +03:00
conn
2020-11-16 16:38:44 +03:00
|> put_flash ( :success , " Added #{ recipient } as a recipient for the weekly report " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_email_reports , site . domain ) )
2020-01-22 12:16:53 +03:00
end
2021-06-16 15:00:07 +03:00
def remove_weekly_report_recipient ( conn , %{ " recipient " = > recipient } ) do
site = conn . assigns [ :site ]
2020-01-22 12:16:53 +03:00
Repo . get_by ( Plausible.Site.WeeklyReport , site_id : site . id )
|> Plausible.Site.WeeklyReport . remove_recipient ( recipient )
2020-06-08 10:35:13 +03:00
|> Repo . update! ( )
2020-01-22 12:16:53 +03:00
conn
2020-06-08 10:35:13 +03:00
|> put_flash (
:success ,
2020-11-16 16:38:44 +03:00
" Removed #{ recipient } as a recipient for the weekly report "
2020-06-08 10:35:13 +03:00
)
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_email_reports , site . domain ) )
2019-09-09 14:37:57 +03:00
end
2021-06-16 15:00:07 +03:00
def enable_monthly_report ( conn , _params ) do
site = conn . assigns [ :site ]
2019-09-09 14:37:57 +03:00
2023-07-27 15:27:01 +03:00
result =
% Plausible.Site.MonthlyReport { }
|> Plausible.Site.MonthlyReport . changeset ( %{
site_id : site . id ,
recipients : [ conn . assigns [ :current_user ] . email ]
} )
|> Repo . insert ( )
:ok = tolerate_unique_contraint_violation ( result , " monthly_reports_site_id_index " )
2019-09-09 14:37:57 +03:00
conn
2020-11-16 16:38:44 +03:00
|> put_flash ( :success , " You will receive an email report every month going forward " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_email_reports , site . domain ) )
2019-09-09 14:37:57 +03:00
end
2021-06-16 15:00:07 +03:00
def disable_monthly_report ( conn , _params ) do
site = conn . assigns [ :site ]
2019-09-09 14:37:57 +03:00
Repo . delete_all ( from mr in Plausible.Site.MonthlyReport , where : mr . site_id == ^ site . id )
conn
2020-11-16 16:38:44 +03:00
|> put_flash ( :success , " You will not receive monthly email reports going forward " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_email_reports , site . domain ) )
2019-09-09 14:37:57 +03:00
end
2021-06-16 15:00:07 +03:00
def add_monthly_report_recipient ( conn , %{ " recipient " = > recipient } ) do
site = conn . assigns [ :site ]
2020-01-22 12:16:53 +03:00
2019-09-09 14:37:57 +03:00
Repo . get_by ( Plausible.Site.MonthlyReport , site_id : site . id )
2020-01-22 12:16:53 +03:00
|> Plausible.Site.MonthlyReport . add_recipient ( recipient )
2020-06-08 10:35:13 +03:00
|> Repo . update! ( )
2019-09-09 14:37:57 +03:00
conn
2020-11-16 16:38:44 +03:00
|> put_flash ( :success , " Added #{ recipient } as a recipient for the monthly report " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_email_reports , site . domain ) )
2019-09-09 14:37:57 +03:00
end
2021-06-16 15:00:07 +03:00
def remove_monthly_report_recipient ( conn , %{ " recipient " = > recipient } ) do
site = conn . assigns [ :site ]
2019-10-10 07:12:15 +03:00
2020-01-22 12:16:53 +03:00
Repo . get_by ( Plausible.Site.MonthlyReport , site_id : site . id )
|> Plausible.Site.MonthlyReport . remove_recipient ( recipient )
2020-06-08 10:35:13 +03:00
|> Repo . update! ( )
2019-10-10 07:12:15 +03:00
2020-01-22 12:16:53 +03:00
conn
2020-06-08 10:35:13 +03:00
|> put_flash (
:success ,
2020-11-16 16:38:44 +03:00
" Removed #{ recipient } as a recipient for the monthly report "
2020-06-08 10:35:13 +03:00
)
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_email_reports , site . domain ) )
2019-10-10 07:12:15 +03:00
end
2021-06-16 15:00:07 +03:00
def enable_spike_notification ( conn , _params ) do
site = conn . assigns [ :site ]
2020-12-09 16:59:41 +03:00
2021-01-19 12:41:15 +03:00
res =
Plausible.Site.SpikeNotification . changeset ( % Plausible.Site.SpikeNotification { } , %{
site_id : site . id ,
threshold : 10 ,
recipients : [ conn . assigns [ :current_user ] . email ]
} )
|> Repo . insert ( )
2020-12-09 16:59:41 +03:00
2021-01-19 12:41:15 +03:00
case res do
{ :ok , _ } ->
conn
|> put_flash ( :success , " You will a notification with traffic spikes going forward " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_email_reports , site . domain ) )
2021-01-19 12:41:15 +03:00
{ :error , _ } ->
conn
|> put_flash ( :error , " Unable to create a spike notification " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_email_reports , site . domain ) )
2021-01-19 12:41:15 +03:00
end
2020-12-09 16:59:41 +03:00
end
2021-06-16 15:00:07 +03:00
def disable_spike_notification ( conn , _params ) do
site = conn . assigns [ :site ]
2020-12-09 16:59:41 +03:00
Repo . delete_all ( from mr in Plausible.Site.SpikeNotification , where : mr . site_id == ^ site . id )
conn
|> put_flash ( :success , " Spike notification disabled " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_email_reports , site . domain ) )
2020-12-09 16:59:41 +03:00
end
2021-06-16 15:00:07 +03:00
def update_spike_notification ( conn , %{ " spike_notification " = > params } ) do
site = conn . assigns [ :site ]
2020-12-09 16:59:41 +03:00
notification = Repo . get_by ( Plausible.Site.SpikeNotification , site_id : site . id )
Plausible.Site.SpikeNotification . changeset ( notification , params )
|> Repo . update! ( )
conn
|> put_flash ( :success , " Notification settings updated " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_email_reports , site . domain ) )
2020-12-09 16:59:41 +03:00
end
2021-06-16 15:00:07 +03:00
def add_spike_notification_recipient ( conn , %{ " recipient " = > recipient } ) do
site = conn . assigns [ :site ]
2020-12-09 16:59:41 +03:00
Repo . get_by ( Plausible.Site.SpikeNotification , site_id : site . id )
|> Plausible.Site.SpikeNotification . add_recipient ( recipient )
|> Repo . update! ( )
conn
|> put_flash ( :success , " Added #{ recipient } as a recipient for the traffic spike notification " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_email_reports , site . domain ) )
2020-12-09 16:59:41 +03:00
end
2021-06-16 15:00:07 +03:00
def remove_spike_notification_recipient ( conn , %{ " recipient " = > recipient } ) do
site = conn . assigns [ :site ]
2020-12-09 16:59:41 +03:00
Repo . get_by ( Plausible.Site.SpikeNotification , site_id : site . id )
|> Plausible.Site.SpikeNotification . remove_recipient ( recipient )
|> Repo . update! ( )
conn
|> put_flash (
:success ,
" Removed #{ recipient } as a recipient for the monthly report "
)
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_email_reports , site . domain ) )
2020-12-09 16:59:41 +03:00
end
2021-06-16 15:00:07 +03:00
def new_shared_link ( conn , _params ) do
site = conn . assigns [ :site ]
2020-01-29 12:29:11 +03:00
changeset = Plausible.Site.SharedLink . changeset ( % Plausible.Site.SharedLink { } , %{ } )
2020-03-02 12:12:11 +03:00
conn
|> assign ( :skip_plausible_tracking , true )
2020-06-08 10:35:13 +03:00
|> render ( " new_shared_link.html " ,
site : site ,
changeset : changeset ,
layout : { PlausibleWeb.LayoutView , " focus.html " }
)
2020-01-29 12:29:11 +03:00
end
2021-06-16 15:00:07 +03:00
def create_shared_link ( conn , %{ " shared_link " = > link } ) do
site = conn . assigns [ :site ]
2020-06-08 10:35:13 +03:00
2021-04-15 15:31:57 +03:00
case Sites . create_shared_link ( site , link [ " name " ] , link [ " password " ] ) do
2020-01-29 12:29:11 +03:00
{ :ok , _created } ->
2023-12-04 15:22:17 +03:00
redirect ( conn , external : Routes . site_path ( conn , :settings_visibility , site . domain ) )
2020-06-08 10:35:13 +03:00
2020-01-29 12:29:11 +03:00
{ :error , changeset } ->
2020-03-02 12:12:11 +03:00
conn
|> assign ( :skip_plausible_tracking , true )
2020-06-08 10:35:13 +03:00
|> render ( " new_shared_link.html " ,
site : site ,
changeset : changeset ,
2021-04-06 14:32:38 +03:00
layout : { PlausibleWeb.LayoutView , " focus.html " }
)
end
end
2021-06-16 15:00:07 +03:00
def edit_shared_link ( conn , %{ " slug " = > slug } ) do
site = conn . assigns [ :site ]
2021-04-06 14:32:38 +03:00
shared_link = Repo . get_by ( Plausible.Site.SharedLink , slug : slug )
changeset = Plausible.Site.SharedLink . changeset ( shared_link , %{ } )
conn
|> assign ( :skip_plausible_tracking , true )
|> render ( " edit_shared_link.html " ,
site : site ,
changeset : changeset ,
layout : { PlausibleWeb.LayoutView , " focus.html " }
)
end
2021-06-16 15:00:07 +03:00
def update_shared_link ( conn , %{ " slug " = > slug , " shared_link " = > params } ) do
site = conn . assigns [ :site ]
2021-04-06 14:32:38 +03:00
shared_link = Repo . get_by ( Plausible.Site.SharedLink , slug : slug )
changeset = Plausible.Site.SharedLink . changeset ( shared_link , params )
case Repo . update ( changeset ) do
{ :ok , _created } ->
2023-12-04 15:22:17 +03:00
redirect ( conn , external : Routes . site_path ( conn , :settings_visibility , site . domain ) )
2021-04-06 14:32:38 +03:00
{ :error , changeset } ->
conn
|> assign ( :skip_plausible_tracking , true )
|> render ( " edit_shared_link.html " ,
site : site ,
changeset : changeset ,
2020-06-08 10:35:13 +03:00
layout : { PlausibleWeb.LayoutView , " focus.html " }
)
2020-01-29 12:29:11 +03:00
end
end
2021-06-16 15:00:07 +03:00
def delete_shared_link ( conn , %{ " slug " = > slug } ) do
site = conn . assigns [ :site ]
2022-10-27 09:39:34 +03:00
site_id = site . id
case Repo . delete_all (
from l in Plausible.Site.SharedLink ,
where : l . slug == ^ slug ,
where : l . site_id == ^ site_id
) do
{ 1 , _ } ->
conn
|> put_flash ( :success , " Shared Link deleted " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_visibility , site . domain ) )
2020-06-08 10:35:13 +03:00
2022-10-27 09:39:34 +03:00
{ 0 , _ } ->
conn
|> put_flash ( :error , " Could not find Shared Link " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_visibility , site . domain ) )
2022-10-27 09:39:34 +03:00
end
2020-01-29 12:29:11 +03:00
end
2022-03-24 12:49:45 +03:00
def import_from_google_user_metric_notice ( conn , %{
" view_id " = > view_id ,
2022-09-26 12:29:56 +03:00
" access_token " = > access_token ,
" refresh_token " = > refresh_token ,
" expires_at " = > expires_at
2022-03-24 12:49:45 +03:00
} ) do
site = conn . assigns [ :site ]
conn
|> assign ( :skip_plausible_tracking , true )
|> render ( " import_from_google_user_metric_form.html " ,
site : site ,
view_id : view_id ,
access_token : access_token ,
2022-09-26 12:29:56 +03:00
refresh_token : refresh_token ,
expires_at : expires_at ,
2022-03-24 12:49:45 +03:00
layout : { PlausibleWeb.LayoutView , " focus.html " }
)
end
2022-09-26 12:29:56 +03:00
def import_from_google_view_id_form ( conn , %{
" access_token " = > access_token ,
" refresh_token " = > refresh_token ,
" expires_at " = > expires_at
} ) do
2022-12-08 05:32:14 +03:00
case Plausible.Google.Api . list_views ( access_token ) do
{ :ok , view_ids } ->
conn
|> assign ( :skip_plausible_tracking , true )
|> render ( " import_from_google_view_id_form.html " ,
access_token : access_token ,
refresh_token : refresh_token ,
expires_at : expires_at ,
site : conn . assigns . site ,
view_ids : view_ids ,
layout : { PlausibleWeb.LayoutView , " focus.html " }
)
2022-03-21 13:47:27 +03:00
2022-12-08 05:32:14 +03:00
{ :error , :authentication_failed } ->
conn
|> put_flash (
:error ,
" We were unable to authenticate your Google Analytics account. Please check that you have granted us permission to 'See and download your Google Analytics data' and try again. "
)
|> redirect ( to : Routes . site_path ( conn , :settings_general , conn . assigns . site . domain ) )
{ :error , _any } ->
conn
|> put_flash (
:error ,
" We were unable to list your Google Analytics properties. If the problem persists, please contact support for assistance. "
)
|> redirect ( to : Routes . site_path ( conn , :settings_general , conn . assigns . site . domain ) )
end
2022-03-21 13:47:27 +03:00
end
2022-03-24 12:49:45 +03:00
# see https://stackoverflow.com/a/57416769
@google_analytics_new_user_metric_date ~D[ 2016-08-24 ]
2022-09-26 12:29:56 +03:00
def import_from_google_view_id ( conn , %{
" view_id " = > view_id ,
" access_token " = > access_token ,
" refresh_token " = > refresh_token ,
" expires_at " = > expires_at
} ) do
2022-03-22 17:09:45 +03:00
site = conn . assigns [ :site ]
2022-08-03 12:25:50 +03:00
start_date = Plausible.Google.HTTP . get_analytics_start_date ( view_id , access_token )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
2022-03-24 12:49:45 +03:00
case start_date do
2022-06-02 14:40:52 +03:00
{ :ok , nil } ->
site = conn . assigns [ :site ]
2022-12-08 05:32:14 +03:00
{ :ok , view_ids } = Plausible.Google.Api . list_views ( access_token )
2022-06-02 14:40:52 +03:00
conn
|> assign ( :skip_plausible_tracking , true )
|> render ( " import_from_google_view_id_form.html " ,
access_token : access_token ,
2022-09-26 12:29:56 +03:00
refresh_token : refresh_token ,
expires_at : expires_at ,
2022-06-02 14:40:52 +03:00
site : site ,
view_ids : view_ids ,
selected_view_id_error : " No data found. Nothing to import " ,
layout : { PlausibleWeb.LayoutView , " focus.html " }
)
2022-03-24 12:49:45 +03:00
{ :ok , date } ->
if Timex . before? ( date , @google_analytics_new_user_metric_date ) do
redirect ( conn ,
to :
Routes . site_path ( conn , :import_from_google_user_metric_notice , site . domain ,
view_id : view_id ,
2022-09-26 12:29:56 +03:00
access_token : access_token ,
refresh_token : refresh_token ,
expires_at : expires_at
2022-03-24 12:49:45 +03:00
)
)
else
redirect ( conn ,
to :
Routes . site_path ( conn , :import_from_google_confirm , site . domain ,
view_id : view_id ,
2022-09-26 12:29:56 +03:00
access_token : access_token ,
refresh_token : refresh_token ,
expires_at : expires_at
2022-03-24 12:49:45 +03:00
)
)
end
end
2022-03-22 17:09:45 +03:00
end
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
2022-09-26 12:29:56 +03:00
def import_from_google_confirm ( conn , %{
" view_id " = > view_id ,
" access_token " = > access_token ,
" refresh_token " = > refresh_token ,
" expires_at " = > expires_at
} ) do
2022-03-22 17:09:45 +03:00
site = conn . assigns [ :site ]
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
2022-08-03 12:25:50 +03:00
start_date = Plausible.Google.HTTP . get_analytics_start_date ( view_id , access_token )
2022-10-06 14:08:22 +03:00
end_date = Plausible.Sites . stats_start_date ( site ) || Timex . today ( site . timezone )
2022-04-06 10:10:53 +03:00
2022-09-08 21:02:17 +03:00
{ :ok , { view_name , view_id } } = Plausible.Google.Api . get_view ( access_token , view_id )
2022-03-22 17:09:45 +03:00
conn
|> assign ( :skip_plausible_tracking , true )
|> render ( " import_from_google_confirm.html " ,
access_token : access_token ,
2022-09-26 12:29:56 +03:00
refresh_token : refresh_token ,
expires_at : expires_at ,
2022-03-22 17:09:45 +03:00
site : site ,
2022-03-23 11:48:47 +03:00
selected_view_id : view_id ,
2022-09-08 21:02:17 +03:00
selected_view_id_name : view_name ,
2022-03-22 17:09:45 +03:00
start_date : start_date ,
end_date : end_date ,
layout : { PlausibleWeb.LayoutView , " focus.html " }
)
end
def import_from_google ( conn , %{
" view_id " = > view_id ,
" start_date " = > start_date ,
" end_date " = > end_date ,
2022-09-26 12:29:56 +03:00
" access_token " = > access_token ,
" refresh_token " = > refresh_token ,
" expires_at " = > expires_at
2022-03-22 17:09:45 +03:00
} ) do
2024-01-23 12:24:08 +03:00
site = conn . assigns . site
2024-02-14 11:32:36 +03:00
current_user = conn . assigns . current_user
2022-03-22 17:09:45 +03:00
2024-02-14 11:32:36 +03:00
{ :ok , _ } =
Plausible.Imported.UniversalAnalytics . new_import (
2024-01-23 12:24:08 +03:00
site ,
2024-02-14 11:32:36 +03:00
current_user ,
2024-01-23 12:24:08 +03:00
view_id : view_id ,
start_date : start_date ,
end_date : end_date ,
access_token : access_token ,
refresh_token : refresh_token ,
token_expires_at : expires_at
)
2022-03-22 17:09:45 +03:00
conn
|> put_flash ( :success , " Import scheduled. An email will be sent when it completes. " )
2023-12-04 15:22:17 +03:00
|> redirect ( external : Routes . site_path ( conn , :settings_integrations , site . domain ) )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
end
def forget_imported ( conn , _params ) do
2024-02-14 11:32:36 +03:00
site = conn . assigns . site
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
2024-02-14 11:32:36 +03:00
import_ids =
site
|> Plausible.Imported . list_all_imports ( )
|> Enum . map ( & &1 . id )
import_ids =
if site . imported_data do
[ 0 | import_ids ]
else
import_ids
end
2022-03-23 12:58:36 +03:00
2024-02-14 11:32:36 +03:00
if import_ids != [ ] do
Oban . cancel_all_jobs (
from j in Oban.Job ,
where :
j . queue == " analytics_imports " and
fragment ( " (? ->> 'import_id')::int " , j . args ) in ^ import_ids
)
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
2024-02-14 11:32:36 +03:00
Plausible.Purge . delete_imported_stats! ( site )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
2024-02-14 11:32:36 +03:00
Plausible.Imported . delete_imports_for_site ( site )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
2024-02-14 11:32:36 +03:00
site
|> Plausible.Site . remove_imported_data ( )
|> Repo . update! ( )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
end
2024-02-14 11:32:36 +03:00
conn
|> put_flash ( :success , " Imported data has been cleared " )
|> redirect ( external : Routes . site_path ( conn , :settings_integrations , site . domain ) )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
end
2023-04-04 11:55:12 +03:00
def change_domain ( conn , _params ) do
2023-04-24 13:17:57 +03:00
changeset = Plausible.Site . update_changeset ( conn . assigns . site )
2023-04-04 11:55:12 +03:00
2023-04-24 13:17:57 +03:00
render ( conn , " change_domain.html " ,
2023-05-16 11:52:17 +03:00
skip_plausible_tracking : true ,
2023-04-24 13:17:57 +03:00
changeset : changeset ,
layout : { PlausibleWeb.LayoutView , " focus.html " }
)
2023-04-04 11:55:12 +03:00
end
def change_domain_submit ( conn , %{ " site " = > %{ " domain " = > new_domain } } ) do
2023-04-24 13:17:57 +03:00
case Plausible.Site.Domain . change ( conn . assigns . site , new_domain ) do
{ :ok , updated_site } ->
conn
|> put_flash ( :success , " Website domain changed successfully " )
|> redirect (
2023-12-04 15:22:17 +03:00
external : Routes . site_path ( conn , :add_snippet_after_domain_change , updated_site . domain )
2023-04-24 13:17:57 +03:00
)
2023-04-04 11:55:12 +03:00
2023-04-24 13:17:57 +03:00
{ :error , changeset } ->
render ( conn , " change_domain.html " ,
2023-05-16 11:52:17 +03:00
skip_plausible_tracking : true ,
2023-04-24 13:17:57 +03:00
changeset : changeset ,
layout : { PlausibleWeb.LayoutView , " focus.html " }
)
2023-04-04 11:55:12 +03:00
end
end
def add_snippet_after_domain_change ( conn , _params ) do
2023-11-29 13:04:54 +03:00
site = conn . assigns [ :site ]
2023-04-04 11:55:12 +03:00
conn
|> assign ( :skip_plausible_tracking , true )
|> render ( " snippet_after_domain_change.html " ,
site : site ,
layout : { PlausibleWeb.LayoutView , " focus.html " }
)
end
2023-07-27 15:27:01 +03:00
defp tolerate_unique_contraint_violation ( result , name ) do
case result do
{ :ok , _ } ->
:ok
{ :error ,
%{
errors : [
site_id : { _ , [ constraint : :unique , constraint_name : ^ name ] }
]
} } ->
:ok
other ->
other
end
end
2019-09-02 14:29:19 +03:00
end