2019-11-19 07:30:42 +03:00
defmodule PlausibleWeb.Api.StatsController do
2023-11-20 14:52:20 +03:00
use Plausible
2019-11-19 07:30:42 +03:00
use PlausibleWeb , :controller
use Plausible.Repo
2024-01-04 12:13:37 +03:00
use PlausibleWeb.Plugs.ErrorHandler
2024-02-14 11:32:36 +03:00
alias Plausible.Imported.SiteImport
2021-07-23 13:44:05 +03:00
alias Plausible.Stats
2024-01-29 13:16:47 +03:00
alias Plausible.Stats . { Query , Comparisons }
2023-10-17 16:00:00 +03:00
alias PlausibleWeb.Api.Helpers , as : H
2019-11-19 07:30:42 +03:00
2023-01-13 20:04:09 +03:00
require Logger
2023-11-22 17:34:47 +03:00
@revenue_metrics on_full_build ( do : Plausible.Stats.Goal.Revenue . revenue_metrics ( ) , else : [ ] )
2023-06-27 11:04:35 +03:00
plug ( :validate_common_input )
2023-05-04 15:09:43 +03:00
2022-11-15 00:41:51 +03:00
@doc """
Returns a time - series based on given parameters .
## Parameters
This API accepts the following parameters :
* ` period ` - x - axis of the graph , e . g . ` 12 mo ` , ` day ` , ` custom ` .
* ` metric ` - y - axis of the graph , e . g . ` visits ` , ` visitors ` , ` pageviews ` .
See the Stats API [ " Metrics " ] ( https :/ / plausible . io / docs / stats - api #metrics)
section for more details . Defaults to ` visitors ` .
* ` interval ` - granularity of the time - series data . You can think of it as
a ` GROUP BY ` clause . Possible values are ` minute ` , ` hour ` , ` date ` , ` week ` ,
and ` month ` . The default depends on the ` period ` parameter . Check
` Plausible.Query . from / 2 ` for each default .
* ` filters ` - optional filters to drill down data . See the Stats API
[ " Filtering " ] ( https :/ / plausible . io / docs / stats - api #filtering) section for
more details .
* ` with_imported ` - boolean indicating whether to include Google Analytics
imported data or not . Defaults to ` false ` .
Full example :
` ` ` elixir
%{
" from " = > " 2021-09-06 " ,
" interval " = > " month " ,
" metric " = > " visitors " ,
" period " = > " custom " ,
" to " = > " 2021-12-13 "
}
` ` `
## Response
Returns a map with the following keys :
* ` plot ` - list of values for the requested metric representing the y - axis
of the graph .
* ` labels ` - list of date times representing the x - axis of the graph .
* ` present_index ` - index of the element representing the current date in
` labels ` and ` plot ` lists .
* ` interval ` - the interval used for querying .
* ` with_imported ` - boolean indicating whether the Google Analytics data
was queried or not .
* ` imported_source ` - the source of the imported data , when applicable .
Currently only Google Analytics is supported .
* ` full_intervals ` - map of dates indicating whether the interval has been
cut off by the requested date range or not . For example , if looking at a
month week - by - week , some weeks may be cut off by the month boundaries .
It ' s useful to adjust the graph display slightly in case the interval is
not ' full ' so that the user understands why the numbers might be lower for
those partial periods .
Full example :
` ` ` elixir
%{
" full_intervals " = > %{
" 2021-09-01 " = > false ,
" 2021-10-01 " = > true ,
" 2021-11-01 " = > true ,
" 2021-12-01 " = > false
} ,
" imported_source " = > nil ,
" interval " = > " month " ,
" labels " = > [ " 2021-09-01 " , " 2021-10-01 " , " 2021-11-01 " , " 2021-12-01 " ] ,
" plot " = > [ 0 , 0 , 0 , 0 ] ,
" present_index " = > nil ,
" with_imported " = > false
}
` ` `
"""
2019-11-19 07:30:42 +03:00
def main_graph ( conn , params ) do
site = conn . assigns [ :site ]
2023-08-08 15:49:19 +03:00
with :ok <- validate_params ( site , params ) do
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2022-04-13 10:38:47 +03:00
2022-10-11 15:42:14 +03:00
selected_metric =
if ! params [ " metric " ] || params [ " metric " ] == " conversions " do
2023-02-07 16:00:49 +03:00
:visitors
2022-10-11 15:42:14 +03:00
else
2023-02-07 16:00:49 +03:00
String . to_existing_atom ( params [ " metric " ] )
2022-10-11 15:42:14 +03:00
end
2021-08-19 15:41:08 +03:00
2022-10-11 15:42:14 +03:00
timeseries_query =
if query . period == " realtime " do
% Query { query | period : " 30m " }
else
query
end
2022-04-13 10:38:47 +03:00
2023-02-07 16:00:49 +03:00
timeseries_result = Stats . timeseries ( site , timeseries_query , [ selected_metric ] )
2022-10-11 15:42:14 +03:00
2023-04-13 16:01:54 +03:00
comparison_opts = parse_comparison_opts ( params )
2023-04-26 12:06:40 +03:00
{ comparison_query , comparison_result } =
2023-04-13 16:01:54 +03:00
case Comparisons . compare ( site , query , params [ " comparison " ] , comparison_opts ) do
2023-04-26 12:06:40 +03:00
{ :ok , comparison_query } ->
{ comparison_query , Stats . timeseries ( site , comparison_query , [ selected_metric ] ) }
{ :error , :not_supported } ->
{ nil , nil }
2023-02-07 16:00:49 +03:00
end
2023-03-22 15:31:44 +03:00
labels = label_timeseries ( timeseries_result , comparison_result )
present_index = present_index_for ( site , query , labels )
full_intervals = build_full_intervals ( query , labels )
2024-02-14 11:32:36 +03:00
site_import = Plausible.Imported . get_earliest_import ( site )
2022-10-11 15:42:14 +03:00
json ( conn , %{
2023-02-07 16:00:49 +03:00
plot : plot_timeseries ( timeseries_result , selected_metric ) ,
2022-10-11 15:42:14 +03:00
labels : labels ,
2023-02-07 16:00:49 +03:00
comparison_plot : comparison_result && plot_timeseries ( comparison_result , selected_metric ) ,
2023-03-22 15:31:44 +03:00
comparison_labels : comparison_result && label_timeseries ( comparison_result , nil ) ,
2022-10-11 15:42:14 +03:00
present_index : present_index ,
interval : query . interval ,
2023-04-26 12:06:40 +03:00
with_imported : with_imported? ( query , comparison_query ) ,
2024-02-14 11:32:36 +03:00
imported_source : site_import && SiteImport . label ( site_import ) ,
2022-11-15 00:41:51 +03:00
full_intervals : full_intervals
2022-10-11 15:42:14 +03:00
} )
else
2022-11-15 00:41:51 +03:00
{ :error , message } when is_binary ( message ) -> bad_request ( conn , message )
end
end
2023-02-07 16:00:49 +03:00
defp plot_timeseries ( timeseries , metric ) do
2023-07-12 12:25:34 +03:00
Enum . map ( timeseries , fn row ->
case row [ metric ] do
nil -> 0
% Money { } = money -> Decimal . to_float ( money . amount )
value -> value
end
end )
2023-02-07 16:00:49 +03:00
end
2023-03-22 15:31:44 +03:00
defp label_timeseries ( main_result , nil ) do
Enum . map ( main_result , & &1 . date )
end
@blank_value " __blank__ "
defp label_timeseries ( main_result , comparison_result ) do
blanks_to_fill = Enum . count ( comparison_result ) - Enum . count ( main_result )
if blanks_to_fill > 0 do
blanks = List . duplicate ( @blank_value , blanks_to_fill )
Enum . map ( main_result , & &1 . date ) ++ blanks
else
Enum . map ( main_result , & &1 . date )
end
2023-02-07 16:00:49 +03:00
end
2022-11-15 00:41:51 +03:00
defp build_full_intervals ( %{ interval : " week " , date_range : range } , labels ) do
for label <- labels , into : %{ } do
interval_start = Timex . beginning_of_week ( label )
interval_end = Timex . end_of_week ( label )
within_interval? = Enum . member? ( range , interval_start ) && Enum . member? ( range , interval_end )
{ label , within_interval? }
end
end
defp build_full_intervals ( %{ interval : " month " , date_range : range } , labels ) do
for label <- labels , into : %{ } do
interval_start = Timex . beginning_of_month ( label )
interval_end = Timex . end_of_month ( label )
within_interval? = Enum . member? ( range , interval_start ) && Enum . member? ( range , interval_end )
{ label , within_interval? }
2022-10-11 15:42:14 +03:00
end
2022-04-13 10:38:47 +03:00
end
2022-11-15 00:41:51 +03:00
defp build_full_intervals ( _query , _labels ) do
nil
end
2022-04-13 10:38:47 +03:00
def top_stats ( conn , params ) do
site = conn . assigns [ :site ]
2023-08-08 15:49:19 +03:00
with :ok <- validate_params ( site , params ) do
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2023-04-11 15:59:10 +03:00
2023-04-13 16:01:54 +03:00
comparison_opts = parse_comparison_opts ( params )
2023-03-22 15:31:44 +03:00
2023-04-11 15:59:10 +03:00
comparison_query =
2023-04-26 14:22:33 +03:00
case Stats.Comparisons . compare ( site , query , params [ " comparison " ] , comparison_opts ) do
2023-04-11 15:59:10 +03:00
{ :ok , query } -> query
{ :error , _cause } -> nil
end
2022-10-11 15:42:14 +03:00
2023-04-11 15:59:10 +03:00
{ top_stats , sample_percent } = fetch_top_stats ( site , query , comparison_query )
2022-04-13 10:38:47 +03:00
2024-02-14 11:32:36 +03:00
site_import = Plausible.Imported . get_earliest_import ( site )
2022-10-11 15:42:14 +03:00
json ( conn , %{
top_stats : top_stats ,
interval : query . interval ,
sample_percent : sample_percent ,
2023-04-26 12:06:40 +03:00
with_imported : with_imported? ( query , comparison_query ) ,
2024-02-14 11:32:36 +03:00
imported_source : site_import && SiteImport . label ( site_import ) ,
2023-04-11 15:59:10 +03:00
comparing_from : comparison_query && comparison_query . date_range . first ,
comparing_to : comparison_query && comparison_query . date_range . last ,
from : query . date_range . first ,
to : query . date_range . last
2022-10-11 15:42:14 +03:00
} )
else
2022-11-15 00:41:51 +03:00
{ :error , message } when is_binary ( message ) -> bad_request ( conn , message )
2022-10-11 15:42:14 +03:00
end
2019-11-19 07:30:42 +03:00
end
2021-07-23 13:44:05 +03:00
defp present_index_for ( site , query , dates ) do
case query . interval do
2021-08-13 11:28:51 +03:00
" hour " ->
current_date =
Timex . now ( site . timezone )
|> Timex . format! ( " {YYYY}-{0M}-{0D} {h24}:00:00 " )
Enum . find_index ( dates , & ( &1 == current_date ) )
2021-07-23 13:44:05 +03:00
" date " ->
current_date =
Timex . now ( site . timezone )
|> Timex . to_date ( )
Enum . find_index ( dates , & ( &1 == current_date ) )
2022-11-15 00:41:51 +03:00
" week " ->
current_date =
Timex . now ( site . timezone )
|> Timex . to_date ( )
|> date_or_weekstart ( query )
Enum . find_index ( dates , & ( &1 == current_date ) )
2021-08-13 11:28:51 +03:00
" month " ->
2021-07-23 13:44:05 +03:00
current_date =
Timex . now ( site . timezone )
2021-08-13 11:28:51 +03:00
|> Timex . to_date ( )
|> Timex . beginning_of_month ( )
2021-07-23 13:44:05 +03:00
Enum . find_index ( dates , & ( &1 == current_date ) )
" minute " ->
2022-11-15 00:41:51 +03:00
current_date =
Timex . now ( site . timezone )
|> Timex . format! ( " {YYYY}-{0M}-{0D} {h24}:{0m}:00 " )
Enum . find_index ( dates , & ( &1 == current_date ) )
end
end
defp date_or_weekstart ( date , query ) do
weekstart = Timex . beginning_of_week ( date )
if Enum . member? ( query . date_range , weekstart ) do
weekstart
else
date
2021-07-23 13:44:05 +03:00
end
end
2023-01-09 10:31:55 +03:00
defp fetch_top_stats (
site ,
2023-03-07 18:52:26 +03:00
% Query { period : " realtime " , filters : %{ " event:goal " = > _goal } } = query ,
2023-04-11 15:59:10 +03:00
_comparison_query
2023-01-09 10:31:55 +03:00
) do
query_30m = % Query { query | period : " 30m " }
%{
visitors : %{ value : unique_conversions } ,
events : %{ value : total_conversions }
} = Stats . aggregate ( site , query_30m , [ :visitors , :events ] )
stats = [
%{
name : " Current visitors " ,
value : Stats . current_visitors ( site )
} ,
%{
name : " Unique conversions (last 30 min) " ,
value : unique_conversions
} ,
%{
name : " Total conversions (last 30 min) " ,
value : total_conversions
}
]
{ stats , 100 }
end
2023-04-11 15:59:10 +03:00
defp fetch_top_stats ( site , % Query { period : " realtime " } = query , _comparison_query ) do
2021-08-19 15:41:08 +03:00
query_30m = % Query { query | period : " 30m " }
2021-07-23 13:44:05 +03:00
%{
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
visitors : %{ value : visitors } ,
pageviews : %{ value : pageviews }
} = Stats . aggregate ( site , query_30m , [ :visitors , :pageviews ] )
2021-07-23 13:44:05 +03:00
2021-03-24 12:19:10 +03:00
stats = [
2020-07-14 16:52:26 +03:00
%{
2020-08-18 14:14:56 +03:00
name : " Current visitors " ,
2021-07-23 13:44:05 +03:00
value : Stats . current_visitors ( site )
2020-07-14 16:52:26 +03:00
} ,
2020-12-22 16:54:41 +03:00
%{
name : " Unique visitors (last 30 min) " ,
2021-07-23 13:44:05 +03:00
value : visitors
2020-12-22 16:54:41 +03:00
} ,
2020-07-14 16:52:26 +03:00
%{
name : " Pageviews (last 30 min) " ,
2021-07-23 13:44:05 +03:00
value : pageviews
2020-07-14 16:52:26 +03:00
}
]
2021-03-24 12:19:10 +03:00
2021-08-18 15:24:44 +03:00
{ stats , 100 }
2020-07-14 16:52:26 +03:00
end
2023-06-22 21:36:43 +03:00
defp fetch_top_stats ( site , % Query { filters : %{ " event:goal " = > _ } } = query , comparison_query ) do
2024-02-15 20:43:35 +03:00
query_without_filters = Query . remove_event_filters ( query , [ :goal , :props ] )
metrics = [ :visitors , :events ] ++ @revenue_metrics
results_without_filters =
site
|> Stats . aggregate ( query_without_filters , [ :visitors ] )
|> transform_keys ( %{ visitors : :unique_visitors } )
2019-11-25 12:17:18 +03:00
2024-02-15 20:43:35 +03:00
results =
site
|> Stats . aggregate ( query , metrics )
|> transform_keys ( %{ visitors : :converted_visitors , events : :completions } )
|> Map . merge ( results_without_filters )
comparison =
if comparison_query do
comparison_query_without_filters =
Query . remove_event_filters ( comparison_query , [ :goal , :props ] )
comparison_without_filters =
site
|> Stats . aggregate ( comparison_query_without_filters , [ :visitors ] )
|> transform_keys ( %{ visitors : :unique_visitors } )
site
|> Stats . aggregate ( comparison_query , metrics )
|> transform_keys ( %{ visitors : :converted_visitors , events : :completions } )
|> Map . merge ( comparison_without_filters )
end
conversion_rate = %{
cr : %{ value : calculate_cr ( results . unique_visitors . value , results . converted_visitors . value ) }
}
comparison_conversion_rate =
if comparison do
value =
calculate_cr ( comparison . unique_visitors . value , comparison . converted_visitors . value )
%{ cr : %{ value : value } }
else
nil
end
2021-03-24 12:19:10 +03:00
2023-06-22 21:36:43 +03:00
[
2024-02-15 20:43:35 +03:00
top_stats_entry ( results , comparison , " Unique visitors " , :unique_visitors ) ,
top_stats_entry ( results , comparison , " Unique conversions " , :converted_visitors ) ,
top_stats_entry ( results , comparison , " Total conversions " , :completions ) ,
2023-11-22 17:34:47 +03:00
on_full_build do
top_stats_entry ( results , comparison , " Average revenue " , :average_revenue , & format_money / 1 )
end ,
on_full_build do
top_stats_entry ( results , comparison , " Total revenue " , :total_revenue , & format_money / 1 )
end ,
2024-02-15 20:43:35 +03:00
top_stats_entry ( conversion_rate , comparison_conversion_rate , " Conversion rate " , :cr )
2023-06-22 21:36:43 +03:00
]
|> Enum . reject ( & is_nil / 1 )
|> then ( & { &1 , 100 } )
2019-11-25 12:17:18 +03:00
end
2023-04-11 15:59:10 +03:00
defp fetch_top_stats ( site , query , comparison_query ) do
2021-07-23 13:44:05 +03:00
metrics =
if query . filters [ " event:page " ] do
2023-03-07 14:58:36 +03:00
[
:visitors ,
:visits ,
:pageviews ,
:bounce_rate ,
:time_on_page ,
:sample_percent
]
2021-07-23 13:44:05 +03:00
else
2023-03-07 14:58:36 +03:00
[
:visitors ,
:visits ,
:pageviews ,
:views_per_visit ,
:bounce_rate ,
:visit_duration ,
:sample_percent
]
2020-11-03 12:20:11 +03:00
end
2019-11-25 12:17:18 +03:00
2021-07-23 13:44:05 +03:00
current_results = Stats . aggregate ( site , query , metrics )
2023-04-11 15:59:10 +03:00
prev_results = comparison_query && Stats . aggregate ( site , comparison_query , metrics )
2021-05-19 10:21:43 +03:00
2021-03-24 12:19:10 +03:00
stats =
[
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
top_stats_entry ( current_results , prev_results , " Unique visitors " , :visitors ) ,
2023-03-07 14:58:36 +03:00
top_stats_entry ( current_results , prev_results , " Total visits " , :visits ) ,
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
top_stats_entry ( current_results , prev_results , " Total pageviews " , :pageviews ) ,
2023-03-07 14:58:36 +03:00
top_stats_entry ( current_results , prev_results , " Views per visit " , :views_per_visit ) ,
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
top_stats_entry ( current_results , prev_results , " Bounce rate " , :bounce_rate ) ,
top_stats_entry ( current_results , prev_results , " Visit duration " , :visit_duration ) ,
top_stats_entry ( current_results , prev_results , " Time on page " , :time_on_page )
2021-03-24 12:19:10 +03:00
]
|> Enum . filter ( & &1 )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
{ stats , current_results [ :sample_percent ] [ :value ] }
2021-07-23 13:44:05 +03:00
end
2023-06-22 21:36:43 +03:00
defp top_stats_entry ( current_results , prev_results , name , key , formatter \\ & &1 ) do
2021-07-23 13:44:05 +03:00
if current_results [ key ] do
2023-03-07 18:52:26 +03:00
value = get_in ( current_results , [ key , :value ] )
2023-04-26 14:22:33 +03:00
if prev_results do
prev_value = get_in ( prev_results , [ key , :value ] )
2023-12-21 15:56:06 +03:00
change = Stats.Compare . calculate_change ( key , prev_value , value )
2023-06-22 21:36:43 +03:00
%{
name : name ,
value : formatter . ( value ) ,
comparison_value : formatter . ( prev_value ) ,
change : change
}
2023-04-26 14:22:33 +03:00
else
2023-06-22 21:36:43 +03:00
%{ name : name , value : formatter . ( value ) }
2023-04-26 14:22:33 +03:00
end
2021-07-23 13:44:05 +03:00
end
end
2020-09-28 11:29:24 +03:00
def sources ( conn , params ) do
2019-11-19 07:30:42 +03:00
site = conn . assigns [ :site ]
2024-02-15 20:43:35 +03:00
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2021-07-23 13:44:05 +03:00
2024-02-15 20:43:35 +03:00
pagination = parse_pagination ( params )
2024-02-15 12:18:57 +03:00
2024-02-15 20:43:35 +03:00
metrics =
if params [ " detailed " ] , do : [ :visitors , :bounce_rate , :visit_duration ] , else : [ :visitors ]
2021-07-23 13:44:05 +03:00
res =
Stats . breakdown ( site , query , " visit:source " , metrics , pagination )
2024-02-15 20:43:35 +03:00
|> add_cr ( site , query , pagination , :source , " visit:source " )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ source : :name } )
2021-07-23 13:44:05 +03:00
2021-10-26 16:54:50 +03:00
if params [ " csv " ] do
2021-11-12 16:18:35 +03:00
if Map . has_key? ( query . filters , " event:goal " ) do
res
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ visitors : :conversions } )
|> to_csv ( [ :name , :conversions , :conversion_rate ] )
2021-11-12 16:18:35 +03:00
else
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
res |> to_csv ( [ :name , :visitors , :bounce_rate , :visit_duration ] )
2021-11-12 16:18:35 +03:00
end
2021-10-26 16:54:50 +03:00
else
json ( conn , res )
end
2019-11-19 07:30:42 +03:00
end
2023-11-20 14:52:20 +03:00
on_full_build do
def funnel ( conn , %{ " id " = > funnel_id } = params ) do
site = Plausible.Repo . preload ( conn . assigns . site , :owner )
with :ok <- Plausible.Billing.Feature.Funnels . check_availability ( site . owner ) ,
:ok <- validate_params ( site , params ) ,
2024-01-29 13:16:47 +03:00
query <- Query . from ( site , params ) ,
2023-11-20 14:52:20 +03:00
:ok <- validate_funnel_query ( query ) ,
{ funnel_id , " " } <- Integer . parse ( funnel_id ) ,
{ :ok , funnel } <- Stats . funnel ( site , query , funnel_id ) do
json ( conn , funnel )
else
{ :error , { :invalid_funnel_query , due_to } } ->
bad_request (
conn ,
" We are unable to show funnels when the dashboard is filtered by #{ due_to } " ,
%{
level : :normal
}
)
{ :error , :funnel_not_found } ->
conn
|> put_status ( 404 )
|> json ( %{ error : " Funnel not found " } )
|> halt ( )
{ :error , :upgrade_required } ->
H . payment_required (
conn ,
" #{ Plausible.Billing.Feature.Funnels . display_name ( ) } is part of the Plausible Business plan. To get access to this feature, please upgrade your account. "
)
_ ->
bad_request ( conn , " There was an error with your request " )
end
2023-06-27 11:04:35 +03:00
end
2023-11-20 14:52:20 +03:00
defp validate_funnel_query ( query ) do
case query do
_ when is_map_key ( query . filters , " event:goal " ) ->
{ :error , { :invalid_funnel_query , " goals " } }
2023-06-27 11:04:35 +03:00
2023-11-20 14:52:20 +03:00
_ when is_map_key ( query . filters , " event:page " ) ->
{ :error , { :invalid_funnel_query , " pages " } }
2023-06-27 11:04:35 +03:00
2023-11-20 14:52:20 +03:00
_ when query . period == " realtime " ->
{ :error , { :invalid_funnel_query , " realtime period " } }
2023-06-27 11:04:35 +03:00
2023-11-20 14:52:20 +03:00
_ ->
:ok
end
2023-06-27 11:04:35 +03:00
end
end
2020-09-28 11:29:24 +03:00
def utm_mediums ( conn , params ) do
2020-01-16 16:40:06 +03:00
site = conn . assigns [ :site ]
2024-02-15 20:43:35 +03:00
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2024-02-15 20:43:35 +03:00
2021-07-23 13:44:05 +03:00
pagination = parse_pagination ( params )
2024-02-15 20:43:35 +03:00
metrics = [ :visitors , :bounce_rate , :visit_duration ]
2021-07-23 13:44:05 +03:00
res =
Stats . breakdown ( site , query , " visit:utm_medium " , metrics , pagination )
2024-02-15 20:43:35 +03:00
|> add_cr ( site , query , pagination , :utm_medium , " visit:utm_medium " )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ utm_medium : :name } )
2021-07-23 13:44:05 +03:00
2021-10-26 16:54:50 +03:00
if params [ " csv " ] do
2021-11-12 16:18:35 +03:00
if Map . has_key? ( query . filters , " event:goal " ) do
res
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ visitors : :conversions } )
|> to_csv ( [ :name , :conversions , :conversion_rate ] )
2021-11-12 16:18:35 +03:00
else
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
res |> to_csv ( [ :name , :visitors , :bounce_rate , :visit_duration ] )
2021-11-12 16:18:35 +03:00
end
2021-10-26 16:54:50 +03:00
else
json ( conn , res )
end
2020-09-28 11:29:24 +03:00
end
def utm_campaigns ( conn , params ) do
site = conn . assigns [ :site ]
2024-02-15 20:43:35 +03:00
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2024-02-15 20:43:35 +03:00
2021-07-23 13:44:05 +03:00
pagination = parse_pagination ( params )
2024-02-15 20:43:35 +03:00
metrics = [ :visitors , :bounce_rate , :visit_duration ]
2021-07-23 13:44:05 +03:00
res =
Stats . breakdown ( site , query , " visit:utm_campaign " , metrics , pagination )
2024-02-15 20:43:35 +03:00
|> add_cr ( site , query , pagination , :utm_campaign , " visit:utm_campaign " )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ utm_campaign : :name } )
2021-07-23 13:44:05 +03:00
2021-10-26 16:54:50 +03:00
if params [ " csv " ] do
2021-11-12 16:18:35 +03:00
if Map . has_key? ( query . filters , " event:goal " ) do
res
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ visitors : :conversions } )
|> to_csv ( [ :name , :conversions , :conversion_rate ] )
2021-11-12 16:18:35 +03:00
else
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
res |> to_csv ( [ :name , :visitors , :bounce_rate , :visit_duration ] )
2021-11-12 16:18:35 +03:00
end
2021-10-26 16:54:50 +03:00
else
json ( conn , res )
end
2020-09-28 11:29:24 +03:00
end
2020-01-16 16:40:06 +03:00
2021-12-16 12:02:09 +03:00
def utm_contents ( conn , params ) do
site = conn . assigns [ :site ]
2024-02-15 20:43:35 +03:00
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2024-02-15 20:43:35 +03:00
2021-12-16 12:02:09 +03:00
pagination = parse_pagination ( params )
2024-02-15 20:43:35 +03:00
metrics = [ :visitors , :bounce_rate , :visit_duration ]
2021-12-16 12:02:09 +03:00
res =
Stats . breakdown ( site , query , " visit:utm_content " , metrics , pagination )
2024-02-15 20:43:35 +03:00
|> add_cr ( site , query , pagination , :utm_content , " visit:utm_content " )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ utm_content : :name } )
2021-12-16 12:02:09 +03:00
2021-12-16 16:50:37 +03:00
if params [ " csv " ] do
if Map . has_key? ( query . filters , " event:goal " ) do
res
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ visitors : :conversions } )
|> to_csv ( [ :name , :conversions , :conversion_rate ] )
2021-12-16 16:50:37 +03:00
else
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
res |> to_csv ( [ :name , :visitors , :bounce_rate , :visit_duration ] )
2021-12-16 16:50:37 +03:00
end
else
json ( conn , res )
end
2021-12-16 12:02:09 +03:00
end
def utm_terms ( conn , params ) do
site = conn . assigns [ :site ]
2024-02-15 20:43:35 +03:00
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2024-02-15 20:43:35 +03:00
2021-12-16 12:02:09 +03:00
pagination = parse_pagination ( params )
2024-02-15 20:43:35 +03:00
metrics = [ :visitors , :bounce_rate , :visit_duration ]
2021-12-16 12:02:09 +03:00
res =
Stats . breakdown ( site , query , " visit:utm_term " , metrics , pagination )
2024-02-15 20:43:35 +03:00
|> add_cr ( site , query , pagination , :utm_term , " visit:utm_term " )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ utm_term : :name } )
2021-12-16 12:02:09 +03:00
2021-12-16 16:50:37 +03:00
if params [ " csv " ] do
if Map . has_key? ( query . filters , " event:goal " ) do
res
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ visitors : :conversions } )
|> to_csv ( [ :name , :conversions , :conversion_rate ] )
2021-12-16 16:50:37 +03:00
else
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
res |> to_csv ( [ :name , :visitors , :bounce_rate , :visit_duration ] )
2021-12-16 16:50:37 +03:00
end
else
json ( conn , res )
end
2021-12-16 12:02:09 +03:00
end
2020-09-28 11:29:24 +03:00
def utm_sources ( conn , params ) do
site = conn . assigns [ :site ]
2024-02-15 20:43:35 +03:00
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2024-02-15 20:43:35 +03:00
2021-07-23 13:44:05 +03:00
pagination = parse_pagination ( params )
2024-02-15 20:43:35 +03:00
metrics = [ :visitors , :bounce_rate , :visit_duration ]
2021-07-23 13:44:05 +03:00
res =
Stats . breakdown ( site , query , " visit:utm_source " , metrics , pagination )
2024-02-15 20:43:35 +03:00
|> add_cr ( site , query , pagination , :utm_source , " visit:utm_source " )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ utm_source : :name } )
2021-07-23 13:44:05 +03:00
2021-10-26 16:54:50 +03:00
if params [ " csv " ] do
2021-11-12 16:18:35 +03:00
if Map . has_key? ( query . filters , " event:goal " ) do
res
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ visitors : :conversions } )
|> to_csv ( [ :name , :conversions , :conversion_rate ] )
2021-11-12 16:18:35 +03:00
else
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
res |> to_csv ( [ :name , :visitors , :bounce_rate , :visit_duration ] )
2021-11-12 16:18:35 +03:00
end
2021-10-26 16:54:50 +03:00
else
json ( conn , res )
end
2020-01-16 16:40:06 +03:00
end
2019-11-20 11:42:45 +03:00
2023-08-07 10:32:01 +03:00
def referrers ( conn , params ) do
site = conn . assigns [ :site ]
2024-02-15 20:43:35 +03:00
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2024-02-15 20:43:35 +03:00
2023-08-07 10:32:01 +03:00
pagination = parse_pagination ( params )
2024-02-15 20:43:35 +03:00
metrics = [ :visitors , :bounce_rate , :visit_duration ]
2023-08-07 10:32:01 +03:00
res =
Stats . breakdown ( site , query , " visit:referrer " , metrics , pagination )
2024-02-15 20:43:35 +03:00
|> add_cr ( site , query , pagination , :referrer , " visit:referrer " )
2023-08-07 10:32:01 +03:00
|> transform_keys ( %{ referrer : :name } )
if params [ " csv " ] do
if Map . has_key? ( query . filters , " event:goal " ) do
res
|> transform_keys ( %{ visitors : :conversions } )
|> to_csv ( [ :name , :conversions , :conversion_rate ] )
else
res |> to_csv ( [ :name , :visitors , :bounce_rate , :visit_duration ] )
end
else
json ( conn , res )
end
end
2019-11-19 07:30:42 +03:00
def referrer_drilldown ( conn , %{ " referrer " = > " Google " } = params ) do
site = conn . assigns [ :site ] |> Repo . preload ( :google_auth )
2021-09-21 11:53:21 +03:00
query =
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
Query . from ( site , params )
2024-02-01 12:27:11 +03:00
|> Query . put_filter ( " visit:source " , " Google " )
2019-11-19 07:30:42 +03:00
2020-06-08 10:35:13 +03:00
search_terms =
if site . google_auth && site . google_auth . property && ! query . filters [ " goal " ] do
2021-01-07 16:16:04 +03:00
google_api ( ) . fetch_stats ( site , query , params [ " limit " ] || 9 )
2020-06-08 10:35:13 +03:00
end
2019-11-19 07:30:42 +03:00
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
%{ :visitors = > %{ value : total_visitors } } = Stats . aggregate ( site , query , [ :visitors ] )
2021-07-23 13:44:05 +03:00
2022-10-24 10:34:02 +03:00
user_id = get_session ( conn , :current_user_id )
is_admin = user_id && Plausible.Sites . has_admin_access? ( user_id , site )
2019-11-19 07:30:42 +03:00
case search_terms do
nil ->
2021-06-16 15:00:07 +03:00
json ( conn , %{ not_configured : true , is_admin : is_admin , total_visitors : total_visitors } )
2020-06-08 10:35:13 +03:00
2019-11-19 07:30:42 +03:00
{ :ok , terms } ->
json ( conn , %{ search_terms : terms , total_visitors : total_visitors } )
2020-06-08 10:35:13 +03:00
2022-10-24 10:34:02 +03:00
{ :error , _ } ->
conn
|> put_status ( 502 )
|> json ( %{
not_configured : true ,
is_admin : is_admin ,
total_visitors : total_visitors
} )
2019-11-19 07:30:42 +03:00
end
end
def referrer_drilldown ( conn , %{ " referrer " = > referrer } = params ) do
site = conn . assigns [ :site ]
2021-07-23 13:44:05 +03:00
query =
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
Query . from ( site , params )
2024-02-01 12:27:11 +03:00
|> Query . put_filter ( " visit:source " , referrer )
2019-11-19 07:30:42 +03:00
2021-07-23 13:44:05 +03:00
pagination = parse_pagination ( params )
2024-02-15 20:43:35 +03:00
metrics =
if params [ " detailed " ] , do : [ :visitors , :bounce_rate , :visit_duration ] , else : [ :visitors ]
2021-07-23 13:44:05 +03:00
referrers =
Stats . breakdown ( site , query , " visit:referrer " , metrics , pagination )
2024-02-15 20:43:35 +03:00
|> add_cr ( site , query , pagination , :referrer , " visit:referrer " )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ referrer : :name } )
2020-01-16 16:40:06 +03:00
2023-07-21 07:35:41 +03:00
json ( conn , referrers )
2020-01-16 16:40:06 +03:00
end
2019-11-19 07:30:42 +03:00
def pages ( conn , params ) do
site = conn . assigns [ :site ]
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2021-07-23 13:44:05 +03:00
2024-02-15 20:43:35 +03:00
metrics =
2021-07-23 13:44:05 +03:00
if params [ " detailed " ] ,
2024-02-15 20:43:35 +03:00
do : [ :visitors , :pageviews , :bounce_rate , :time_on_page ] ,
else : [ :visitors ]
2021-07-23 13:44:05 +03:00
pagination = parse_pagination ( params )
2019-11-19 07:30:42 +03:00
2021-07-23 13:44:05 +03:00
pages =
Stats . breakdown ( site , query , " event:page " , metrics , pagination )
2024-02-15 20:43:35 +03:00
|> add_cr ( site , query , pagination , :page , " event:page " )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ page : :name } )
2021-07-23 13:44:05 +03:00
2021-10-26 16:54:50 +03:00
if params [ " csv " ] do
2021-11-12 16:18:35 +03:00
if Map . has_key? ( query . filters , " event:goal " ) do
pages
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ visitors : :conversions } )
|> to_csv ( [ :name , :conversions , :conversion_rate ] )
2021-11-12 16:18:35 +03:00
else
2022-07-12 23:46:47 +03:00
pages |> to_csv ( [ :name , :visitors , :pageviews , :bounce_rate , :time_on_page ] )
2021-11-12 16:18:35 +03:00
end
2021-10-26 16:54:50 +03:00
else
json ( conn , pages )
end
2019-11-19 07:30:42 +03:00
end
2020-07-30 11:18:28 +03:00
def entry_pages ( conn , params ) do
site = conn . assigns [ :site ]
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2021-07-23 13:44:05 +03:00
pagination = parse_pagination ( params )
2024-02-15 20:43:35 +03:00
metrics = [ :visitors , :visits , :visit_duration ]
2021-07-23 13:44:05 +03:00
entry_pages =
2021-08-16 11:58:36 +03:00
Stats . breakdown ( site , query , " visit:entry_page " , metrics , pagination )
2024-02-15 20:43:35 +03:00
|> add_cr ( site , query , pagination , :entry_page , " visit:entry_page " )
2023-07-31 11:33:37 +03:00
|> transform_keys ( %{ entry_page : :name } )
2020-07-30 11:18:28 +03:00
2021-10-26 16:54:50 +03:00
if params [ " csv " ] do
2021-11-12 16:18:35 +03:00
if Map . has_key? ( query . filters , " event:goal " ) do
2023-07-31 11:33:37 +03:00
to_csv ( entry_pages , [ :name , :visitors , :conversion_rate ] , [
:name ,
:conversions ,
:conversion_rate
] )
2021-11-12 16:18:35 +03:00
else
2023-07-31 11:33:37 +03:00
to_csv ( entry_pages , [ :name , :visitors , :visits , :visit_duration ] , [
:name ,
:unique_entrances ,
:total_entrances ,
:visit_duration
] )
2021-11-12 16:18:35 +03:00
end
2021-10-26 16:54:50 +03:00
else
json ( conn , entry_pages )
end
Adds entry and exit pages to Top Pages module (#712)
* Initial Pass
* Adds support for page visits counting by referrer
* Includes goal selection in entry and exit computation
* Adds goal-based entry and exit page stats, formatting, code cleanup
* Changelog
* Format
* Exit rate, visit duration, updated tests
* I keep forgetting to format :/
* Tests, last time
* Fixes double counting, exit rate >100%, relevant tests
* Fixes exit pages on filter and goal states
* Adds entry and exit filters, fixes various bugs
* Fixes discussed issues
* Format
* Fixes impossible case in tests
Originally, there were only 2 pageviews for `test-site.com`,`/` on `2019-01-01`, but that doesn't make sense when there were 3 sessions that exited on the same site/date.
* Format
* Removes boolean function parameter in favor of separate function
* Adds support for queries that use `page` filter as `entry-page`
* Format
* Makes loader/title interaction in sources report consistent
2021-02-26 12:02:37 +03:00
end
def exit_pages ( conn , params ) do
site = conn . assigns [ :site ]
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2021-07-23 13:44:05 +03:00
{ limit , page } = parse_pagination ( params )
2024-02-15 20:43:35 +03:00
metrics = [ :visitors , :visits ]
2021-07-23 13:44:05 +03:00
exit_pages =
Stats . breakdown ( site , query , " visit:exit_page " , metrics , { limit , page } )
2024-02-15 20:43:35 +03:00
|> add_cr ( site , query , { limit , page } , :exit_page , " visit:exit_page " )
2023-07-31 11:33:37 +03:00
|> add_exit_rate ( site , query , limit )
|> transform_keys ( %{ exit_page : :name } )
2021-07-23 13:44:05 +03:00
2021-10-26 16:54:50 +03:00
if params [ " csv " ] do
2021-11-12 16:18:35 +03:00
if Map . has_key? ( query . filters , " event:goal " ) do
2023-07-31 11:33:37 +03:00
to_csv ( exit_pages , [ :name , :visitors , :conversion_rate ] , [
:name ,
:conversions ,
:conversion_rate
] )
2021-11-12 16:18:35 +03:00
else
2023-07-31 11:33:37 +03:00
to_csv ( exit_pages , [ :name , :visitors , :visits , :exit_rate ] , [
:name ,
:unique_exits ,
:total_exits ,
:exit_rate
] )
2021-11-12 16:18:35 +03:00
end
2021-10-26 16:54:50 +03:00
else
json ( conn , exit_pages )
end
2020-07-30 11:18:28 +03:00
end
2023-07-31 11:33:37 +03:00
defp add_exit_rate ( breakdown_results , site , query , limit ) do
if Query . has_event_filters? ( query ) do
breakdown_results
else
pages = Enum . map ( breakdown_results , & &1 [ :exit_page ] )
total_visits_query =
Query . put_filter ( query , " event:page " , { :member , pages } )
|> Query . put_filter ( " event:name " , { :is , " pageview " } )
total_pageviews =
Stats . breakdown ( site , total_visits_query , " event:page " , [ :pageviews ] , { limit , 1 } )
Enum . map ( breakdown_results , fn result ->
exit_rate =
case Enum . find ( total_pageviews , & ( &1 [ :page ] == result [ :exit_page ] ) ) do
%{ pageviews : pageviews } ->
Float . floor ( result [ :visits ] / pageviews * 100 )
nil ->
nil
end
Map . put ( result , :exit_rate , exit_rate )
end )
end
end
2019-11-19 07:30:42 +03:00
def countries ( conn , params ) do
site = conn . assigns [ :site ]
2024-01-29 13:16:47 +03:00
query = site |> Query . from ( params )
2021-10-14 11:55:43 +03:00
pagination = parse_pagination ( params )
2021-07-23 13:44:05 +03:00
countries =
2024-02-15 20:43:35 +03:00
Stats . breakdown ( site , query , " visit:country " , [ :visitors ] , pagination )
|> add_cr ( site , query , { 300 , 1 } , :country , " visit:country " )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ country : :code } )
2023-07-28 21:44:56 +03:00
|> add_percentages ( site , query )
2019-11-19 07:30:42 +03:00
2021-10-26 16:54:50 +03:00
if params [ " csv " ] do
2021-11-12 16:18:35 +03:00
countries =
countries
|> Enum . map ( fn country ->
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
country_info = get_country ( country [ :code ] )
Map . put ( country , :name , country_info . name )
2021-11-12 16:18:35 +03:00
end )
if Map . has_key? ( query . filters , " event:goal " ) do
countries
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ visitors : :conversions } )
|> to_csv ( [ :name , :conversions , :conversion_rate ] )
2021-11-12 16:18:35 +03:00
else
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
countries |> to_csv ( [ :name , :visitors ] )
2021-11-12 16:18:35 +03:00
end
2021-10-26 16:54:50 +03:00
else
2021-11-04 15:20:39 +03:00
countries =
2021-12-01 16:31:50 +03:00
Enum . map ( countries , fn row ->
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
country = get_country ( row [ :code ] )
if country do
Map . merge ( row , %{
name : country . name ,
flag : country . flag ,
alpha_3 : country . alpha_3 ,
code : country . alpha_2
} )
else
Map . merge ( row , %{
name : row [ :code ] ,
flag : " " ,
alpha_3 : " " ,
code : " "
} )
end
2021-11-04 15:20:39 +03:00
end )
2021-10-26 16:54:50 +03:00
json ( conn , countries )
end
2019-11-19 07:30:42 +03:00
end
2021-11-23 12:39:09 +03:00
def regions ( conn , params ) do
site = conn . assigns [ :site ]
2024-01-29 13:16:47 +03:00
query = site |> Query . from ( params )
2021-11-23 12:39:09 +03:00
pagination = parse_pagination ( params )
2022-01-04 12:08:06 +03:00
regions =
2024-02-15 20:43:35 +03:00
Stats . breakdown ( site , query , " visit:region " , [ :visitors ] , pagination )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ region : :code } )
2021-11-23 12:39:09 +03:00
|> Enum . map ( fn region ->
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
region_entry = Location . get_subdivision ( region [ :code ] )
2022-01-18 19:03:50 +03:00
2022-01-18 20:13:35 +03:00
if region_entry do
2022-01-18 19:41:15 +03:00
country_entry = get_country ( region_entry . country_code )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
Map . merge ( region , %{ name : region_entry . name , country_flag : country_entry . flag } )
2022-01-18 19:03:50 +03:00
else
2023-01-13 20:04:09 +03:00
Logger . warning ( " Could not find region info - code: #{ inspect ( region [ :code ] ) } " )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
Map . merge ( region , %{ name : region [ :code ] } )
2022-01-18 19:03:50 +03:00
end
2021-11-23 12:39:09 +03:00
end )
2022-01-04 12:08:06 +03:00
if params [ " csv " ] do
if Map . has_key? ( query . filters , " event:goal " ) do
regions
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ visitors : :conversions } )
|> to_csv ( [ :name , :conversions , :conversion_rate ] )
2022-01-04 12:08:06 +03:00
else
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
regions |> to_csv ( [ :name , :visitors ] )
2022-01-04 12:08:06 +03:00
end
else
json ( conn , regions )
end
2021-11-23 12:39:09 +03:00
end
def cities ( conn , params ) do
site = conn . assigns [ :site ]
2024-01-29 13:16:47 +03:00
query = site |> Query . from ( params )
2021-11-23 12:39:09 +03:00
pagination = parse_pagination ( params )
cities =
2024-02-15 20:43:35 +03:00
Stats . breakdown ( site , query , " visit:city " , [ :visitors ] , pagination )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ city : :code } )
2021-11-23 12:39:09 +03:00
|> Enum . map ( fn city ->
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
city_info = Location . get_city ( city [ :code ] )
2021-12-09 15:21:26 +03:00
if city_info do
2022-01-18 19:41:15 +03:00
country_info = get_country ( city_info . country_code )
2021-12-09 15:21:26 +03:00
Map . merge ( city , %{
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
name : city_info . name ,
country_flag : country_info . flag
2021-12-09 15:21:26 +03:00
} )
else
2023-01-13 20:04:09 +03:00
Logger . warning ( " Could not find city info - code: #{ inspect ( city [ :code ] ) } " )
2022-01-18 19:03:50 +03:00
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
Map . merge ( city , %{ name : " N/A " } )
2021-12-09 15:21:26 +03:00
end
2021-11-23 12:39:09 +03:00
end )
2022-01-04 12:08:06 +03:00
if params [ " csv " ] do
if Map . has_key? ( query . filters , " event:goal " ) do
cities
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ visitors : :conversions } )
|> to_csv ( [ :name , :conversions , :conversion_rate ] )
2022-01-04 12:08:06 +03:00
else
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
cities |> to_csv ( [ :name , :visitors ] )
2022-01-04 12:08:06 +03:00
end
else
json ( conn , cities )
end
2021-11-23 12:39:09 +03:00
end
2019-11-19 07:30:42 +03:00
def browsers ( conn , params ) do
site = conn . assigns [ :site ]
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2021-07-23 13:44:05 +03:00
pagination = parse_pagination ( params )
browsers =
2024-02-15 20:43:35 +03:00
Stats . breakdown ( site , query , " visit:browser " , [ :visitors ] , pagination )
|> add_cr ( site , query , pagination , :browser , " visit:browser " )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ browser : :name } )
2023-07-28 21:44:56 +03:00
|> add_percentages ( site , query )
2019-11-19 07:30:42 +03:00
2021-10-26 16:54:50 +03:00
if params [ " csv " ] do
2021-11-12 16:18:35 +03:00
if Map . has_key? ( query . filters , " event:goal " ) do
browsers
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ visitors : :conversions } )
|> to_csv ( [ :name , :conversions , :conversion_rate ] )
2021-11-12 16:18:35 +03:00
else
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
browsers |> to_csv ( [ :name , :visitors ] )
2021-11-12 16:18:35 +03:00
end
2021-10-26 16:54:50 +03:00
else
json ( conn , browsers )
end
2019-11-19 07:30:42 +03:00
end
2020-11-10 16:18:59 +03:00
def browser_versions ( conn , params ) do
site = conn . assigns [ :site ]
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2021-07-23 13:44:05 +03:00
pagination = parse_pagination ( params )
versions =
2024-02-15 20:43:35 +03:00
Stats . breakdown ( site , query , " visit:browser_version " , [ :visitors ] , pagination )
|> add_cr ( site , query , pagination , :browser_version , " visit:browser_version " )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ browser_version : :name } )
2023-07-28 21:44:56 +03:00
|> add_percentages ( site , query )
2020-11-10 16:18:59 +03:00
2023-12-12 16:39:08 +03:00
if params [ " csv " ] do
if Map . has_key? ( query . filters , " event:goal " ) do
versions
|> transform_keys ( %{
name : :version ,
browser : :name ,
visitors : :conversions
} )
|> to_csv ( [ :name , :version , :conversions , :conversion_rate ] )
else
versions
|> transform_keys ( %{ name : :version , browser : :name } )
|> to_csv ( [ :name , :version , :visitors ] )
end
else
json ( conn , versions )
end
2020-11-10 16:18:59 +03:00
end
2019-11-19 07:30:42 +03:00
def operating_systems ( conn , params ) do
site = conn . assigns [ :site ]
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2021-07-23 13:44:05 +03:00
pagination = parse_pagination ( params )
2019-11-19 07:30:42 +03:00
2021-07-23 13:44:05 +03:00
systems =
2024-02-15 20:43:35 +03:00
Stats . breakdown ( site , query , " visit:os " , [ :visitors ] , pagination )
|> add_cr ( site , query , pagination , :os , " visit:os " )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ os : :name } )
2023-07-28 21:44:56 +03:00
|> add_percentages ( site , query )
2021-07-23 13:44:05 +03:00
2021-10-26 16:54:50 +03:00
if params [ " csv " ] do
2021-11-12 16:18:35 +03:00
if Map . has_key? ( query . filters , " event:goal " ) do
systems
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ visitors : :conversions } )
|> to_csv ( [ :name , :conversions , :conversion_rate ] )
2021-11-12 16:18:35 +03:00
else
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
systems |> to_csv ( [ :name , :visitors ] )
2021-11-12 16:18:35 +03:00
end
2021-10-26 16:54:50 +03:00
else
json ( conn , systems )
end
2019-11-19 07:30:42 +03:00
end
2020-11-10 16:18:59 +03:00
def operating_system_versions ( conn , params ) do
site = conn . assigns [ :site ]
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2021-07-23 13:44:05 +03:00
pagination = parse_pagination ( params )
versions =
2024-02-15 20:43:35 +03:00
Stats . breakdown ( site , query , " visit:os_version " , [ :visitors ] , pagination )
|> add_cr ( site , query , pagination , :os_version , " visit:os_version " )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ os_version : :name } )
2023-07-28 21:44:56 +03:00
|> add_percentages ( site , query )
2020-11-10 16:18:59 +03:00
2021-07-23 13:44:05 +03:00
json ( conn , versions )
2020-11-10 16:18:59 +03:00
end
2019-11-19 07:30:42 +03:00
def screen_sizes ( conn , params ) do
site = conn . assigns [ :site ]
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2021-07-23 13:44:05 +03:00
pagination = parse_pagination ( params )
sizes =
2024-02-15 20:43:35 +03:00
Stats . breakdown ( site , query , " visit:device " , [ :visitors ] , pagination )
|> add_cr ( site , query , pagination , :device , " visit:device " )
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ device : :name } )
2023-07-28 21:44:56 +03:00
|> add_percentages ( site , query )
2019-11-19 07:30:42 +03:00
2021-10-26 16:54:50 +03:00
if params [ " csv " ] do
2021-11-12 16:18:35 +03:00
if Map . has_key? ( query . filters , " event:goal " ) do
sizes
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
|> transform_keys ( %{ visitors : :conversions } )
|> to_csv ( [ :name , :conversions , :conversion_rate ] )
2021-11-12 16:18:35 +03:00
else
[Continued] Google Analytics import (#1753)
* Add has_imported_stats boolean to Site
* Add Google Analytics import panel to general settings
* Get GA profiles to display in import settings panel
* Add import_from_google method as entrypoint to import data
* Add imported_visitors table
* Remove conflicting code from migration
* Import visitors data into clickhouse database
* Pass another dataset to main graph for rendering in red
This adds another entry to the JSON data returned via the main graph API
called `imported_plot`, which is similar to `plot` in form but will be
completed with previously imported data. Currently it simply returns
the values from `plot` / 2. The data is rendered in the main graph in
red without fill, and without an indicator for the present. Rationale:
imported data will not continue to grow so there is no projection
forward, only backwards.
* Hook imported GA data to dashboard timeseries plot
* Add settings option to forget imported data
* Import sources from google analytics
* Merge imported sources when queried
* Merge imported source data native data when querying sources
* Start converting metrics to atoms so they can be subqueried
This changes "visitors" and in some places "sources" to atoms. This does
not change the behaviour of the functions - the tests all pass unchanged
following this commit. This is necessary as joining subqueries requires
that the keys in `select` statements be atoms and not strings.
* Convery GA (direct) source to empty string
* Import utm campaign and utm medium from GA
* format
* Import all data types from GA into new tables
* Handle large amounts of more data more safely
* Fix some mistakes in tables
* Make GA requests in chunks of 5 queries
* Only display imported timeseries when there is no filter
* Correctly show last 30 minutes timeseries when 'realtime'
* Add with_imported key to Query struct
* Account for injected :is_not filter on sources from dashboard
* Also add tentative imported_utm_sources table
This needs a bit more work on the google import side, as GA do not
report sources and utm sources as distinct things.
* Return imported data to dashboard for rest of Sources panel
This extends the merge_imported function definition for sources to
utm_sources, utm_mediums and utm_campaigns too. This appears to be
working on the DB side but something is incomplete on the client side.
* Clear imported stats from all tables when requested
* Merge entry pages and exit pages from imported data into unfiltered dashboard view
This requires converting the `"visits"` and `"visit_duration"` metrics
to atoms so that they can be used in ecto subqueries.
* Display imported devices, browsers and OSs on dashboard
* Display imported country data on dashboard
* Add more metrics to entries/exits for modals
* make sure data is returned via API with correct keys
* Import regions and cities from GA
* Capitalize device upon import to match native data
* Leave query limits/offsets until after possibly joining with imported data
* Also import timeOnPage and pageviews for pages from GA
* imported_countries -> imported_locations
* Get timeOnPage and pageviews for pages from GA
These are needed for the pages modal, and for calculating exit rates for
exit pages.
* Add indicator to dashboard when imported data is being used
* Don't show imported data as separately line on main graph
* "bounce_rate" -> :bounce_rate, so it works in subqueries
* Drop imported browser and OS versions
These are not needed.
* Toggle displaying imported data by clicking indicator
* Parse referrers with RefInspector
- Use 'ga:fullReferrer' instead of 'ga:source'. This provides the actual
referrer host + path, whereas 'ga:source' includes utm_mediums and
other values when relevant.
- 'ga:fullReferror' does however include search engine names directly,
so they are manually checked for as RefInspector won't pick up on
these.
* Keep imported data indicator on dashboard and strikethrough when hidden
* Add unlink google button to import panel
* Rename some GA browsers and OSes to plausible versions
* Get main top pages and exit pages panels working correctly with imported data
* mix format
* Fetch time_on_pages for imported data when needed
* entry pages need to fetch bounces from GA
* "sample_percent" -> :sample_percent as only atoms can be used in subqueries
* Calculate bounce_rate for joined native and imported data for top pages modal
* Flip some query bindings around to be less misleading
* Fixup entry page modal visit durations
* mix format
* Fetch bounces and visit_duration for sources from GA
* add more source metrics used for data in modals
* Make sources modals display correct values
* imported_visitors: bounce_rate -> bounces, avg_visit_duration -> visit_duration
* Merge imported data into aggregate stats
* Reformat top graph side icons
* Ensure sample_percent is yielded from aggregate data
* filter event_props should be strings
* Hide imported data from frontend when using filter
* Fix existing tests
* fix tests
* Fix imported indicator appearing when filtering
* comma needed, lost when rebasing
* Import utm_terms and utm_content from GA
* Merge imported utm_term and utm_content
* Rename imported Countries data as Locations
* Set imported city schema field to int
* Remove utm_terms and utm_content when clearing imported
* Clean locations import from Google Analytics
- Country and region should be set to "" when GA provides "(not set)"
- City should be set to 0 for "unknown", as we cannot reliably import
city data from GA.
* Display imported region and city in dashboard
* os -> operating_system in some parts of code
The inconsistency of using os in some places and operating_system in
others causes trouble with subqueries and joins for the native and
imported data, which would require additional logic to account for. The
simplest solution is the just use a consistent word for all uses. This
doesn't make any user-facing or database changes.
* to_atom -> to_existing_atom
* format
* "events" metric -> :events
* ignore imported data when "events" in metrics
* update "bounce_rate"
* atomise some more metrics from new city and region api
* atomise some more metrics for email handlers
* "conversion_rate" -> :conversion_rate during csv export
* Move imported data stats code to own module
* Move imported timeseries function to Stats.Imported
* Use Timex.parse to import dates from GA
* has_imported_stats -> imported_source
* "time_on_page" -> :time_on_page
* Convert imported GA data to UTC
* Clean up GA request code a bit
There was some weird logic here with two separate lists that really
ought to be together, so this merges those.
* Fail sooner if GA timezone can't be identified
* Link imported tables to site by id
* imported_utm_content -> imported_utm_contents
* Imported GA from all of time
* Reorganise GA data fetch logic
- Fetch data from the start of time (2005)
- Check whether no data was fetched, and if so, inform user and don't
consider data to be imported.
* Clarify removal of "visits" data when it isn't in metrics
* Apply location filters from API
This makes it consistent with the sources etc which filter out 'Direct /
None' on the API side. These filters are used by both the native and
imported data handling code, which would otherwise both duplicate the
filters in their `where` clauses.
* Do not use changeset for setting site.imported_source
* Add all metrics to all dimensions
* Run GA import in the background
* Send email when GA import completes
* Add handler to insert imported data into tests and imported_browsers_factory
* Add remaining import data test factories
* Add imported location data to test
* Test main graph with imported data
* Add imported data to operating systems tests
* Add imported data to pages tests
* Add imported data to entry pages tests
* Add imported data to exit pages tests
* Add imported data to devices tests
* Add imported data to sources tests
* Add imported data to UTM tests
* Add new test module for the data import step
* Test import of sources GA data
* Test import of utm_mediums GA data
* Test import of utm_campaigns GA data
* Add tests for UTM terms
* Add tests for UTM contents
* Add test for importing pages and entry pages data from GA
* Add test for importing exit page data
* Fix module file name typo
* Add test for importing location data from GA
* Add test for importing devices data from GA
* Add test for importing browsers data from GA
* Add test for importing OS data from GA
* Paginate GA requests to download all data
* Bump clickhouse_ecto version
* Move RefInspector wrapper function into module
* Drop timezone transform on import
* Order imported by side_id then date
* More strings -> atoms
Also changes a conditional to be a bit nicer
* Remove parallelisation of data import
* Split sources and UTM sources from fetched GA data
GA has only a "source" dimension and no "UTM source" dimension. Instead
it returns these combined. The logic herein to tease these apart is:
1. "(direct)" -> it's a direct source
2. if the source is a domain -> it's a source
3. "google" -> it's from adwords; let's make this a UTM source "adwords"
4. else -> just a UTM source
* Keep prop names in queries as strings
* fix typo
* Fix import
* Insert data to clickhouse in batches
* Fix link when removing imported data
* Merge source tables
* Import hostname as well as pathname
* Record start and end time of imported data
* Track import progress
* Fix month interval with imported data
* Do not JOIN when imported date range has no overlap
* Fix time on page using exits
Co-authored-by: mcol <mcol@posteo.net>
2022-03-11 00:04:59 +03:00
sizes |> to_csv ( [ :name , :visitors ] )
2021-11-12 16:18:35 +03:00
end
2021-10-26 16:54:50 +03:00
else
json ( conn , sizes )
end
2019-11-19 07:30:42 +03:00
end
2024-02-15 20:43:35 +03:00
defp calculate_cr ( nil , _converted_visitors ) , do : nil
defp calculate_cr ( unique_visitors , converted_visitors ) do
if unique_visitors > 0 ,
do : Float . round ( converted_visitors / unique_visitors * 100 , 1 ) ,
else : 0.0
end
2019-11-19 07:30:42 +03:00
def conversions ( conn , params ) do
2023-08-01 21:33:53 +03:00
pagination = parse_pagination ( params )
2023-06-14 11:32:08 +03:00
site = Plausible.Repo . preload ( conn . assigns . site , :goals )
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2021-08-19 15:41:08 +03:00
query =
if query . period == " realtime " do
% Query { query | period : " 30m " }
else
query
end
2024-02-15 20:43:35 +03:00
total_q = Query . remove_event_filters ( query , [ :goal , :props ] )
%{ visitors : %{ value : total_visitors } } = Stats . aggregate ( site , total_q , [ :visitors ] )
metrics =
on_full_build do
if Enum . any? ( site . goals , & Plausible.Goal.Revenue . revenue? / 1 ) and
Plausible.Billing.Feature.RevenueGoals . enabled? ( site ) do
[ :visitors , :events ] ++ @revenue_metrics
else
[ :visitors , :events ]
end
else
[ :visitors , :events ]
end
2023-06-14 11:32:08 +03:00
2020-11-03 12:20:11 +03:00
conversions =
2023-06-14 11:32:08 +03:00
site
2023-08-01 21:33:53 +03:00
|> Stats . breakdown ( query , " event:goal " , metrics , pagination )
2023-07-31 11:33:37 +03:00
|> transform_keys ( %{ goal : :name } )
2020-11-03 12:20:11 +03:00
|> Enum . map ( fn goal ->
goal
2024-02-15 20:43:35 +03:00
|> Map . put ( :conversion_rate , calculate_cr ( total_visitors , goal [ :visitors ] ) )
2023-06-22 21:36:43 +03:00
|> Enum . map ( & format_revenue_metric / 1 )
|> Map . new ( )
2020-11-03 12:20:11 +03:00
end )
2019-11-19 07:30:42 +03:00
2021-10-26 16:54:50 +03:00
if params [ " csv " ] do
2023-07-31 11:33:37 +03:00
to_csv ( conversions , [ :name , :visitors , :events ] , [
:name ,
:unique_conversions ,
:total_conversions
] )
2021-10-26 16:54:50 +03:00
else
json ( conn , conversions )
end
2020-10-28 12:09:04 +03:00
end
2023-07-17 18:00:52 +03:00
def custom_prop_values ( conn , params ) do
2023-10-17 16:00:00 +03:00
site = Plausible.Repo . preload ( conn . assigns . site , :owner )
2023-11-28 12:30:35 +03:00
prop_key = Map . fetch! ( params , " prop_key " )
2023-10-17 16:00:00 +03:00
2023-11-28 12:30:35 +03:00
case Plausible.Props . ensure_prop_key_accessible ( prop_key , site . owner ) do
2023-10-17 16:00:00 +03:00
:ok ->
props = breakdown_custom_prop_values ( site , params )
json ( conn , props )
{ :error , :upgrade_required } ->
H . payment_required (
conn ,
" #{ Plausible.Billing.Feature.Props . display_name ( ) } is part of the Plausible Business plan. To get access to this feature, please upgrade your account. "
)
end
2023-07-24 17:27:44 +03:00
end
def all_custom_prop_values ( conn , params ) do
2023-10-17 16:00:00 +03:00
site = conn . assigns . site
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2023-07-24 17:27:44 +03:00
prop_names = Plausible.Stats.CustomProps . fetch_prop_names ( site , query )
2024-01-02 15:31:08 +03:00
prop_names =
if Plausible.Billing.Feature.Props . enabled? ( site ) do
prop_names
else
prop_names |> Enum . filter ( & ( &1 in Plausible.Props . internal_keys ( ) ) )
end
2023-07-24 17:27:44 +03:00
2024-01-02 15:31:08 +03:00
if not Enum . empty? ( prop_names ) do
values =
prop_names
|> Enum . map ( fn prop_key ->
breakdown_custom_prop_values ( site , Map . put ( params , " prop_key " , prop_key ) )
|> Enum . map ( & Map . put ( &1 , :property , prop_key ) )
|> transform_keys ( %{ :name = > :value } )
end )
|> Enum . concat ( )
percent_or_cr =
if query . filters [ " event:goal " ] ,
do : :conversion_rate ,
else : :percentage
2023-07-24 17:27:44 +03:00
2024-01-02 15:31:08 +03:00
to_csv ( values , [ :property , :value , :visitors , :events , percent_or_cr ] )
end
2023-07-24 17:27:44 +03:00
end
defp breakdown_custom_prop_values ( site , %{ " prop_key " = > prop_key } = params ) do
2023-08-01 15:52:31 +03:00
pagination = parse_pagination ( params )
prefixed_prop = " event:props: " <> prop_key
2023-07-28 21:44:56 +03:00
query =
Query . from ( site , params )
|> Map . put ( :include_imported , false )
2023-07-17 18:00:52 +03:00
2023-08-01 15:52:31 +03:00
metrics =
2024-02-15 20:43:35 +03:00
if full_build? ( ) and Map . has_key? ( query . filters , " event:goal " ) do
2024-02-15 12:18:57 +03:00
[ :visitors , :events ] ++ @revenue_metrics
2024-02-15 20:43:35 +03:00
else
[ :visitors , :events ]
2023-08-01 15:52:31 +03:00
end
2023-07-17 18:00:52 +03:00
2024-02-15 20:43:35 +03:00
props =
Stats . breakdown ( site , query , prefixed_prop , metrics , pagination )
|> transform_keys ( %{ prop_key = > :name } )
|> Enum . map ( fn entry ->
Enum . map ( entry , & format_revenue_metric / 1 )
|> Map . new ( )
end )
|> add_percentages ( site , query )
if Map . has_key? ( query . filters , " event:goal " ) do
total_q = Query . remove_event_filters ( query , [ :goal , :props ] )
%{ visitors : %{ value : total_unique_visitors } } = Stats . aggregate ( site , total_q , [ :visitors ] )
Enum . map ( props , fn prop ->
Map . put ( prop , :conversion_rate , calculate_cr ( total_unique_visitors , prop . visitors ) )
end )
else
props
end
2023-07-17 18:00:52 +03:00
end
2019-11-19 07:30:42 +03:00
def current_visitors ( conn , _ ) do
2020-07-30 11:18:28 +03:00
site = conn . assigns [ :site ]
2021-07-23 13:44:05 +03:00
json ( conn , Stats . current_visitors ( site ) )
2019-11-19 07:30:42 +03:00
end
2021-01-07 16:16:04 +03:00
defp google_api ( ) , do : Application . fetch_env! ( :plausible , :google_api )
2021-03-25 12:55:15 +03:00
2021-06-21 14:42:16 +03:00
def filter_suggestions ( conn , params ) do
site = conn . assigns [ :site ]
2024-01-29 13:16:47 +03:00
query = Query . from ( site , params )
2023-05-04 15:09:43 +03:00
json (
conn ,
Stats . filter_suggestions ( site , query , params [ " filter_name " ] , params [ " q " ] )
)
2021-07-23 13:44:05 +03:00
end
2023-06-22 21:36:43 +03:00
defp transform_keys ( result , keys_to_replace ) when is_map ( result ) do
for { key , val } <- result , do : { Map . get ( keys_to_replace , key , key ) , val } , into : %{ }
end
defp transform_keys ( results , keys_to_replace ) when is_list ( results ) do
Enum . map ( results , & transform_keys ( &1 , keys_to_replace ) )
2021-07-23 13:44:05 +03:00
end
defp parse_pagination ( params ) do
2022-10-11 15:42:14 +03:00
limit = to_int ( params [ " limit " ] , 9 )
page = to_int ( params [ " page " ] , 1 )
2021-07-23 13:44:05 +03:00
{ limit , page }
end
2022-10-11 15:42:14 +03:00
defp to_int ( string , default ) when is_binary ( string ) do
case Integer . parse ( string ) do
{ i , " " } when is_integer ( i ) ->
i
_ ->
default
end
end
defp to_int ( _ , default ) , do : default
2023-07-28 21:44:56 +03:00
defp add_percentages ( [ _ | _ ] = breakdown_result , site , query )
2023-07-17 18:00:52 +03:00
when not is_map_key ( query . filters , " event:goal " ) do
2023-07-28 21:44:56 +03:00
%{ visitors : %{ value : total_visitors } } = Stats . aggregate ( site , query , [ :visitors ] )
2021-09-20 17:17:11 +03:00
2023-07-28 21:44:56 +03:00
breakdown_result
|> Enum . map ( fn stat ->
Map . put ( stat , :percentage , Float . round ( stat . visitors / total_visitors * 100 , 1 ) )
2021-09-20 17:17:11 +03:00
end )
end
2023-07-28 21:44:56 +03:00
defp add_percentages ( breakdown_result , _ , _ ) , do : breakdown_result
2024-02-15 20:43:35 +03:00
defp add_cr ( [ _ | _ ] = breakdown_results , site , query , pagination , key_name , filter_name )
when is_map_key ( query . filters , " event:goal " ) do
items = Enum . map ( breakdown_results , fn item -> Map . fetch! ( item , key_name ) end )
query_without_goal =
query
|> Query . put_filter ( filter_name , { :member , items } )
|> Query . remove_event_filters ( [ :goal , :props ] )
# Here, we're always only interested in the first page of results
# - the :member filter makes sure that the results always match with
# the items in the given breakdown_results list
pagination = { elem ( pagination , 0 ) , 1 }
res_without_goal =
Stats . breakdown ( site , query_without_goal , filter_name , [ :visitors ] , pagination )
Enum . map ( breakdown_results , fn item ->
without_goal =
Enum . find ( res_without_goal , fn s ->
Map . fetch! ( s , key_name ) == Map . fetch! ( item , key_name )
end )
item
|> Map . put ( :total_visitors , without_goal . visitors )
|> Map . put ( :conversion_rate , calculate_cr ( without_goal . visitors , item . visitors ) )
end )
end
defp add_cr ( breakdown_results , _ , _ , _ , _ , _ ) , do : breakdown_results
2023-07-31 11:33:37 +03:00
defp to_csv ( list , columns ) , do : to_csv ( list , columns , columns )
defp to_csv ( list , columns , column_names ) do
2021-10-26 16:54:50 +03:00
list
2023-07-31 11:33:37 +03:00
|> Enum . map ( fn row -> Enum . map ( columns , & row [ &1 ] ) end )
|> ( fn res -> [ column_names | res ] end ) . ( )
2021-10-26 16:54:50 +03:00
|> CSV . encode ( )
|> Enum . join ( )
end
2022-01-18 19:41:15 +03:00
defp get_country ( code ) do
case Location . get_country ( code ) do
nil ->
2023-01-13 20:04:09 +03:00
Logger . warning ( " Could not find country info - code: #{ inspect ( code ) } " )
2022-01-18 19:41:15 +03:00
% Location.Country {
alpha_2 : code ,
alpha_3 : " N/A " ,
name : code ,
flag : nil
}
country ->
country
end
end
2022-10-11 15:42:14 +03:00
2023-05-04 15:09:43 +03:00
defp validate_common_input ( conn , _opts ) do
2023-08-08 15:49:19 +03:00
case validate_params ( conn . assigns [ :site ] , conn . params ) do
2023-05-04 15:09:43 +03:00
:ok -> conn
{ :error , message } when is_binary ( message ) -> bad_request ( conn , message )
end
end
2023-08-08 15:49:19 +03:00
defp validate_params ( site , params ) do
with { :ok , dates } <- validate_dates ( params ) ,
2022-11-15 00:41:51 +03:00
:ok <- validate_interval ( params ) ,
2023-08-08 15:49:19 +03:00
do : validate_interval_granularity ( site , params , dates )
2022-11-15 00:41:51 +03:00
end
2023-05-04 15:09:43 +03:00
defp validate_dates ( params ) do
params
|> Map . take ( [ " from " , " to " , " date " ] )
2023-08-08 15:49:19 +03:00
|> Enum . reduce_while ( { :ok , %{ } } , fn { key , value } , { :ok , acc } ->
2023-05-04 15:09:43 +03:00
case Date . from_iso8601 ( value ) do
2023-08-08 15:49:19 +03:00
{ :ok , date } ->
{ :cont , { :ok , Map . put ( acc , key , date ) } }
2023-05-04 15:09:43 +03:00
_ ->
{ :halt ,
{ :error ,
" Failed to parse ' #{ key } ' argument. Only ISO 8601 dates are allowed, e.g. `2019-09-07`, `2020-01-01` " } }
end
end )
2022-11-15 00:41:51 +03:00
end
defp validate_interval ( params ) do
with %{ " interval " = > interval } <- params ,
true <- Plausible.Stats.Interval . valid? ( interval ) do
2022-10-11 15:42:14 +03:00
:ok
2022-11-15 00:41:51 +03:00
else
%{ } ->
:ok
false ->
values = Enum . join ( Plausible.Stats.Interval . list ( ) , " , " )
{ :error , " Invalid value for interval. Accepted values are: #{ values } " }
2022-10-11 15:42:14 +03:00
end
end
2023-08-08 15:49:19 +03:00
defp validate_interval_granularity ( site , params , dates ) do
case params do
%{ " interval " = > interval , " period " = > " custom " , " from " = > _ , " to " = > _ } ->
if Plausible.Stats.Interval . valid_for_period? ( " custom " , interval ,
site : site ,
from : dates [ " from " ] ,
to : dates [ " to " ]
) do
:ok
else
{ :error ,
" Invalid combination of interval and period. Custom ranges over 12 months must come with greater granularity, e.g. `period=custom,interval=week` " }
end
2022-11-15 00:41:51 +03:00
2023-08-08 15:49:19 +03:00
%{ " interval " = > interval , " period " = > period } ->
if Plausible.Stats.Interval . valid_for_period? ( period , interval , site : site ) do
:ok
else
{ :error ,
" Invalid combination of interval and period. Interval must be smaller than the selected period, e.g. `period=day,interval=minute` " }
end
_ ->
:ok
2022-11-15 00:41:51 +03:00
end
2022-10-11 15:42:14 +03:00
end
2023-06-27 11:04:35 +03:00
defp bad_request ( conn , message , extra \\ %{ } ) do
payload = Map . merge ( extra , %{ error : message } )
2022-10-11 15:42:14 +03:00
conn
|> put_status ( 400 )
2023-06-27 11:04:35 +03:00
|> json ( payload )
2023-05-04 15:09:43 +03:00
|> halt ( )
2022-10-11 15:42:14 +03:00
end
2023-04-13 16:01:54 +03:00
defp parse_comparison_opts ( params ) do
[
from : params [ " compare_from " ] ,
to : params [ " compare_to " ] ,
2023-05-18 13:05:24 +03:00
match_day_of_week? : params [ " match_day_of_week " ] == " true "
2023-04-13 16:01:54 +03:00
]
end
2023-04-26 12:06:40 +03:00
defp with_imported? ( source_query , comparison_query ) do
cond do
source_query . include_imported -> true
comparison_query && comparison_query . include_imported -> true
true -> false
end
end
2023-11-22 17:34:47 +03:00
on_full_build do
defdelegate format_revenue_metric ( metric_value ) , to : PlausibleWeb.Controllers.API.Revenue
defdelegate format_money ( money ) , to : PlausibleWeb.Controllers.API.Revenue
else
defp format_revenue_metric ( { metric , value } ) do
{ metric , value }
end
end
2019-11-19 07:30:42 +03:00
end