More robust scheduling for email reports (#189)

* More robust scheduling for email reports

* Update config for new email report jobs

* Only build docker on master

* Use Postgres 12 in travis

* Cache elixir files

* use Postgres 12 in travis

* Specify env on one line

* Wrong postgres version
This commit is contained in:
Uku Taht 2020-06-28 13:24:54 +03:00 committed by GitHub
parent 9465ff4c6c
commit 80b98762f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 321 additions and 300 deletions

View File

@ -8,10 +8,24 @@ before_install:
- echo "CREATE DATABASE plausible_test" > $HOME/init.sql
- docker pull yandex/clickhouse-server
- docker run -d -p 8123:8123 --ulimit nofile=262144:262144 --volume=$HOME/init.sql:/docker-entrypoint-initdb.d/init.sql yandex/clickhouse-server
- docker build -t plausible/analytics .
# setting up postgres 12 is quite a pain, see:
# https://travis-ci.community/t/test-against-postgres-12/6768/8
- sudo sed -i 's/port = 5433/port = 5432/' /etc/postgresql/12/main/postgresql.conf
- sudo cp /etc/postgresql/{9.4,12}/main/pg_hba.conf
- sudo pg_ctlcluster 12 main restart
env:
- MIX_ENV=test
- MIX_ENV=test PGVER=12
script: mix coveralls.travis
addons:
postgresql: '12'
apt:
packages:
- postgresql-12
- postgresql-client-12
cache:
directories:
- _build
- deps
deploy:
provider: script
script: bash docker_push

View File

@ -91,7 +91,7 @@ crontab = [
# hourly
{"0 * * * *", Plausible.Workers.SendSiteSetupEmails},
#  hourly
{"0 * * * *", Plausible.Workers.SendEmailReports},
{"0 * * * *", Plausible.Workers.ScheduleEmailReports},
# Daily at midnight
{"0 0 * * *", Plausible.Workers.FetchTweets},
# Daily at midday
@ -106,9 +106,10 @@ queues = [
provision_ssl_certificates: 1,
fetch_tweets: 1,
check_stats_emails: 1,
email_reports: 1,
site_setup_emails: 1,
trial_notification_emails: 1
trial_notification_emails: 1,
schedule_email_reports: 1,
send_email_reports: 1
]
config :plausible, Oban,

View File

@ -141,7 +141,7 @@ crontab = [
# hourly
{"0 * * * *", Plausible.Workers.SendSiteSetupEmails},
#  hourly
{"0 * * * *", Plausible.Workers.SendEmailReports},
{"0 * * * *", Plausible.Workers.ScheduleEmailReports},
# Daily at midnight
{"0 0 * * *", Plausible.Workers.FetchTweets},
# Daily at midday
@ -156,9 +156,10 @@ queues = [
provision_ssl_certificates: 1,
fetch_tweets: 1,
check_stats_emails: 1,
email_reports: 1,
site_setup_emails: 1,
trial_notification_emails: 1
trial_notification_emails: 1,
schedule_email_reports: 1,
send_email_reports: 1
]
config :plausible, Oban,

View File

@ -1,3 +1,4 @@
#!/bin/bash
echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin
docker build -t plausible/analytics .
docker push plausible/analytics

View File

@ -115,7 +115,7 @@ defmodule PlausibleWeb.Email do
def weekly_report(email, site, assigns) do
base_email()
|> to(email)
|> from("Plausible Insights <info@plausible.io>")
|> from("Plausible Analytics <info@plausible.io>")
|> tag("weekly-report")
|> subject("#{assigns[:name]} report for #{site.domain}")
|> render("weekly_report.html", Keyword.put(assigns, :site, site))

View File

@ -10,7 +10,7 @@ We will keep recording stats for another month to give you time to upgrade.
<br /><br />
Thanks,<br />
Uku Taht<br />
Founder, Plausible Insights
Founder, Plausible Analytics
<br /><br />
--
<br /><br />

View File

@ -21,7 +21,7 @@
<%= render @view_module, @view_template, assigns %>
<p class="text-center text-gray-500 text-xs py-8">
©2020 Plausible Insights. All rights reserved.
©2020 Plausible Analytics. All rights reserved.
</p>
<script type="text/javascript" src="<%= Routes.static_path(@conn, "/js/app.js") %>"></script>

View File

@ -0,0 +1,86 @@
defmodule Plausible.Workers.ScheduleEmailReports do
use Plausible.Repo
use Oban.Worker, queue: :schedule_email_reports
alias Plausible.Workers.SendEmailReport
require Logger
@impl Oban.Worker
@doc """
Email reports should be sent on Monday at 9am according to the timezone
of a site. This job runs every day at midnight to ensure that all sites
have a scheduled job for email reports.
"""
def perform(_args, _job) do
schedule_weekly_emails()
schedule_monthly_emails()
end
defp schedule_weekly_emails() do
weekly_jobs = from(
j in Oban.Job,
where: j.worker == "Plausible.Workers.SendEmailReport"
and fragment("(? ->> 'interval')", j.args) == "weekly"
)
sites =
Repo.all(
from s in Plausible.Site,
join: wr in Plausible.Site.WeeklyReport,
on: wr.site_id == s.id,
left_join: job in subquery(weekly_jobs),
on: fragment("(? -> 'site_id')::int", job.args) == s.id and
job.state not in ["completed", "discarded"],
where: is_nil(job),
preload: [weekly_report: wr]
)
for site <- sites do
SendEmailReport.new(%{site_id: site.id, interval: "weekly"}, scheduled_at: monday_9am(site.timezone))
|> Oban.insert!()
end
:ok
end
def monday_9am(timezone) do
Timex.now(timezone)
|> Timex.shift(weeks: 1)
|> Timex.beginning_of_week()
|> Timex.shift(hours: 9)
end
defp schedule_monthly_emails() do
monthly_jobs = from(
j in Oban.Job,
where: j.worker == "Plausible.Workers.SendEmailReport"
and fragment("(? ->> 'interval')", j.args) == "monthly"
)
sites =
Repo.all(
from s in Plausible.Site,
join: mr in Plausible.Site.MonthlyReport,
on: mr.site_id == s.id,
left_join: job in subquery(monthly_jobs),
on:
fragment("(? -> 'site_id')::int", job.args) == s.id and
job.state not in ["completed", "discarded"],
where: is_nil(job),
preload: [monthly_report: mr]
)
for site <- sites do
SendEmailReport.new(%{site_id: site.id, interval: "monthly"}, scheduled_at: first_of_month_9am(site.timezone))
|> Oban.insert!()
end
:ok
end
def first_of_month_9am(timezone) do
Timex.now(timezone)
|> Timex.shift(months: 1)
|> Timex.beginning_of_month()
|> Timex.shift(hours: 9)
end
end

View File

@ -0,0 +1,81 @@
defmodule Plausible.Workers.SendEmailReport do
use Plausible.Repo
use Oban.Worker, queue: :send_email_reports, max_attempts: 1
alias Plausible.Stats.Query
alias Plausible.Stats.Clickhouse, as: Stats
@impl Oban.Worker
def perform(%{"interval" => "weekly", "site_id" => site_id}, _job) do
site = Repo.get(Plausible.Site, site_id) |> Repo.preload(:weekly_report)
query = Query.from(site.timezone, %{"period" => "7d"})
for email <- site.weekly_report.recipients do
unsubscribe_link =
PlausibleWeb.Endpoint.url() <>
"/sites/#{URI.encode_www_form(site.domain)}/weekly-report/unsubscribe?email=#{email}"
send_report(email, site, "Weekly", unsubscribe_link, query)
end
:ok
end
@impl Oban.Worker
def perform(%{"interval" => "monthly", "site_id" => site_id}, _job) do
site = Repo.get(Plausible.Site, site_id) |> Repo.preload(:monthly_report)
last_month =
Timex.now(site.timezone)
|> Timex.shift(months: -1)
|> Timex.beginning_of_month()
query =
Query.from(site.timezone, %{
"period" => "month",
"date" => Timex.format!(last_month, "{ISOdate}")
})
for email <- site.monthly_report.recipients do
unsubscribe_link =
PlausibleWeb.Endpoint.url() <>
"/sites/#{URI.encode_www_form(site.domain)}/monthly-report/unsubscribe?email=#{email}"
send_report(email, site, Timex.format!(last_month, "{Mfull}"), unsubscribe_link, query)
end
:ok
end
defp send_report(email, site, name, unsubscribe_link, query) do
{pageviews, unique_visitors} = Stats.pageviews_and_visitors(site, query)
{change_pageviews, change_visitors} =
Stats.compare_pageviews_and_visitors(site, query, {pageviews, unique_visitors})
bounce_rate = Stats.bounce_rate(site, query)
prev_bounce_rate = Stats.bounce_rate(site, Query.shift_back(query))
change_bounce_rate = if prev_bounce_rate > 0, do: bounce_rate - prev_bounce_rate
referrers = Stats.top_referrers(site, query)
pages = Stats.top_pages(site, query)
user = Plausible.Auth.find_user_by(email: email)
login_link = user && Plausible.Sites.is_owner?(user.id, site)
template = PlausibleWeb.Email.weekly_report(email, site,
unique_visitors: unique_visitors,
change_visitors: change_visitors,
pageviews: pageviews,
change_pageviews: change_pageviews,
bounce_rate: bounce_rate,
change_bounce_rate: change_bounce_rate,
referrers: referrers,
unsubscribe_link: unsubscribe_link,
login_link: login_link,
pages: pages,
query: query,
name: name
)
try do
Plausible.Mailer.send_email(template)
rescue
_ -> nil
end
end
end

View File

@ -1,173 +0,0 @@
defmodule Plausible.Workers.SendEmailReports do
use Plausible.Repo
use Oban.Worker, queue: :email_reports
require Logger
alias Plausible.Stats.Query
alias Plausible.Stats.Clickhouse, as: Stats
@impl Oban.Worker
@doc """
The email report should be sent on Monday at 9am according to the timezone
of the site. This job runs every hour to be able to send it with hourly precision.
"""
def perform(args, _job) do
current_time =
if args["current_time"],
do: Timex.parse!(args["current_time"], "{ISO:Extended}"),
else: Timex.now()
send_weekly_emails(current_time)
send_monthly_emails(current_time)
:ok
end
defp send_weekly_emails(job_start) do
sites =
Repo.all(
from s in Plausible.Site,
join: wr in Plausible.Site.WeeklyReport,
on: wr.site_id == s.id,
left_join: se in "sent_weekly_reports",
on:
se.site_id == s.id and
se.year ==
fragment("EXTRACT(isoyear from (? at time zone ?))", ^job_start, s.timezone) and
se.week == fragment("EXTRACT(week from (? at time zone ?))", ^job_start, s.timezone),
# We haven't sent a report for this site on this week
where: is_nil(se),
# It's monday in the local timezone
where: fragment("EXTRACT(dow from (? at time zone ?))", ^job_start, s.timezone) == 1,
# It's after 9am
where: fragment("EXTRACT(hour from (? at time zone ?))", ^job_start, s.timezone) >= 9,
preload: [weekly_report: wr]
)
for site <- sites do
query = Query.from(site.timezone, %{"period" => "7d"})
sent = Enum.map(site.weekly_report.recipients, fn email ->
Logger.info("Sending weekly report for #{URI.encode_www_form(site.domain)} to #{email}")
unsubscribe_link =
PlausibleWeb.Endpoint.url() <>
"/sites/#{URI.encode_www_form(site.domain)}/weekly-report/unsubscribe?email=#{email}"
send_report(email, site, "Weekly", unsubscribe_link, query)
end)
if Enum.all?(sent), do: weekly_report_sent(site, job_start)
end
end
defp send_monthly_emails(job_start) do
sites =
Repo.all(
from s in Plausible.Site,
join: mr in Plausible.Site.MonthlyReport,
on: mr.site_id == s.id,
left_join: se in "sent_monthly_reports",
on:
se.site_id == s.id and
se.year == fragment("EXTRACT(year from (? at time zone ?))", ^job_start, s.timezone) and
se.month ==
fragment("EXTRACT(month from (? at time zone ?))", ^job_start, s.timezone),
# We haven't sent a report for this site this month
where: is_nil(se),
# It's the 1st of the month in the local timezone
where: fragment("EXTRACT(day from (? at time zone ?))", ^job_start, s.timezone) == 1,
# It's after 9am
where: fragment("EXTRACT(hour from (? at time zone ?))", ^job_start, s.timezone) >= 9,
preload: [monthly_report: mr]
)
for site <- sites do
last_month =
job_start
|> Timex.Timezone.convert(site.timezone)
|> Timex.shift(months: -1)
|> Timex.beginning_of_month()
query =
Query.from(site.timezone, %{
"period" => "month",
"date" => Timex.format!(last_month, "{ISOdate}")
})
sent = Enum.map(site.monthly_report.recipients, fn email ->
Logger.info("Sending monthly report for #{site.domain} to #{email}")
unsubscribe_link =
PlausibleWeb.Endpoint.url() <>
"/sites/#{URI.encode_www_form(site.domain)}/monthly-report/unsubscribe?email=#{email}"
send_report(email, site, Timex.format!(last_month, "{Mfull}"), unsubscribe_link, query)
end)
if Enum.all?(sent), do: monthly_report_sent(site, job_start)
end
end
defp send_report(email, site, name, unsubscribe_link, query) do
{pageviews, unique_visitors} = Stats.pageviews_and_visitors(site, query)
{change_pageviews, change_visitors} =
Stats.compare_pageviews_and_visitors(site, query, {pageviews, unique_visitors})
bounce_rate = Stats.bounce_rate(site, query)
prev_bounce_rate = Stats.bounce_rate(site, Query.shift_back(query))
change_bounce_rate = if prev_bounce_rate > 0, do: bounce_rate - prev_bounce_rate
referrers = Stats.top_referrers(site, query)
pages = Stats.top_pages(site, query)
user = Plausible.Auth.find_user_by(email: email)
login_link = user && Plausible.Sites.is_owner?(user.id, site)
template = PlausibleWeb.Email.weekly_report(email, site,
unique_visitors: unique_visitors,
change_visitors: change_visitors,
pageviews: pageviews,
change_pageviews: change_pageviews,
bounce_rate: bounce_rate,
change_bounce_rate: change_bounce_rate,
referrers: referrers,
unsubscribe_link: unsubscribe_link,
login_link: login_link,
pages: pages,
query: query,
name: name
)
try do
Plausible.Mailer.send_email(template)
true
rescue
_ -> false
end
end
defp weekly_report_sent(site, time) do
{year, week} = time |> DateTime.to_date() |> Timex.iso_week()
Repo.insert_all("sent_weekly_reports", [
%{
site_id: site.id,
year: year,
week: week,
timestamp: Timex.now()
}
])
end
defp monthly_report_sent(site, time) do
date = DateTime.to_date(time)
Repo.insert_all("sent_monthly_reports", [
%{
site_id: site.id,
year: date.year,
month: date.month,
timestamp: Timex.now()
}
])
end
end

View File

@ -0,0 +1,76 @@
defmodule Plausible.Workers.ScheduleEmailReportsTest do
use Plausible.DataCase
use Oban.Testing, repo: Plausible.Repo
alias Plausible.Workers.{ScheduleEmailReports, SendEmailReport}
defp perform(args) do
ScheduleEmailReports.new(args) |> Oban.insert!()
Oban.drain_queue(:schedule_email_reports)
end
describe "weekly reports" do
test "schedules weekly report on Monday 9am local timezone" do
site = insert(:site, domain: "test-site.com", timezone: "US/Eastern")
insert(:weekly_report, site: site, recipients: ["user@email.com"])
perform(%{})
assert_enqueued worker: SendEmailReport, args: %{site_id: site.id, interval: "weekly"}, scheduled_at: ScheduleEmailReports.monday_9am(site.timezone)
end
test "does not schedule more than one weekly report at a time" do
site = insert(:site, domain: "test-site.com", timezone: "US/Eastern")
insert(:weekly_report, site: site, recipients: ["user@email.com"])
perform(%{})
perform(%{})
assert Enum.count(all_enqueued(worker: SendEmailReport)) == 1
end
test "schedules a new report as soon as a previous one is completed" do
site = insert(:site, domain: "test-site.com", timezone: "US/Eastern")
insert(:weekly_report, site: site, recipients: ["user@email.com"])
perform(%{})
Repo.update_all("oban_jobs", [set: [state: "completed"]])
assert Enum.count(all_enqueued(worker: SendEmailReport)) == 0
perform(%{})
assert Enum.count(all_enqueued(worker: SendEmailReport)) == 1
end
end
describe "monthly_reports" do
test "schedules monthly report on first of the next month at 9am local timezone" do
site = insert(:site, domain: "test-site.com", timezone: "US/Eastern")
insert(:monthly_report, site: site, recipients: ["user@email.com"])
perform(%{})
assert_enqueued worker: SendEmailReport, args: %{site_id: site.id, interval: "monthly"}, scheduled_at: ScheduleEmailReports.first_of_month_9am(site.timezone)
end
test "does not schedule more than one monthly report at a time" do
site = insert(:site, domain: "test-site.com", timezone: "US/Eastern")
insert(:monthly_report, site: site, recipients: ["user@email.com"])
perform(%{})
perform(%{})
assert Enum.count(all_enqueued(worker: SendEmailReport)) == 1
end
test "schedules a new report as soon as a previous one is completed" do
site = insert(:site, domain: "test-site.com", timezone: "US/Eastern")
insert(:monthly_report, site: site, recipients: ["user@email.com"])
perform(%{})
Repo.update_all("oban_jobs", [set: [state: "completed"]])
assert Enum.count(all_enqueued(worker: SendEmailReport)) == 0
perform(%{})
assert Enum.count(all_enqueued(worker: SendEmailReport)) == 1
end
end
end

View File

@ -0,0 +1,49 @@
defmodule Plausible.Workers.SendEmailReportTest do
use Plausible.DataCase
use Bamboo.Test
alias Plausible.Workers.SendEmailReport
defp perform(args) do
SendEmailReport.new(args) |> Oban.insert!()
Oban.drain_queue(:send_email_reports)
end
describe "weekly reports" do
test "sends weekly report to all recipients" do
site = insert(:site, domain: "test-site.com", timezone: "US/Eastern")
insert(:weekly_report, site: site, recipients: ["user@email.com", "user2@email.com"])
perform(%{"site_id" => site.id, "interval" => "weekly"})
assert_email_delivered_with(
subject: "Weekly report for #{site.domain}",
to: [nil: "user@email.com"]
)
assert_email_delivered_with(
subject: "Weekly report for #{site.domain}",
to: [nil: "user2@email.com"]
)
end
end
describe "monthly_reports" do
test "sends monthly report to all recipients" do
site = insert(:site, domain: "test-site.com", timezone: "US/Eastern")
insert(:monthly_report, site: site, recipients: ["user@email.com", "user2@email.com"])
last_month = Timex.now(site.timezone) |> Timex.shift(months: -1) |> Timex.beginning_of_month() |> Timex.format!("{Mfull}")
perform(%{"site_id" => site.id, "interval" => "monthly"})
assert_email_delivered_with(
subject: "#{last_month} report for #{site.domain}",
to: [nil: "user@email.com"]
)
assert_email_delivered_with(
subject: "#{last_month} report for #{site.domain}",
to: [nil: "user2@email.com"]
)
end
end
end

View File

@ -1,115 +0,0 @@
defmodule Plausible.Workers.EmailReportsTest do
use Plausible.DataCase
use Bamboo.Test
alias Plausible.Workers.SendEmailReports
defp perform(args) do
SendEmailReports.new(args) |> Oban.insert!()
Oban.drain_queue(:email_reports)
end
describe "weekly reports" do
test "sends weekly report on Monday 9am local timezone" do
site = insert(:site, domain: "test-site.com", timezone: "US/Eastern")
insert(:weekly_report, site: site, recipients: ["user@email.com"])
# 2pm UTC is 10am EST
time = Timex.now() |> Timex.beginning_of_week() |> Timex.shift(hours: 14)
perform(%{"current_time" => time})
assert_email_delivered_with(
subject: "Weekly report for #{site.domain}",
to: [nil: "user@email.com"]
)
end
test "does not send a report on Monday before 9am in local timezone" do
site = insert(:site, timezone: "US/Eastern")
insert(:weekly_report, site: site, recipients: ["user@email.com"])
# 12pm UTC is 8am EST
time = Timex.now() |> Timex.beginning_of_week() |> Timex.shift(hours: 12)
perform(%{"current_time" => time})
assert_no_emails_delivered()
end
test "does not send a report on Tuesday" do
site = insert(:site)
insert(:weekly_report, site: site, recipients: ["user@email.com"])
time = Timex.now() |> Timex.beginning_of_week() |> Timex.shift(days: 1, hours: 10)
perform(%{"current_time" => time})
assert_no_emails_delivered()
end
test "does not send the same report multiple times on the same week" do
site = insert(:site)
insert(:weekly_report, site: site, recipients: ["user@email.com"])
time = Timex.now() |> Timex.beginning_of_week() |> Timex.shift(hours: 10)
perform(%{"current_time" => time})
assert_email_delivered_with(
subject: "Weekly report for #{site.domain}",
to: [nil: "user@email.com"]
)
perform(%{"current_time" => time})
assert_no_emails_delivered()
end
end
describe "monthly_reports" do
test "sends monthly report on the 1st of the month after 9am local timezone" do
site = insert(:site, timezone: "US/Eastern")
insert(:monthly_report, site: site, recipients: ["user@email.com"])
{:ok, time, _} = DateTime.from_iso8601("2019-04-01T14:00:00Z")
perform(%{"current_time" => time})
assert_email_delivered_with(
subject: "March report for #{site.domain}",
to: [nil: "user@email.com"]
)
end
test "does not send a report on the 1st of the month before 9am in local timezone" do
site = insert(:site, timezone: "US/Eastern")
insert(:monthly_report, site: site, recipients: ["user@email.com"])
# 12pm UTC is 8am EST
time = Timex.now() |> Timex.beginning_of_month() |> Timex.shift(hours: 12)
perform(%{"current_time" => time})
assert_no_emails_delivered()
end
test "does not send a report on the 2nd of the month" do
site = insert(:site)
insert(:monthly_report, site: site, recipients: ["user@email.com"])
time = Timex.now() |> Timex.beginning_of_month() |> Timex.shift(days: 1, hours: 10)
perform(%{"current_time" => time})
assert_no_emails_delivered()
end
test "does not send the same report multiple times on the same month" do
site = insert(:site)
insert(:monthly_report, site: site, recipients: ["user@email.com"])
{:ok, time, _} = DateTime.from_iso8601("2019-02-01T11:00:00Z")
perform(%{"current_time" => time})
assert_email_delivered_with(
subject: "January report for #{site.domain}",
to: [nil: "user@email.com"]
)
perform(%{"current_time" => time})
assert_no_emails_delivered()
end
end
end