mirror of
https://github.com/plausible/analytics.git
synced 2024-11-29 05:57:19 +03:00
CSV imports (no UI) (#3895)
* encode/decode date range in filenames * Update lib/plausible/imported/csv_importer.ex Co-authored-by: Adrian Gruntkowski <adrian.gruntkowski@gmail.com> * Update lib/plausible/imported/csv_importer.ex Co-authored-by: Adrian Gruntkowski <adrian.gruntkowski@gmail.com> * drop unused functions * send failure email if there is no data to export * use PlausibleWeb.Email.mailer_email_from() * ensure we get dates from minmax date query --------- Co-authored-by: Adrian Gruntkowski <adrian.gruntkowski@gmail.com>
This commit is contained in:
parent
4242b52be4
commit
279e89c693
@ -27,3 +27,4 @@ S3_SECRET_ACCESS_KEY=minioadmin
|
|||||||
S3_REGION=us-east-1
|
S3_REGION=us-east-1
|
||||||
S3_ENDPOINT=http://localhost:10000
|
S3_ENDPOINT=http://localhost:10000
|
||||||
S3_EXPORTS_BUCKET=dev-exports
|
S3_EXPORTS_BUCKET=dev-exports
|
||||||
|
S3_IMPORTS_BUCKET=dev-imports
|
||||||
|
@ -22,3 +22,4 @@ S3_SECRET_ACCESS_KEY=minioadmin
|
|||||||
S3_REGION=us-east-1
|
S3_REGION=us-east-1
|
||||||
S3_ENDPOINT=http://localhost:10000
|
S3_ENDPOINT=http://localhost:10000
|
||||||
S3_EXPORTS_BUCKET=test-exports
|
S3_EXPORTS_BUCKET=test-exports
|
||||||
|
S3_IMPORTS_BUCKET=test-imports
|
||||||
|
@ -736,6 +736,10 @@ unless s3_disabled? do
|
|||||||
%{
|
%{
|
||||||
name: "S3_EXPORTS_BUCKET",
|
name: "S3_EXPORTS_BUCKET",
|
||||||
example: "my-csv-exports-bucket"
|
example: "my-csv-exports-bucket"
|
||||||
|
},
|
||||||
|
%{
|
||||||
|
name: "S3_IMPORTS_BUCKET",
|
||||||
|
example: "my-csv-imports-bucket"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -771,5 +775,7 @@ unless s3_disabled? do
|
|||||||
host: s3_host,
|
host: s3_host,
|
||||||
port: s3_port
|
port: s3_port
|
||||||
|
|
||||||
config :plausible, Plausible.S3, exports_bucket: s3_env_value.("S3_EXPORTS_BUCKET")
|
config :plausible, Plausible.S3,
|
||||||
|
exports_bucket: s3_env_value.("S3_EXPORTS_BUCKET"),
|
||||||
|
imports_bucket: s3_env_value.("S3_IMPORTS_BUCKET")
|
||||||
end
|
end
|
||||||
|
@ -20,7 +20,13 @@ defmodule Plausible.Imported.CSVImporter do
|
|||||||
|
|
||||||
@impl true
|
@impl true
|
||||||
def import_data(site_import, opts) do
|
def import_data(site_import, opts) do
|
||||||
%{id: import_id, site_id: site_id} = site_import
|
%{
|
||||||
|
id: import_id,
|
||||||
|
site_id: site_id,
|
||||||
|
start_date: start_date,
|
||||||
|
end_date: end_date
|
||||||
|
} = site_import
|
||||||
|
|
||||||
uploads = Keyword.fetch!(opts, :uploads)
|
uploads = Keyword.fetch!(opts, :uploads)
|
||||||
|
|
||||||
%{access_key_id: s3_access_key_id, secret_access_key: s3_secret_access_key} =
|
%{access_key_id: s3_access_key_id, secret_access_key: s3_secret_access_key} =
|
||||||
@ -31,14 +37,10 @@ defmodule Plausible.Imported.CSVImporter do
|
|||||||
|> Keyword.replace!(:pool_size, 1)
|
|> Keyword.replace!(:pool_size, 1)
|
||||||
|> Ch.start_link()
|
|> Ch.start_link()
|
||||||
|
|
||||||
ranges =
|
Enum.each(uploads, fn upload ->
|
||||||
Enum.map(uploads, fn upload ->
|
|
||||||
%{"filename" => filename, "s3_url" => s3_url} = upload
|
%{"filename" => filename, "s3_url" => s3_url} = upload
|
||||||
|
|
||||||
".csv" = Path.extname(filename)
|
{table, _, _} = parse_filename!(filename)
|
||||||
table = Path.rootname(filename)
|
|
||||||
ensure_importable_table!(table)
|
|
||||||
|
|
||||||
s3_structure = input_structure!(table)
|
s3_structure = input_structure!(table)
|
||||||
|
|
||||||
statement =
|
statement =
|
||||||
@ -46,6 +48,7 @@ defmodule Plausible.Imported.CSVImporter do
|
|||||||
INSERT INTO {table:Identifier} \
|
INSERT INTO {table:Identifier} \
|
||||||
SELECT {site_id:UInt64} AS site_id, *, {import_id:UInt64} AS import_id \
|
SELECT {site_id:UInt64} AS site_id, *, {import_id:UInt64} AS import_id \
|
||||||
FROM s3({s3_url:String},{s3_access_key_id:String},{s3_secret_access_key:String},{s3_format:String},{s3_structure:String}) \
|
FROM s3({s3_url:String},{s3_access_key_id:String},{s3_secret_access_key:String},{s3_format:String},{s3_structure:String}) \
|
||||||
|
WHERE date >= {start_date:Date} AND date <= {end_date:Date}\
|
||||||
"""
|
"""
|
||||||
|
|
||||||
params =
|
params =
|
||||||
@ -57,26 +60,13 @@ defmodule Plausible.Imported.CSVImporter do
|
|||||||
"s3_access_key_id" => s3_access_key_id,
|
"s3_access_key_id" => s3_access_key_id,
|
||||||
"s3_secret_access_key" => s3_secret_access_key,
|
"s3_secret_access_key" => s3_secret_access_key,
|
||||||
"s3_format" => "CSVWithNames",
|
"s3_format" => "CSVWithNames",
|
||||||
"s3_structure" => s3_structure
|
"s3_structure" => s3_structure,
|
||||||
|
"start_date" => start_date,
|
||||||
|
"end_date" => end_date
|
||||||
}
|
}
|
||||||
|
|
||||||
Ch.query!(ch, statement, params, timeout: :infinity)
|
Ch.query!(ch, statement, params, timeout: :infinity)
|
||||||
|
|
||||||
%Ch.Result{rows: [[min_date, max_date]]} =
|
|
||||||
Ch.query!(
|
|
||||||
ch,
|
|
||||||
"SELECT min(date), max(date) FROM {table:Identifier} WHERE site_id = {site_id:UInt64} AND import_id = {import_id:UInt64}",
|
|
||||||
%{"table" => table, "site_id" => site_id, "import_id" => import_id}
|
|
||||||
)
|
|
||||||
|
|
||||||
Date.range(min_date, max_date)
|
|
||||||
end)
|
end)
|
||||||
|
|
||||||
{:ok,
|
|
||||||
%{
|
|
||||||
start_date: Enum.min_by(ranges, & &1.first, Date).first,
|
|
||||||
end_date: Enum.max_by(ranges, & &1.last, Date).last
|
|
||||||
}}
|
|
||||||
rescue
|
rescue
|
||||||
# we are cancelling on any argument or ClickHouse errors
|
# we are cancelling on any argument or ClickHouse errors
|
||||||
e in [ArgumentError, Ch.Error] ->
|
e in [ArgumentError, Ch.Error] ->
|
||||||
@ -103,12 +93,85 @@ defmodule Plausible.Imported.CSVImporter do
|
|||||||
"date Date, visitors UInt64, pageviews UInt64, bounces UInt64, visits UInt64, visit_duration UInt64"
|
"date Date, visitors UInt64, pageviews UInt64, bounces UInt64, visits UInt64, visit_duration UInt64"
|
||||||
}
|
}
|
||||||
|
|
||||||
for {table, input_structure} <- input_structures do
|
@doc """
|
||||||
defp input_structure!(unquote(table)), do: unquote(input_structure)
|
Extracts min/max date range from a list of uploads.
|
||||||
defp ensure_importable_table!(unquote(table)), do: :ok
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
iex> date_range([
|
||||||
|
...> %{"filename" => "imported_devices_20190101_20210101.csv"},
|
||||||
|
...> "imported_pages_20200101_20220101.csv"
|
||||||
|
...> ])
|
||||||
|
Date.range(~D[2019-01-01], ~D[2022-01-01])
|
||||||
|
|
||||||
|
iex> date_range([])
|
||||||
|
** (ArgumentError) empty uploads
|
||||||
|
|
||||||
|
"""
|
||||||
|
@spec date_range([String.t() | %{String.t() => String.t()}, ...]) :: Date.Range.t()
|
||||||
|
def date_range([_ | _] = uploads), do: date_range(uploads, _start_date = nil, _end_date = nil)
|
||||||
|
def date_range([]), do: raise(ArgumentError, "empty uploads")
|
||||||
|
|
||||||
|
defp date_range([upload | uploads], prev_start_date, prev_end_date) do
|
||||||
|
filename =
|
||||||
|
case upload do
|
||||||
|
%{"filename" => filename} -> filename
|
||||||
|
filename when is_binary(filename) -> filename
|
||||||
end
|
end
|
||||||
|
|
||||||
defp ensure_importable_table!(table) do
|
{_table, start_date, end_date} = parse_filename!(filename)
|
||||||
raise ArgumentError, "table #{table} is not supported for data import"
|
|
||||||
|
start_date =
|
||||||
|
if prev_start_date do
|
||||||
|
Enum.min([start_date, prev_start_date], Date)
|
||||||
|
else
|
||||||
|
start_date
|
||||||
|
end
|
||||||
|
|
||||||
|
end_date =
|
||||||
|
if prev_end_date do
|
||||||
|
Enum.max([end_date, prev_end_date], Date)
|
||||||
|
else
|
||||||
|
end_date
|
||||||
|
end
|
||||||
|
|
||||||
|
date_range(uploads, start_date, end_date)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp date_range([], first, last), do: Date.range(first, last)
|
||||||
|
|
||||||
|
@spec parse_date!(String.t()) :: Date.t()
|
||||||
|
defp parse_date!(date) do
|
||||||
|
date |> Timex.parse!("{YYYY}{0M}{0D}") |> NaiveDateTime.to_date()
|
||||||
|
end
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Extracts table name and min/max dates from the filename.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
iex> parse_filename!("my_data.csv")
|
||||||
|
** (ArgumentError) invalid filename
|
||||||
|
|
||||||
|
iex> parse_filename!("imported_devices_00010101_20250101.csv")
|
||||||
|
{"imported_devices", ~D[0001-01-01], ~D[2025-01-01]}
|
||||||
|
|
||||||
|
"""
|
||||||
|
@spec parse_filename!(String.t()) ::
|
||||||
|
{table :: String.t(), start_date :: Date.t(), end_date :: Date.t()}
|
||||||
|
def parse_filename!(filename)
|
||||||
|
|
||||||
|
for {table, input_structure} <- input_structures do
|
||||||
|
defp input_structure!(unquote(table)), do: unquote(input_structure)
|
||||||
|
|
||||||
|
def parse_filename!(
|
||||||
|
<<unquote(table)::bytes, ?_, start_date::8-bytes, ?_, end_date::8-bytes, ".csv">>
|
||||||
|
) do
|
||||||
|
{unquote(table), parse_date!(start_date), parse_date!(end_date)}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_filename!(_filename) do
|
||||||
|
raise ArgumentError, "invalid filename"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -18,9 +18,64 @@ defmodule Plausible.S3 do
|
|||||||
@spec exports_bucket :: String.t()
|
@spec exports_bucket :: String.t()
|
||||||
def exports_bucket, do: config(:exports_bucket)
|
def exports_bucket, do: config(:exports_bucket)
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Returns the pre-configured S3 bucket for CSV imports.
|
||||||
|
|
||||||
|
config :plausible, Plausible.S3,
|
||||||
|
imports_bucket: System.fetch_env!("S3_IMPORTS_BUCKET")
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
iex> imports_bucket()
|
||||||
|
"test-imports"
|
||||||
|
|
||||||
|
"""
|
||||||
|
@spec imports_bucket :: String.t()
|
||||||
|
def imports_bucket, do: config(:imports_bucket)
|
||||||
|
|
||||||
defp config, do: Application.fetch_env!(:plausible, __MODULE__)
|
defp config, do: Application.fetch_env!(:plausible, __MODULE__)
|
||||||
defp config(key), do: Keyword.fetch!(config(), key)
|
defp config(key), do: Keyword.fetch!(config(), key)
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Presigns an upload for an imported file.
|
||||||
|
|
||||||
|
In the current implementation the bucket always goes into the path component.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
iex> %{
|
||||||
|
...> s3_url: "http://localhost:10000/test-imports/123/imported_browsers.csv",
|
||||||
|
...> presigned_url: "http://localhost:10000/test-imports/123/imported_browsers.csv?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=minioadmin" <> _
|
||||||
|
...> } = import_presign_upload(_site_id = 123, _filename = "imported_browsers.csv")
|
||||||
|
|
||||||
|
"""
|
||||||
|
def import_presign_upload(site_id, filename) do
|
||||||
|
config = ExAws.Config.new(:s3)
|
||||||
|
s3_path = Path.join(to_string(site_id), filename)
|
||||||
|
bucket = imports_bucket()
|
||||||
|
{:ok, presigned_url} = ExAws.S3.presigned_url(config, :put, bucket, s3_path)
|
||||||
|
%{s3_url: extract_s3_url(presigned_url), presigned_url: presigned_url}
|
||||||
|
end
|
||||||
|
|
||||||
|
# to make ClickHouse see MinIO in dev and test envs we replace
|
||||||
|
# the host in the S3 URL with whatever's set in S3_CLICKHOUSE_HOST env var
|
||||||
|
if Mix.env() in [:dev, :test, :small_dev, :small_test] do
|
||||||
|
defp extract_s3_url(presigned_url) do
|
||||||
|
[s3_url, _] = String.split(presigned_url, "?")
|
||||||
|
|
||||||
|
if ch_host = System.get_env("S3_CLICKHOUSE_HOST") do
|
||||||
|
URI.to_string(%URI{URI.parse(s3_url) | host: ch_host})
|
||||||
|
else
|
||||||
|
s3_url
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
defp extract_s3_url(presigned_url) do
|
||||||
|
[s3_url, _] = String.split(presigned_url, "?")
|
||||||
|
s3_url
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
@doc """
|
@doc """
|
||||||
Chunks and uploads Zip archive to the provided S3 destination.
|
Chunks and uploads Zip archive to the provided S3 destination.
|
||||||
|
|
||||||
@ -77,6 +132,12 @@ defmodule Plausible.S3 do
|
|||||||
|
|
||||||
@doc """
|
@doc """
|
||||||
Returns `access_key_id` and `secret_access_key` to be used by ClickHouse during imports from S3.
|
Returns `access_key_id` and `secret_access_key` to be used by ClickHouse during imports from S3.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
iex> import_clickhouse_credentials()
|
||||||
|
%{access_key_id: "minioadmin", secret_access_key: "minioadmin"}
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@spec import_clickhouse_credentials ::
|
@spec import_clickhouse_credentials ::
|
||||||
%{access_key_id: String.t(), secret_access_key: String.t()}
|
%{access_key_id: String.t(), secret_access_key: String.t()}
|
||||||
|
@ -25,22 +25,34 @@ defmodule Plausible.Workers.ExportCSV do
|
|||||||
|> Keyword.replace!(:pool_size, 1)
|
|> Keyword.replace!(:pool_size, 1)
|
||||||
|> Ch.start_link()
|
|> Ch.start_link()
|
||||||
|
|
||||||
# NOTE: should we use site.timezone?
|
%Ch.Result{rows: [[%Date{} = min_date, %Date{} = max_date]]} =
|
||||||
# %Ch.Result{rows: [[min_date, max_date]]} =
|
Ch.query!(
|
||||||
# Ch.query!(
|
ch,
|
||||||
# ch,
|
"SELECT toDate(min(timestamp)), toDate(max(timestamp)) FROM events_v2 WHERE site_id={site_id:UInt64}",
|
||||||
# "SELECT toDate(min(timestamp)), toDate(max(timestamp)) FROM events_v2 WHERE site_id={site_id:UInt64}",
|
%{"site_id" => site_id}
|
||||||
# %{"site_id" => site_id}
|
)
|
||||||
# )
|
|
||||||
|
|
||||||
|
if max_date == ~D[1970-01-01] do
|
||||||
|
# NOTE: replace with proper Plausible.Email template
|
||||||
|
Plausible.Mailer.deliver_now!(
|
||||||
|
Bamboo.Email.new_email(
|
||||||
|
from: PlausibleWeb.Email.mailer_email_from(),
|
||||||
|
to: email,
|
||||||
|
subject: "EXPORT FAILURE",
|
||||||
|
text_body: "there is nothing to export"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else
|
||||||
download_url =
|
download_url =
|
||||||
DBConnection.run(
|
DBConnection.run(
|
||||||
ch,
|
ch,
|
||||||
fn conn ->
|
fn conn ->
|
||||||
conn
|
conn
|
||||||
|> Plausible.Exports.stream_archive(
|
|> Plausible.Exports.stream_archive(
|
||||||
# date_range: Date.range(min_date, max_date)
|
Plausible.Exports.export_queries(site_id,
|
||||||
Plausible.Exports.export_queries(site_id, extname: ".csv"),
|
date_range: Date.range(min_date, max_date),
|
||||||
|
extname: ".csv"
|
||||||
|
),
|
||||||
format: "CSVWithNames"
|
format: "CSVWithNames"
|
||||||
)
|
)
|
||||||
|> Plausible.S3.export_upload_multipart(s3_bucket, s3_path, s3_config_overrides(args))
|
|> Plausible.S3.export_upload_multipart(s3_bucket, s3_path, s3_config_overrides(args))
|
||||||
@ -51,7 +63,7 @@ defmodule Plausible.Workers.ExportCSV do
|
|||||||
# NOTE: replace with proper Plausible.Email template
|
# NOTE: replace with proper Plausible.Email template
|
||||||
Plausible.Mailer.deliver_now!(
|
Plausible.Mailer.deliver_now!(
|
||||||
Bamboo.Email.new_email(
|
Bamboo.Email.new_email(
|
||||||
from: "plausible@email.com",
|
from: PlausibleWeb.Email.mailer_email_from(),
|
||||||
to: email,
|
to: email,
|
||||||
subject: "EXPORT SUCCESS",
|
subject: "EXPORT SUCCESS",
|
||||||
text_body: """
|
text_body: """
|
||||||
@ -62,6 +74,7 @@ defmodule Plausible.Workers.ExportCSV do
|
|||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
end
|
||||||
|
|
||||||
:ok
|
:ok
|
||||||
end
|
end
|
||||||
|
@ -198,7 +198,8 @@ defmodule Plausible.ConfigTest do
|
|||||||
{"S3_SECRET_ACCESS_KEY", nil},
|
{"S3_SECRET_ACCESS_KEY", nil},
|
||||||
{"S3_REGION", nil},
|
{"S3_REGION", nil},
|
||||||
{"S3_ENDPOINT", nil},
|
{"S3_ENDPOINT", nil},
|
||||||
{"S3_EXPORTS_BUCKET", nil}
|
{"S3_EXPORTS_BUCKET", nil},
|
||||||
|
{"S3_IMPORTS_BUCKET", nil}
|
||||||
]
|
]
|
||||||
|
|
||||||
result =
|
result =
|
||||||
@ -211,13 +212,14 @@ defmodule Plausible.ConfigTest do
|
|||||||
assert %ArgumentError{} = result
|
assert %ArgumentError{} = result
|
||||||
|
|
||||||
assert Exception.message(result) == """
|
assert Exception.message(result) == """
|
||||||
Missing S3 configuration. Please set S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY, S3_REGION, S3_ENDPOINT, S3_EXPORTS_BUCKET environment variable(s):
|
Missing S3 configuration. Please set S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY, S3_REGION, S3_ENDPOINT, S3_EXPORTS_BUCKET, S3_IMPORTS_BUCKET environment variable(s):
|
||||||
|
|
||||||
\tS3_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
|
\tS3_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
|
||||||
\tS3_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
|
\tS3_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
|
||||||
\tS3_REGION=us-east-1
|
\tS3_REGION=us-east-1
|
||||||
\tS3_ENDPOINT=https://<ACCOUNT_ID>.r2.cloudflarestorage.com
|
\tS3_ENDPOINT=https://<ACCOUNT_ID>.r2.cloudflarestorage.com
|
||||||
\tS3_EXPORTS_BUCKET=my-csv-exports-bucket
|
\tS3_EXPORTS_BUCKET=my-csv-exports-bucket
|
||||||
|
\tS3_IMPORTS_BUCKET=my-csv-imports-bucket
|
||||||
"""
|
"""
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -227,7 +229,8 @@ defmodule Plausible.ConfigTest do
|
|||||||
{"S3_SECRET_ACCESS_KEY", nil},
|
{"S3_SECRET_ACCESS_KEY", nil},
|
||||||
{"S3_REGION", "eu-north-1"},
|
{"S3_REGION", "eu-north-1"},
|
||||||
{"S3_ENDPOINT", nil},
|
{"S3_ENDPOINT", nil},
|
||||||
{"S3_EXPORTS_BUCKET", "my-exports"}
|
{"S3_EXPORTS_BUCKET", "my-exports"},
|
||||||
|
{"S3_IMPORTS_BUCKET", nil}
|
||||||
]
|
]
|
||||||
|
|
||||||
result =
|
result =
|
||||||
@ -240,10 +243,11 @@ defmodule Plausible.ConfigTest do
|
|||||||
assert %ArgumentError{} = result
|
assert %ArgumentError{} = result
|
||||||
|
|
||||||
assert Exception.message(result) == """
|
assert Exception.message(result) == """
|
||||||
Missing S3 configuration. Please set S3_SECRET_ACCESS_KEY, S3_ENDPOINT environment variable(s):
|
Missing S3 configuration. Please set S3_SECRET_ACCESS_KEY, S3_ENDPOINT, S3_IMPORTS_BUCKET environment variable(s):
|
||||||
|
|
||||||
\tS3_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
|
\tS3_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
|
||||||
\tS3_ENDPOINT=https://<ACCOUNT_ID>.r2.cloudflarestorage.com
|
\tS3_ENDPOINT=https://<ACCOUNT_ID>.r2.cloudflarestorage.com
|
||||||
|
\tS3_IMPORTS_BUCKET=my-csv-imports-bucket
|
||||||
"""
|
"""
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -253,7 +257,8 @@ defmodule Plausible.ConfigTest do
|
|||||||
{"S3_SECRET_ACCESS_KEY", "minioadmin"},
|
{"S3_SECRET_ACCESS_KEY", "minioadmin"},
|
||||||
{"S3_REGION", "us-east-1"},
|
{"S3_REGION", "us-east-1"},
|
||||||
{"S3_ENDPOINT", "http://localhost:6000"},
|
{"S3_ENDPOINT", "http://localhost:6000"},
|
||||||
{"S3_EXPORTS_BUCKET", "my-exports"}
|
{"S3_EXPORTS_BUCKET", "my-exports"},
|
||||||
|
{"S3_IMPORTS_BUCKET", "my-imports"}
|
||||||
]
|
]
|
||||||
|
|
||||||
config = runtime_config(env)
|
config = runtime_config(env)
|
||||||
@ -266,8 +271,9 @@ defmodule Plausible.ConfigTest do
|
|||||||
s3: [scheme: "http://", host: "localhost", port: 6000]
|
s3: [scheme: "http://", host: "localhost", port: 6000]
|
||||||
]
|
]
|
||||||
|
|
||||||
assert get_in(runtime_config(env), [:plausible, Plausible.S3]) == [
|
assert get_in(config, [:plausible, Plausible.S3]) == [
|
||||||
exports_bucket: "my-exports"
|
exports_bucket: "my-exports",
|
||||||
|
imports_bucket: "my-imports"
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -4,6 +4,8 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
alias Testcontainers.MinioContainer
|
alias Testcontainers.MinioContainer
|
||||||
require SiteImport
|
require SiteImport
|
||||||
|
|
||||||
|
doctest CSVImporter, import: true
|
||||||
|
|
||||||
@moduletag :minio
|
@moduletag :minio
|
||||||
|
|
||||||
setup_all do
|
setup_all do
|
||||||
@ -55,9 +57,12 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
"imported_visitors"
|
"imported_visitors"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
start_date = "20231001"
|
||||||
|
end_date = "20240102"
|
||||||
|
|
||||||
uploads =
|
uploads =
|
||||||
Enum.map(tables, fn table ->
|
Enum.map(tables, fn table ->
|
||||||
filename = "#{table}.csv"
|
filename = "#{table}_#{start_date}_#{end_date}.csv"
|
||||||
|
|
||||||
%{
|
%{
|
||||||
"filename" => filename,
|
"filename" => filename,
|
||||||
@ -65,11 +70,12 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
}
|
}
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
date_range = CSVImporter.date_range(uploads)
|
||||||
|
|
||||||
assert {:ok, job} =
|
assert {:ok, job} =
|
||||||
CSVImporter.new_import(site, user,
|
CSVImporter.new_import(site, user,
|
||||||
# to satisfy the non null constraints on the table I'm providing "0" dates (according to ClickHouse)
|
start_date: date_range.first,
|
||||||
start_date: ~D[1970-01-01],
|
end_date: date_range.last,
|
||||||
end_date: ~D[1970-01-01],
|
|
||||||
uploads: uploads
|
uploads: uploads
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -80,8 +86,8 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
%{
|
%{
|
||||||
id: ^import_id,
|
id: ^import_id,
|
||||||
source: :csv,
|
source: :csv,
|
||||||
start_date: ~D[1970-01-01],
|
start_date: ~D[2023-10-01],
|
||||||
end_date: ~D[1970-01-01],
|
end_date: ~D[2024-01-02],
|
||||||
status: SiteImport.pending()
|
status: SiteImport.pending()
|
||||||
}
|
}
|
||||||
] = Plausible.Imported.list_all_imports(site)
|
] = Plausible.Imported.list_all_imports(site)
|
||||||
@ -97,7 +103,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
test "imports tables from S3", %{site: site, user: user, s3: s3, container: minio} do
|
test "imports tables from S3", %{site: site, user: user, s3: s3, container: minio} do
|
||||||
csvs = [
|
csvs = [
|
||||||
%{
|
%{
|
||||||
name: "imported_browsers.csv",
|
name: "imported_browsers_20211230_20211231.csv",
|
||||||
body: """
|
body: """
|
||||||
"date","browser","visitors","visits","visit_duration","bounces"
|
"date","browser","visitors","visits","visit_duration","bounces"
|
||||||
"2021-12-30","Amazon Silk",2,2,0,2
|
"2021-12-30","Amazon Silk",2,2,0,2
|
||||||
@ -122,7 +128,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
"""
|
"""
|
||||||
},
|
},
|
||||||
%{
|
%{
|
||||||
name: "imported_devices.csv",
|
name: "imported_devices_20211230_20220102.csv",
|
||||||
body: """
|
body: """
|
||||||
"date","device","visitors","visits","visit_duration","bounces"
|
"date","device","visitors","visits","visit_duration","bounces"
|
||||||
"2021-12-30","Desktop",25,28,75,27
|
"2021-12-30","Desktop",25,28,75,27
|
||||||
@ -140,7 +146,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
"""
|
"""
|
||||||
},
|
},
|
||||||
%{
|
%{
|
||||||
name: "imported_entry_pages.csv",
|
name: "imported_entry_pages_20211230_20211231.csv",
|
||||||
body: """
|
body: """
|
||||||
"date","visitors","entrances","visit_duration","bounces","entry_page"
|
"date","visitors","entrances","visit_duration","bounces","entry_page"
|
||||||
"2021-12-30",6,6,0,6,"/14776416252794997127"
|
"2021-12-30",6,6,0,6,"/14776416252794997127"
|
||||||
@ -173,7 +179,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
"""
|
"""
|
||||||
},
|
},
|
||||||
%{
|
%{
|
||||||
name: "imported_exit_pages.csv",
|
name: "imported_exit_pages_20211230_20211231.csv",
|
||||||
body: """
|
body: """
|
||||||
"date","visitors","exits","exit_page"
|
"date","visitors","exits","exit_page"
|
||||||
"2021-12-30",6,6,"/14776416252794997127"
|
"2021-12-30",6,6,"/14776416252794997127"
|
||||||
@ -198,7 +204,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
"""
|
"""
|
||||||
},
|
},
|
||||||
%{
|
%{
|
||||||
name: "imported_locations.csv",
|
name: "imported_locations_20211230_20211231.csv",
|
||||||
body: """
|
body: """
|
||||||
"date","country","region","city","visitors","visits","visit_duration","bounces"
|
"date","country","region","city","visitors","visits","visit_duration","bounces"
|
||||||
"2021-12-30","AU","",0,1,1,43,0
|
"2021-12-30","AU","",0,1,1,43,0
|
||||||
@ -235,7 +241,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
"""
|
"""
|
||||||
},
|
},
|
||||||
%{
|
%{
|
||||||
name: "imported_operating_systems.csv",
|
name: "imported_operating_systems_20211230_20220101.csv",
|
||||||
body: """
|
body: """
|
||||||
"date","operating_system","visitors","visits","visit_duration","bounces"
|
"date","operating_system","visitors","visits","visit_duration","bounces"
|
||||||
"2021-12-30","Android",25,26,254,24
|
"2021-12-30","Android",25,26,254,24
|
||||||
@ -255,7 +261,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
"""
|
"""
|
||||||
},
|
},
|
||||||
%{
|
%{
|
||||||
name: "imported_pages.csv",
|
name: "imported_pages_20211230_20220101.csv",
|
||||||
body: """
|
body: """
|
||||||
"date","visitors","pageviews","exits","time_on_page","hostname","page"
|
"date","visitors","pageviews","exits","time_on_page","hostname","page"
|
||||||
"2021-12-30",1,1,0,43,"lucky.numbers.com","/14776416252794997127"
|
"2021-12-30",1,1,0,43,"lucky.numbers.com","/14776416252794997127"
|
||||||
@ -277,7 +283,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
"""
|
"""
|
||||||
},
|
},
|
||||||
%{
|
%{
|
||||||
name: "imported_sources.csv",
|
name: "imported_sources_20211230_20220106.csv",
|
||||||
body: """
|
body: """
|
||||||
"date","source","utm_medium","utm_campaign","utm_content","utm_term","visitors","visits","visit_duration","bounces"
|
"date","source","utm_medium","utm_campaign","utm_content","utm_term","visitors","visits","visit_duration","bounces"
|
||||||
"2021-12-30","","","","","",25,26,254,24
|
"2021-12-30","","","","","",25,26,254,24
|
||||||
@ -307,7 +313,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
"""
|
"""
|
||||||
},
|
},
|
||||||
%{
|
%{
|
||||||
name: "imported_visitors.csv",
|
name: "imported_visitors_20111225_20111230.csv",
|
||||||
body: """
|
body: """
|
||||||
"date","visitors","pageviews","bounces","visits","visit_duration"
|
"date","visitors","pageviews","bounces","visits","visit_duration"
|
||||||
"2011-12-25",5,50,2,7,8640
|
"2011-12-25",5,50,2,7,8640
|
||||||
@ -327,13 +333,12 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
%{"filename" => name, "s3_url" => minio_url(minio, "imports", key)}
|
%{"filename" => name, "s3_url" => minio_url(minio, "imports", key)}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
date_range = CSVImporter.date_range(uploads)
|
||||||
|
|
||||||
{:ok, job} =
|
{:ok, job} =
|
||||||
CSVImporter.new_import(
|
CSVImporter.new_import(site, user,
|
||||||
site,
|
start_date: date_range.first,
|
||||||
user,
|
end_date: date_range.last,
|
||||||
# to satisfy the non null constraints on the table I'm providing "0" dates (according to ClickHouse)
|
|
||||||
start_date: ~D[1970-01-01],
|
|
||||||
end_date: ~D[1970-01-01],
|
|
||||||
uploads: uploads
|
uploads: uploads
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -341,7 +346,6 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
|
|
||||||
assert :ok = Plausible.Workers.ImportAnalytics.perform(job)
|
assert :ok = Plausible.Workers.ImportAnalytics.perform(job)
|
||||||
|
|
||||||
# on successfull import the start and end dates are updated
|
|
||||||
assert %SiteImport{
|
assert %SiteImport{
|
||||||
start_date: ~D[2011-12-25],
|
start_date: ~D[2011-12-25],
|
||||||
end_date: ~D[2022-01-06],
|
end_date: ~D[2022-01-06],
|
||||||
@ -355,7 +359,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
test "fails on invalid CSV", %{site: site, user: user, s3: s3, container: minio} do
|
test "fails on invalid CSV", %{site: site, user: user, s3: s3, container: minio} do
|
||||||
csvs = [
|
csvs = [
|
||||||
%{
|
%{
|
||||||
name: "imported_browsers.csv",
|
name: "imported_browsers_20211230_20211231.csv",
|
||||||
body: """
|
body: """
|
||||||
"date","browser","visitors","visits","visit_duration","bounces"
|
"date","browser","visitors","visits","visit_duration","bounces"
|
||||||
"2021-12-30","Amazon Silk",2,2,0,2
|
"2021-12-30","Amazon Silk",2,2,0,2
|
||||||
@ -368,7 +372,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
"""
|
"""
|
||||||
},
|
},
|
||||||
%{
|
%{
|
||||||
name: "imported_devices.csv",
|
name: "imported_devices_20211230_20211231.csv",
|
||||||
body: """
|
body: """
|
||||||
"date","device","visitors","visit_duration","bounces"
|
"date","device","visitors","visit_duration","bounces"
|
||||||
"2021-12-30","Desktop",28,ehhhh....
|
"2021-12-30","Desktop",28,ehhhh....
|
||||||
@ -383,12 +387,12 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
%{"filename" => name, "s3_url" => minio_url(minio, "imports", key)}
|
%{"filename" => name, "s3_url" => minio_url(minio, "imports", key)}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
date_range = CSVImporter.date_range(uploads)
|
||||||
|
|
||||||
{:ok, job} =
|
{:ok, job} =
|
||||||
CSVImporter.new_import(
|
CSVImporter.new_import(site, user,
|
||||||
site,
|
start_date: date_range.first,
|
||||||
user,
|
end_date: date_range.last,
|
||||||
start_date: ~D[1970-01-01],
|
|
||||||
end_date: ~D[1970-01-01],
|
|
||||||
uploads: uploads
|
uploads: uploads
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -508,12 +512,12 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
end)
|
end)
|
||||||
|
|
||||||
# run importer
|
# run importer
|
||||||
|
date_range = CSVImporter.date_range(uploads)
|
||||||
|
|
||||||
{:ok, job} =
|
{:ok, job} =
|
||||||
CSVImporter.new_import(
|
CSVImporter.new_import(site, user,
|
||||||
site,
|
start_date: date_range.first,
|
||||||
user,
|
end_date: date_range.last,
|
||||||
start_date: ~D[1970-01-01],
|
|
||||||
end_date: ~D[1970-01-01],
|
|
||||||
uploads: uploads
|
uploads: uploads
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -533,7 +537,13 @@ defmodule Plausible.Imported.CSVImporterTest do
|
|||||||
end
|
end
|
||||||
|
|
||||||
defp minio_url(minio, bucket, key) do
|
defp minio_url(minio, bucket, key) do
|
||||||
|
arch = to_string(:erlang.system_info(:system_architecture))
|
||||||
|
|
||||||
|
if String.contains?(arch, "darwin") do
|
||||||
|
Path.join(["http://#{minio.ip_address}:9000", bucket, key])
|
||||||
|
else
|
||||||
port = minio |> MinioContainer.connection_opts() |> Keyword.fetch!(:port)
|
port = minio |> MinioContainer.connection_opts() |> Keyword.fetch!(:port)
|
||||||
Path.join(["http://172.17.0.1:#{port}", bucket, key])
|
Path.join(["http://172.17.0.1:#{port}", bucket, key])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
4
test/plausible/s3_test.exs
Normal file
4
test/plausible/s3_test.exs
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
defmodule Plausible.S3Test do
|
||||||
|
use ExUnit.Case, async: true
|
||||||
|
doctest Plausible.S3, import: true
|
||||||
|
end
|
Loading…
Reference in New Issue
Block a user