mirror of
https://github.com/plausible/analytics.git
synced 2024-11-25 15:34:22 +03:00
CSV imports (no UI) (#3895)
* encode/decode date range in filenames * Update lib/plausible/imported/csv_importer.ex Co-authored-by: Adrian Gruntkowski <adrian.gruntkowski@gmail.com> * Update lib/plausible/imported/csv_importer.ex Co-authored-by: Adrian Gruntkowski <adrian.gruntkowski@gmail.com> * drop unused functions * send failure email if there is no data to export * use PlausibleWeb.Email.mailer_email_from() * ensure we get dates from minmax date query --------- Co-authored-by: Adrian Gruntkowski <adrian.gruntkowski@gmail.com>
This commit is contained in:
parent
4242b52be4
commit
279e89c693
@ -27,3 +27,4 @@ S3_SECRET_ACCESS_KEY=minioadmin
|
||||
S3_REGION=us-east-1
|
||||
S3_ENDPOINT=http://localhost:10000
|
||||
S3_EXPORTS_BUCKET=dev-exports
|
||||
S3_IMPORTS_BUCKET=dev-imports
|
||||
|
@ -22,3 +22,4 @@ S3_SECRET_ACCESS_KEY=minioadmin
|
||||
S3_REGION=us-east-1
|
||||
S3_ENDPOINT=http://localhost:10000
|
||||
S3_EXPORTS_BUCKET=test-exports
|
||||
S3_IMPORTS_BUCKET=test-imports
|
||||
|
@ -736,6 +736,10 @@ unless s3_disabled? do
|
||||
%{
|
||||
name: "S3_EXPORTS_BUCKET",
|
||||
example: "my-csv-exports-bucket"
|
||||
},
|
||||
%{
|
||||
name: "S3_IMPORTS_BUCKET",
|
||||
example: "my-csv-imports-bucket"
|
||||
}
|
||||
]
|
||||
|
||||
@ -771,5 +775,7 @@ unless s3_disabled? do
|
||||
host: s3_host,
|
||||
port: s3_port
|
||||
|
||||
config :plausible, Plausible.S3, exports_bucket: s3_env_value.("S3_EXPORTS_BUCKET")
|
||||
config :plausible, Plausible.S3,
|
||||
exports_bucket: s3_env_value.("S3_EXPORTS_BUCKET"),
|
||||
imports_bucket: s3_env_value.("S3_IMPORTS_BUCKET")
|
||||
end
|
||||
|
@ -20,7 +20,13 @@ defmodule Plausible.Imported.CSVImporter do
|
||||
|
||||
@impl true
|
||||
def import_data(site_import, opts) do
|
||||
%{id: import_id, site_id: site_id} = site_import
|
||||
%{
|
||||
id: import_id,
|
||||
site_id: site_id,
|
||||
start_date: start_date,
|
||||
end_date: end_date
|
||||
} = site_import
|
||||
|
||||
uploads = Keyword.fetch!(opts, :uploads)
|
||||
|
||||
%{access_key_id: s3_access_key_id, secret_access_key: s3_secret_access_key} =
|
||||
@ -31,52 +37,36 @@ defmodule Plausible.Imported.CSVImporter do
|
||||
|> Keyword.replace!(:pool_size, 1)
|
||||
|> Ch.start_link()
|
||||
|
||||
ranges =
|
||||
Enum.map(uploads, fn upload ->
|
||||
%{"filename" => filename, "s3_url" => s3_url} = upload
|
||||
Enum.each(uploads, fn upload ->
|
||||
%{"filename" => filename, "s3_url" => s3_url} = upload
|
||||
|
||||
".csv" = Path.extname(filename)
|
||||
table = Path.rootname(filename)
|
||||
ensure_importable_table!(table)
|
||||
{table, _, _} = parse_filename!(filename)
|
||||
s3_structure = input_structure!(table)
|
||||
|
||||
s3_structure = input_structure!(table)
|
||||
statement =
|
||||
"""
|
||||
INSERT INTO {table:Identifier} \
|
||||
SELECT {site_id:UInt64} AS site_id, *, {import_id:UInt64} AS import_id \
|
||||
FROM s3({s3_url:String},{s3_access_key_id:String},{s3_secret_access_key:String},{s3_format:String},{s3_structure:String}) \
|
||||
WHERE date >= {start_date:Date} AND date <= {end_date:Date}\
|
||||
"""
|
||||
|
||||
statement =
|
||||
"""
|
||||
INSERT INTO {table:Identifier} \
|
||||
SELECT {site_id:UInt64} AS site_id, *, {import_id:UInt64} AS import_id \
|
||||
FROM s3({s3_url:String},{s3_access_key_id:String},{s3_secret_access_key:String},{s3_format:String},{s3_structure:String})\
|
||||
"""
|
||||
params =
|
||||
%{
|
||||
"table" => table,
|
||||
"site_id" => site_id,
|
||||
"import_id" => import_id,
|
||||
"s3_url" => s3_url,
|
||||
"s3_access_key_id" => s3_access_key_id,
|
||||
"s3_secret_access_key" => s3_secret_access_key,
|
||||
"s3_format" => "CSVWithNames",
|
||||
"s3_structure" => s3_structure,
|
||||
"start_date" => start_date,
|
||||
"end_date" => end_date
|
||||
}
|
||||
|
||||
params =
|
||||
%{
|
||||
"table" => table,
|
||||
"site_id" => site_id,
|
||||
"import_id" => import_id,
|
||||
"s3_url" => s3_url,
|
||||
"s3_access_key_id" => s3_access_key_id,
|
||||
"s3_secret_access_key" => s3_secret_access_key,
|
||||
"s3_format" => "CSVWithNames",
|
||||
"s3_structure" => s3_structure
|
||||
}
|
||||
|
||||
Ch.query!(ch, statement, params, timeout: :infinity)
|
||||
|
||||
%Ch.Result{rows: [[min_date, max_date]]} =
|
||||
Ch.query!(
|
||||
ch,
|
||||
"SELECT min(date), max(date) FROM {table:Identifier} WHERE site_id = {site_id:UInt64} AND import_id = {import_id:UInt64}",
|
||||
%{"table" => table, "site_id" => site_id, "import_id" => import_id}
|
||||
)
|
||||
|
||||
Date.range(min_date, max_date)
|
||||
end)
|
||||
|
||||
{:ok,
|
||||
%{
|
||||
start_date: Enum.min_by(ranges, & &1.first, Date).first,
|
||||
end_date: Enum.max_by(ranges, & &1.last, Date).last
|
||||
}}
|
||||
Ch.query!(ch, statement, params, timeout: :infinity)
|
||||
end)
|
||||
rescue
|
||||
# we are cancelling on any argument or ClickHouse errors
|
||||
e in [ArgumentError, Ch.Error] ->
|
||||
@ -103,12 +93,85 @@ defmodule Plausible.Imported.CSVImporter do
|
||||
"date Date, visitors UInt64, pageviews UInt64, bounces UInt64, visits UInt64, visit_duration UInt64"
|
||||
}
|
||||
|
||||
for {table, input_structure} <- input_structures do
|
||||
defp input_structure!(unquote(table)), do: unquote(input_structure)
|
||||
defp ensure_importable_table!(unquote(table)), do: :ok
|
||||
@doc """
|
||||
Extracts min/max date range from a list of uploads.
|
||||
|
||||
Examples:
|
||||
|
||||
iex> date_range([
|
||||
...> %{"filename" => "imported_devices_20190101_20210101.csv"},
|
||||
...> "imported_pages_20200101_20220101.csv"
|
||||
...> ])
|
||||
Date.range(~D[2019-01-01], ~D[2022-01-01])
|
||||
|
||||
iex> date_range([])
|
||||
** (ArgumentError) empty uploads
|
||||
|
||||
"""
|
||||
@spec date_range([String.t() | %{String.t() => String.t()}, ...]) :: Date.Range.t()
|
||||
def date_range([_ | _] = uploads), do: date_range(uploads, _start_date = nil, _end_date = nil)
|
||||
def date_range([]), do: raise(ArgumentError, "empty uploads")
|
||||
|
||||
defp date_range([upload | uploads], prev_start_date, prev_end_date) do
|
||||
filename =
|
||||
case upload do
|
||||
%{"filename" => filename} -> filename
|
||||
filename when is_binary(filename) -> filename
|
||||
end
|
||||
|
||||
{_table, start_date, end_date} = parse_filename!(filename)
|
||||
|
||||
start_date =
|
||||
if prev_start_date do
|
||||
Enum.min([start_date, prev_start_date], Date)
|
||||
else
|
||||
start_date
|
||||
end
|
||||
|
||||
end_date =
|
||||
if prev_end_date do
|
||||
Enum.max([end_date, prev_end_date], Date)
|
||||
else
|
||||
end_date
|
||||
end
|
||||
|
||||
date_range(uploads, start_date, end_date)
|
||||
end
|
||||
|
||||
defp ensure_importable_table!(table) do
|
||||
raise ArgumentError, "table #{table} is not supported for data import"
|
||||
defp date_range([], first, last), do: Date.range(first, last)
|
||||
|
||||
@spec parse_date!(String.t()) :: Date.t()
|
||||
defp parse_date!(date) do
|
||||
date |> Timex.parse!("{YYYY}{0M}{0D}") |> NaiveDateTime.to_date()
|
||||
end
|
||||
|
||||
@doc """
|
||||
Extracts table name and min/max dates from the filename.
|
||||
|
||||
Examples:
|
||||
|
||||
iex> parse_filename!("my_data.csv")
|
||||
** (ArgumentError) invalid filename
|
||||
|
||||
iex> parse_filename!("imported_devices_00010101_20250101.csv")
|
||||
{"imported_devices", ~D[0001-01-01], ~D[2025-01-01]}
|
||||
|
||||
"""
|
||||
@spec parse_filename!(String.t()) ::
|
||||
{table :: String.t(), start_date :: Date.t(), end_date :: Date.t()}
|
||||
def parse_filename!(filename)
|
||||
|
||||
for {table, input_structure} <- input_structures do
|
||||
defp input_structure!(unquote(table)), do: unquote(input_structure)
|
||||
|
||||
def parse_filename!(
|
||||
<<unquote(table)::bytes, ?_, start_date::8-bytes, ?_, end_date::8-bytes, ".csv">>
|
||||
) do
|
||||
{unquote(table), parse_date!(start_date), parse_date!(end_date)}
|
||||
end
|
||||
end
|
||||
|
||||
def parse_filename!(_filename) do
|
||||
raise ArgumentError, "invalid filename"
|
||||
end
|
||||
end
|
||||
|
@ -18,9 +18,64 @@ defmodule Plausible.S3 do
|
||||
@spec exports_bucket :: String.t()
|
||||
def exports_bucket, do: config(:exports_bucket)
|
||||
|
||||
@doc """
|
||||
Returns the pre-configured S3 bucket for CSV imports.
|
||||
|
||||
config :plausible, Plausible.S3,
|
||||
imports_bucket: System.fetch_env!("S3_IMPORTS_BUCKET")
|
||||
|
||||
Example:
|
||||
|
||||
iex> imports_bucket()
|
||||
"test-imports"
|
||||
|
||||
"""
|
||||
@spec imports_bucket :: String.t()
|
||||
def imports_bucket, do: config(:imports_bucket)
|
||||
|
||||
defp config, do: Application.fetch_env!(:plausible, __MODULE__)
|
||||
defp config(key), do: Keyword.fetch!(config(), key)
|
||||
|
||||
@doc """
|
||||
Presigns an upload for an imported file.
|
||||
|
||||
In the current implementation the bucket always goes into the path component.
|
||||
|
||||
Example:
|
||||
|
||||
iex> %{
|
||||
...> s3_url: "http://localhost:10000/test-imports/123/imported_browsers.csv",
|
||||
...> presigned_url: "http://localhost:10000/test-imports/123/imported_browsers.csv?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=minioadmin" <> _
|
||||
...> } = import_presign_upload(_site_id = 123, _filename = "imported_browsers.csv")
|
||||
|
||||
"""
|
||||
def import_presign_upload(site_id, filename) do
|
||||
config = ExAws.Config.new(:s3)
|
||||
s3_path = Path.join(to_string(site_id), filename)
|
||||
bucket = imports_bucket()
|
||||
{:ok, presigned_url} = ExAws.S3.presigned_url(config, :put, bucket, s3_path)
|
||||
%{s3_url: extract_s3_url(presigned_url), presigned_url: presigned_url}
|
||||
end
|
||||
|
||||
# to make ClickHouse see MinIO in dev and test envs we replace
|
||||
# the host in the S3 URL with whatever's set in S3_CLICKHOUSE_HOST env var
|
||||
if Mix.env() in [:dev, :test, :small_dev, :small_test] do
|
||||
defp extract_s3_url(presigned_url) do
|
||||
[s3_url, _] = String.split(presigned_url, "?")
|
||||
|
||||
if ch_host = System.get_env("S3_CLICKHOUSE_HOST") do
|
||||
URI.to_string(%URI{URI.parse(s3_url) | host: ch_host})
|
||||
else
|
||||
s3_url
|
||||
end
|
||||
end
|
||||
else
|
||||
defp extract_s3_url(presigned_url) do
|
||||
[s3_url, _] = String.split(presigned_url, "?")
|
||||
s3_url
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Chunks and uploads Zip archive to the provided S3 destination.
|
||||
|
||||
@ -77,6 +132,12 @@ defmodule Plausible.S3 do
|
||||
|
||||
@doc """
|
||||
Returns `access_key_id` and `secret_access_key` to be used by ClickHouse during imports from S3.
|
||||
|
||||
Example:
|
||||
|
||||
iex> import_clickhouse_credentials()
|
||||
%{access_key_id: "minioadmin", secret_access_key: "minioadmin"}
|
||||
|
||||
"""
|
||||
@spec import_clickhouse_credentials ::
|
||||
%{access_key_id: String.t(), secret_access_key: String.t()}
|
||||
|
@ -25,43 +25,56 @@ defmodule Plausible.Workers.ExportCSV do
|
||||
|> Keyword.replace!(:pool_size, 1)
|
||||
|> Ch.start_link()
|
||||
|
||||
# NOTE: should we use site.timezone?
|
||||
# %Ch.Result{rows: [[min_date, max_date]]} =
|
||||
# Ch.query!(
|
||||
# ch,
|
||||
# "SELECT toDate(min(timestamp)), toDate(max(timestamp)) FROM events_v2 WHERE site_id={site_id:UInt64}",
|
||||
# %{"site_id" => site_id}
|
||||
# )
|
||||
|
||||
download_url =
|
||||
DBConnection.run(
|
||||
%Ch.Result{rows: [[%Date{} = min_date, %Date{} = max_date]]} =
|
||||
Ch.query!(
|
||||
ch,
|
||||
fn conn ->
|
||||
conn
|
||||
|> Plausible.Exports.stream_archive(
|
||||
# date_range: Date.range(min_date, max_date)
|
||||
Plausible.Exports.export_queries(site_id, extname: ".csv"),
|
||||
format: "CSVWithNames"
|
||||
)
|
||||
|> Plausible.S3.export_upload_multipart(s3_bucket, s3_path, s3_config_overrides(args))
|
||||
end,
|
||||
timeout: :infinity
|
||||
"SELECT toDate(min(timestamp)), toDate(max(timestamp)) FROM events_v2 WHERE site_id={site_id:UInt64}",
|
||||
%{"site_id" => site_id}
|
||||
)
|
||||
|
||||
# NOTE: replace with proper Plausible.Email template
|
||||
Plausible.Mailer.deliver_now!(
|
||||
Bamboo.Email.new_email(
|
||||
from: "plausible@email.com",
|
||||
to: email,
|
||||
subject: "EXPORT SUCCESS",
|
||||
text_body: """
|
||||
download it from #{download_url}! hurry up! you have 24 hours!"
|
||||
""",
|
||||
html_body: """
|
||||
download it from <a href="#{download_url}">here</a>! hurry up! you have 24 hours!
|
||||
"""
|
||||
if max_date == ~D[1970-01-01] do
|
||||
# NOTE: replace with proper Plausible.Email template
|
||||
Plausible.Mailer.deliver_now!(
|
||||
Bamboo.Email.new_email(
|
||||
from: PlausibleWeb.Email.mailer_email_from(),
|
||||
to: email,
|
||||
subject: "EXPORT FAILURE",
|
||||
text_body: "there is nothing to export"
|
||||
)
|
||||
)
|
||||
)
|
||||
else
|
||||
download_url =
|
||||
DBConnection.run(
|
||||
ch,
|
||||
fn conn ->
|
||||
conn
|
||||
|> Plausible.Exports.stream_archive(
|
||||
Plausible.Exports.export_queries(site_id,
|
||||
date_range: Date.range(min_date, max_date),
|
||||
extname: ".csv"
|
||||
),
|
||||
format: "CSVWithNames"
|
||||
)
|
||||
|> Plausible.S3.export_upload_multipart(s3_bucket, s3_path, s3_config_overrides(args))
|
||||
end,
|
||||
timeout: :infinity
|
||||
)
|
||||
|
||||
# NOTE: replace with proper Plausible.Email template
|
||||
Plausible.Mailer.deliver_now!(
|
||||
Bamboo.Email.new_email(
|
||||
from: PlausibleWeb.Email.mailer_email_from(),
|
||||
to: email,
|
||||
subject: "EXPORT SUCCESS",
|
||||
text_body: """
|
||||
download it from #{download_url}! hurry up! you have 24 hours!"
|
||||
""",
|
||||
html_body: """
|
||||
download it from <a href="#{download_url}">here</a>! hurry up! you have 24 hours!
|
||||
"""
|
||||
)
|
||||
)
|
||||
end
|
||||
|
||||
:ok
|
||||
end
|
||||
|
@ -198,7 +198,8 @@ defmodule Plausible.ConfigTest do
|
||||
{"S3_SECRET_ACCESS_KEY", nil},
|
||||
{"S3_REGION", nil},
|
||||
{"S3_ENDPOINT", nil},
|
||||
{"S3_EXPORTS_BUCKET", nil}
|
||||
{"S3_EXPORTS_BUCKET", nil},
|
||||
{"S3_IMPORTS_BUCKET", nil}
|
||||
]
|
||||
|
||||
result =
|
||||
@ -211,13 +212,14 @@ defmodule Plausible.ConfigTest do
|
||||
assert %ArgumentError{} = result
|
||||
|
||||
assert Exception.message(result) == """
|
||||
Missing S3 configuration. Please set S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY, S3_REGION, S3_ENDPOINT, S3_EXPORTS_BUCKET environment variable(s):
|
||||
Missing S3 configuration. Please set S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY, S3_REGION, S3_ENDPOINT, S3_EXPORTS_BUCKET, S3_IMPORTS_BUCKET environment variable(s):
|
||||
|
||||
\tS3_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
|
||||
\tS3_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
|
||||
\tS3_REGION=us-east-1
|
||||
\tS3_ENDPOINT=https://<ACCOUNT_ID>.r2.cloudflarestorage.com
|
||||
\tS3_EXPORTS_BUCKET=my-csv-exports-bucket
|
||||
\tS3_IMPORTS_BUCKET=my-csv-imports-bucket
|
||||
"""
|
||||
end
|
||||
|
||||
@ -227,7 +229,8 @@ defmodule Plausible.ConfigTest do
|
||||
{"S3_SECRET_ACCESS_KEY", nil},
|
||||
{"S3_REGION", "eu-north-1"},
|
||||
{"S3_ENDPOINT", nil},
|
||||
{"S3_EXPORTS_BUCKET", "my-exports"}
|
||||
{"S3_EXPORTS_BUCKET", "my-exports"},
|
||||
{"S3_IMPORTS_BUCKET", nil}
|
||||
]
|
||||
|
||||
result =
|
||||
@ -240,10 +243,11 @@ defmodule Plausible.ConfigTest do
|
||||
assert %ArgumentError{} = result
|
||||
|
||||
assert Exception.message(result) == """
|
||||
Missing S3 configuration. Please set S3_SECRET_ACCESS_KEY, S3_ENDPOINT environment variable(s):
|
||||
Missing S3 configuration. Please set S3_SECRET_ACCESS_KEY, S3_ENDPOINT, S3_IMPORTS_BUCKET environment variable(s):
|
||||
|
||||
\tS3_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
|
||||
\tS3_ENDPOINT=https://<ACCOUNT_ID>.r2.cloudflarestorage.com
|
||||
\tS3_IMPORTS_BUCKET=my-csv-imports-bucket
|
||||
"""
|
||||
end
|
||||
|
||||
@ -253,7 +257,8 @@ defmodule Plausible.ConfigTest do
|
||||
{"S3_SECRET_ACCESS_KEY", "minioadmin"},
|
||||
{"S3_REGION", "us-east-1"},
|
||||
{"S3_ENDPOINT", "http://localhost:6000"},
|
||||
{"S3_EXPORTS_BUCKET", "my-exports"}
|
||||
{"S3_EXPORTS_BUCKET", "my-exports"},
|
||||
{"S3_IMPORTS_BUCKET", "my-imports"}
|
||||
]
|
||||
|
||||
config = runtime_config(env)
|
||||
@ -266,8 +271,9 @@ defmodule Plausible.ConfigTest do
|
||||
s3: [scheme: "http://", host: "localhost", port: 6000]
|
||||
]
|
||||
|
||||
assert get_in(runtime_config(env), [:plausible, Plausible.S3]) == [
|
||||
exports_bucket: "my-exports"
|
||||
assert get_in(config, [:plausible, Plausible.S3]) == [
|
||||
exports_bucket: "my-exports",
|
||||
imports_bucket: "my-imports"
|
||||
]
|
||||
end
|
||||
end
|
||||
|
@ -4,6 +4,8 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
alias Testcontainers.MinioContainer
|
||||
require SiteImport
|
||||
|
||||
doctest CSVImporter, import: true
|
||||
|
||||
@moduletag :minio
|
||||
|
||||
setup_all do
|
||||
@ -55,9 +57,12 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
"imported_visitors"
|
||||
]
|
||||
|
||||
start_date = "20231001"
|
||||
end_date = "20240102"
|
||||
|
||||
uploads =
|
||||
Enum.map(tables, fn table ->
|
||||
filename = "#{table}.csv"
|
||||
filename = "#{table}_#{start_date}_#{end_date}.csv"
|
||||
|
||||
%{
|
||||
"filename" => filename,
|
||||
@ -65,11 +70,12 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
}
|
||||
end)
|
||||
|
||||
date_range = CSVImporter.date_range(uploads)
|
||||
|
||||
assert {:ok, job} =
|
||||
CSVImporter.new_import(site, user,
|
||||
# to satisfy the non null constraints on the table I'm providing "0" dates (according to ClickHouse)
|
||||
start_date: ~D[1970-01-01],
|
||||
end_date: ~D[1970-01-01],
|
||||
start_date: date_range.first,
|
||||
end_date: date_range.last,
|
||||
uploads: uploads
|
||||
)
|
||||
|
||||
@ -80,8 +86,8 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
%{
|
||||
id: ^import_id,
|
||||
source: :csv,
|
||||
start_date: ~D[1970-01-01],
|
||||
end_date: ~D[1970-01-01],
|
||||
start_date: ~D[2023-10-01],
|
||||
end_date: ~D[2024-01-02],
|
||||
status: SiteImport.pending()
|
||||
}
|
||||
] = Plausible.Imported.list_all_imports(site)
|
||||
@ -97,7 +103,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
test "imports tables from S3", %{site: site, user: user, s3: s3, container: minio} do
|
||||
csvs = [
|
||||
%{
|
||||
name: "imported_browsers.csv",
|
||||
name: "imported_browsers_20211230_20211231.csv",
|
||||
body: """
|
||||
"date","browser","visitors","visits","visit_duration","bounces"
|
||||
"2021-12-30","Amazon Silk",2,2,0,2
|
||||
@ -122,7 +128,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
"""
|
||||
},
|
||||
%{
|
||||
name: "imported_devices.csv",
|
||||
name: "imported_devices_20211230_20220102.csv",
|
||||
body: """
|
||||
"date","device","visitors","visits","visit_duration","bounces"
|
||||
"2021-12-30","Desktop",25,28,75,27
|
||||
@ -140,7 +146,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
"""
|
||||
},
|
||||
%{
|
||||
name: "imported_entry_pages.csv",
|
||||
name: "imported_entry_pages_20211230_20211231.csv",
|
||||
body: """
|
||||
"date","visitors","entrances","visit_duration","bounces","entry_page"
|
||||
"2021-12-30",6,6,0,6,"/14776416252794997127"
|
||||
@ -173,7 +179,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
"""
|
||||
},
|
||||
%{
|
||||
name: "imported_exit_pages.csv",
|
||||
name: "imported_exit_pages_20211230_20211231.csv",
|
||||
body: """
|
||||
"date","visitors","exits","exit_page"
|
||||
"2021-12-30",6,6,"/14776416252794997127"
|
||||
@ -198,7 +204,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
"""
|
||||
},
|
||||
%{
|
||||
name: "imported_locations.csv",
|
||||
name: "imported_locations_20211230_20211231.csv",
|
||||
body: """
|
||||
"date","country","region","city","visitors","visits","visit_duration","bounces"
|
||||
"2021-12-30","AU","",0,1,1,43,0
|
||||
@ -235,7 +241,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
"""
|
||||
},
|
||||
%{
|
||||
name: "imported_operating_systems.csv",
|
||||
name: "imported_operating_systems_20211230_20220101.csv",
|
||||
body: """
|
||||
"date","operating_system","visitors","visits","visit_duration","bounces"
|
||||
"2021-12-30","Android",25,26,254,24
|
||||
@ -255,7 +261,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
"""
|
||||
},
|
||||
%{
|
||||
name: "imported_pages.csv",
|
||||
name: "imported_pages_20211230_20220101.csv",
|
||||
body: """
|
||||
"date","visitors","pageviews","exits","time_on_page","hostname","page"
|
||||
"2021-12-30",1,1,0,43,"lucky.numbers.com","/14776416252794997127"
|
||||
@ -277,7 +283,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
"""
|
||||
},
|
||||
%{
|
||||
name: "imported_sources.csv",
|
||||
name: "imported_sources_20211230_20220106.csv",
|
||||
body: """
|
||||
"date","source","utm_medium","utm_campaign","utm_content","utm_term","visitors","visits","visit_duration","bounces"
|
||||
"2021-12-30","","","","","",25,26,254,24
|
||||
@ -307,7 +313,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
"""
|
||||
},
|
||||
%{
|
||||
name: "imported_visitors.csv",
|
||||
name: "imported_visitors_20111225_20111230.csv",
|
||||
body: """
|
||||
"date","visitors","pageviews","bounces","visits","visit_duration"
|
||||
"2011-12-25",5,50,2,7,8640
|
||||
@ -327,13 +333,12 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
%{"filename" => name, "s3_url" => minio_url(minio, "imports", key)}
|
||||
end
|
||||
|
||||
date_range = CSVImporter.date_range(uploads)
|
||||
|
||||
{:ok, job} =
|
||||
CSVImporter.new_import(
|
||||
site,
|
||||
user,
|
||||
# to satisfy the non null constraints on the table I'm providing "0" dates (according to ClickHouse)
|
||||
start_date: ~D[1970-01-01],
|
||||
end_date: ~D[1970-01-01],
|
||||
CSVImporter.new_import(site, user,
|
||||
start_date: date_range.first,
|
||||
end_date: date_range.last,
|
||||
uploads: uploads
|
||||
)
|
||||
|
||||
@ -341,7 +346,6 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
|
||||
assert :ok = Plausible.Workers.ImportAnalytics.perform(job)
|
||||
|
||||
# on successfull import the start and end dates are updated
|
||||
assert %SiteImport{
|
||||
start_date: ~D[2011-12-25],
|
||||
end_date: ~D[2022-01-06],
|
||||
@ -355,7 +359,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
test "fails on invalid CSV", %{site: site, user: user, s3: s3, container: minio} do
|
||||
csvs = [
|
||||
%{
|
||||
name: "imported_browsers.csv",
|
||||
name: "imported_browsers_20211230_20211231.csv",
|
||||
body: """
|
||||
"date","browser","visitors","visits","visit_duration","bounces"
|
||||
"2021-12-30","Amazon Silk",2,2,0,2
|
||||
@ -368,7 +372,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
"""
|
||||
},
|
||||
%{
|
||||
name: "imported_devices.csv",
|
||||
name: "imported_devices_20211230_20211231.csv",
|
||||
body: """
|
||||
"date","device","visitors","visit_duration","bounces"
|
||||
"2021-12-30","Desktop",28,ehhhh....
|
||||
@ -383,12 +387,12 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
%{"filename" => name, "s3_url" => minio_url(minio, "imports", key)}
|
||||
end
|
||||
|
||||
date_range = CSVImporter.date_range(uploads)
|
||||
|
||||
{:ok, job} =
|
||||
CSVImporter.new_import(
|
||||
site,
|
||||
user,
|
||||
start_date: ~D[1970-01-01],
|
||||
end_date: ~D[1970-01-01],
|
||||
CSVImporter.new_import(site, user,
|
||||
start_date: date_range.first,
|
||||
end_date: date_range.last,
|
||||
uploads: uploads
|
||||
)
|
||||
|
||||
@ -508,12 +512,12 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
end)
|
||||
|
||||
# run importer
|
||||
date_range = CSVImporter.date_range(uploads)
|
||||
|
||||
{:ok, job} =
|
||||
CSVImporter.new_import(
|
||||
site,
|
||||
user,
|
||||
start_date: ~D[1970-01-01],
|
||||
end_date: ~D[1970-01-01],
|
||||
CSVImporter.new_import(site, user,
|
||||
start_date: date_range.first,
|
||||
end_date: date_range.last,
|
||||
uploads: uploads
|
||||
)
|
||||
|
||||
@ -533,7 +537,13 @@ defmodule Plausible.Imported.CSVImporterTest do
|
||||
end
|
||||
|
||||
defp minio_url(minio, bucket, key) do
|
||||
port = minio |> MinioContainer.connection_opts() |> Keyword.fetch!(:port)
|
||||
Path.join(["http://172.17.0.1:#{port}", bucket, key])
|
||||
arch = to_string(:erlang.system_info(:system_architecture))
|
||||
|
||||
if String.contains?(arch, "darwin") do
|
||||
Path.join(["http://#{minio.ip_address}:9000", bucket, key])
|
||||
else
|
||||
port = minio |> MinioContainer.connection_opts() |> Keyword.fetch!(:port)
|
||||
Path.join(["http://172.17.0.1:#{port}", bucket, key])
|
||||
end
|
||||
end
|
||||
end
|
||||
|
4
test/plausible/s3_test.exs
Normal file
4
test/plausible/s3_test.exs
Normal file
@ -0,0 +1,4 @@
|
||||
defmodule Plausible.S3Test do
|
||||
use ExUnit.Case, async: true
|
||||
doctest Plausible.S3, import: true
|
||||
end
|
Loading…
Reference in New Issue
Block a user