Ignore unknown countries (#2556)

* Ignore XX and T1 countries

* Add fallback if country_code=nil

* Lookup city overrides directly in CityOverrides module

* Changelog

* Add empty moduledoc

* Remove redundant comment
This commit is contained in:
Uku Taht 2023-01-03 15:35:23 +02:00 committed by GitHub
parent 47e21121db
commit 1785653b1e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 96 additions and 38 deletions

View File

@ -9,6 +9,7 @@ All notable changes to this project will be documented in this file.
### Changed
- Reject events with long URIs and data URIs plausible/analytics#2536
- Always show direct traffic in sources reports plausible/analytics#2531
- Stop recording XX and T1 country codes plausible/analytics#2556
## Fixed
- Cascade delete sent_renewal_notifications table when user is deleted plausible/analytics#2549

View File

@ -56,7 +56,9 @@ config :geolix,
{1, 1, 1, 1} => %{country: %{iso_code: "US"}},
{2, 2, 2, 2} => geolix_sample_lookup,
{1, 1, 1, 1, 1, 1, 1, 1} => %{country: %{iso_code: "US"}},
{0, 0, 0, 0} => %{country: %{iso_code: "ZZ"}}
{0, 0, 0, 0} => %{country: %{iso_code: "ZZ"}, city: %{geoname_id: 123_123}},
{0, 0, 0, 1} => %{country: %{iso_code: "XX"}, subdivisions: [%{iso_code: "IDF"}]},
{0, 0, 0, 2} => %{country: %{iso_code: "T1"}, subdivisions: [%{}, %{iso_code: "IDF"}]}
}
}
]

View File

@ -198,5 +198,5 @@ defmodule Plausible.Ingestion.CityOverrides do
# Hackney -> London
2_647_694 => 2_643_743
}
def get, do: @overrides
def get(key, default), do: Map.get(@overrides, key, default)
end

View File

@ -5,7 +5,7 @@ defmodule Plausible.Ingestion.Event do
are uniformly either buffered in batches (to Clickhouse) or dropped
(e.g. due to spam blocklist) from the processing pipeline.
"""
alias Plausible.Ingestion.{Request, CityOverrides}
alias Plausible.Ingestion.Request
alias Plausible.ClickhouseEvent
alias Plausible.Site.GateKeeper
@ -168,39 +168,9 @@ defmodule Plausible.Ingestion.Event do
end
defp put_geolocation(%__MODULE__{} = event) do
result = Geolix.lookup(event.request.remote_ip, where: :geolocation)
result = Plausible.Ingestion.Geolocation.lookup(event.request.remote_ip)
country_code =
get_in(result, [:country, :iso_code])
|> ignore_unknown_country()
city_geoname_id = get_in(result, [:city, :geoname_id])
city_geoname_id = Map.get(CityOverrides.get(), city_geoname_id, city_geoname_id)
subdivision1_code =
case result do
%{subdivisions: [%{iso_code: iso_code} | _rest]} ->
country_code <> "-" <> iso_code
_ ->
""
end
subdivision2_code =
case result do
%{subdivisions: [_first, %{iso_code: iso_code} | _rest]} ->
country_code <> "-" <> iso_code
_ ->
""
end
update_attrs(event, %{
country_code: country_code,
subdivision1_code: subdivision1_code,
subdivision2_code: subdivision2_code,
city_geoname_id: city_geoname_id
})
update_attrs(event, result)
end
defp put_screen_size(%__MODULE__{} = event) do
@ -372,9 +342,6 @@ defmodule Plausible.Ingestion.Event do
end
end
defp ignore_unknown_country("ZZ"), do: nil
defp ignore_unknown_country(country), do: country
defp generate_user_id(request, domain, hostname, salt) do
cond do
is_nil(salt) ->

View File

@ -0,0 +1,47 @@
defmodule Plausible.Ingestion.Geolocation do
@moduledoc false
alias Plausible.Ingestion.CityOverrides
def lookup(remote_ip) do
result = Geolix.lookup(remote_ip, where: :geolocation)
country_code =
get_in(result, [:country, :iso_code])
|> ignore_unknown_country()
city_geoname_id = country_code && get_in(result, [:city, :geoname_id])
city_geoname_id = CityOverrides.get(city_geoname_id, city_geoname_id)
%{
country_code: country_code,
subdivision1_code: subdivision1_code(country_code, result),
subdivision2_code: subdivision2_code(country_code, result),
city_geoname_id: city_geoname_id
}
end
defp subdivision1_code(country_code, %{subdivisions: [%{iso_code: iso_code} | _rest]})
when not is_nil(country_code) do
country_code <> "-" <> iso_code
end
defp subdivision1_code(_, _), do: nil
defp subdivision2_code(country_code, %{subdivisions: [_first, %{iso_code: iso_code} | _rest]})
when not is_nil(country_code) do
country_code <> "-" <> iso_code
end
defp subdivision2_code(_, _), do: nil
@ignored_countries [
# Worldwide
"ZZ",
# Disputed territory
"XX",
# Tor exit node
"T1"
]
defp ignore_unknown_country(code) when code in @ignored_countries, do: nil
defp ignore_unknown_country(country), do: country
end

View File

@ -685,6 +685,47 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do
pageview = get_event(domain)
assert pageview.country_code == <<0, 0>>
assert pageview.subdivision1_code == ""
assert pageview.subdivision2_code == ""
assert pageview.city_geoname_id == 0
end
test "ignores disputed territory code XX", %{conn: conn, domain: domain} do
params = %{
name: "pageview",
domain: domain,
url: "http://gigride.live/"
}
conn
|> put_req_header("x-forwarded-for", "0.0.0.1")
|> post("/api/event", params)
pageview = get_event(domain)
assert pageview.country_code == <<0, 0>>
assert pageview.subdivision1_code == ""
assert pageview.subdivision2_code == ""
assert pageview.city_geoname_id == 0
end
test "ignores TOR exit node country code T1", %{conn: conn, domain: domain} do
params = %{
name: "pageview",
domain: domain,
url: "http://gigride.live/"
}
conn
|> put_req_header("x-forwarded-for", "0.0.0.2")
|> post("/api/event", params)
pageview = get_event(domain)
assert pageview.country_code == <<0, 0>>
assert pageview.subdivision1_code == ""
assert pageview.subdivision2_code == ""
assert pageview.city_geoname_id == 0
end
test "scrubs port from x-forwarded-for", %{conn: conn, domain: domain} do