2021-08-04 12:01:50 +03:00
|
|
|
defmodule PlausibleWeb.Favicon do
|
2022-09-23 13:22:43 +03:00
|
|
|
@referer_domains_file "priv/referer_favicon_domains.json"
|
|
|
|
@moduledoc """
|
2022-09-28 14:55:46 +03:00
|
|
|
A Plug that fetches favicon images from DuckDuckGo and returns them
|
|
|
|
to the Plausible frontend.
|
2022-09-23 13:22:43 +03:00
|
|
|
|
2022-09-28 14:55:46 +03:00
|
|
|
The proxying is there so we can reduce the number of third-party domains that
|
|
|
|
the browser clients need to connect to. Our goal is to have 0 third-party domain
|
|
|
|
connections on the website for privacy reasons.
|
2022-09-23 13:22:43 +03:00
|
|
|
|
2022-09-28 14:55:46 +03:00
|
|
|
This module also maps between categorized sources and their respective URLs for favicons.
|
|
|
|
What does that mean exactly? During ingestion we use `PlausibleWeb.RefInspector.parse/1` to
|
|
|
|
categorize our referrer sources like so:
|
2022-09-23 13:22:43 +03:00
|
|
|
|
2022-09-28 14:55:46 +03:00
|
|
|
google.com -> Google
|
|
|
|
google.co.uk -> Google
|
|
|
|
google.com.au -> Google
|
2022-09-23 13:22:43 +03:00
|
|
|
|
2022-09-28 14:55:46 +03:00
|
|
|
So when we show Google as a source in the dashboard, the request to this plug will come as:
|
|
|
|
https://plausible/io/favicon/sources/Google
|
2022-09-23 13:22:43 +03:00
|
|
|
|
2022-09-28 14:55:46 +03:00
|
|
|
Now, when we want to show a favicon for Google, we need to convert Google -> google.com or
|
|
|
|
some other hostname owned by Google:
|
|
|
|
https://icons.duckduckgo.com/ip3/google.com.ico
|
2022-09-23 13:22:43 +03:00
|
|
|
|
2022-09-28 14:55:46 +03:00
|
|
|
The mapping from source category -> source hostname is stored in "#{@referer_domains_file}" and
|
|
|
|
managed by `Mix.Tasks.GenerateReferrerFavicons.run/1`
|
2022-09-23 13:22:43 +03:00
|
|
|
"""
|
2021-08-04 12:01:50 +03:00
|
|
|
import Plug.Conn
|
2022-08-15 10:41:48 +03:00
|
|
|
alias Plausible.HTTPClient
|
2021-08-04 12:01:50 +03:00
|
|
|
|
2022-09-28 14:55:46 +03:00
|
|
|
@placeholder_icon_location "priv/placeholder_favicon.ico"
|
|
|
|
@placeholder_icon File.read!(@placeholder_icon_location)
|
|
|
|
|
2021-08-04 12:01:50 +03:00
|
|
|
def init(_) do
|
|
|
|
domains =
|
2022-09-23 13:22:43 +03:00
|
|
|
File.read!(Application.app_dir(:plausible, @referer_domains_file))
|
|
|
|
|> Jason.decode!()
|
2021-08-04 12:01:50 +03:00
|
|
|
|
|
|
|
[favicon_domains: domains]
|
|
|
|
end
|
|
|
|
|
2022-09-28 14:55:46 +03:00
|
|
|
@ddg_broken_icon <<137, 80, 78, 71, 13, 10, 26, 10>>
|
2022-09-23 13:22:43 +03:00
|
|
|
@doc """
|
2022-12-02 12:33:24 +03:00
|
|
|
Proxies HTTP request to DuckDuckGo favicon service. Swallows hop-by-hop HTTP
|
|
|
|
headers that should not be forwarded as defined in [RFC 2616](https://www.rfc-editor.org/rfc/rfc2616#section-13.5.1)
|
|
|
|
|
|
|
|
## Placeholder
|
2022-09-28 14:55:46 +03:00
|
|
|
|
|
|
|
Cases where we show a placeholder icon instead:
|
|
|
|
|
2022-12-02 12:33:24 +03:00
|
|
|
1. In case of network error to DuckDuckGo
|
|
|
|
2. In case of non-2xx status code from DuckDuckGo
|
|
|
|
3. In case of broken image response body from DuckDuckGo
|
|
|
|
|
|
|
|
I'm not sure why DDG sometimes returns a broken PNG image in their response
|
|
|
|
but we filter that out. When the icon request fails, we show a placeholder
|
|
|
|
favicon instead. The placeholder is an emoji from
|
|
|
|
[https://favicon.io/emoji-favicons/](https://favicon.io/emoji-favicons/)
|
|
|
|
|
|
|
|
DuckDuckGo favicon service has some issues with [SVG favicons](https://css-tricks.com/svg-favicons-and-all-the-fun-things-we-can-do-with-them/).
|
|
|
|
For some reason, they return them with `content-type=image/x-icon` whereas SVG
|
|
|
|
icons should be returned with `content-type=image/svg+xml`. This Plug detects
|
|
|
|
when the response body starts with `<svg` and will override the `Content-Type`
|
|
|
|
to correct it.
|
|
|
|
|
|
|
|
## Preventing XSS vulnerabilities
|
|
|
|
|
|
|
|
SVGs may contain `<script>` tags, and as these SVGs come from external
|
|
|
|
sources, we need to prevent untrusted code from running on the browser.
|
|
|
|
|
|
|
|
- This Plug sets a strict `Content-Security-Policy` header telling the browser
|
|
|
|
not to run scripts.
|
|
|
|
|
|
|
|
- This Plug sets `Content-Disposition=attachment` to prevent the SVG from
|
|
|
|
rendering when navigating to `/favicon/sources/:domain` directly.
|
|
|
|
|
|
|
|
- Browsers do not execute scripts from `<img>` tags, therefore it is safe to
|
|
|
|
use `<img src="https://plausible.io/favicon/sources/dummy.site"></img>`
|
2022-10-04 13:20:51 +03:00
|
|
|
|
2022-09-23 13:22:43 +03:00
|
|
|
"""
|
2022-07-06 17:46:05 +03:00
|
|
|
def call(conn, favicon_domains: favicon_domains) do
|
2021-08-04 12:01:50 +03:00
|
|
|
case conn.path_info do
|
2022-10-04 13:20:51 +03:00
|
|
|
["favicon", "sources", "placeholder"] ->
|
|
|
|
send_placeholder(conn)
|
|
|
|
|
2022-07-06 17:46:05 +03:00
|
|
|
["favicon", "sources", source] ->
|
|
|
|
clean_source = URI.decode_www_form(source)
|
|
|
|
domain = Map.get(favicon_domains, clean_source, clean_source)
|
|
|
|
|
2022-09-23 13:22:43 +03:00
|
|
|
case HTTPClient.impl().get("https://icons.duckduckgo.com/ip3/#{domain}.ico") do
|
2022-10-04 13:20:51 +03:00
|
|
|
{:ok, %Finch.Response{status: 200, body: body, headers: headers}}
|
|
|
|
when body != @ddg_broken_icon ->
|
2022-09-23 13:22:43 +03:00
|
|
|
conn
|
2022-09-28 14:55:46 +03:00
|
|
|
|> forward_headers(headers)
|
2022-10-04 13:20:51 +03:00
|
|
|
|> maybe_override_content_type(body)
|
2022-12-02 12:33:24 +03:00
|
|
|
|> prevent_javascript_execution()
|
2022-09-28 14:55:46 +03:00
|
|
|
|> send_resp(200, body)
|
2022-12-02 12:33:24 +03:00
|
|
|
|> halt()
|
2022-07-06 17:46:05 +03:00
|
|
|
|
2022-08-16 14:35:45 +03:00
|
|
|
_ ->
|
2022-09-28 14:55:46 +03:00
|
|
|
send_placeholder(conn)
|
2022-08-16 14:31:01 +03:00
|
|
|
end
|
2021-08-04 12:01:50 +03:00
|
|
|
|
|
|
|
_ ->
|
|
|
|
conn
|
|
|
|
end
|
|
|
|
end
|
2022-08-16 14:31:01 +03:00
|
|
|
|
2022-09-28 14:55:46 +03:00
|
|
|
defp send_placeholder(conn) do
|
|
|
|
conn
|
|
|
|
|> put_resp_content_type("image/x-icon")
|
|
|
|
|> put_resp_header("cache-control", "public, max-age=2592000")
|
|
|
|
|> send_resp(200, @placeholder_icon)
|
|
|
|
|> halt
|
|
|
|
end
|
|
|
|
|
2022-09-23 13:22:43 +03:00
|
|
|
@forwarded_headers ["content-type", "cache-control", "expires"]
|
2022-09-28 14:55:46 +03:00
|
|
|
defp forward_headers(conn, headers) do
|
|
|
|
headers_to_forward = Enum.filter(headers, fn {k, _} -> k in @forwarded_headers end)
|
|
|
|
%Plug.Conn{conn | resp_headers: headers_to_forward}
|
2022-08-16 14:31:01 +03:00
|
|
|
end
|
2022-10-04 13:20:51 +03:00
|
|
|
|
|
|
|
defp maybe_override_content_type(conn, "<svg" <> _rest) do
|
|
|
|
conn |> put_resp_content_type("image/svg+xml")
|
|
|
|
end
|
|
|
|
|
|
|
|
defp maybe_override_content_type(conn, _), do: conn
|
2022-12-02 12:33:24 +03:00
|
|
|
|
|
|
|
defp prevent_javascript_execution(conn) do
|
|
|
|
conn
|
|
|
|
|> put_resp_header("content-security-policy", "script-src 'none'")
|
|
|
|
|> put_resp_header("content-disposition", "attachment")
|
|
|
|
end
|
2021-08-04 12:01:50 +03:00
|
|
|
end
|