Verification improvements (#4132)

* Simplify browserless script and catch errors

* Improve fetching body:
    - follow up on max. 4 redirects
    - rely on Req default timeouts (wait much longer to account for slow
      sites)

* Improve installation check:
  - rely on Req default timeouts, wait longer
  - log service errors as warnings
  - use stealth mode to bypass captchas

* Stop cutting off body too large

* Improve diagnostics for known failures

* Another round of diagnostic improvements

* Format

* Add a test for callback status 500
This commit is contained in:
hq1 2024-05-24 10:40:59 +02:00 committed by GitHub
parent c81cb16933
commit ee61094023
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 194 additions and 45 deletions

View File

@ -17,9 +17,7 @@ defmodule Plausible.Verification.Checks.FetchBody do
Keyword.merge( Keyword.merge(
[ [
base_url: url, base_url: url,
max_redirects: 2, max_redirects: 4,
connect_options: [timeout: 4_000],
receive_timeout: 4_000,
max_retries: 3, max_retries: 3,
retry_log_level: :warning retry_log_level: :warning
], ],
@ -38,7 +36,7 @@ defmodule Plausible.Verification.Checks.FetchBody do
end end
end end
defp extract_document(state, response) when byte_size(response.body) <= 500_000 do defp extract_document(state, response) do
with true <- html?(response), with true <- html?(response),
{:ok, document} <- Floki.parse_document(response.body) do {:ok, document} <- Floki.parse_document(response.body) do
state state
@ -50,10 +48,6 @@ defmodule Plausible.Verification.Checks.FetchBody do
end end
end end
defp extract_document(state, response) when byte_size(response.body) > 500_000 do
state
end
defp html?(%Req.Response{headers: headers}) do defp html?(%Req.Response{headers: headers}) do
headers headers
|> Map.get("content-type", "") |> Map.get("content-type", "")

View File

@ -1,4 +1,6 @@
defmodule Plausible.Verification.Checks.Installation do defmodule Plausible.Verification.Checks.Installation do
require Logger
@verification_script_filename "verification/verify_plausible_installed.js" @verification_script_filename "verification/verify_plausible_installed.js"
@verification_script_path Path.join(:code.priv_dir(:plausible), @verification_script_filename) @verification_script_path Path.join(:code.priv_dir(:plausible), @verification_script_filename)
@external_resource @verification_script_path @external_resource @verification_script_path
@ -34,8 +36,7 @@ defmodule Plausible.Verification.Checks.Installation do
}), }),
retry: :transient, retry: :transient,
retry_log_level: :warning, retry_log_level: :warning,
max_retries: 2, max_retries: 2
receive_timeout: 6_000
] ]
extra_opts = Application.get_env(:plausible, __MODULE__)[:req_opts] || [] extra_opts = Application.get_env(:plausible, __MODULE__)[:req_opts] || []
@ -46,16 +47,22 @@ defmodule Plausible.Verification.Checks.Installation do
%{ %{
status: 200, status: 200,
body: %{ body: %{
"data" => %{"plausibleInstalled" => installed?, "callbackStatus" => callback_status} "data" =>
%{"plausibleInstalled" => installed?, "callbackStatus" => callback_status} = data
} }
}} }}
when is_boolean(installed?) -> when is_boolean(installed?) ->
if data["error"] do
Logger.warning("Browserless error: #{Map.get(data, "error")}")
end
put_diagnostics(state, plausible_installed?: installed?, callback_status: callback_status) put_diagnostics(state, plausible_installed?: installed?, callback_status: callback_status)
{:ok, %{status: status}} -> {:ok, %{status: status}} ->
put_diagnostics(state, plausible_installed?: false, service_error: status) put_diagnostics(state, plausible_installed?: false, service_error: status)
{:error, %{reason: reason}} -> {:error, %{reason: reason}} ->
Logger.warning("Browserless error: #{inspect(reason)}")
put_diagnostics(state, plausible_installed?: false, service_error: reason) put_diagnostics(state, plausible_installed?: false, service_error: reason)
end end
end end
@ -64,6 +71,6 @@ defmodule Plausible.Verification.Checks.Installation do
config = Application.fetch_env!(:plausible, __MODULE__) config = Application.fetch_env!(:plausible, __MODULE__)
token = Keyword.fetch!(config, :token) token = Keyword.fetch!(config, :token)
endpoint = Keyword.fetch!(config, :endpoint) endpoint = Keyword.fetch!(config, :endpoint)
Path.join(endpoint, "function?token=#{token}") Path.join(endpoint, "function?token=#{token}&stealth")
end end
end end

View File

@ -14,7 +14,7 @@ defmodule Plausible.Verification.Diagnostics do
body_fetched?: false, body_fetched?: false,
wordpress_likely?: false, wordpress_likely?: false,
gtm_likely?: false, gtm_likely?: false,
callback_status: -1, callback_status: 0,
proxy_likely?: false, proxy_likely?: false,
data_domain_mismatch?: false, data_domain_mismatch?: false,
wordpress_plugin?: false wordpress_plugin?: false
@ -35,13 +35,14 @@ defmodule Plausible.Verification.Diagnostics do
plausible_installed?: true, plausible_installed?: true,
snippets_found_in_head: 1, snippets_found_in_head: 1,
snippets_found_in_body: 0, snippets_found_in_body: 0,
callback_status: 202, callback_status: callback_status,
snippet_found_after_busting_cache?: false, snippet_found_after_busting_cache?: false,
service_error: nil, service_error: nil,
data_domain_mismatch?: false data_domain_mismatch?: false
}, },
_url _url
) do )
when callback_status in [200, 202] do
%Result{ok?: true} %Result{ok?: true}
end end
@ -150,13 +151,74 @@ defmodule Plausible.Verification.Diagnostics do
def interpret( def interpret(
%__MODULE__{ %__MODULE__{
snippets_found_in_body: 0,
snippets_found_in_head: 1,
plausible_installed?: true, plausible_installed?: true,
callback_status: callback_status, wordpress_likely?: false,
callback_status: -1
},
_url
) do
%Result{
ok?: false,
errors: ["We encountered a problem trying to verify your website"],
recommendations: [
{"The integration may be working but as you're running an older version of our script, we cannot verify it automatically. Please manually check your integration or update to use the latest script",
"https://plausible.io/docs/troubleshoot-integration"}
]
}
end
def interpret(
%__MODULE__{
snippets_found_in_body: 0,
snippets_found_in_head: 1,
plausible_installed?: true,
wordpress_likely?: true,
wordpress_plugin?: false,
callback_status: -1
},
_url
) do
%Result{
ok?: false,
errors: ["We encountered a problem trying to verify your website"],
recommendations: [
{"The integration may be working but as you're running an older version of our script, we cannot verify it automatically. Please install our WordPress plugin to use the built-in proxy",
"https://plausible.io/wordpress-analytics-plugin"}
]
}
end
def interpret(
%__MODULE__{
snippets_found_in_body: 0,
snippets_found_in_head: 1,
plausible_installed?: true,
wordpress_likely?: true,
wordpress_plugin?: true,
callback_status: -1
},
_url
) do
%Result{
ok?: false,
errors: ["We encountered a problem trying to verify your website"],
recommendations: [
{"The integration may be working but as you're running an older version of our script, we cannot verify it automatically. Please disable and then enable the proxy in our WordPress plugin, then clear your WordPress cache",
"https://plausible.io/wordpress-analytics-plugin"}
]
}
end
def interpret(
%__MODULE__{
plausible_installed?: true,
callback_status: 0,
proxy_likely?: true proxy_likely?: true
}, },
_url _url
) ) do
when callback_status != 202 do
%Result{ %Result{
ok?: false, ok?: false,
errors: ["We encountered an error with your Plausible proxy"], errors: ["We encountered an error with your Plausible proxy"],
@ -359,6 +421,21 @@ defmodule Plausible.Verification.Diagnostics do
} }
end end
def interpret(
%__MODULE__{
plausible_installed?: true,
snippets_found_in_head: 0,
snippets_found_in_body: 0,
callback_status: callback_status,
snippet_found_after_busting_cache?: false,
service_error: nil
},
_url
)
when callback_status in [200, 202] do
%Result{ok?: true}
end
def interpret(rating, url) do def interpret(rating, url) do
Sentry.capture_message("Unhandled case for site verification: #{url}", Sentry.capture_message("Unhandled case for site verification: #{url}",
extra: %{ extra: %{

View File

@ -5,38 +5,32 @@ export default async function({ page, context }) {
} }
await page.setUserAgent(context.userAgent); await page.setUserAgent(context.userAgent);
await page.goto(context.url); await page.goto(context.url);
await page.waitForNetworkIdle({ idleTime: 1000 });
const plausibleInstalled = await page.evaluate(() => { try {
window.__plausible = true; await page.waitForFunction('window.plausible', { timeout: 4000 });
if (typeof (window.plausible) === "function") { await page.evaluate(() => {
window.__plausible = true;
window.plausible('verification-agent-test', { window.plausible('verification-agent-test', {
callback: function(options) { callback: function(options) {
window.plausibleCallbackResult = () => options && options.status ? options.status : 1; window.plausibleCallbackResult = () => options && options.status ? options.status : -1;
} }
}); });
return true; });
} else {
window.plausibleCallbackResult = () => 0;
return false;
}
});
await page.waitForFunction('window.plausibleCallbackResult', { timeout: 2000 }); try {
const callbackStatus = await page.evaluate(() => { await page.waitForFunction('window.plausibleCallbackResult', { timeout: 3000 });
if (typeof (window.plausibleCallbackResult) === "function") { const status = await page.evaluate(() => { return window.plausibleCallbackResult() });
return window.plausibleCallbackResult(); return { data: { plausibleInstalled: true, callbackStatus: status } };
} else { } catch ({ err, message }) {
return 0; return { data: { plausibleInstalled: true, callbackStatus: 0, error: message } };
} }
}); } catch ({ err, message }) {
return {
return { data: {
data: { plausibleInstalled: false, callbackStatus: 0, error: message
plausibleInstalled, callbackStatus }
}, };
type: "application/json" }
};
} }

View File

@ -105,7 +105,7 @@ defmodule Plausible.Verification.ChecksTest do
assert interpretation.recommendations == [] assert interpretation.recommendations == []
end end
test "fetching will not follow more than 2 redirect" do test "fetching will give up at 5th redirect" do
test = self() test = self()
stub_fetch_body(fn conn -> stub_fetch_body(fn conn ->
@ -120,6 +120,8 @@ defmodule Plausible.Verification.ChecksTest do
result = run_checks() result = run_checks()
assert_receive :redirect_sent
assert_receive :redirect_sent
assert_receive :redirect_sent assert_receive :redirect_sent
assert_receive :redirect_sent assert_receive :redirect_sent
assert_receive :redirect_sent assert_receive :redirect_sent
@ -750,6 +752,81 @@ defmodule Plausible.Verification.ChecksTest do
"https://plausible.io/wordpress-analytics-plugin "} "https://plausible.io/wordpress-analytics-plugin "}
] ]
end end
test "callback handling not found for non-wordpress site" do
stub_fetch_body(200, @normal_body)
stub_installation(200, plausible_installed(true, -1))
result = run_checks()
interpretation = Checks.interpret_diagnostics(result)
assert interpretation.errors == ["We encountered a problem trying to verify your website"]
assert interpretation.recommendations == [
{"The integration may be working but as you're running an older version of our script, we cannot verify it automatically. Please manually check your integration or update to use the latest script",
"https://plausible.io/docs/troubleshoot-integration"}
]
end
test "callback handling not found for wordpress site" do
stub_fetch_body(200, @normal_body_wordpress)
stub_installation(200, plausible_installed(true, -1))
result = run_checks()
interpretation = Checks.interpret_diagnostics(result)
assert interpretation.errors == ["We encountered a problem trying to verify your website"]
assert interpretation.recommendations == [
{"The integration may be working but as you're running an older version of our script, we cannot verify it automatically. Please install our WordPress plugin to use the built-in proxy",
"https://plausible.io/wordpress-analytics-plugin"}
]
end
test "callback handling not found for wordpress site using our plugin" do
stub_fetch_body(200, @normal_body_wordpress_official_plugin)
stub_installation(200, plausible_installed(true, -1))
result = run_checks()
interpretation = Checks.interpret_diagnostics(result)
assert interpretation.errors == ["We encountered a problem trying to verify your website"]
assert interpretation.recommendations == [
{
"The integration may be working but as you're running an older version of our script, we cannot verify it automatically. Please disable and then enable the proxy in our WordPress plugin, then clear your WordPress cache",
"https://plausible.io/wordpress-analytics-plugin"
}
]
end
test "non-standard integration where the snippet cannot be found but it works ok in headless" do
stub_fetch_body(200, @body_no_snippet)
stub_installation(200, plausible_installed(true, 202))
result = run_checks()
interpretation = Checks.interpret_diagnostics(result)
assert interpretation.ok?
assert interpretation.errors == []
assert interpretation.recommendations == []
end
test "fails due to callback status being something unlikely like 500" do
stub_fetch_body(200, @normal_body)
stub_installation(200, plausible_installed(true, 500))
result = run_checks()
interpretation = Checks.interpret_diagnostics(result)
refute interpretation.ok?
assert interpretation.errors == ["Your Plausible integration is not working"]
assert interpretation.recommendations == [
{"Please manually check your integration to make sure that the Plausible snippet has been inserted correctly",
"https://plausible.io/docs/troubleshoot-integration"}
]
end
end end
defp run_checks(extra_opts \\ []) do defp run_checks(extra_opts \\ []) do