Verification improvements (#4132)

* Simplify browserless script and catch errors

* Improve fetching body:
    - follow up on max. 4 redirects
    - rely on Req default timeouts (wait much longer to account for slow
      sites)

* Improve installation check:
  - rely on Req default timeouts, wait longer
  - log service errors as warnings
  - use stealth mode to bypass captchas

* Stop cutting off body too large

* Improve diagnostics for known failures

* Another round of diagnostic improvements

* Format

* Add a test for callback status 500
This commit is contained in:
hq1 2024-05-24 10:40:59 +02:00 committed by GitHub
parent c81cb16933
commit ee61094023
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 194 additions and 45 deletions

View File

@ -17,9 +17,7 @@ defmodule Plausible.Verification.Checks.FetchBody do
Keyword.merge(
[
base_url: url,
max_redirects: 2,
connect_options: [timeout: 4_000],
receive_timeout: 4_000,
max_redirects: 4,
max_retries: 3,
retry_log_level: :warning
],
@ -38,7 +36,7 @@ defmodule Plausible.Verification.Checks.FetchBody do
end
end
defp extract_document(state, response) when byte_size(response.body) <= 500_000 do
defp extract_document(state, response) do
with true <- html?(response),
{:ok, document} <- Floki.parse_document(response.body) do
state
@ -50,10 +48,6 @@ defmodule Plausible.Verification.Checks.FetchBody do
end
end
defp extract_document(state, response) when byte_size(response.body) > 500_000 do
state
end
defp html?(%Req.Response{headers: headers}) do
headers
|> Map.get("content-type", "")

View File

@ -1,4 +1,6 @@
defmodule Plausible.Verification.Checks.Installation do
require Logger
@verification_script_filename "verification/verify_plausible_installed.js"
@verification_script_path Path.join(:code.priv_dir(:plausible), @verification_script_filename)
@external_resource @verification_script_path
@ -34,8 +36,7 @@ defmodule Plausible.Verification.Checks.Installation do
}),
retry: :transient,
retry_log_level: :warning,
max_retries: 2,
receive_timeout: 6_000
max_retries: 2
]
extra_opts = Application.get_env(:plausible, __MODULE__)[:req_opts] || []
@ -46,16 +47,22 @@ defmodule Plausible.Verification.Checks.Installation do
%{
status: 200,
body: %{
"data" => %{"plausibleInstalled" => installed?, "callbackStatus" => callback_status}
"data" =>
%{"plausibleInstalled" => installed?, "callbackStatus" => callback_status} = data
}
}}
when is_boolean(installed?) ->
if data["error"] do
Logger.warning("Browserless error: #{Map.get(data, "error")}")
end
put_diagnostics(state, plausible_installed?: installed?, callback_status: callback_status)
{:ok, %{status: status}} ->
put_diagnostics(state, plausible_installed?: false, service_error: status)
{:error, %{reason: reason}} ->
Logger.warning("Browserless error: #{inspect(reason)}")
put_diagnostics(state, plausible_installed?: false, service_error: reason)
end
end
@ -64,6 +71,6 @@ defmodule Plausible.Verification.Checks.Installation do
config = Application.fetch_env!(:plausible, __MODULE__)
token = Keyword.fetch!(config, :token)
endpoint = Keyword.fetch!(config, :endpoint)
Path.join(endpoint, "function?token=#{token}")
Path.join(endpoint, "function?token=#{token}&stealth")
end
end

View File

@ -14,7 +14,7 @@ defmodule Plausible.Verification.Diagnostics do
body_fetched?: false,
wordpress_likely?: false,
gtm_likely?: false,
callback_status: -1,
callback_status: 0,
proxy_likely?: false,
data_domain_mismatch?: false,
wordpress_plugin?: false
@ -35,13 +35,14 @@ defmodule Plausible.Verification.Diagnostics do
plausible_installed?: true,
snippets_found_in_head: 1,
snippets_found_in_body: 0,
callback_status: 202,
callback_status: callback_status,
snippet_found_after_busting_cache?: false,
service_error: nil,
data_domain_mismatch?: false
},
_url
) do
)
when callback_status in [200, 202] do
%Result{ok?: true}
end
@ -150,13 +151,74 @@ defmodule Plausible.Verification.Diagnostics do
def interpret(
%__MODULE__{
snippets_found_in_body: 0,
snippets_found_in_head: 1,
plausible_installed?: true,
callback_status: callback_status,
wordpress_likely?: false,
callback_status: -1
},
_url
) do
%Result{
ok?: false,
errors: ["We encountered a problem trying to verify your website"],
recommendations: [
{"The integration may be working but as you're running an older version of our script, we cannot verify it automatically. Please manually check your integration or update to use the latest script",
"https://plausible.io/docs/troubleshoot-integration"}
]
}
end
def interpret(
%__MODULE__{
snippets_found_in_body: 0,
snippets_found_in_head: 1,
plausible_installed?: true,
wordpress_likely?: true,
wordpress_plugin?: false,
callback_status: -1
},
_url
) do
%Result{
ok?: false,
errors: ["We encountered a problem trying to verify your website"],
recommendations: [
{"The integration may be working but as you're running an older version of our script, we cannot verify it automatically. Please install our WordPress plugin to use the built-in proxy",
"https://plausible.io/wordpress-analytics-plugin"}
]
}
end
def interpret(
%__MODULE__{
snippets_found_in_body: 0,
snippets_found_in_head: 1,
plausible_installed?: true,
wordpress_likely?: true,
wordpress_plugin?: true,
callback_status: -1
},
_url
) do
%Result{
ok?: false,
errors: ["We encountered a problem trying to verify your website"],
recommendations: [
{"The integration may be working but as you're running an older version of our script, we cannot verify it automatically. Please disable and then enable the proxy in our WordPress plugin, then clear your WordPress cache",
"https://plausible.io/wordpress-analytics-plugin"}
]
}
end
def interpret(
%__MODULE__{
plausible_installed?: true,
callback_status: 0,
proxy_likely?: true
},
_url
)
when callback_status != 202 do
) do
%Result{
ok?: false,
errors: ["We encountered an error with your Plausible proxy"],
@ -359,6 +421,21 @@ defmodule Plausible.Verification.Diagnostics do
}
end
def interpret(
%__MODULE__{
plausible_installed?: true,
snippets_found_in_head: 0,
snippets_found_in_body: 0,
callback_status: callback_status,
snippet_found_after_busting_cache?: false,
service_error: nil
},
_url
)
when callback_status in [200, 202] do
%Result{ok?: true}
end
def interpret(rating, url) do
Sentry.capture_message("Unhandled case for site verification: #{url}",
extra: %{

View File

@ -5,38 +5,32 @@ export default async function({ page, context }) {
}
await page.setUserAgent(context.userAgent);
await page.goto(context.url);
await page.waitForNetworkIdle({ idleTime: 1000 });
const plausibleInstalled = await page.evaluate(() => {
window.__plausible = true;
if (typeof (window.plausible) === "function") {
try {
await page.waitForFunction('window.plausible', { timeout: 4000 });
await page.evaluate(() => {
window.__plausible = true;
window.plausible('verification-agent-test', {
callback: function(options) {
window.plausibleCallbackResult = () => options && options.status ? options.status : 1;
window.plausibleCallbackResult = () => options && options.status ? options.status : -1;
}
});
return true;
} else {
window.plausibleCallbackResult = () => 0;
return false;
}
});
});
await page.waitForFunction('window.plausibleCallbackResult', { timeout: 2000 });
const callbackStatus = await page.evaluate(() => {
if (typeof (window.plausibleCallbackResult) === "function") {
return window.plausibleCallbackResult();
} else {
return 0;
try {
await page.waitForFunction('window.plausibleCallbackResult', { timeout: 3000 });
const status = await page.evaluate(() => { return window.plausibleCallbackResult() });
return { data: { plausibleInstalled: true, callbackStatus: status } };
} catch ({ err, message }) {
return { data: { plausibleInstalled: true, callbackStatus: 0, error: message } };
}
});
return {
data: {
plausibleInstalled, callbackStatus
},
type: "application/json"
};
} catch ({ err, message }) {
return {
data: {
plausibleInstalled: false, callbackStatus: 0, error: message
}
};
}
}

View File

@ -105,7 +105,7 @@ defmodule Plausible.Verification.ChecksTest do
assert interpretation.recommendations == []
end
test "fetching will not follow more than 2 redirect" do
test "fetching will give up at 5th redirect" do
test = self()
stub_fetch_body(fn conn ->
@ -120,6 +120,8 @@ defmodule Plausible.Verification.ChecksTest do
result = run_checks()
assert_receive :redirect_sent
assert_receive :redirect_sent
assert_receive :redirect_sent
assert_receive :redirect_sent
assert_receive :redirect_sent
@ -750,6 +752,81 @@ defmodule Plausible.Verification.ChecksTest do
"https://plausible.io/wordpress-analytics-plugin "}
]
end
test "callback handling not found for non-wordpress site" do
stub_fetch_body(200, @normal_body)
stub_installation(200, plausible_installed(true, -1))
result = run_checks()
interpretation = Checks.interpret_diagnostics(result)
assert interpretation.errors == ["We encountered a problem trying to verify your website"]
assert interpretation.recommendations == [
{"The integration may be working but as you're running an older version of our script, we cannot verify it automatically. Please manually check your integration or update to use the latest script",
"https://plausible.io/docs/troubleshoot-integration"}
]
end
test "callback handling not found for wordpress site" do
stub_fetch_body(200, @normal_body_wordpress)
stub_installation(200, plausible_installed(true, -1))
result = run_checks()
interpretation = Checks.interpret_diagnostics(result)
assert interpretation.errors == ["We encountered a problem trying to verify your website"]
assert interpretation.recommendations == [
{"The integration may be working but as you're running an older version of our script, we cannot verify it automatically. Please install our WordPress plugin to use the built-in proxy",
"https://plausible.io/wordpress-analytics-plugin"}
]
end
test "callback handling not found for wordpress site using our plugin" do
stub_fetch_body(200, @normal_body_wordpress_official_plugin)
stub_installation(200, plausible_installed(true, -1))
result = run_checks()
interpretation = Checks.interpret_diagnostics(result)
assert interpretation.errors == ["We encountered a problem trying to verify your website"]
assert interpretation.recommendations == [
{
"The integration may be working but as you're running an older version of our script, we cannot verify it automatically. Please disable and then enable the proxy in our WordPress plugin, then clear your WordPress cache",
"https://plausible.io/wordpress-analytics-plugin"
}
]
end
test "non-standard integration where the snippet cannot be found but it works ok in headless" do
stub_fetch_body(200, @body_no_snippet)
stub_installation(200, plausible_installed(true, 202))
result = run_checks()
interpretation = Checks.interpret_diagnostics(result)
assert interpretation.ok?
assert interpretation.errors == []
assert interpretation.recommendations == []
end
test "fails due to callback status being something unlikely like 500" do
stub_fetch_body(200, @normal_body)
stub_installation(200, plausible_installed(true, 500))
result = run_checks()
interpretation = Checks.interpret_diagnostics(result)
refute interpretation.ok?
assert interpretation.errors == ["Your Plausible integration is not working"]
assert interpretation.recommendations == [
{"Please manually check your integration to make sure that the Plausible snippet has been inserted correctly",
"https://plausible.io/docs/troubleshoot-integration"}
]
end
end
defp run_checks(extra_opts \\ []) do