From 8dc25dffc2807d6ac4858ba96489eb0044bc8cdf Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Sat, 28 Oct 2023 09:31:24 -0400 Subject: [PATCH] LibWebView: Protect URL highlighting against partially-typed URLs The current helpers assume that a valid URL is a full URL (i.e. contains the "://" separator between the scheme and domain). This isn't true, as "file:" alone is parsed as a valid URL. We must also avoid simply searching for the parsed public suffix in the original URL string. For example, "com" is a public suffix. If we search for that in the URL "com.com", we will think the public suffix starts at index 0. --- Meta/Lagom/CMakeLists.txt | 1 + Tests/CMakeLists.txt | 1 + Tests/LibWebView/CMakeLists.txt | 7 +++ Tests/LibWebView/TestWebViewURL.cpp | 77 +++++++++++++++++++++++++++ Userland/Libraries/LibWebView/URL.cpp | 42 +++++++++------ 5 files changed, 113 insertions(+), 15 deletions(-) create mode 100644 Tests/LibWebView/CMakeLists.txt create mode 100644 Tests/LibWebView/TestWebViewURL.cpp diff --git a/Meta/Lagom/CMakeLists.txt b/Meta/Lagom/CMakeLists.txt index 6b7b83e9fef..7c54d280dd4 100644 --- a/Meta/Lagom/CMakeLists.txt +++ b/Meta/Lagom/CMakeLists.txt @@ -651,6 +651,7 @@ if (BUILD_LAGOM) LibTimeZone LibUnicode LibVideo + LibWebView LibXML ) if (ENABLE_LAGOM_LIBWEB) diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt index c8b3c504223..9dfe755bbfb 100644 --- a/Tests/CMakeLists.txt +++ b/Tests/CMakeLists.txt @@ -26,6 +26,7 @@ add_subdirectory(LibUnicode) add_subdirectory(LibVideo) add_subdirectory(LibWasm) add_subdirectory(LibWeb) +add_subdirectory(LibWebView) add_subdirectory(LibXML) add_subdirectory(LibCrypto) add_subdirectory(LibTLS) diff --git a/Tests/LibWebView/CMakeLists.txt b/Tests/LibWebView/CMakeLists.txt new file mode 100644 index 00000000000..3022b1113a4 --- /dev/null +++ b/Tests/LibWebView/CMakeLists.txt @@ -0,0 +1,7 @@ +set(TEST_SOURCES + TestWebViewURL.cpp +) + +foreach(source IN LISTS TEST_SOURCES) + serenity_test("${source}" LibWebView LIBS LibWebView) +endforeach() diff --git a/Tests/LibWebView/TestWebViewURL.cpp b/Tests/LibWebView/TestWebViewURL.cpp new file mode 100644 index 00000000000..3b5ca7e5674 --- /dev/null +++ b/Tests/LibWebView/TestWebViewURL.cpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2023, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +static void compare_url_parts(StringView url, WebView::URLParts const& expected) +{ + auto result = WebView::break_url_into_parts(url); + VERIFY(result.has_value()); + + EXPECT_EQ(result->scheme_and_subdomain, expected.scheme_and_subdomain); + EXPECT_EQ(result->effective_tld_plus_one, expected.effective_tld_plus_one); + EXPECT_EQ(result->remainder, expected.remainder); +} + +TEST_CASE(invalid_url) +{ + EXPECT(!WebView::break_url_into_parts(""sv).has_value()); + EXPECT(!WebView::break_url_into_parts(":"sv).has_value()); + EXPECT(!WebView::break_url_into_parts(":/"sv).has_value()); + EXPECT(!WebView::break_url_into_parts("://"sv).has_value()); + + EXPECT(!WebView::break_url_into_parts("f"sv).has_value()); + EXPECT(!WebView::break_url_into_parts("fi"sv).has_value()); + EXPECT(!WebView::break_url_into_parts("fil"sv).has_value()); + EXPECT(!WebView::break_url_into_parts("file"sv).has_value()); + EXPECT(!WebView::break_url_into_parts("file:"sv).has_value()); + EXPECT(!WebView::break_url_into_parts("file:/"sv).has_value()); + + EXPECT(!WebView::break_url_into_parts("h"sv).has_value()); + EXPECT(!WebView::break_url_into_parts("ht"sv).has_value()); + EXPECT(!WebView::break_url_into_parts("htt"sv).has_value()); + EXPECT(!WebView::break_url_into_parts("http"sv).has_value()); + EXPECT(!WebView::break_url_into_parts("http:"sv).has_value()); + EXPECT(!WebView::break_url_into_parts("http:/"sv).has_value()); + EXPECT(!WebView::break_url_into_parts("http://"sv).has_value()); + + EXPECT(!WebView::break_url_into_parts("https"sv).has_value()); + EXPECT(!WebView::break_url_into_parts("https:"sv).has_value()); + EXPECT(!WebView::break_url_into_parts("https:/"sv).has_value()); + EXPECT(!WebView::break_url_into_parts("https://"sv).has_value()); +} + +TEST_CASE(file_url) +{ + compare_url_parts("file://"sv, { "file://"sv, ""sv, {} }); + compare_url_parts("file://a"sv, { "file://"sv, "a"sv, {} }); + compare_url_parts("file:///a"sv, { "file://"sv, "/a"sv, {} }); + compare_url_parts("file:///abc"sv, { "file://"sv, "/abc"sv, {} }); +} + +TEST_CASE(http_url) +{ + compare_url_parts("http://a"sv, { "http://"sv, "a"sv, {} }); + compare_url_parts("http://abc"sv, { "http://"sv, "abc"sv, {} }); + compare_url_parts("http://com"sv, { "http://"sv, "com"sv, {} }); + compare_url_parts("http://abc."sv, { "http://"sv, "abc."sv, {} }); + compare_url_parts("http://abc.c"sv, { "http://"sv, "abc.c"sv, {} }); + compare_url_parts("http://abc.com"sv, { "http://"sv, "abc.com"sv, {} }); + compare_url_parts("http://abc.com."sv, { "http://"sv, "abc.com."sv, {} }); + compare_url_parts("http://abc.com."sv, { "http://"sv, "abc.com."sv, {} }); + compare_url_parts("http://abc.com.org"sv, { "http://abc."sv, "com.org"sv, {} }); + compare_url_parts("http://abc.com.org.gov"sv, { "http://abc.com."sv, "org.gov"sv, {} }); + + compare_url_parts("http://abc/path"sv, { "http://"sv, "abc"sv, "/path"sv }); + compare_url_parts("http://abc#anchor"sv, { "http://"sv, "abc"sv, "#anchor"sv }); + compare_url_parts("http://abc?query"sv, { "http://"sv, "abc"sv, "?query"sv }); + + compare_url_parts("http://abc.def.com"sv, { "http://abc."sv, "def.com"sv, {} }); + compare_url_parts("http://abc.def.com/path"sv, { "http://abc."sv, "def.com"sv, "/path"sv }); + compare_url_parts("http://abc.def.com#anchor"sv, { "http://abc."sv, "def.com"sv, "#anchor"sv }); + compare_url_parts("http://abc.def.com?query"sv, { "http://abc."sv, "def.com"sv, "?query"sv }); +} diff --git a/Userland/Libraries/LibWebView/URL.cpp b/Userland/Libraries/LibWebView/URL.cpp index 780907d939e..7df85d15cde 100644 --- a/Userland/Libraries/LibWebView/URL.cpp +++ b/Userland/Libraries/LibWebView/URL.cpp @@ -107,27 +107,35 @@ static URLParts break_file_url_into_parts(URL const& url, StringView url_string) static URLParts break_web_url_into_parts(URL const& url, StringView url_string) { - auto host = MUST(url.serialized_host()); + auto scheme = url_string.substring_view(0, url.scheme().bytes_as_string_view().length() + "://"sv.length()); + auto url_without_scheme = url_string.substring_view(scheme.length()); - auto public_suffix = get_public_suffix(host); - if (!public_suffix.has_value()) - return {}; + StringView domain; + StringView remainder; - auto public_suffix_start = url_string.find(*public_suffix); - auto public_suffix_end = public_suffix_start.value() + public_suffix->bytes_as_string_view().length(); + if (auto index = url_without_scheme.find_any_of("/?#"sv); index.has_value()) { + domain = url_without_scheme.substring_view(0, *index); + remainder = url_without_scheme.substring_view(*index); + } else { + domain = url_without_scheme; + } - auto scheme_and_subdomain = url_string.substring_view(0, *public_suffix_start); - scheme_and_subdomain = scheme_and_subdomain.trim("."sv, TrimMode::Right); + auto public_suffix = get_public_suffix(domain); + if (!public_suffix.has_value() || !domain.ends_with(*public_suffix)) + return { scheme, domain, remainder }; - if (auto index = scheme_and_subdomain.find_last('.'); index.has_value()) - scheme_and_subdomain = scheme_and_subdomain.substring_view(0, *index + 1); - else - scheme_and_subdomain = scheme_and_subdomain.substring_view(0, url.scheme().bytes_as_string_view().length() + "://"sv.length()); + auto subdomain = domain.substring_view(0, domain.length() - public_suffix->bytes_as_string_view().length()); + subdomain = subdomain.trim("."sv, TrimMode::Right); - auto effective_tld_plus_one = url_string.substring_view(scheme_and_subdomain.length(), public_suffix_end - scheme_and_subdomain.length()); - auto remainder = url_string.substring_view(public_suffix_end); + if (auto index = subdomain.find_last('.'); index.has_value()) { + subdomain = subdomain.substring_view(0, *index + 1); + domain = domain.substring_view(subdomain.length()); + } else { + subdomain = {}; + } - return URLParts { scheme_and_subdomain, effective_tld_plus_one, remainder }; + auto scheme_and_subdomain = url_string.substring_view(0, scheme.length() + subdomain.length()); + return { scheme_and_subdomain, domain, remainder }; } Optional break_url_into_parts(StringView url_string) @@ -136,6 +144,10 @@ Optional break_url_into_parts(StringView url_string) if (!url.is_valid()) return {}; + auto scheme_length = url.scheme().bytes_as_string_view().length(); + if (!url_string.substring_view(scheme_length).starts_with("://"sv)) + return {}; + if (url.scheme() == "file"sv) return break_file_url_into_parts(url, url_string); if (url.scheme().is_one_of("http"sv, "https"sv, "gemini"sv))