AK: Fix url host parsing check for 'ends in a number'

I misunderstood the spec step for checking whether the host 'ends with a
number'. We can't simply check for it if ends with a number, this check
is actually an algorithm which is required to avoid detecting hosts that
end with a number from an IPv4 host.

Implement this missing step, and add a test to cover this.
This commit is contained in:
Shannon Booth 2023-07-25 19:43:00 +12:00 committed by Tim Flynn
parent 715b6f868f
commit 177b04dcfc
Notes: sideshowbarker 2024-07-17 05:21:12 +09:00
2 changed files with 42 additions and 1 deletions

View File

@ -518,6 +518,37 @@ static Optional<Array<u16, 8>> parse_ipv6_address(StringView input)
return address;
}
// https://url.spec.whatwg.org/#ends-in-a-number-checker
static bool ends_in_a_number_checker(StringView input)
{
// 1. Let parts be the result of strictly splitting input on U+002E (.).
auto parts = input.split_view("."sv, SplitBehavior::KeepEmpty);
// 2. If the last item in parts is the empty string, then:
if (parts.last().is_empty()) {
// 1. If partss size is 1, then return false.
if (parts.size() == 1)
return false;
// 2. Remove the last item from parts.
parts.take_last();
}
// 3. Let last be the last item in parts.
auto last = parts.last();
// 4. If last is non-empty and contains only ASCII digits, then return true.
if (!last.is_empty() && all_of(last, is_ascii_digit))
return true;
// 5. If parsing last as an IPv4 number does not return failure, then return true.
if (parse_ipv4_number(last).has_value())
return true;
// 6. Return false.
return false;
}
// https://url.spec.whatwg.org/#concept-host-parser
// NOTE: This is a very bare-bones implementation.
static Optional<DeprecatedString> parse_host(StringView input, bool is_not_special = false)
@ -565,7 +596,7 @@ static Optional<DeprecatedString> parse_host(StringView input, bool is_not_speci
}
// 8. If asciiDomain ends in a number, then return the result of IPv4 parsing asciiDomain.
if (is_ascii_digit(ascii_domain[ascii_domain.length() - 1])) {
if (ends_in_a_number_checker(ascii_domain)) {
auto ipv4_host = parse_ipv4_address(ascii_domain);
if (!ipv4_host.has_value())
return {};

View File

@ -38,6 +38,16 @@ TEST_CASE(basic)
EXPECT(url.query().is_null());
EXPECT(url.fragment().is_null());
}
{
URL url("https://www.serenityos.org1/index.html"sv);
EXPECT_EQ(url.is_valid(), true);
EXPECT_EQ(url.scheme(), "https");
EXPECT_EQ(url.host(), "www.serenityos.org1");
EXPECT_EQ(url.port_or_default(), 443);
EXPECT_EQ(url.serialize_path(), "/index.html");
EXPECT(url.query().is_null());
EXPECT(url.fragment().is_null());
}
{
URL url("https://localhost:1234/~anon/test/page.html"sv);
EXPECT_EQ(url.is_valid(), true);