mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-12-26 20:55:35 +03:00
AK: Implement IPv6 host parsing in URLParser
This is just a straight (and fairly inefficient) implementation of IPv6 parsing and serialization from the URL spec. Note that we don't use AK::IPv6Address here because the URL spec requires a specific serialization behavior.
This commit is contained in:
parent
545fdc849f
commit
f0ec104131
Notes:
sideshowbarker
2024-07-16 23:52:10 +09:00
Author: https://github.com/awesomekling Commit: https://github.com/SerenityOS/serenity/commit/f0ec104131 Pull-request: https://github.com/SerenityOS/serenity/pull/20065 Reviewed-by: https://github.com/LucasChollet
306
AK/URLParser.cpp
306
AK/URLParser.cpp
@ -50,17 +50,319 @@ static Optional<DeprecatedString> parse_ipv4_address(StringView input)
|
||||
return input;
|
||||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#concept-ipv6-serializer
|
||||
static ErrorOr<String> serialize_ipv6_address(Array<u16, 8> const& address)
|
||||
{
|
||||
// 1. Let output be the empty string.
|
||||
StringBuilder output;
|
||||
|
||||
// 2. Let compress be an index to the first IPv6 piece in the first longest sequences of address’s IPv6 pieces that are 0.
|
||||
// 3. If there is no sequence of address’s IPv6 pieces that are 0 that is longer than 1, then set compress to null.
|
||||
Optional<size_t> compress;
|
||||
size_t longest_sequence_length = 0;
|
||||
size_t current_sequence_length = 0;
|
||||
size_t current_sequence_start = 0;
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
if (address[i] == 0) {
|
||||
if (current_sequence_length == 0)
|
||||
current_sequence_start = i;
|
||||
++current_sequence_length;
|
||||
} else {
|
||||
if (current_sequence_length > longest_sequence_length) {
|
||||
longest_sequence_length = current_sequence_length;
|
||||
compress = current_sequence_start;
|
||||
}
|
||||
current_sequence_length = 0;
|
||||
}
|
||||
}
|
||||
// 4. Let ignore0 be false.
|
||||
auto ignore0 = false;
|
||||
|
||||
// 5. For each pieceIndex in the range 0 to 7, inclusive:
|
||||
for (size_t piece_index = 0; piece_index <= 7; ++piece_index) {
|
||||
// 1. If ignore0 is true and address[pieceIndex] is 0, then continue.
|
||||
if (ignore0 && address[piece_index] == 0)
|
||||
continue;
|
||||
|
||||
// 2. Otherwise, if ignore0 is true, set ignore0 to false.
|
||||
if (ignore0)
|
||||
ignore0 = false;
|
||||
|
||||
// 3. If compress is pieceIndex, then:
|
||||
if (compress == piece_index) {
|
||||
// 1. Let separator be "::" if pieceIndex is 0, and U+003A (:) otherwise.
|
||||
auto separator = piece_index == 0 ? "::"sv : ":"sv;
|
||||
|
||||
// 2. Append separator to output.
|
||||
output.append(separator);
|
||||
|
||||
// 3. Set ignore0 to true and continue.
|
||||
ignore0 = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 4. Append address[pieceIndex], represented as the shortest possible lowercase hexadecimal number, to output.
|
||||
output.appendff("{:x}", address[piece_index]);
|
||||
|
||||
// 5. If pieceIndex is not 7, then append U+003A (:) to output.
|
||||
if (piece_index != 7)
|
||||
output.append(':');
|
||||
}
|
||||
|
||||
// 6. Return output.
|
||||
return output.to_string();
|
||||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#concept-ipv6-parser
|
||||
static Optional<Array<u16, 8>> parse_ipv6_address(StringView input)
|
||||
{
|
||||
// 1. Let address be a new IPv6 address whose IPv6 pieces are all 0.
|
||||
Array<u16, 8> address {};
|
||||
|
||||
// 2. Let pieceIndex be 0.
|
||||
size_t piece_index = 0;
|
||||
|
||||
// 3. Let compress be null.
|
||||
Optional<size_t> compress;
|
||||
|
||||
Vector<u32> code_points;
|
||||
code_points.ensure_capacity(input.length());
|
||||
for (auto code_point : Utf8View { input }) {
|
||||
code_points.append(code_point);
|
||||
}
|
||||
|
||||
// 4. Let pointer be a pointer for input.
|
||||
size_t pointer = 0;
|
||||
auto c = [&]() -> u32 {
|
||||
if (pointer >= code_points.size())
|
||||
return end_of_file;
|
||||
return code_points[pointer];
|
||||
};
|
||||
|
||||
auto remaining = [&]() -> ReadonlySpan<u32> {
|
||||
if ((pointer + 1) >= code_points.size())
|
||||
return {};
|
||||
return code_points.span().slice(pointer + 1);
|
||||
};
|
||||
|
||||
// 5. If c is U+003A (:), then:
|
||||
if (c() == ':') {
|
||||
// 1. If remaining does not start with U+003A (:), IPv6-invalid-compression validation error, return failure.
|
||||
if (remaining().is_empty() || remaining()[0] != ':') {
|
||||
report_validation_error();
|
||||
return {};
|
||||
}
|
||||
|
||||
// 2. Increase pointer by 2.
|
||||
pointer += 2;
|
||||
|
||||
// 3. Increase pieceIndex by 1 and then set compress to pieceIndex.
|
||||
++piece_index;
|
||||
compress = piece_index;
|
||||
}
|
||||
|
||||
// 6. While c is not the EOF code point:
|
||||
while (c() != end_of_file) {
|
||||
// 1. If pieceIndex is 8, IPv6-too-many-pieces validation error, return failure.
|
||||
if (piece_index == 8) {
|
||||
report_validation_error();
|
||||
return {};
|
||||
}
|
||||
|
||||
// 2. If c is U+003A (:), then:
|
||||
if (c() == ':') {
|
||||
// 1. If compress is non-null, IPv6-multiple-compression validation error, return failure.
|
||||
if (compress.has_value()) {
|
||||
report_validation_error();
|
||||
return {};
|
||||
}
|
||||
|
||||
// 2. Increase pointer and pieceIndex by 1, set compress to pieceIndex, and then continue.
|
||||
++pointer;
|
||||
++piece_index;
|
||||
compress = piece_index;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 3. Let value and length be 0.
|
||||
u32 value = 0;
|
||||
size_t length = 0;
|
||||
|
||||
// 4. While length is less than 4 and c is an ASCII hex digit,
|
||||
// set value to value × 0x10 + c interpreted as hexadecimal number,
|
||||
// and increase pointer and length by 1.
|
||||
while (length < 4 && is_ascii_hex_digit(c())) {
|
||||
value = value * 0x10 + parse_ascii_hex_digit(c());
|
||||
++pointer;
|
||||
++length;
|
||||
}
|
||||
|
||||
// 5. If c is U+002E (.), then:
|
||||
if (c() == '.') {
|
||||
// 1. If length is 0, IPv4-in-IPv6-invalid-code-point validation error, return failure.
|
||||
if (length == 0) {
|
||||
report_validation_error();
|
||||
return {};
|
||||
}
|
||||
|
||||
// 2. Decrease pointer by length.
|
||||
pointer -= length;
|
||||
|
||||
// 3. If pieceIndex is greater than 6, IPv4-in-IPv6-too-many-pieces validation error, return failure.
|
||||
if (piece_index > 6) {
|
||||
report_validation_error();
|
||||
return {};
|
||||
}
|
||||
|
||||
// 4. Let numbersSeen be 0.
|
||||
size_t numbers_seen = 0;
|
||||
|
||||
// 5. While c is not the EOF code point:
|
||||
while (c() != end_of_file) {
|
||||
// 1. Let ipv4Piece be null.
|
||||
Optional<u32> ipv4_piece;
|
||||
|
||||
// 2. If numbersSeen is greater than 0, then:
|
||||
if (numbers_seen > 0) {
|
||||
// 1. If c is a U+002E (.) and numbersSeen is less than 4, then increase pointer by 1.
|
||||
if (c() == '.' && numbers_seen < 4) {
|
||||
++pointer;
|
||||
}
|
||||
// 2. Otherwise, IPv4-in-IPv6-invalid-code-point validation error, return failure.
|
||||
else {
|
||||
report_validation_error();
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
// 3. If c is not an ASCII digit, IPv4-in-IPv6-invalid-code-point validation error, return failure.
|
||||
if (!is_ascii_digit(c())) {
|
||||
report_validation_error();
|
||||
return {};
|
||||
}
|
||||
|
||||
// 4. While c is an ASCII digit:
|
||||
while (is_ascii_digit(c())) {
|
||||
// 1. Let number be c interpreted as decimal number.
|
||||
u32 number = parse_ascii_digit(c());
|
||||
|
||||
// 2. If ipv4Piece is null, then set ipv4Piece to number.
|
||||
if (!ipv4_piece.has_value()) {
|
||||
ipv4_piece = number;
|
||||
}
|
||||
// Otherwise, if ipv4Piece is 0, IPv4-in-IPv6-invalid-code-point validation error, return failure.
|
||||
else if (ipv4_piece.value() == 0) {
|
||||
report_validation_error();
|
||||
return {};
|
||||
}
|
||||
// Otherwise, set ipv4Piece to ipv4Piece × 10 + number.
|
||||
else {
|
||||
ipv4_piece = ipv4_piece.value() * 10 + number;
|
||||
}
|
||||
|
||||
// 3. If ipv4Piece is greater than 255, IPv4-in-IPv6-out-of-range-part validation error, return failure.
|
||||
if (ipv4_piece.value() > 255) {
|
||||
report_validation_error();
|
||||
return {};
|
||||
}
|
||||
|
||||
// 4. Increase pointer by 1.
|
||||
++pointer;
|
||||
}
|
||||
// 5. Set address[pieceIndex] to address[pieceIndex] × 0x100 + ipv4Piece.
|
||||
address[piece_index] = address[piece_index] * 0x100 + ipv4_piece.value();
|
||||
|
||||
// 6. Increase numbersSeen by 1.
|
||||
++numbers_seen;
|
||||
|
||||
// 7. If numbersSeen is 2 or 4, then increase pieceIndex by 1.
|
||||
if (numbers_seen == 2 || numbers_seen == 4)
|
||||
++piece_index;
|
||||
}
|
||||
|
||||
// 6. If numbersSeen is not 4, IPv4-in-IPv6-too-few-parts validation error, return failure.
|
||||
if (numbers_seen != 4) {
|
||||
report_validation_error();
|
||||
return {};
|
||||
}
|
||||
|
||||
// 7. Break.
|
||||
break;
|
||||
}
|
||||
// 6. Otherwise, if c is U+003A (:):
|
||||
else if (c() == ':') {
|
||||
// 1. Increase pointer by 1.
|
||||
++pointer;
|
||||
|
||||
// 2. If c is the EOF code point, IPv6-invalid-code-point validation error, return failure.
|
||||
if (c() == end_of_file) {
|
||||
report_validation_error();
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
// 7. Otherwise, if c is not the EOF code point, IPv6-invalid-code-point validation error, return failure.
|
||||
else if (c() != end_of_file) {
|
||||
report_validation_error();
|
||||
return {};
|
||||
}
|
||||
|
||||
// 8. Set address[pieceIndex] to value.
|
||||
address[piece_index] = value;
|
||||
|
||||
// 9. Increase pieceIndex by 1.
|
||||
++piece_index;
|
||||
}
|
||||
|
||||
// 7. If compress is non-null, then:
|
||||
if (compress.has_value()) {
|
||||
// 1. Let swaps be pieceIndex − compress.
|
||||
size_t swaps = piece_index - compress.value();
|
||||
|
||||
// 2. Set pieceIndex to 7.
|
||||
piece_index = 7;
|
||||
|
||||
// 3. While pieceIndex is not 0 and swaps is greater than 0,
|
||||
// swap address[pieceIndex] with address[compress + swaps − 1],
|
||||
// and then decrease both pieceIndex and swaps by 1.
|
||||
while (piece_index != 0 && swaps > 0) {
|
||||
swap(address[piece_index], address[compress.value() + swaps - 1]);
|
||||
--piece_index;
|
||||
--swaps;
|
||||
}
|
||||
}
|
||||
|
||||
// 8. Otherwise, if compress is null and pieceIndex is not 8, IPv6-too-few-pieces validation error, return failure.
|
||||
else if (!compress.has_value() && piece_index != 8) {
|
||||
report_validation_error();
|
||||
return {};
|
||||
}
|
||||
|
||||
// 9. Return address.
|
||||
return address;
|
||||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#concept-host-parser
|
||||
// NOTE: This is a very bare-bones implementation.
|
||||
static Optional<DeprecatedString> parse_host(StringView input, bool is_not_special = false)
|
||||
{
|
||||
// 1. If input starts with U+005B ([), then:
|
||||
if (input.starts_with('[')) {
|
||||
// 1. If input does not end with U+005D (]), IPv6-unclosed validation error, return failure.
|
||||
if (!input.ends_with(']')) {
|
||||
report_validation_error();
|
||||
return {};
|
||||
}
|
||||
// FIXME: Return the result of IPv6 parsing input with its leading U+005B ([) and trailing U+005D (]) removed.
|
||||
TODO();
|
||||
|
||||
// 2. Return the result of IPv6 parsing input with its leading U+005B ([) and trailing U+005D (]) removed.
|
||||
auto address = parse_ipv6_address(input.substring_view(1, input.length() - 2));
|
||||
if (!address.has_value())
|
||||
return {};
|
||||
|
||||
auto result = serialize_ipv6_address(*address);
|
||||
if (result.is_error())
|
||||
return {};
|
||||
return result.release_value().to_deprecated_string();
|
||||
}
|
||||
|
||||
if (is_not_special)
|
||||
|
@ -428,3 +428,33 @@ TEST_CASE(google_street_view)
|
||||
URL url(streetview_url);
|
||||
EXPECT_EQ(url.serialize(), streetview_url);
|
||||
}
|
||||
|
||||
TEST_CASE(ipv6_address)
|
||||
{
|
||||
{
|
||||
constexpr auto ipv6_url = "http://[::1]/index.html"sv;
|
||||
URL url(ipv6_url);
|
||||
EXPECT(url.is_valid());
|
||||
EXPECT_EQ(url, ipv6_url);
|
||||
}
|
||||
|
||||
{
|
||||
constexpr auto ipv6_url = "http://[0:f:0:0:f:f:0:0]/index.html"sv;
|
||||
URL url(ipv6_url);
|
||||
EXPECT(url.is_valid());
|
||||
EXPECT_EQ(url, ipv6_url);
|
||||
}
|
||||
|
||||
{
|
||||
constexpr auto ipv6_url = "https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]/index.html"sv;
|
||||
URL url(ipv6_url);
|
||||
EXPECT(url.is_valid());
|
||||
EXPECT_EQ(url, ipv6_url);
|
||||
}
|
||||
|
||||
{
|
||||
constexpr auto bad_ipv6_url = "https://[oops]/index.html"sv;
|
||||
URL url(bad_ipv6_url);
|
||||
EXPECT_EQ(url.is_valid(), false);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user