ladybird/AK/URLParser.cpp

/*
 * Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
 * Copyright (c) 2023, Shannon Booth <shannon@serenityos.org>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

#include <AK/CharacterTypes.h>
#include <AK/Debug.h>
#include <AK/DeprecatedString.h>
#include <AK/IntegralMath.h>
#include <AK/Optional.h>
#include <AK/SourceLocation.h>
#include <AK/StringBuilder.h>
#include <AK/StringUtils.h>
#include <AK/URLParser.h>
#include <AK/Utf8View.h>

namespace AK {

// NOTE: This is similar to the LibC macro EOF = -1.
constexpr u32 end_of_file = 0xFFFFFFFF;

static bool is_url_code_point(u32 code_point)
{
    // FIXME: [...] and code points in the range U+00A0 to U+10FFFD, inclusive, excluding surrogates and noncharacters.
    return is_ascii_alphanumeric(code_point) || code_point >= 0xA0 || "!$&'()*+,-./:;=?@_~"sv.contains(code_point);
}

static void report_validation_error(SourceLocation const& location = SourceLocation::current())
{
    dbgln_if(URL_PARSER_DEBUG, "URLParser::basic_parse: Validation error! {}", location);
}

static Optional<DeprecatedString> parse_opaque_host(StringView input)
{
    auto forbidden_host_characters_excluding_percent = "\0\t\n\r #/:<>?@[\\]^|"sv;
    for (auto character : forbidden_host_characters_excluding_percent) {
        if (input.contains(character)) {
            report_validation_error();
            return {};
        }
    }
    // FIXME: If input contains a code point that is not a URL code point and not U+0025 (%), validation error.
    // FIXME: If input contains a U+0025 (%) and the two code points following it are not ASCII hex digits, validation error.
    return URL::percent_encode(input, URL::PercentEncodeSet::C0Control);
}

struct ParsedIPv4Number {
    u32 number { 0 };
    bool validation_error { false };
};

// https://url.spec.whatwg.org/#ipv4-number-parser
static Optional<ParsedIPv4Number> parse_ipv4_number(StringView input)
{
    // 1. If input is the empty string, then return failure.
    if (input.is_empty())
        return {};

    // 2. Let validationError be false.
    bool validation_error = false;

    // 3. Let R be 10.
    u8 radix = 10;

    // 4. If input contains at least two code points and the first two code points are either "0X" or "0x", then:
    if (input.length() >= 2 && (input.starts_with("0X"sv) || input.starts_with("0x"sv))) {
        // 1. Set validationError to true.
        validation_error = true;

        // 2. Remove the first two code points from input.
        input = input.substring_view(2);

        // 3. Set R to 16.
        radix = 16;
    }
    // 5. Otherwise, if input contains at least two code points and the first code point is U+0030 (0), then:
    else if (input.length() >= 2 && input[0] == '0') {
        // 1. Set validationError to true.
        validation_error = true;

        // 2. Remove the first code point from input.
        input = input.substring_view(1);

        // 3. Set R to 8.
        radix = 8;
    }

    // 6. If input is the empty string, then return (0, true).
    if (input.is_empty())
        return ParsedIPv4Number { 0, true };

    // 7. If input contains a code point that is not a radix-R digit, then return failure.
    if (radix == 8) {
        if (!all_of(input, [](auto character) { return is_ascii_octal_digit(character); }))
            return {};
    } else if (radix == 10) {
        if (!all_of(input, [](auto character) { return is_ascii_digit(character); }))
            return {};
    } else if (radix == 16) {
        if (!all_of(input, [](auto character) { return is_ascii_hex_digit(character); }))
            return {};
    } else {
        VERIFY_NOT_REACHED();
    }

    // 8. Let output be the mathematical integer value that is represented by input in radix-R notation, using ASCII hex digits for digits with values 0 through 15.
    u32 output;
    if (radix == 8)
        output = StringUtils::convert_to_uint_from_octal(input).release_value();
    else if (radix == 10)
        output = input.to_uint().release_value();
    else if (radix == 16)
        output = StringUtils::convert_to_uint_from_hex(input).release_value();
    else
        VERIFY_NOT_REACHED();

    // 9. Return (output, validationError).
    return ParsedIPv4Number { output, validation_error };
}

// https://url.spec.whatwg.org/#concept-ipv4-parser
static Optional<u32> parse_ipv4_address(StringView input)
{
    // 1. Let parts be the result of strictly splitting input on U+002E (.).
    auto parts = input.split_view("."sv, SplitBehavior::KeepEmpty);

    // 2. If the last item in parts is the empty string, then:
    if (parts.last().is_empty()) {
        // 1. IPv4-empty-part validation error.
        report_validation_error();

        // 2. If parts’s size is greater than 1, then remove the last item from parts.
        if (parts.size() > 1)
            parts.take_last();
    }

    // 3. If parts’s size is greater than 4, IPv4-too-many-parts validation error, return failure.
    if (parts.size() > 4) {
        report_validation_error();
        return {};
    }

    // 4. Let numbers be an empty list.
    Vector<u32, 4> numbers;

    // 5. For each part of parts:
    for (auto const& part : parts) {
        // 1. Let result be the result of parsing part.
        auto const result = parse_ipv4_number(part);

        // 2. If result is failure, IPv4-non-numeric-part validation error, return failure.
        if (!result.has_value()) {
            report_validation_error();
            return {};
        }

        // 3. If result[1] is true, IPv4-non-decimal-part validation error.
        if (result->validation_error)
            report_validation_error();

        // 4. Append result[0] to numbers.
        numbers.append(result->number);
    }

    // 6. If any item in numbers is greater than 255, IPv4-out-of-range-part validation error.
    // 7. If any but the last item in numbers is greater than 255, then return failure.
    for (size_t i = 0; i < numbers.size(); ++i) {
        if (numbers[i] > 255) {
            report_validation_error();
            if (i != numbers.size() - 1)
                return {};
        }
    }

    // 8. If the last item in numbers is greater than or equal to 256^(5 − numbers’s size), then return failure.
    if (numbers.last() >= pow<size_t>(256, 5 - numbers.size()))
        return {};

    // 9. Let ipv4 be the last item in numbers.
    auto ipv4 = numbers.last();

    // 10. Remove the last item from numbers.
    numbers.take_last();

    // 11. Let counter be 0.
    u8 counter = 0;

    // 12. For each n of numbers:
    for (u32 n : numbers) {
        // 1. Increment ipv4 by n × 256^(3 − counter).
        ipv4 += n * pow<size_t>(256, 3 - counter);

        // 2. Increment counter by 1.
        ++counter;
    }

    // 13. Return ipv4.
    return ipv4;
}

// https://url.spec.whatwg.org/#concept-ipv4-serializer
static ErrorOr<String> serialize_ipv4_address(u32 address)
{
    // 1. Let output be the empty string.
    // NOTE: Array to avoid prepend.
    Array<u8, 4> output;

    // 2. Let n be the value of address.
    u32 n = address;

    // 3. For each i in the range 1 to 4, inclusive:
    for (size_t i = 0; i <= 3; ++i) {
        // 1. Prepend n % 256, serialized, to output.
        output[3 - i] = n % 256;

        // 2. If i is not 4, then prepend U+002E (.) to output.
        // NOTE: done at end

        // 3. Set n to floor(n / 256).
        n /= 256;
    }

    // 4. Return output.
    return String::formatted("{}.{}.{}.{}", output[0], output[1], output[2], output[3]);
}

// https://url.spec.whatwg.org/#concept-ipv6-serializer
static ErrorOr<String> serialize_ipv6_address(Array<u16, 8> const& address)
{
    // 1. Let output be the empty string.
    StringBuilder output;

    // 2. Let compress be an index to the first IPv6 piece in the first longest sequences of address’s IPv6 pieces that are 0.
    // 3. If there is no sequence of address’s IPv6 pieces that are 0 that is longer than 1, then set compress to null.
    Optional<size_t> compress;
    size_t longest_sequence_length = 0;
    size_t current_sequence_length = 0;
    size_t current_sequence_start = 0;
    for (size_t i = 0; i < 8; ++i) {
        if (address[i] == 0) {
            if (current_sequence_length == 0)
                current_sequence_start = i;
            ++current_sequence_length;
        } else {
            if (current_sequence_length > longest_sequence_length) {
                longest_sequence_length = current_sequence_length;
                compress = current_sequence_start;
            }
            current_sequence_length = 0;
        }
    }
    // 4. Let ignore0 be false.
    auto ignore0 = false;

    // 5. For each pieceIndex in the range 0 to 7, inclusive:
    for (size_t piece_index = 0; piece_index <= 7; ++piece_index) {
        // 1. If ignore0 is true and address[pieceIndex] is 0, then continue.
        if (ignore0 && address[piece_index] == 0)
            continue;

        // 2. Otherwise, if ignore0 is true, set ignore0 to false.
        if (ignore0)
            ignore0 = false;

        // 3. If compress is pieceIndex, then:
        if (compress == piece_index) {
            // 1. Let separator be "::" if pieceIndex is 0, and U+003A (:) otherwise.
            auto separator = piece_index == 0 ? "::"sv : ":"sv;

            // 2. Append separator to output.
            output.append(separator);

            // 3. Set ignore0 to true and continue.
            ignore0 = true;
            continue;
        }

        // 4. Append address[pieceIndex], represented as the shortest possible lowercase hexadecimal number, to output.
        output.appendff("{:x}", address[piece_index]);

        // 5. If pieceIndex is not 7, then append U+003A (:) to output.
        if (piece_index != 7)
            output.append(':');
    }

    // 6. Return output.
    return output.to_string();
}

// https://url.spec.whatwg.org/#concept-ipv6-parser
static Optional<Array<u16, 8>> parse_ipv6_address(StringView input)
{
    // 1. Let address be a new IPv6 address whose IPv6 pieces are all 0.
    Array<u16, 8> address {};

    // 2. Let pieceIndex be 0.
    size_t piece_index = 0;

    // 3. Let compress be null.
    Optional<size_t> compress;

    Vector<u32> code_points;
    code_points.ensure_capacity(input.length());
    for (auto code_point : Utf8View { input }) {
        code_points.append(code_point);
    }

    // 4. Let pointer be a pointer for input.
    size_t pointer = 0;
    auto c = [&]() -> u32 {
        if (pointer >= code_points.size())
            return end_of_file;
        return code_points[pointer];
    };

    auto remaining = [&]() -> ReadonlySpan<u32> {
        if ((pointer + 1) >= code_points.size())
            return {};
        return code_points.span().slice(pointer + 1);
    };

    // 5. If c is U+003A (:), then:
    if (c() == ':') {
        // 1. If remaining does not start with U+003A (:), IPv6-invalid-compression validation error, return failure.
        if (remaining().is_empty() || remaining()[0] != ':') {
            report_validation_error();
            return {};
        }

        // 2. Increase pointer by 2.
        pointer += 2;

        // 3. Increase pieceIndex by 1 and then set compress to pieceIndex.
        ++piece_index;
        compress = piece_index;
    }

    // 6. While c is not the EOF code point:
    while (c() != end_of_file) {
        // 1. If pieceIndex is 8, IPv6-too-many-pieces validation error, return failure.
        if (piece_index == 8) {
            report_validation_error();
            return {};
        }

        // 2. If c is U+003A (:), then:
        if (c() == ':') {
            // 1. If compress is non-null, IPv6-multiple-compression validation error, return failure.
            if (compress.has_value()) {
                report_validation_error();
                return {};
            }

            // 2. Increase pointer and pieceIndex by 1, set compress to pieceIndex, and then continue.
            ++pointer;
            ++piece_index;
            compress = piece_index;
            continue;
        }

        // 3. Let value and length be 0.
        u32 value = 0;
        size_t length = 0;

        // 4. While length is less than 4 and c is an ASCII hex digit,
        //    set value to value × 0x10 + c interpreted as hexadecimal number,
        //    and increase pointer and length by 1.
        while (length < 4 && is_ascii_hex_digit(c())) {
            value = value * 0x10 + parse_ascii_hex_digit(c());
            ++pointer;
            ++length;
        }

        // 5. If c is U+002E (.), then:
        if (c() == '.') {
            // 1. If length is 0, IPv4-in-IPv6-invalid-code-point validation error, return failure.
            if (length == 0) {
                report_validation_error();
                return {};
            }

            // 2. Decrease pointer by length.
            pointer -= length;

            // 3. If pieceIndex is greater than 6, IPv4-in-IPv6-too-many-pieces validation error, return failure.
            if (piece_index > 6) {
                report_validation_error();
                return {};
            }

            // 4. Let numbersSeen be 0.
            size_t numbers_seen = 0;

            // 5. While c is not the EOF code point:
            while (c() != end_of_file) {
                // 1. Let ipv4Piece be null.
                Optional<u32> ipv4_piece;

                // 2. If numbersSeen is greater than 0, then:
                if (numbers_seen > 0) {
                    // 1. If c is a U+002E (.) and numbersSeen is less than 4, then increase pointer by 1.
                    if (c() == '.' && numbers_seen < 4) {
                        ++pointer;
                    }
                    // 2. Otherwise, IPv4-in-IPv6-invalid-code-point validation error, return failure.
                    else {
                        report_validation_error();
                        return {};
                    }
                }

                // 3. If c is not an ASCII digit, IPv4-in-IPv6-invalid-code-point validation error, return failure.
                if (!is_ascii_digit(c())) {
                    report_validation_error();
                    return {};
                }

                // 4. While c is an ASCII digit:
                while (is_ascii_digit(c())) {
                    // 1. Let number be c interpreted as decimal number.
                    u32 number = parse_ascii_digit(c());

                    // 2. If ipv4Piece is null, then set ipv4Piece to number.
                    if (!ipv4_piece.has_value()) {
                        ipv4_piece = number;
                    }
                    // Otherwise, if ipv4Piece is 0, IPv4-in-IPv6-invalid-code-point validation error, return failure.
                    else if (ipv4_piece.value() == 0) {
                        report_validation_error();
                        return {};
                    }
                    // Otherwise, set ipv4Piece to ipv4Piece × 10 + number.
                    else {
                        ipv4_piece = ipv4_piece.value() * 10 + number;
                    }

                    // 3. If ipv4Piece is greater than 255, IPv4-in-IPv6-out-of-range-part validation error, return failure.
                    if (ipv4_piece.value() > 255) {
                        report_validation_error();
                        return {};
                    }

                    // 4. Increase pointer by 1.
                    ++pointer;
                }
                // 5. Set address[pieceIndex] to address[pieceIndex] × 0x100 + ipv4Piece.
                address[piece_index] = address[piece_index] * 0x100 + ipv4_piece.value();

                // 6. Increase numbersSeen by 1.
                ++numbers_seen;

                // 7. If numbersSeen is 2 or 4, then increase pieceIndex by 1.
                if (numbers_seen == 2 || numbers_seen == 4)
                    ++piece_index;
            }

            // 6. If numbersSeen is not 4, IPv4-in-IPv6-too-few-parts validation error, return failure.
            if (numbers_seen != 4) {
                report_validation_error();
                return {};
            }

            // 7. Break.
            break;
        }
        // 6. Otherwise, if c is U+003A (:):
        else if (c() == ':') {
            // 1. Increase pointer by 1.
            ++pointer;

            // 2. If c is the EOF code point, IPv6-invalid-code-point validation error, return failure.
            if (c() == end_of_file) {
                report_validation_error();
                return {};
            }
        }

        // 7. Otherwise, if c is not the EOF code point, IPv6-invalid-code-point validation error, return failure.
        else if (c() != end_of_file) {
            report_validation_error();
            return {};
        }

        // 8. Set address[pieceIndex] to value.
        address[piece_index] = value;

        // 9. Increase pieceIndex by 1.
        ++piece_index;
    }

    // 7. If compress is non-null, then:
    if (compress.has_value()) {
        // 1. Let swaps be pieceIndex − compress.
        size_t swaps = piece_index - compress.value();

        // 2. Set pieceIndex to 7.
        piece_index = 7;

        // 3. While pieceIndex is not 0 and swaps is greater than 0,
        //    swap address[pieceIndex] with address[compress + swaps − 1],
        //    and then decrease both pieceIndex and swaps by 1.
        while (piece_index != 0 && swaps > 0) {
            swap(address[piece_index], address[compress.value() + swaps - 1]);
            --piece_index;
            --swaps;
        }
    }

    // 8. Otherwise, if compress is null and pieceIndex is not 8, IPv6-too-few-pieces validation error, return failure.
    else if (!compress.has_value() && piece_index != 8) {
        report_validation_error();
        return {};
    }

    // 9. Return address.
    return address;
}

// https://url.spec.whatwg.org/#ends-in-a-number-checker
static bool ends_in_a_number_checker(StringView input)
{
    // 1. Let parts be the result of strictly splitting input on U+002E (.).
    auto parts = input.split_view("."sv, SplitBehavior::KeepEmpty);

    // 2. If the last item in parts is the empty string, then:
    if (parts.last().is_empty()) {
        // 1. If parts’s size is 1, then return false.
        if (parts.size() == 1)
            return false;

        // 2. Remove the last item from parts.
        parts.take_last();
    }

    // 3. Let last be the last item in parts.
    auto last = parts.last();

    // 4. If last is non-empty and contains only ASCII digits, then return true.
    if (!last.is_empty() && all_of(last, is_ascii_digit))
        return true;

    // 5. If parsing last as an IPv4 number does not return failure, then return true.
    if (parse_ipv4_number(last).has_value())
        return true;

    // 6. Return false.
    return false;
}

// https://url.spec.whatwg.org/#concept-host-parser
// NOTE: This is a very bare-bones implementation.
static Optional<DeprecatedString> parse_host(StringView input, bool is_not_special = false)
{
    // 1. If input starts with U+005B ([), then:
    if (input.starts_with('[')) {
        // 1. If input does not end with U+005D (]), IPv6-unclosed validation error, return failure.
        if (!input.ends_with(']')) {
            report_validation_error();
            return {};
        }

        // 2. Return the result of IPv6 parsing input with its leading U+005B ([) and trailing U+005D (]) removed.
        auto address = parse_ipv6_address(input.substring_view(1, input.length() - 2));
        if (!address.has_value())
            return {};

        auto result = serialize_ipv6_address(*address);
        if (result.is_error())
            return {};
        return result.release_value().to_deprecated_string();
    }

    // 2. If isNotSpecial is true, then return the result of opaque-host parsing input.
    if (is_not_special)
        return parse_opaque_host(input);

    // 3. Assert: input is not the empty string.
    VERIFY(!input.is_empty());

    // FIXME: 4. Let domain be the result of running UTF-8 decode without BOM on the percent-decoding of input.
    auto domain = URL::percent_decode(input);

    // FIXME: 5. Let asciiDomain be the result of running domain to ASCII on domain.
    // FIXME: 6. If asciiDomain is failure, then return failure.
    auto& ascii_domain = domain;

    // 7. If asciiDomain contains a forbidden domain code point, domain-invalid-code-point validation error, return failure.
    auto forbidden_host_characters = "\0\t\n\r #%/:<>?@[\\]^|"sv;
    for (auto character : forbidden_host_characters) {
        if (ascii_domain.view().contains(character)) {
            report_validation_error();
            return {};
        }
    }

    // 8. If asciiDomain ends in a number, then return the result of IPv4 parsing asciiDomain.
    if (ends_in_a_number_checker(ascii_domain)) {
        auto ipv4_host = parse_ipv4_address(ascii_domain);
        if (!ipv4_host.has_value())
            return {};

        auto result = serialize_ipv4_address(*ipv4_host);
        if (result.is_error())
            return {};

        return result.release_value().to_deprecated_string();
    }

    // 9. Return asciiDomain.
    return ascii_domain;
}

// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
constexpr bool starts_with_windows_drive_letter(StringView input)
{
    if (input.length() < 2)
        return false;
    if (!is_ascii_alpha(input[0]) || !(input[1] == ':' || input[1] == '|'))
        return false;
    if (input.length() == 2)
        return true;
    return "/\\?#"sv.contains(input[2]);
}

constexpr bool is_windows_drive_letter(StringView input)
{
    return input.length() == 2 && is_ascii_alpha(input[0]) && (input[1] == ':' || input[1] == '|');
}

constexpr bool is_normalized_windows_drive_letter(StringView input)
{
    return input.length() == 2 && is_ascii_alpha(input[0]) && input[1] == ':';
}

constexpr bool is_single_dot_path_segment(StringView input)
{
    return input == "."sv || input.equals_ignoring_ascii_case("%2e"sv);
}

constexpr bool is_double_dot_path_segment(StringView input)
{
    return input == ".."sv || input.equals_ignoring_ascii_case(".%2e"sv) || input.equals_ignoring_ascii_case("%2e."sv) || input.equals_ignoring_ascii_case("%2e%2e"sv);
}

// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
DeprecatedString URLParser::percent_encode_after_encoding(StringView input, URL::PercentEncodeSet percent_encode_set, bool space_as_plus)
{
    // NOTE: This is written somewhat ad-hoc since we don't yet implement the Encoding spec.

    StringBuilder output;

    // 3. For each byte of encodeOutput converted to a byte sequence:
    for (auto byte : input) {
        // 1. If spaceAsPlus is true and byte is 0x20 (SP), then append U+002B (+) to output and continue.
        if (space_as_plus && byte == ' ') {
            output.append('+');
            continue;
        }

        // 2. Let isomorph be a code point whose value is byte’s value.
        u32 isomorph = byte;

        // 3. Assert: percentEncodeSet includes all non-ASCII code points.

        // 4. If isomorphic is not in percentEncodeSet, then append isomorph to output.
        if (!URL::code_point_is_in_percent_encode_set(isomorph, percent_encode_set)) {
            output.append_code_point(isomorph);
        }

        // 5. Otherwise, percent-encode byte and append the result to output.
        else {
            output.appendff("%{:02X}", byte);
        }
    }

    // 6. Return output.
    return output.to_deprecated_string();
}

// https://fetch.spec.whatwg.org/#data-urls
// FIXME: This only loosely follows the spec, as we use the same class for "regular" and data URLs, unlike the spec.
Optional<URL> URLParser::parse_data_url(StringView raw_input)
{
    dbgln_if(URL_PARSER_DEBUG, "URLParser::parse_data_url: Parsing '{}'.", raw_input);
    VERIFY(raw_input.starts_with("data:"sv));
    auto input = raw_input.substring_view(5);
    auto comma_offset = input.find(',');
    if (!comma_offset.has_value())
        return {};
    auto mime_type = StringUtils::trim(input.substring_view(0, comma_offset.value()), "\t\n\f\r "sv, TrimMode::Both);
    auto encoded_body = input.substring_view(comma_offset.value() + 1);
    auto body = URL::percent_decode(encoded_body);
    bool is_base64_encoded = false;
    if (mime_type.ends_with("base64"sv, CaseSensitivity::CaseInsensitive)) {
        auto substring_view = mime_type.substring_view(0, mime_type.length() - 6);
        auto trimmed_substring_view = StringUtils::trim(substring_view, " "sv, TrimMode::Right);
        if (trimmed_substring_view.ends_with(';')) {
            is_base64_encoded = true;
            mime_type = trimmed_substring_view.substring_view(0, trimmed_substring_view.length() - 1);
        }
    }

    StringBuilder builder;
    if (mime_type.starts_with(";"sv) || mime_type.is_empty()) {
        builder.append("text/plain"sv);
        builder.append(mime_type);
        mime_type = builder.string_view();
    }

    // FIXME: Parse the MIME type's components according to https://mimesniff.spec.whatwg.org/#parse-a-mime-type
    URL url { StringUtils::trim(mime_type, "\n\r\t "sv, TrimMode::Both), move(body), is_base64_encoded };
    dbgln_if(URL_PARSER_DEBUG, "URLParser::parse_data_url: Parsed data URL to be '{}'.", url.serialize());
    return url;
}

// https://url.spec.whatwg.org/#concept-basic-url-parser
// NOTE: This parser assumes a UTF-8 encoding.
// NOTE: Refrain from using the URL classes setters inside this algorithm. Rather, set the values directly. This bypasses the setters' built-in
//       validation, which is strictly unnecessary since we set m_valid=true at the end anyways. Furthermore, this algorithm may be used in the
//       future for validation of URLs, which would then lead to infinite recursion.
//       The same goes for base_url, because e.g. the port() getter does not always return m_port, and we are interested in the underlying member
//       variables' values here, not what the URL class presents to its users.
URL URLParser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Optional<URL> url, Optional<State> state_override)
{
    dbgln_if(URL_PARSER_DEBUG, "URLParser::parse: Parsing '{}'", raw_input);
    if (raw_input.is_empty())
        return base_url.has_value() ? *base_url : URL {};

    if (raw_input.starts_with("data:"sv)) {
        auto maybe_url = parse_data_url(raw_input);
        if (!maybe_url.has_value())
            return {};
        return maybe_url.release_value();
    }

    size_t start_index = 0;
    size_t end_index = raw_input.length();

    // 1. If url is not given:
    if (!url.has_value()) {
        // 1. Set url to a new URL.
        url = URL();

        // 2. If input contains any leading or trailing C0 control or space, invalid-URL-unit validation error.
        // 3. Remove any leading and trailing C0 control or space from input.
        //
        // FIXME: We aren't checking exactly for 'trailing C0 control or space' here.

        bool has_validation_error = false;
        for (size_t i = 0; i < raw_input.length(); ++i) {
            i8 ch = raw_input[i];
            if (0 <= ch && ch <= 0x20) {
                ++start_index;
                has_validation_error = true;
            } else {
                break;
            }
        }
        for (ssize_t i = raw_input.length() - 1; i >= 0; --i) {
            i8 ch = raw_input[i];
            if (0 <= ch && ch <= 0x20) {
                --end_index;
                has_validation_error = true;
            } else {
                break;
            }
        }
        if (has_validation_error)
            report_validation_error();
    }
    if (start_index >= end_index)
        return {};

    DeprecatedString processed_input = raw_input.substring_view(start_index, end_index - start_index);

    // 2. If input contains any ASCII tab or newline, invalid-URL-unit validation error.
    // 3. Remove all ASCII tab or newline from input.
    if (processed_input.contains("\t"sv) || processed_input.contains("\n"sv)) {
        report_validation_error();
        processed_input = processed_input.replace("\t"sv, ""sv, ReplaceMode::All).replace("\n"sv, ""sv, ReplaceMode::All);
    }

    // 4. Let state be state override if given, or scheme start state otherwise.
    State state = state_override.value_or(State::SchemeStart);

    // FIXME: 5. Set encoding to the result of getting an output encoding from encoding.

    // 6. Let buffer be the empty string.
    StringBuilder buffer;

    // 7. Let atSignSeen, insideBrackets, and passwordTokenSeen be false.
    bool at_sign_seen = false;
    bool inside_brackets = false;
    bool password_token_seen = false;

    Utf8View input(processed_input);

    // 8. Let pointer be a pointer for input.
    Utf8CodePointIterator iterator = input.begin();

    auto get_remaining = [&input, &iterator] {
        return input.substring_view(iterator - input.begin() + iterator.underlying_code_point_length_in_bytes()).as_string();
    };

    // 9. Keep running the following state machine by switching on state. If after a run pointer points to the EOF code point, go to the next step. Otherwise, increase pointer by 1 and continue with the state machine.
    // NOTE: "continue" should only be used to prevent incrementing the iterator, as this is done at the end of the loop.
    //       ++iterator : "increase pointer by 1"
    //       continue   : "decrease pointer by 1"
    for (;;) {
        u32 code_point = end_of_file;
        if (!iterator.done())
            code_point = *iterator;

        if constexpr (URL_PARSER_DEBUG) {
            if (code_point == end_of_file)
                dbgln("URLParser::basic_parse: {} state with EOF.", state_name(state));
            else if (is_ascii_printable(code_point))
                dbgln("URLParser::basic_parse: {} state with code point U+{:04X} ({:c}).", state_name(state), code_point, code_point);
            else
                dbgln("URLParser::basic_parse: {} state with code point U+{:04X}.", state_name(state), code_point);
        }

        switch (state) {
        // -> scheme start state, https://url.spec.whatwg.org/#scheme-start-state
        case State::SchemeStart:
            // 1. If c is an ASCII alpha, append c, lowercased, to buffer, and set state to scheme state.
            if (is_ascii_alpha(code_point)) {
                buffer.append_as_lowercase(code_point);
                state = State::Scheme;
            }
            // 2. Otherwise, if state override is not given, set state to no scheme state and decrease pointer by 1.
            else if (!state_override.has_value()) {
                state = State::NoScheme;
                continue;
            }
            // 3. Otherwise, return failure.
            else {
                return {};
            }
            break;
        // -> scheme state, https://url.spec.whatwg.org/#scheme-state
        case State::Scheme:
            // 1. If c is an ASCII alphanumeric, U+002B (+), U+002D (-), or U+002E (.), append c, lowercased, to buffer.
            if (is_ascii_alphanumeric(code_point) || code_point == '+' || code_point == '-' || code_point == '.') {
                buffer.append_as_lowercase(code_point);
            }
            // 2. Otherwise, if c is U+003A (:), then:
            else if (code_point == ':') {
                // 1. If state override is given, then:
                if (state_override.has_value()) {
                    // 1. If url’s scheme is a special scheme and buffer is not a special scheme, then return.
                    if (URL::is_special_scheme(url->scheme()) && !URL::is_special_scheme(buffer.string_view()))
                        return *url;

                    // 2. If url’s scheme is not a special scheme and buffer is a special scheme, then return.
                    if (!URL::is_special_scheme(url->scheme()) && URL::is_special_scheme(buffer.string_view()))
                        return *url;

                    // 3. If url includes credentials or has a non-null port, and buffer is "file", then return.
                    if ((url->includes_credentials() || url->port().has_value()) && buffer.string_view() == "file"sv)
                        return *url;

                    // 4. If url’s scheme is "file" and its host is an empty host, then return.
                    if (url->scheme() == "file"sv && url->host().is_empty())
                        return *url;
                }

                // 2. Set url’s scheme to buffer.
                url->m_scheme = buffer.to_deprecated_string();

                // 3. If state override is given, then:
                if (state_override.has_value()) {
                    // 1. If url’s port is url’s scheme’s default port, then set url’s port to null.
                    if (url->port() == URL::default_port_for_scheme(url->scheme()))
                        url->m_port = {};

                    // 2. Return.
                    return *url;
                }

                // 4. Set buffer to the empty string.
                buffer.clear();

                // 5. If url’s scheme is "file", then:
                if (url->scheme() == "file") {
                    // 1. If remaining does not start with "//", special-scheme-missing-following-solidus validation error.
                    if (!get_remaining().starts_with("//"sv)) {
                        report_validation_error();
                    }
                    // 2. Set state to file state.
                    state = State::File;
                }
                // 6. Otherwise, if url is special, base is non-null, and base’s scheme is url’s scheme:
                // 7. Otherwise, if url is special, set state to special authority slashes state.
                // FIXME: Write this block closer to spec text.
                else if (url->is_special()) {
                    // FIXME: 1. Assert: base is is special (and therefore does not have an opaque path).

                    // 2. Set state to special relative or authority state.
                    if (base_url.has_value() && base_url->m_scheme == url->m_scheme)
                        state = State::SpecialRelativeOrAuthority;
                    else
                        state = State::SpecialAuthoritySlashes;
                }

                // 8. Otherwise, if remaining starts with an U+002F (/), set state to path or authority state and increase pointer by 1.
                else if (get_remaining().starts_with("/"sv)) {
                    state = State::PathOrAuthority;
                    ++iterator;
                }
                // 9. Otherwise, set url’s path to the empty string and set state to opaque path state.
                else {
                    url->m_cannot_be_a_base_url = true;
                    url->append_slash();
                    state = State::CannotBeABaseUrlPath;
                }
            }
            // 3. Otherwise, if state override is not given, set buffer to the empty string, state to no scheme state, and start over (from the first code point in input).
            else if (!state_override.has_value()) {
                buffer.clear();
                state = State::NoScheme;
                iterator = input.begin();
                continue;
            }
            // 4. Otherwise, return failure.
            else {
                return {};
            }
            break;
        // -> no scheme state, https://url.spec.whatwg.org/#no-scheme-state
        case State::NoScheme:
            // 1. If base is null, or base has an opaque path and c is not U+0023 (#), missing-scheme-non-relative-URL validation error, return failure.
            if (!base_url.has_value() || (base_url->m_cannot_be_a_base_url && code_point != '#')) {
                report_validation_error();
                return {};
            }
            // 2. Otherwise, if base has an opaque path and c is U+0023 (#), set url’s scheme to base’s scheme, url’s path to base’s path, url’s query
            //    to base’s query,url’s fragment to the empty string, and set state to fragment state.
            else if (base_url->m_cannot_be_a_base_url && code_point == '#') {
                url->m_scheme = base_url->m_scheme;
                url->m_paths = base_url->m_paths;
                url->m_query = base_url->m_query;
                url->m_fragment = "";
                url->m_cannot_be_a_base_url = true;
                state = State::Fragment;
            }
            // 3. Otherwise, if base’s scheme is not "file", set state to relative state and decrease pointer by 1.
            else if (base_url->m_scheme != "file") {
                state = State::Relative;
                continue;
            }
            // 4. Otherwise, set state to file state and decrease pointer by 1.
            else {
                state = State::File;
                continue;
            }
            break;
        // -> special relative or authority state, https://url.spec.whatwg.org/#special-relative-or-authority-state
        case State::SpecialRelativeOrAuthority:
            // 1. If c is U+002F (/) and remaining starts with U+002F (/), then set state to special authority ignore slashes state and increase pointer by 1.
            if (code_point == '/' && get_remaining().starts_with("/"sv)) {
                state = State::SpecialAuthorityIgnoreSlashes;
                ++iterator;
            }
            // 2. Otherwise, special-scheme-missing-following-solidus validation error, set state to relative state and decrease pointer by 1.
            else {
                report_validation_error();
                state = State::Relative;
                continue;
            }
            break;
        // -> path or authority state, https://url.spec.whatwg.org/#path-or-authority-state
        case State::PathOrAuthority:
            // 1. If c is U+002F (/), then set state to authority state.
            if (code_point == '/') {
                state = State::Authority;
            }
            // 2. Otherwise, set state to path state, and decrease pointer by 1.
            else {
                state = State::Path;
                continue;
            }
            break;
        // -> relative state, https://url.spec.whatwg.org/#relative-state
        case State::Relative:
            // 1. Assert: base’s scheme is not "file".
            VERIFY(base_url->scheme() != "file");

            // 2. Set url’s scheme to base’s scheme.
            url->m_scheme = base_url->m_scheme;

            // 3. If c is U+002F (/), then set state to relative slash state.
            if (code_point == '/') {
                state = State::RelativeSlash;
            }
            // 4. Otherwise, if url is special and c is U+005C (\), invalid-reverse-solidus validation error, set state to relative slash state.
            else if (url->is_special() && code_point == '\\') {
                report_validation_error();
                state = State::RelativeSlash;
            }
            // 5. Otherwise:
            else {
                // 1. Set url’s username to base’s username, url’s password to base’s password, url’s host to base’s host, url’s port to base’s port, url’s path to a clone of base’s path, and url’s query to base’s query.
                url->m_username = base_url->m_username;
                url->m_password = base_url->m_password;
                url->m_host = base_url->m_host;
                url->m_port = base_url->m_port;
                url->m_paths = base_url->m_paths;
                url->m_query = base_url->m_query;

                // 2. If c is U+003F (?), then set url’s query to the empty string, and state to query state.
                if (code_point == '?') {
                    url->m_query = "";
                    state = State::Query;
                }
                // 3. Otherwise, if c is U+0023 (#), set url’s fragment to the empty string and state to fragment state.
                else if (code_point == '#') {
                    url->m_fragment = "";
                    state = State::Fragment;
                }
                // 4. Otherwise, if c is not the EOF code point:
                else if (code_point != end_of_file) {
                    // 1. Set url’s query to null.
                    url->m_query = {};

                    // 2. Shorten url’s path.
                    if (url->m_paths.size())
                        url->m_paths.remove(url->m_paths.size() - 1);

                    // 3. Set state to path state and decrease pointer by 1.
                    state = State::Path;
                    continue;
                }
            }
            break;
        // -> relative slash state, https://url.spec.whatwg.org/#relative-slash-state
        case State::RelativeSlash:
            // 1. If url is special and c is U+002F (/) or U+005C (\), then:
            if (url->is_special() && (code_point == '/' || code_point == '\\')) {
                // 1. If c is U+005C (\), invalid-reverse-solidus validation error.
                if (code_point == '\\')
                    report_validation_error();

                // 2. Set state to special authority ignore slashes state.
                state = State::SpecialAuthorityIgnoreSlashes;
            }
            // 2. Otherwise, if c is U+002F (/), then set state to authority state.
            else if (code_point == '/') {
                state = State::Authority;
            }
            // 3. Otherwise, set url’s username to base’s username, url’s password to base’s password, url’s host to base’s host, url’s port to base’s port, state to path state, and then, decrease pointer by 1.
            else {
                url->m_username = base_url->m_username;
                url->m_password = base_url->m_password;
                url->m_host = base_url->m_host;
                url->m_port = base_url->m_port;
                state = State::Path;
                continue;
            }
            break;
        // -> special authority slashes state, https://url.spec.whatwg.org/#special-authority-slashes-state
        case State::SpecialAuthoritySlashes:
            // 1. If c is U+002F (/) and remaining starts with U+002F (/), then set state to special authority ignore slashes state and increase pointer by 1.
            if (code_point == '/' && get_remaining().starts_with("/"sv)) {
                state = State::SpecialAuthorityIgnoreSlashes;
                ++iterator;
            }
            // 2. Otherwise, special-scheme-missing-following-solidus validation error, set state to special authority ignore slashes state and decrease pointer by 1.
            else {
                report_validation_error();
                state = State::SpecialAuthorityIgnoreSlashes;
                continue;
            }
            break;
        // -> special authority ignore slashes state, https://url.spec.whatwg.org/#special-authority-ignore-slashes-state
        case State::SpecialAuthorityIgnoreSlashes:
            // 1. If c is neither U+002F (/) nor U+005C (\), then set state to authority state and decrease pointer by 1.
            if (code_point != '/' && code_point != '\\') {
                state = State::Authority;
                continue;
            }
            // 2. Otherwise, special-scheme-missing-following-solidus validation error.
            else {
                report_validation_error();
            }
            break;
        // -> authority state, https://url.spec.whatwg.org/#authority-state
        case State::Authority:
            // 1. If c is U+0040 (@), then:
            if (code_point == '@') {
                // 1. Invalid-credentials validation error.
                report_validation_error();

                // 2. If atSignSeen is true, then prepend "%40" to buffer.
                if (at_sign_seen) {
                    auto content = buffer.to_deprecated_string();
                    buffer.clear();
                    buffer.append("%40"sv);
                    buffer.append(content);
                }

                // 3. Set atSignSeen to true.
                at_sign_seen = true;

                StringBuilder builder;

                // 4. For each codePoint in buffer:
                for (auto c : Utf8View(buffer.string_view())) {
                    // 1. If codePoint is U+003A (:) and passwordTokenSeen is false, then set passwordTokenSeen to true and continue.
                    if (c == ':' && !password_token_seen) {
                        password_token_seen = true;
                        continue;
                    }

                    // 2. Let encodedCodePoints be the result of running UTF-8 percent-encode codePoint using the userinfo percent-encode set.
                    // NOTE: This is done inside of step 3 and 4 implementation

                    builder.clear();
                    // 3. If passwordTokenSeen is true, then append encodedCodePoints to url’s password.
                    if (password_token_seen) {
                        builder.append(url->password());
                        URL::append_percent_encoded_if_necessary(builder, c, URL::PercentEncodeSet::Userinfo);
                        url->m_password = builder.string_view();
                    }
                    // 4. Otherwise, append encodedCodePoints to url’s username.
                    else {
                        builder.append(url->username());
                        URL::append_percent_encoded_if_necessary(builder, c, URL::PercentEncodeSet::Userinfo);
                        url->m_username = builder.string_view();
                    }
                }

                // 5. Set buffer to the empty string.
                buffer.clear();

            }
            // 2. Otherwise, if one of the following is true:
            //    * c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
            //    * url is special and c is U+005C (\)
            else if ((code_point == end_of_file || code_point == '/' || code_point == '?' || code_point == '#')
                || (url->is_special() && code_point == '\\')) {
                // then:

                // 1. If atSignSeen is true and buffer is the empty string, invalid-credentials validation error, return failure.
                if (at_sign_seen && buffer.is_empty()) {
                    report_validation_error();
                    return {};
                }

                // 2. Decrease pointer by buffer’s code point length + 1, set buffer to the empty string, and set state to host state.
                iterator = input.iterator_at_byte_offset(iterator - input.begin() - buffer.length() - 1);
                buffer.clear();
                state = State::Host;
            }
            // 3. Otherwise, append c to buffer.
            else {
                buffer.append_code_point(code_point);
            }
            break;
        // -> host state, https://url.spec.whatwg.org/#host-state
        // -> hostname state, https://url.spec.whatwg.org/#hostname-state
        case State::Host:
        case State::Hostname:
            // 1. If state override is given and url’s scheme is "file", then decrease pointer by 1 and set state to file host state.
            if (state_override.has_value() && url->scheme() == "file") {
                state = State::FileHost;
                continue;
            }

            // 2. Otherwise, if c is U+003A (:) and insideBrackets is false, then:
            if (code_point == ':' && !inside_brackets) {
                // 1. If buffer is the empty string, host-missing validation error, return failure.
                if (buffer.is_empty()) {
                    report_validation_error();
                    return {};
                }

                // 2. If state override is given and state override is hostname state, then return.
                if (state_override.has_value() && *state_override == State::Hostname)
                    return *url;

                // 3. Let host be the result of host parsing buffer with url is not special.
                auto host = parse_host(buffer.string_view(), !url->is_special());

                // 4. If host is failure, then return failure.
                if (!host.has_value())
                    return {};

                // 5. Set url’s host to host, buffer to the empty string, and state to port state.
                url->m_host = host.release_value();
                buffer.clear();
                state = State::Port;
            }
            // 3. Otherwise, if one of the following is true:
            //    * c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
            //    * url is special and c is U+005C (\)
            else if ((code_point == end_of_file || code_point == '/' || code_point == '?' || code_point == '#')
                || (url->is_special() && code_point == '\\')) {
                // then decrease pointer by 1, and then:
                // NOTE: pointer decrement is done by the continue below

                // 1. If url is special and buffer is the empty string, host-missing validation error, return failure.
                if (url->is_special() && buffer.is_empty()) {
                    report_validation_error();
                    return {};
                }

                // 2. Otherwise, if state override is given, buffer is the empty string, and either url includes credentials or url’s port is non-null, return.
                if (state_override.has_value() && buffer.is_empty() && (url->includes_credentials() || url->port().has_value()))
                    return *url;

                // 3. Let host be the result of host parsing buffer with url is not special.
                auto host = parse_host(buffer.string_view(), !url->is_special());

                // 4. If host is failure, then return failure.
                if (!host.has_value())
                    return {};

                // 5. Set url’s host to host, buffer to the empty string, and state to path start state.
                url->m_host = host.value();
                buffer.clear();
                state = State::Port;

                // 6. If state override is given, then return.
                if (state_override.has_value())
                    return *url;

                continue;

            }
            // 4. Otherwise:
            else {
                // 1. If c is U+005B ([), then set insideBrackets to true.
                if (code_point == '[') {
                    inside_brackets = true;
                }
                // 2. If c is U+005D (]), then set insideBrackets to false.
                else if (code_point == ']') {
                    inside_brackets = false;
                }

                // 3. Append c to buffer.
                buffer.append_code_point(code_point);
            }
            break;
        // -> port state, https://url.spec.whatwg.org/#port-state
        case State::Port:
            // 1. If c is an ASCII digit, append c to buffer.
            if (is_ascii_digit(code_point)) {
                buffer.append_code_point(code_point);
            }

            // 2. Otherwise, if one of the following is true:
            //    * c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
            //    * url is special and c is U+005C (\)
            //    * state override is given
            else if ((code_point == end_of_file || code_point == '/' || code_point == '?' || code_point == '#')
                || (url->is_special() && code_point == '\\')
                || state_override.has_value()) {
                // then:

                // 1. If buffer is not the empty string, then:
                if (!buffer.is_empty()) {
                    // 1. Let port be the mathematical integer value that is represented by buffer in radix-10 using ASCII digits for digits with values 0 through 9.
                    auto port = buffer.string_view().to_uint();

                    // 2. If port is greater than 2^16 − 1, port-out-of-range validation error, return failure.
                    if (!port.has_value() || port.value() > 65535) {
                        report_validation_error();
                        return {};
                    }

                    // 3. Set url’s port to null, if port is url’s scheme’s default port; otherwise to port.
                    if (port.value() == URL::default_port_for_scheme(url->scheme()))
                        url->m_port = {};
                    else
                        url->m_port = port.value();

                    // 4. Set buffer to the empty string.
                    buffer.clear();
                }

                // 2. If state override is given, then return.
                if (state_override.has_value())
                    return *url;

                // 3. Set state to path start state and decrease pointer by 1.
                state = State::PathStart;
                continue;
            }
            // 3. Otherwise, port-invalid validation error, return failure.
            else {
                report_validation_error();
                return {};
            }
            break;
        // -> file state, https://url.spec.whatwg.org/#file-state
        case State::File:
            // 1. Set url’s scheme to "file".
            url->m_scheme = "file";

            // 2. Set url’s host to the empty string.
            url->m_host = "";

            // 3. If c is U+002F (/) or U+005C (\), then:
            if (code_point == '/' || code_point == '\\') {
                // 1. If c is U+005C (\), invalid-reverse-solidus validation error.
                if (code_point == '\\')
                    report_validation_error();

                // 2. Set state to file slash state.
                state = State::FileSlash;
            }
            // 4. Otherwise, if base is non-null and base’s scheme is "file":
            else if (base_url.has_value() && base_url->m_scheme == "file") {
                // 1. Set url’s host to base’s host, url’s path to a clone of base’s path, and url’s query to base’s query.
                url->m_host = base_url->m_host;
                url->m_paths = base_url->m_paths;
                url->m_query = base_url->m_query;

                // 2. If c is U+003F (?), then set url’s query to the empty string and state to query state.
                if (code_point == '?') {
                    url->m_query = "";
                    state = State::Query;
                }
                // 3. Otherwise, if c is U+0023 (#), set url’s fragment to the empty string and state to fragment state.
                else if (code_point == '#') {
                    url->m_fragment = "";
                    state = State::Fragment;
                }
                // 4. Otherwise, if c is not the EOF code point:
                else if (code_point != end_of_file) {
                    // 1. Set url’s query to null.
                    url->m_query = {};

                    // 2. If the code point substring from pointer to the end of input does not start with a Windows drive letter, then shorten url’s path.
                    auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string();
                    if (!starts_with_windows_drive_letter(substring_from_pointer)) {
                        if (!url->m_paths.is_empty() && !(url->scheme() == "file" && url->m_paths.size() == 1 && is_normalized_windows_drive_letter(url->m_paths[0])))
                            url->m_paths.remove(url->m_paths.size() - 1);
                    }
                    // 3. Otherwise:
                    else {
                        // 1. File-invalid-Windows-drive-letter validation error.
                        report_validation_error();

                        // 2. Set url’s path to « ».
                        url->m_paths.clear();
                    }

                    // 4. Set state to path state and decrease pointer by 1.
                    state = State::Path;
                    continue;
                }
            }
            // 5. Otherwise, set state to path state, and decrease pointer by 1.
            else {
                state = State::Path;
                continue;
            }

            break;
        // -> file slash state, https://url.spec.whatwg.org/#file-slash-state
        case State::FileSlash:
            // 1. If c is U+002F (/) or U+005C (\), then:
            if (code_point == '/' || code_point == '\\') {
                // 1. If c is U+005C (\), invalid-reverse-solidus validation error.
                if (code_point == '\\')
                    report_validation_error();

                // 2. Set state to file host state.
                state = State::FileHost;
            }
            // 2. Otherwise:
            else {
                // 1. If base is non-null and base’s scheme is "file", then:
                if (base_url.has_value() && base_url->m_scheme == "file") {
                    // 1. Set url’s host to base’s host.
                    url->m_paths = base_url->m_paths;
                    url->m_paths.remove(url->m_paths.size() - 1);

                    // 2. If the code point substring from pointer to the end of input does not start with a Windows drive letter and base’s path[0] is a normalized Windows drive letter, then append base’s path[0] to url’s path.
                    auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string();
                    if (!starts_with_windows_drive_letter(substring_from_pointer) && is_normalized_windows_drive_letter(base_url->m_paths[0]))
                        url->append_path(base_url->m_paths[0], URL::ApplyPercentEncoding::No);
                }

                // 2. Set state to path state, and decrease pointer by 1.
                state = State::Path;
                continue;
            }
            break;
        // -> file host state, https://url.spec.whatwg.org/#file-host-state
        case State::FileHost:
            // 1. If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), then decrease pointer by 1 and then:
            //    NOTE: decreasing the pointer is done at the bottom of this block.
            if (code_point == end_of_file || code_point == '/' || code_point == '\\' || code_point == '?' || code_point == '#') {
                // 1. If state override is not given and buffer is a Windows drive letter, file-invalid-Windows-drive-letter-host validation error, set state to path state.
                if (!state_override.has_value() && is_windows_drive_letter(buffer.string_view())) {
                    report_validation_error();
                    state = State::Path;
                }
                // 2. Otherwise, if buffer is the empty string, then:
                else if (buffer.is_empty()) {
                    // 1. Set url’s host to the empty string.
                    url->m_host = "";

                    // 2. If state override is given, then return.
                    if (state_override.has_value())
                        return *url;

                    // 3. Set state to path start state.
                    state = State::PathStart;
                }
                // 3. Otherwise, run these steps:
                else {
                    // 1. Let host be the result of host parsing buffer with url is not special.
                    // FIXME: It seems we are not passing through url is not special through here
                    auto host = parse_host(buffer.string_view(), true);

                    // 2. If host is failure, then return failure.
                    if (!host.has_value())
                        return {};

                    // 3. If host is "localhost", then set host to the empty string.
                    if (host.value() == "localhost")
                        host = "";

                    // 4. Set url’s host to host.
                    url->m_host = host.release_value();

                    // 5. If state override is given, then return.
                    if (state_override.has_value())
                        return *url;

                    // 6. Set buffer to the empty string and state to path start state.
                    buffer.clear();
                    state = State::PathStart;
                }

                // NOTE: Decrement specified at the top of this 'if' statement.
                continue;
            } else {
                buffer.append_code_point(code_point);
            }
            break;
        // -> path start state, https://url.spec.whatwg.org/#path-start-state
        case State::PathStart:
            // 1. If url is special, then:
            if (url->is_special()) {
                // 1. If c is U+005C (\), invalid-reverse-solidus validation error.
                if (code_point == '\\')
                    report_validation_error();

                // 2. Set state to path state.
                state = State::Path;

                // 3. If c is neither U+002F (/) nor U+005C (\), then decrease pointer by 1.
                if (code_point != '/' && code_point != '\\')
                    continue;
            }
            // 2. Otherwise, if state override is not given and c is U+003F (?), set url’s query to the empty string and state to query state.
            else if (!state_override.has_value() && code_point == '?') {
                url->m_query = "";
                state = State::Query;
            }
            // 3. Otherwise, if state override is not given and c is U+0023 (#), set url’s fragment to the empty string and state to fragment state.
            else if (!state_override.has_value() && code_point == '#') {
                url->m_fragment = "";
                state = State::Fragment;
            }
            // 4. Otherwise, if c is not the EOF code point:
            else if (code_point != end_of_file) {
                // 1. Set state to path state.
                state = State::Path;

                // 2. If c is not U+002F (/), then decrease pointer by 1.
                if (code_point != '/')
                    continue;
            }
            // 5. Otherwise, if state override is given and url’s host is null, append the empty string to url’s path.
            else if (state_override.has_value() && url->host().is_empty()) {
                url->append_slash();
            }
            break;
        // -> path state, https://url.spec.whatwg.org/#path-state
        case State::Path:
            // 1. If one of the following is true:
            //    * c is the EOF code point or U+002F (/)
            //    * url is special and c is U+005C (\)
            //    * state override is not given and c is U+003F (?) or U+0023 (#)
            if ((code_point == end_of_file || code_point == '/')
                || (url->is_special() && code_point == '\\')
                || (!state_override.has_value() && (code_point == '?' || code_point == '#'))) {
                // then:

                // 1. If url is special and c is U+005C (\), invalid-reverse-solidus validation error.
                if (url->is_special() && code_point == '\\')
                    report_validation_error();

                // 2. If buffer is a double-dot URL path segment, then:
                if (is_double_dot_path_segment(buffer.string_view())) {
                    // 1. Shorten url’s path.
                    if (!url->m_paths.is_empty())
                        url->m_paths.remove(url->m_paths.size() - 1);

                    // 2. If neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
                    if (code_point != '/' && !(url->is_special() && code_point == '\\'))
                        url->append_slash();
                }
                // 3. Otherwise, if buffer is a single-dot URL path segment and if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
                else if (is_single_dot_path_segment(buffer.string_view()) && code_point != '/' && !(url->is_special() && code_point == '\\')) {
                    url->append_slash();
                }
                // 4. Otherwise, if buffer is not a single-dot URL path segment, then:
                else if (!is_single_dot_path_segment(buffer.string_view())) {
                    // 1. If url’s scheme is "file", url’s path is empty, and buffer is a Windows drive letter, then replace the second code point in buffer with U+003A (:).
                    if (url->m_scheme == "file" && url->m_paths.is_empty() && is_windows_drive_letter(buffer.string_view())) {
                        auto drive_letter = buffer.string_view()[0];
                        buffer.clear();
                        buffer.append(drive_letter);
                        buffer.append(':');
                    }
                    // 2. Append buffer to url’s path.
                    //    FIXME: It would be nicer (and closer to spec) if URLParser could just directly append the path.
                    url->append_path(buffer.string_view(), URL::ApplyPercentEncoding::No);
                }

                // 5. Set buffer to the empty string.
                buffer.clear();

                // 6. If c is U+003F (?), then set url’s query to the empty string and state to query state.
                if (code_point == '?') {
                    url->m_query = "";
                    state = State::Query;
                }
                // 7. If c is U+0023 (#), then set url’s fragment to the empty string and state to fragment state.
                else if (code_point == '#') {
                    url->m_fragment = "";
                    state = State::Fragment;
                }
            }
            // 2. Otherwise, run these steps
            else {
                // 1. If c is not a URL code point and not U+0025 (%), invalid-URL-unit validation error.
                if (!is_url_code_point(code_point) && code_point != '%')
                    report_validation_error();

                // FIXME: 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.

                // 3. UTF-8 percent-encode c using the path percent-encode set and append the result to buffer.
                URL::append_percent_encoded_if_necessary(buffer, code_point, URL::PercentEncodeSet::Path);
            }
            break;
        // -> opaque path state, https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
        case State::CannotBeABaseUrlPath:
            // NOTE: This does not follow the spec exactly but rather uses the buffer and only sets the path on EOF.
            // NOTE: Verify that the assumptions required for this simplification are correct.
            VERIFY(url->m_paths.size() == 1 && url->m_paths[0].is_empty());

            // 1. If c is U+003F (?), then set url’s query to the empty string and state to query state.
            if (code_point == '?') {
                url->m_paths[0] = buffer.string_view();
                url->m_query = "";
                state = State::Query;
            }
            // 2. Otherwise, if c is U+0023 (#), then set url’s fragment to the empty string and state to fragment state.
            else if (code_point == '#') {
                // NOTE: This needs to be percent decoded since the member variables contain decoded data.
                url->m_paths[0] = buffer.string_view();
                url->m_fragment = "";
                state = State::Fragment;
            }
            // 3. Otherwise:
            else {
                // 1. If c is not the EOF code point, not a URL code point, and not U+0025 (%), invalid-URL-unit validation error.
                if (code_point != end_of_file && !is_url_code_point(code_point) && code_point != '%')
                    report_validation_error();

                // FIXME: 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.

                // 3. If c is not the EOF code point, UTF-8 percent-encode c using the C0 control percent-encode set and append the result to url’s path.
                if (code_point != end_of_file) {
                    URL::append_percent_encoded_if_necessary(buffer, code_point, URL::PercentEncodeSet::C0Control);
                } else {
                    url->m_paths[0] = buffer.string_view();
                }
            }
            break;
        // -> query state, https://url.spec.whatwg.org/#query-state
        case State::Query:
            // FIXME: 1. If encoding is not UTF-8 and one of the following is true:
            //           * url is not special
            //           * url’s scheme is "ws" or "wss"
            //        then set encoding to UTF-8.

            // 2. If one of the following is true:
            //    * state override is not given and c is U+0023 (#)
            //    * c is the EOF code point
            if ((!state_override.has_value() && code_point == '#')
                || code_point == end_of_file) {
                VERIFY(url->m_query == "");
                // then:

                // 1. Let queryPercentEncodeSet be the special-query percent-encode set if url is special; otherwise the query percent-encode set.
                auto query_percent_encode_set = url->is_special() ? URL::PercentEncodeSet::SpecialQuery : URL::PercentEncodeSet::Query;

                // 2. Percent-encode after encoding, with encoding, buffer, and queryPercentEncodeSet, and append the result to url’s query.
                url->m_query = percent_encode_after_encoding(buffer.string_view(), query_percent_encode_set);

                // 3. Set buffer to the empty string.
                buffer.clear();

                // 4. If c is U+0023 (#), then set url’s fragment to the empty string and state to fragment state.
                if (code_point == '#') {
                    url->m_fragment = "";
                    state = State::Fragment;
                }
            }
            // 3. Otherwise, if c is not the EOF code point:
            else if (code_point != end_of_file) {
                // 1. If c is not a URL code point and not U+0025 (%), invalid-URL-unit validation error.
                if (!is_url_code_point(code_point) && code_point != '%')
                    report_validation_error();

                // FIXME: 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.

                // 3. Append c to buffer.
                buffer.append_code_point(code_point);
            }
            break;
        // -> fragment state, https://url.spec.whatwg.org/#fragment-state
        case State::Fragment:
            // NOTE: This does not follow the spec exactly but rather uses the buffer and only sets the fragment on EOF.
            // 1. If c is not the EOF code point, then:
            if (code_point != end_of_file) {
                // 1. If c is not a URL code point and not U+0025 (%), invalid-URL-unit validation error.
                if (!is_url_code_point(code_point) && code_point != '%')
                    report_validation_error();

                // FIXME: 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.

                // FIXME: 3. UTF-8 percent-encode c using the fragment percent-encode set and append the result to url’s fragment.
                buffer.append_code_point(code_point);
            } else {
                url->m_fragment = buffer.string_view();
                buffer.clear();
            }
            break;
        default:
            VERIFY_NOT_REACHED();
        }

        if (iterator.done())
            break;
        ++iterator;
    }

    url->m_valid = true;
    dbgln_if(URL_PARSER_DEBUG, "URLParser::parse: Parsed URL to be '{}'.", url->serialize());

    // 10. Return url.
    return url.release_value();
}

}
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								/*
 								 * Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
-												AK: Implement IPV4 host URL parsing to specification

This implements both the parsing and serialization IPV4 parts from
the URL spec.

											
										
										
											2023-07-23 12:09:29 +03:00
+								 * Copyright (c) 2023, Shannon Booth <shannon@serenityos.org>
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								 *
 								 * SPDX-License-Identifier: BSD-2-Clause
 								 */
-												Everywhere: Replace ctype.h to avoid narrowing conversions

This replaces ctype.h with CharacterType.h everywhere I could find
issues with narrowing conversions. While using it will probably make
sense almost everywhere in the future, the most critical places should
have been addressed.

											
										
										
											2021-06-01 22:18:08 +03:00
+								#include <AK/CharacterTypes.h>
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								#include <AK/Debug.h>
-												AK+Everywhere: Rename String to DeprecatedString

We have a new, improved string type coming up in AK (OOM aware, no null
state), and while it's going to use UTF-8, the name UTF8String is a
mouthful - so let's free up the String name by renaming the existing
class.
Making the old one have an annoying name will hopefully also help with
quick adoption :^)

											
										
										
											2022-12-04 21:02:33 +03:00
+								#include <AK/DeprecatedString.h>
-												AK: Implement IPV4 host URL parsing to specification

This implements both the parsing and serialization IPV4 parts from
the URL spec.

											
										
										
											2023-07-23 12:09:29 +03:00
+								#include <AK/IntegralMath.h>
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								#include <AK/Optional.h>
 								#include <AK/SourceLocation.h>
 								#include <AK/StringBuilder.h>
 								#include <AK/StringUtils.h>
 								#include <AK/URLParser.h>
 								#include <AK/Utf8View.h>
 								namespace AK {
-												AK: Stop using U+0000 as end of file code point in URL parser

This changes URL parser to use the 0xFFFFFFFF constant instead of 0 to
indicate end of file. This fixes a bug where inputs containing null
bytes would terminate the parser early, because they were interpreted
as end of file.

											
										
										
											2021-06-03 13:43:08 +03:00
+								// NOTE: This is similar to the LibC macro EOF = -1.
 								constexpr u32 end_of_file = 0xFFFFFFFF;
-												AK: Fix 'constexpr' attribute on non-constexpr function

is_url_code_point invokes StringView::contains, which never was and
cannot become constexpr.

											
										
										
											2022-09-12 19:32:52 +03:00
+								static bool is_url_code_point(u32 code_point)
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								{
 								    // FIXME: [...] and code points in the range U+00A0 to U+10FFFD, inclusive, excluding surrogates and noncharacters.
 								    return is_ascii_alphanumeric(code_point) || code_point >= 0xA0 || "!$&'()*+,-./:;=?@_~"sv.contains(code_point);
 								}
-												AK: Update URLParser.{cpp,h} to use east const

											
										
										
											2021-06-03 13:03:56 +03:00
+								static void report_validation_error(SourceLocation const& location = SourceLocation::current())
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								{
-												AK: Rename URLParser::parse to URLParser::basic_parse

To make it more clear that this function implements
'concept-basic-url-parser' instead of 'concept-url-parser'.

											
										
										
											2023-07-15 05:29:20 +03:00
+								    dbgln_if(URL_PARSER_DEBUG, "URLParser::basic_parse: Validation error! {}", location);
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								}
-												AK+Everywhere: Rename String to DeprecatedString

We have a new, improved string type coming up in AK (OOM aware, no null
state), and while it's going to use UTF-8, the name UTF8String is a
mouthful - so let's free up the String name by renaming the existing
class.
Making the old one have an annoying name will hopefully also help with
quick adoption :^)

											
										
										
											2022-12-04 21:02:33 +03:00
+								static Optional<DeprecatedString> parse_opaque_host(StringView input)
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								{
-												AK+Everywhere: Fix data corruption due to code-point-to-char conversion

In particular, StringView::contains(char) is often used with a u32
code point. When this is done, the compiler will for some reason allow
data corruption to occur silently.

In fact, this is one of two reasons for the following OSS Fuzz issue:
https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=49184
This is probably a very old bug.

In the particular case of URLParser, AK::is_url_code_point got confused:
    return /* ... */ || "!$&'()*+,-./:;=?@_~"sv.contains(code_point);
If code_point is a large code point that happens to have the correct
lower bytes, AK::is_url_code_point is then convinced that the given
code point is okay, even if it is actually problematic.

This commit fixes *only* the silent data corruption due to the erroneous
conversion, and does not fully resolve OSS-Fuzz#49184.

											
										
										
											2022-09-12 17:31:16 +03:00
+								    auto forbidden_host_characters_excluding_percent = "\0\t\n\r #/:<>?@[\\]^|"sv;
 								    for (auto character : forbidden_host_characters_excluding_percent) {
 								        if (input.contains(character)) {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								            report_validation_error();
 								            return {};
 								        }
 								    }
 								    // FIXME: If input contains a code point that is not a URL code point and not U+0025 (%), validation error.
 								    // FIXME: If input contains a U+0025 (%) and the two code points following it are not ASCII hex digits, validation error.
 								    return URL::percent_encode(input, URL::PercentEncodeSet::C0Control);
 								}
-												AK: Implement IPV4 host URL parsing to specification

This implements both the parsing and serialization IPV4 parts from
the URL spec.

											
										
										
											2023-07-23 12:09:29 +03:00
+								struct ParsedIPv4Number {
 								    u32 number { 0 };
 								    bool validation_error { false };
 								};
 								// https://url.spec.whatwg.org/#ipv4-number-parser
 								static Optional<ParsedIPv4Number> parse_ipv4_number(StringView input)
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								{
-												AK: Implement IPV4 host URL parsing to specification

This implements both the parsing and serialization IPV4 parts from
the URL spec.

											
										
										
											2023-07-23 12:09:29 +03:00
+								    // 1. If input is the empty string, then return failure.
 								    if (input.is_empty())
 								        return {};
 								    // 2. Let validationError be false.
 								    bool validation_error = false;
 								    // 3. Let R be 10.
 								    u8 radix = 10;
 								    // 4. If input contains at least two code points and the first two code points are either "0X" or "0x", then:
 								    if (input.length() >= 2 && (input.starts_with("0X"sv) || input.starts_with("0x"sv))) {
 								        // 1. Set validationError to true.
 								        validation_error = true;
 								        // 2. Remove the first two code points from input.
 								        input = input.substring_view(2);
 								        // 3. Set R to 16.
 								        radix = 16;
 								    }
 								    // 5. Otherwise, if input contains at least two code points and the first code point is U+0030 (0), then:
 								    else if (input.length() >= 2 && input[0] == '0') {
 								        // 1. Set validationError to true.
 								        validation_error = true;
 								        // 2. Remove the first code point from input.
 								        input = input.substring_view(1);
 								        // 3. Set R to 8.
 								        radix = 8;
 								    }
 								    // 6. If input is the empty string, then return (0, true).
 								    if (input.is_empty())
 								        return ParsedIPv4Number { 0, true };
 								    // 7. If input contains a code point that is not a radix-R digit, then return failure.
 								    if (radix == 8) {
 								        if (!all_of(input, [](auto character) { return is_ascii_octal_digit(character); }))
 								            return {};
 								    } else if (radix == 10) {
 								        if (!all_of(input, [](auto character) { return is_ascii_digit(character); }))
 								            return {};
 								    } else if (radix == 16) {
 								        if (!all_of(input, [](auto character) { return is_ascii_hex_digit(character); }))
 								            return {};
 								    } else {
 								        VERIFY_NOT_REACHED();
 								    }
 								    // 8. Let output be the mathematical integer value that is represented by input in radix-R notation, using ASCII hex digits for digits with values 0 through 15.
 								    u32 output;
 								    if (radix == 8)
 								        output = StringUtils::convert_to_uint_from_octal(input).release_value();
 								    else if (radix == 10)
 								        output = input.to_uint().release_value();
 								    else if (radix == 16)
 								        output = StringUtils::convert_to_uint_from_hex(input).release_value();
 								    else
 								        VERIFY_NOT_REACHED();
 								    // 9. Return (output, validationError).
 								    return ParsedIPv4Number { output, validation_error };
 								}
 								// https://url.spec.whatwg.org/#concept-ipv4-parser
 								static Optional<u32> parse_ipv4_address(StringView input)
 								{
 								    // 1. Let parts be the result of strictly splitting input on U+002E (.).
 								    auto parts = input.split_view("."sv, SplitBehavior::KeepEmpty);
 								    // 2. If the last item in parts is the empty string, then:
 								    if (parts.last().is_empty()) {
 								        // 1. IPv4-empty-part validation error.
 								        report_validation_error();
 								        // 2. If parts’s size is greater than 1, then remove the last item from parts.
 								        if (parts.size() > 1)
 								            parts.take_last();
 								    }
 								    // 3. If parts’s size is greater than 4, IPv4-too-many-parts validation error, return failure.
 								    if (parts.size() > 4) {
 								        report_validation_error();
 								        return {};
 								    }
 								    // 4. Let numbers be an empty list.
 								    Vector<u32, 4> numbers;
 								    // 5. For each part of parts:
 								    for (auto const& part : parts) {
 								        // 1. Let result be the result of parsing part.
 								        auto const result = parse_ipv4_number(part);
 								        // 2. If result is failure, IPv4-non-numeric-part validation error, return failure.
 								        if (!result.has_value()) {
 								            report_validation_error();
 								            return {};
 								        }
 								        // 3. If result[1] is true, IPv4-non-decimal-part validation error.
 								        if (result->validation_error)
 								            report_validation_error();
 								        // 4. Append result[0] to numbers.
 								        numbers.append(result->number);
 								    }
 								    // 6. If any item in numbers is greater than 255, IPv4-out-of-range-part validation error.
 								    // 7. If any but the last item in numbers is greater than 255, then return failure.
 								    for (size_t i = 0; i < numbers.size(); ++i) {
 								        if (numbers[i] > 255) {
 								            report_validation_error();
 								            if (i != numbers.size() - 1)
 								                return {};
 								        }
 								    }
 								    // 8. If the last item in numbers is greater than or equal to 256^(5 − numbers’s size), then return failure.
 								    if (numbers.last() >= pow<size_t>(256, 5 - numbers.size()))
 								        return {};
 								    // 9. Let ipv4 be the last item in numbers.
 								    auto ipv4 = numbers.last();
 								    // 10. Remove the last item from numbers.
 								    numbers.take_last();
 								    // 11. Let counter be 0.
 								    u8 counter = 0;
 								    // 12. For each n of numbers:
 								    for (u32 n : numbers) {
 								        // 1. Increment ipv4 by n × 256^(3 − counter).
 								        ipv4 += n * pow<size_t>(256, 3 - counter);
 								        // 2. Increment counter by 1.
 								        ++counter;
 								    }
 								    // 13. Return ipv4.
 								    return ipv4;
 								}
 								// https://url.spec.whatwg.org/#concept-ipv4-serializer
 								static ErrorOr<String> serialize_ipv4_address(u32 address)
 								{
 								    // 1. Let output be the empty string.
 								    // NOTE: Array to avoid prepend.
 								    Array<u8, 4> output;
 								    // 2. Let n be the value of address.
 								    u32 n = address;
 								    // 3. For each i in the range 1 to 4, inclusive:
 								    for (size_t i = 0; i <= 3; ++i) {
 								        // 1. Prepend n % 256, serialized, to output.
 								        output[3 - i] = n % 256;
 								        // 2. If i is not 4, then prepend U+002E (.) to output.
 								        // NOTE: done at end
 								        // 3. Set n to floor(n / 256).
 								        n /= 256;
 								    }
 								    // 4. Return output.
 								    return String::formatted("{}.{}.{}.{}", output[0], output[1], output[2], output[3]);
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								}
-												AK: Implement IPv6 host parsing in URLParser

This is just a straight (and fairly inefficient) implementation of IPv6
parsing and serialization from the URL spec.

Note that we don't use AK::IPv6Address here because the URL spec
requires a specific serialization behavior.

											
										
										
											2023-07-17 07:52:29 +03:00
+								// https://url.spec.whatwg.org/#concept-ipv6-serializer
 								static ErrorOr<String> serialize_ipv6_address(Array<u16, 8> const& address)
 								{
 								    // 1. Let output be the empty string.
 								    StringBuilder output;
 								    // 2. Let compress be an index to the first IPv6 piece in the first longest sequences of address’s IPv6 pieces that are 0.
 								    // 3. If there is no sequence of address’s IPv6 pieces that are 0 that is longer than 1, then set compress to null.
 								    Optional<size_t> compress;
 								    size_t longest_sequence_length = 0;
 								    size_t current_sequence_length = 0;
 								    size_t current_sequence_start = 0;
 								    for (size_t i = 0; i < 8; ++i) {
 								        if (address[i] == 0) {
 								            if (current_sequence_length == 0)
 								                current_sequence_start = i;
 								            ++current_sequence_length;
 								        } else {
 								            if (current_sequence_length > longest_sequence_length) {
 								                longest_sequence_length = current_sequence_length;
 								                compress = current_sequence_start;
 								            }
 								            current_sequence_length = 0;
 								        }
 								    }
 								    // 4. Let ignore0 be false.
 								    auto ignore0 = false;
 								    // 5. For each pieceIndex in the range 0 to 7, inclusive:
 								    for (size_t piece_index = 0; piece_index <= 7; ++piece_index) {
 								        // 1. If ignore0 is true and address[pieceIndex] is 0, then continue.
 								        if (ignore0 && address[piece_index] == 0)
 								            continue;
 								        // 2. Otherwise, if ignore0 is true, set ignore0 to false.
 								        if (ignore0)
 								            ignore0 = false;
 								        // 3. If compress is pieceIndex, then:
 								        if (compress == piece_index) {
 								            // 1. Let separator be "::" if pieceIndex is 0, and U+003A (:) otherwise.
 								            auto separator = piece_index == 0 ? "::"sv : ":"sv;
 								            // 2. Append separator to output.
 								            output.append(separator);
 								            // 3. Set ignore0 to true and continue.
 								            ignore0 = true;
 								            continue;
 								        }
 								        // 4. Append address[pieceIndex], represented as the shortest possible lowercase hexadecimal number, to output.
 								        output.appendff("{:x}", address[piece_index]);
 								        // 5. If pieceIndex is not 7, then append U+003A (:) to output.
 								        if (piece_index != 7)
 								            output.append(':');
 								    }
 								    // 6. Return output.
 								    return output.to_string();
 								}
 								// https://url.spec.whatwg.org/#concept-ipv6-parser
 								static Optional<Array<u16, 8>> parse_ipv6_address(StringView input)
 								{
 								    // 1. Let address be a new IPv6 address whose IPv6 pieces are all 0.
 								    Array<u16, 8> address {};
 								    // 2. Let pieceIndex be 0.
 								    size_t piece_index = 0;
 								    // 3. Let compress be null.
 								    Optional<size_t> compress;
 								    Vector<u32> code_points;
 								    code_points.ensure_capacity(input.length());
 								    for (auto code_point : Utf8View { input }) {
 								        code_points.append(code_point);
 								    }
 								    // 4. Let pointer be a pointer for input.
 								    size_t pointer = 0;
 								    auto c = [&]() -> u32 {
 								        if (pointer >= code_points.size())
 								            return end_of_file;
 								        return code_points[pointer];
 								    };
 								    auto remaining = [&]() -> ReadonlySpan<u32> {
 								        if ((pointer + 1) >= code_points.size())
 								            return {};
 								        return code_points.span().slice(pointer + 1);
 								    };
 								    // 5. If c is U+003A (:), then:
 								    if (c() == ':') {
 								        // 1. If remaining does not start with U+003A (:), IPv6-invalid-compression validation error, return failure.
 								        if (remaining().is_empty() || remaining()[0] != ':') {
 								            report_validation_error();
 								            return {};
 								        }
 								        // 2. Increase pointer by 2.
 								        pointer += 2;
 								        // 3. Increase pieceIndex by 1 and then set compress to pieceIndex.
 								        ++piece_index;
 								        compress = piece_index;
 								    }
 								    // 6. While c is not the EOF code point:
 								    while (c() != end_of_file) {
 								        // 1. If pieceIndex is 8, IPv6-too-many-pieces validation error, return failure.
 								        if (piece_index == 8) {
 								            report_validation_error();
 								            return {};
 								        }
 								        // 2. If c is U+003A (:), then:
 								        if (c() == ':') {
 								            // 1. If compress is non-null, IPv6-multiple-compression validation error, return failure.
 								            if (compress.has_value()) {
 								                report_validation_error();
 								                return {};
 								            }
 								            // 2. Increase pointer and pieceIndex by 1, set compress to pieceIndex, and then continue.
 								            ++pointer;
 								            ++piece_index;
 								            compress = piece_index;
 								            continue;
 								        }
 								        // 3. Let value and length be 0.
 								        u32 value = 0;
 								        size_t length = 0;
 								        // 4. While length is less than 4 and c is an ASCII hex digit,
 								        //    set value to value × 0x10 + c interpreted as hexadecimal number,
 								        //    and increase pointer and length by 1.
 								        while (length < 4 && is_ascii_hex_digit(c())) {
 								            value = value * 0x10 + parse_ascii_hex_digit(c());
 								            ++pointer;
 								            ++length;
 								        }
 								        // 5. If c is U+002E (.), then:
 								        if (c() == '.') {
 								            // 1. If length is 0, IPv4-in-IPv6-invalid-code-point validation error, return failure.
 								            if (length == 0) {
 								                report_validation_error();
 								                return {};
 								            }
 								            // 2. Decrease pointer by length.
 								            pointer -= length;
 								            // 3. If pieceIndex is greater than 6, IPv4-in-IPv6-too-many-pieces validation error, return failure.
 								            if (piece_index > 6) {
 								                report_validation_error();
 								                return {};
 								            }
 								            // 4. Let numbersSeen be 0.
 								            size_t numbers_seen = 0;
 								            // 5. While c is not the EOF code point:
 								            while (c() != end_of_file) {
 								                // 1. Let ipv4Piece be null.
 								                Optional<u32> ipv4_piece;
 								                // 2. If numbersSeen is greater than 0, then:
 								                if (numbers_seen > 0) {
 								                    // 1. If c is a U+002E (.) and numbersSeen is less than 4, then increase pointer by 1.
 								                    if (c() == '.' && numbers_seen < 4) {
 								                        ++pointer;
 								                    }
 								                    // 2. Otherwise, IPv4-in-IPv6-invalid-code-point validation error, return failure.
 								                    else {
 								                        report_validation_error();
 								                        return {};
 								                    }
 								                }
 								                // 3. If c is not an ASCII digit, IPv4-in-IPv6-invalid-code-point validation error, return failure.
 								                if (!is_ascii_digit(c())) {
 								                    report_validation_error();
 								                    return {};
 								                }
 								                // 4. While c is an ASCII digit:
 								                while (is_ascii_digit(c())) {
 								                    // 1. Let number be c interpreted as decimal number.
 								                    u32 number = parse_ascii_digit(c());
 								                    // 2. If ipv4Piece is null, then set ipv4Piece to number.
 								                    if (!ipv4_piece.has_value()) {
 								                        ipv4_piece = number;
 								                    }
 								                    // Otherwise, if ipv4Piece is 0, IPv4-in-IPv6-invalid-code-point validation error, return failure.
 								                    else if (ipv4_piece.value() == 0) {
 								                        report_validation_error();
 								                        return {};
 								                    }
 								                    // Otherwise, set ipv4Piece to ipv4Piece × 10 + number.
 								                    else {
 								                        ipv4_piece = ipv4_piece.value() * 10 + number;
 								                    }
 								                    // 3. If ipv4Piece is greater than 255, IPv4-in-IPv6-out-of-range-part validation error, return failure.
 								                    if (ipv4_piece.value() > 255) {
 								                        report_validation_error();
 								                        return {};
 								                    }
 								                    // 4. Increase pointer by 1.
 								                    ++pointer;
 								                }
 								                // 5. Set address[pieceIndex] to address[pieceIndex] × 0x100 + ipv4Piece.
 								                address[piece_index] = address[piece_index] * 0x100 + ipv4_piece.value();
 								                // 6. Increase numbersSeen by 1.
 								                ++numbers_seen;
 								                // 7. If numbersSeen is 2 or 4, then increase pieceIndex by 1.
 								                if (numbers_seen == 2 || numbers_seen == 4)
 								                    ++piece_index;
 								            }
 								            // 6. If numbersSeen is not 4, IPv4-in-IPv6-too-few-parts validation error, return failure.
 								            if (numbers_seen != 4) {
 								                report_validation_error();
 								                return {};
 								            }
 								            // 7. Break.
 								            break;
 								        }
 								        // 6. Otherwise, if c is U+003A (:):
 								        else if (c() == ':') {
 								            // 1. Increase pointer by 1.
 								            ++pointer;
 								            // 2. If c is the EOF code point, IPv6-invalid-code-point validation error, return failure.
 								            if (c() == end_of_file) {
 								                report_validation_error();
 								                return {};
 								            }
 								        }
 								        // 7. Otherwise, if c is not the EOF code point, IPv6-invalid-code-point validation error, return failure.
 								        else if (c() != end_of_file) {
 								            report_validation_error();
 								            return {};
 								        }
 								        // 8. Set address[pieceIndex] to value.
 								        address[piece_index] = value;
 								        // 9. Increase pieceIndex by 1.
 								        ++piece_index;
 								    }
 								    // 7. If compress is non-null, then:
 								    if (compress.has_value()) {
 								        // 1. Let swaps be pieceIndex − compress.
 								        size_t swaps = piece_index - compress.value();
 								        // 2. Set pieceIndex to 7.
 								        piece_index = 7;
 								        // 3. While pieceIndex is not 0 and swaps is greater than 0,
 								        //    swap address[pieceIndex] with address[compress + swaps − 1],
 								        //    and then decrease both pieceIndex and swaps by 1.
 								        while (piece_index != 0 && swaps > 0) {
 								            swap(address[piece_index], address[compress.value() + swaps - 1]);
 								            --piece_index;
 								            --swaps;
 								        }
 								    }
 								    // 8. Otherwise, if compress is null and pieceIndex is not 8, IPv6-too-few-pieces validation error, return failure.
 								    else if (!compress.has_value() && piece_index != 8) {
 								        report_validation_error();
 								        return {};
 								    }
 								    // 9. Return address.
 								    return address;
 								}
-												AK: Fix url host parsing check for 'ends in a number'

I misunderstood the spec step for checking whether the host 'ends with a
number'. We can't simply check for it if ends with a number, this check
is actually an algorithm which is required to avoid detecting hosts that
end with a number from an IPv4 host.

Implement this missing step, and add a test to cover this.

											
										
										
											2023-07-25 10:43:00 +03:00
+								// https://url.spec.whatwg.org/#ends-in-a-number-checker
 								static bool ends_in_a_number_checker(StringView input)
 								{
 								    // 1. Let parts be the result of strictly splitting input on U+002E (.).
 								    auto parts = input.split_view("."sv, SplitBehavior::KeepEmpty);
 								    // 2. If the last item in parts is the empty string, then:
 								    if (parts.last().is_empty()) {
 								        // 1. If parts’s size is 1, then return false.
 								        if (parts.size() == 1)
 								            return false;
 								        // 2. Remove the last item from parts.
 								        parts.take_last();
 								    }
 								    // 3. Let last be the last item in parts.
 								    auto last = parts.last();
 								    // 4. If last is non-empty and contains only ASCII digits, then return true.
 								    if (!last.is_empty() && all_of(last, is_ascii_digit))
 								        return true;
 								    // 5. If parsing last as an IPv4 number does not return failure, then return true.
 								    if (parse_ipv4_number(last).has_value())
 								        return true;
 								    // 6. Return false.
 								    return false;
 								}
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								// https://url.spec.whatwg.org/#concept-host-parser
 								// NOTE: This is a very bare-bones implementation.
-												AK+Everywhere: Rename String to DeprecatedString

We have a new, improved string type coming up in AK (OOM aware, no null
state), and while it's going to use UTF-8, the name UTF8String is a
mouthful - so let's free up the String name by renaming the existing
class.
Making the old one have an annoying name will hopefully also help with
quick adoption :^)

											
										
										
											2022-12-04 21:02:33 +03:00
+								static Optional<DeprecatedString> parse_host(StringView input, bool is_not_special = false)
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								{
-												AK: Implement IPv6 host parsing in URLParser

This is just a straight (and fairly inefficient) implementation of IPv6
parsing and serialization from the URL spec.

Note that we don't use AK::IPv6Address here because the URL spec
requires a specific serialization behavior.

											
										
										
											2023-07-17 07:52:29 +03:00
+								    // 1. If input starts with U+005B ([), then:
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								    if (input.starts_with('[')) {
-												AK: Implement IPv6 host parsing in URLParser

This is just a straight (and fairly inefficient) implementation of IPv6
parsing and serialization from the URL spec.

Note that we don't use AK::IPv6Address here because the URL spec
requires a specific serialization behavior.

											
										
										
											2023-07-17 07:52:29 +03:00
+								        // 1. If input does not end with U+005D (]), IPv6-unclosed validation error, return failure.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        if (!input.ends_with(']')) {
 								            report_validation_error();
 								            return {};
 								        }
-												AK: Implement IPv6 host parsing in URLParser

This is just a straight (and fairly inefficient) implementation of IPv6
parsing and serialization from the URL spec.

Note that we don't use AK::IPv6Address here because the URL spec
requires a specific serialization behavior.

											
										
										
											2023-07-17 07:52:29 +03:00
 								        // 2. Return the result of IPv6 parsing input with its leading U+005B ([) and trailing U+005D (]) removed.
 								        auto address = parse_ipv6_address(input.substring_view(1, input.length() - 2));
 								        if (!address.has_value())
 								            return {};
 								        auto result = serialize_ipv6_address(*address);
 								        if (result.is_error())
 								            return {};
 								        return result.release_value().to_deprecated_string();
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								    }
-												AK: Implement IPV4 host URL parsing to specification

This implements both the parsing and serialization IPV4 parts from
the URL spec.

											
										
										
											2023-07-23 12:09:29 +03:00
+								    // 2. If isNotSpecial is true, then return the result of opaque-host parsing input.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								    if (is_not_special)
 								        return parse_opaque_host(input);
-												AK: Implement IPV4 host URL parsing to specification

This implements both the parsing and serialization IPV4 parts from
the URL spec.

											
										
										
											2023-07-23 12:09:29 +03:00
 								    // 3. Assert: input is not the empty string.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								    VERIFY(!input.is_empty());
-												AK: Implement IPV4 host URL parsing to specification

This implements both the parsing and serialization IPV4 parts from
the URL spec.

											
										
										
											2023-07-23 12:09:29 +03:00
+								    // FIXME: 4. Let domain be the result of running UTF-8 decode without BOM on the percent-decoding of input.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								    auto domain = URL::percent_decode(input);
-												AK: Implement IPV4 host URL parsing to specification

This implements both the parsing and serialization IPV4 parts from
the URL spec.

											
										
										
											2023-07-23 12:09:29 +03:00
 								    // FIXME: 5. Let asciiDomain be the result of running domain to ASCII on domain.
 								    // FIXME: 6. If asciiDomain is failure, then return failure.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								    auto& ascii_domain = domain;
-												AK: Implement IPV4 host URL parsing to specification

This implements both the parsing and serialization IPV4 parts from
the URL spec.

											
										
										
											2023-07-23 12:09:29 +03:00
+								    // 7. If asciiDomain contains a forbidden domain code point, domain-invalid-code-point validation error, return failure.
-												AK+Everywhere: Fix data corruption due to code-point-to-char conversion

In particular, StringView::contains(char) is often used with a u32
code point. When this is done, the compiler will for some reason allow
data corruption to occur silently.

In fact, this is one of two reasons for the following OSS Fuzz issue:
https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=49184
This is probably a very old bug.

In the particular case of URLParser, AK::is_url_code_point got confused:
    return /* ... */ || "!$&'()*+,-./:;=?@_~"sv.contains(code_point);
If code_point is a large code point that happens to have the correct
lower bytes, AK::is_url_code_point is then convinced that the given
code point is okay, even if it is actually problematic.

This commit fixes *only* the silent data corruption due to the erroneous
conversion, and does not fully resolve OSS-Fuzz#49184.

											
										
										
											2022-09-12 17:31:16 +03:00
+								    auto forbidden_host_characters = "\0\t\n\r #%/:<>?@[\\]^|"sv;
 								    for (auto character : forbidden_host_characters) {
 								        if (ascii_domain.view().contains(character)) {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								            report_validation_error();
 								            return {};
 								        }
 								    }
-												AK: Implement IPV4 host URL parsing to specification

This implements both the parsing and serialization IPV4 parts from
the URL spec.

											
										
										
											2023-07-23 12:09:29 +03:00
+								    // 8. If asciiDomain ends in a number, then return the result of IPv4 parsing asciiDomain.
-												AK: Fix url host parsing check for 'ends in a number'

I misunderstood the spec step for checking whether the host 'ends with a
number'. We can't simply check for it if ends with a number, this check
is actually an algorithm which is required to avoid detecting hosts that
end with a number from an IPv4 host.

Implement this missing step, and add a test to cover this.

											
										
										
											2023-07-25 10:43:00 +03:00
+								    if (ends_in_a_number_checker(ascii_domain)) {
-												AK: Implement IPV4 host URL parsing to specification

This implements both the parsing and serialization IPV4 parts from
the URL spec.

											
										
										
											2023-07-23 12:09:29 +03:00
+								        auto ipv4_host = parse_ipv4_address(ascii_domain);
 								        if (!ipv4_host.has_value())
 								            return {};
 								        auto result = serialize_ipv4_address(*ipv4_host);
 								        if (result.is_error())
 								            return {};
 								        return result.release_value().to_deprecated_string();
 								    }
 								    // 9. Return asciiDomain.
 								    return ascii_domain;
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								}
-												AK: Fix bad parsing of some file:/// URLs with base URL

We were dropping the base URL path components in the resulting URL due
to mistakenly determining the input URL to start with a Windows drive
letter. Fix this, add a spec link, and a test.

											
										
										
											2022-09-20 16:38:53 +03:00
+								// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
-												Everywhere: Pass AK::StringView by value

											
										
										
											2021-11-11 02:55:02 +03:00
+								constexpr bool starts_with_windows_drive_letter(StringView input)
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								{
 								    if (input.length() < 2)
 								        return false;
-												AK: Fix bad parsing of some file:/// URLs with base URL

We were dropping the base URL path components in the resulting URL due
to mistakenly determining the input URL to start with a Windows drive
letter. Fix this, add a spec link, and a test.

											
										
										
											2022-09-20 16:38:53 +03:00
+								    if (!is_ascii_alpha(input[0]) || !(input[1] == ':' || input[1] == '|'))
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        return false;
 								    if (input.length() == 2)
 								        return true;
 								    return "/\\?#"sv.contains(input[2]);
 								}
-												Everywhere: Pass AK::StringView by value

											
										
										
											2021-11-11 02:55:02 +03:00
+								constexpr bool is_windows_drive_letter(StringView input)
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								{
 								    return input.length() == 2 && is_ascii_alpha(input[0]) && (input[1] == ':' || input[1] == '|');
 								}
-												Everywhere: Pass AK::StringView by value

											
										
										
											2021-11-11 02:55:02 +03:00
+								constexpr bool is_normalized_windows_drive_letter(StringView input)
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								{
 								    return input.length() == 2 && is_ascii_alpha(input[0]) && input[1] == ':';
 								}
-												Everywhere: Pass AK::StringView by value

											
										
										
											2021-11-11 02:55:02 +03:00
+								constexpr bool is_single_dot_path_segment(StringView input)
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								{
-												Everywhere: Rename equals_ignoring_case => equals_ignoring_ascii_case

Let's make it clear that these functions deal with ASCII case only.

											
										
										
											2023-03-10 10:48:54 +03:00
+								    return input == "."sv || input.equals_ignoring_ascii_case("%2e"sv);
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								}
-												Everywhere: Pass AK::StringView by value

											
										
										
											2021-11-11 02:55:02 +03:00
+								constexpr bool is_double_dot_path_segment(StringView input)
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								{
-												Everywhere: Rename equals_ignoring_case => equals_ignoring_ascii_case

Let's make it clear that these functions deal with ASCII case only.

											
										
										
											2023-03-10 10:48:54 +03:00
+								    return input == ".."sv || input.equals_ignoring_ascii_case(".%2e"sv) || input.equals_ignoring_ascii_case("%2e."sv) || input.equals_ignoring_ascii_case("%2e%2e"sv);
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								}
-												AK: Don't destructively re-encode query strings in the URL parser

We were decoding and then re-encoding the query string in URLs.
This round-trip caused us to lose information about plus ('+')
ASCII characters encoded as "%2B".

											
										
										
											2022-04-10 01:48:15 +03:00
+								// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
-												AK: Expose URLParser::percent_encode_after_encoding

This function is useful in places outside of the URLParser in LibWeb.

											
										
										
											2023-06-25 05:11:34 +03:00
+								DeprecatedString URLParser::percent_encode_after_encoding(StringView input, URL::PercentEncodeSet percent_encode_set, bool space_as_plus)
-												AK: Don't destructively re-encode query strings in the URL parser

We were decoding and then re-encoding the query string in URLs.
This round-trip caused us to lose information about plus ('+')
ASCII characters encoded as "%2B".

											
										
										
											2022-04-10 01:48:15 +03:00
+								{
 								    // NOTE: This is written somewhat ad-hoc since we don't yet implement the Encoding spec.
 								    StringBuilder output;
 								    // 3. For each byte of encodeOutput converted to a byte sequence:
 								    for (auto byte : input) {
 								        // 1. If spaceAsPlus is true and byte is 0x20 (SP), then append U+002B (+) to output and continue.
 								        if (space_as_plus && byte == ' ') {
 								            output.append('+');
 								            continue;
 								        }
 								        // 2. Let isomorph be a code point whose value is byte’s value.
 								        u32 isomorph = byte;
 								        // 3. Assert: percentEncodeSet includes all non-ASCII code points.
 								        // 4. If isomorphic is not in percentEncodeSet, then append isomorph to output.
 								        if (!URL::code_point_is_in_percent_encode_set(isomorph, percent_encode_set)) {
 								            output.append_code_point(isomorph);
 								        }
 								        // 5. Otherwise, percent-encode byte and append the result to output.
 								        else {
 								            output.appendff("%{:02X}", byte);
 								        }
 								    }
 								    // 6. Return output.
-												Everywhere: Rename to_{string => deprecated_string}() where applicable

This will make it easier to support both string types at the same time
while we convert code, and tracking down remaining uses.

One big exception is Value::to_string() in LibJS, where the name is
dictated by the ToString AO.

											
										
										
											2022-12-06 04:12:49 +03:00
+								    return output.to_deprecated_string();
-												AK: Don't destructively re-encode query strings in the URL parser

We were decoding and then re-encoding the query string in URLs.
This round-trip caused us to lose information about plus ('+')
ASCII characters encoded as "%2B".

											
										
										
											2022-04-10 01:48:15 +03:00
+								}
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								// https://fetch.spec.whatwg.org/#data-urls
-												AK: Improve the parsing of data urls

Improve the parsing of data urls in URLParser to bring it more up-to-
spec. At the moment, we cannot parse the components of the MIME type
since it is represented as a string, but the spec requires it to be
parsed as a "MIME type record".

											
										
										
											2021-08-04 19:29:06 +03:00
+								// FIXME: This only loosely follows the spec, as we use the same class for "regular" and data URLs, unlike the spec.
-												Everywhere: Pass AK::StringView by value

											
										
										
											2021-11-11 02:55:02 +03:00
+								Optional<URL> URLParser::parse_data_url(StringView raw_input)
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								{
 								    dbgln_if(URL_PARSER_DEBUG, "URLParser::parse_data_url: Parsing '{}'.", raw_input);
-												Everywhere: Add sv suffix to strings relying on StringView(char const*)

Each of these strings would previously rely on StringView's char const*
constructor overload, which would call __builtin_strlen on the string.
Since we now have operator ""sv, we can replace these with much simpler
versions. This opens the door to being able to remove
StringView(char const*).

No functional changes.

											
										
										
											2022-07-11 20:32:29 +03:00
+								    VERIFY(raw_input.starts_with("data:"sv));
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								    auto input = raw_input.substring_view(5);
 								    auto comma_offset = input.find(',');
 								    if (!comma_offset.has_value())
 								        return {};
-												Everywhere: Add sv suffix to strings relying on StringView(char const*)

Each of these strings would previously rely on StringView's char const*
constructor overload, which would call __builtin_strlen on the string.
Since we now have operator ""sv, we can replace these with much simpler
versions. This opens the door to being able to remove
StringView(char const*).

No functional changes.

											
										
										
											2022-07-11 20:32:29 +03:00
+								    auto mime_type = StringUtils::trim(input.substring_view(0, comma_offset.value()), "\t\n\f\r "sv, TrimMode::Both);
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								    auto encoded_body = input.substring_view(comma_offset.value() + 1);
 								    auto body = URL::percent_decode(encoded_body);
-												AK: Improve the parsing of data urls

Improve the parsing of data urls in URLParser to bring it more up-to-
spec. At the moment, we cannot parse the components of the MIME type
since it is represented as a string, but the spec requires it to be
parsed as a "MIME type record".

											
										
										
											2021-08-04 19:29:06 +03:00
+								    bool is_base64_encoded = false;
-												Everywhere: Add sv suffix to strings relying on StringView(char const*)

Each of these strings would previously rely on StringView's char const*
constructor overload, which would call __builtin_strlen on the string.
Since we now have operator ""sv, we can replace these with much simpler
versions. This opens the door to being able to remove
StringView(char const*).

No functional changes.

											
										
										
											2022-07-11 20:32:29 +03:00
+								    if (mime_type.ends_with("base64"sv, CaseSensitivity::CaseInsensitive)) {
-												AK: Improve the parsing of data urls

Improve the parsing of data urls in URLParser to bring it more up-to-
spec. At the moment, we cannot parse the components of the MIME type
since it is represented as a string, but the spec requires it to be
parsed as a "MIME type record".

											
										
										
											2021-08-04 19:29:06 +03:00
+								        auto substring_view = mime_type.substring_view(0, mime_type.length() - 6);
-												Everywhere: Add sv suffix to strings relying on StringView(char const*)

Each of these strings would previously rely on StringView's char const*
constructor overload, which would call __builtin_strlen on the string.
Since we now have operator ""sv, we can replace these with much simpler
versions. This opens the door to being able to remove
StringView(char const*).

No functional changes.

											
										
										
											2022-07-11 20:32:29 +03:00
+								        auto trimmed_substring_view = StringUtils::trim(substring_view, " "sv, TrimMode::Right);
-												AK: Improve the parsing of data urls

Improve the parsing of data urls in URLParser to bring it more up-to-
spec. At the moment, we cannot parse the components of the MIME type
since it is represented as a string, but the spec requires it to be
parsed as a "MIME type record".

											
										
										
											2021-08-04 19:29:06 +03:00
+								        if (trimmed_substring_view.ends_with(';')) {
 								            is_base64_encoded = true;
 								            mime_type = trimmed_substring_view.substring_view(0, trimmed_substring_view.length() - 1);
 								        }
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								    }
-												AK: Improve the parsing of data urls

Improve the parsing of data urls in URLParser to bring it more up-to-
spec. At the moment, we cannot parse the components of the MIME type
since it is represented as a string, but the spec requires it to be
parsed as a "MIME type record".

											
										
										
											2021-08-04 19:29:06 +03:00
+								    StringBuilder builder;
-												Everywhere: Add sv suffix to strings relying on StringView(char const*)

Each of these strings would previously rely on StringView's char const*
constructor overload, which would call __builtin_strlen on the string.
Since we now have operator ""sv, we can replace these with much simpler
versions. This opens the door to being able to remove
StringView(char const*).

No functional changes.

											
										
										
											2022-07-11 20:32:29 +03:00
+								    if (mime_type.starts_with(";"sv) || mime_type.is_empty()) {
 								        builder.append("text/plain"sv);
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        builder.append(mime_type);
-												AK: Improve the parsing of data urls

Improve the parsing of data urls in URLParser to bring it more up-to-
spec. At the moment, we cannot parse the components of the MIME type
since it is represented as a string, but the spec requires it to be
parsed as a "MIME type record".

											
										
										
											2021-08-04 19:29:06 +03:00
+								        mime_type = builder.string_view();
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								    }
-												AK: Improve the parsing of data urls

Improve the parsing of data urls in URLParser to bring it more up-to-
spec. At the moment, we cannot parse the components of the MIME type
since it is represented as a string, but the spec requires it to be
parsed as a "MIME type record".

											
										
										
											2021-08-04 19:29:06 +03:00
+								    // FIXME: Parse the MIME type's components according to https://mimesniff.spec.whatwg.org/#parse-a-mime-type
-												Everywhere: Add sv suffix to strings relying on StringView(char const*)

Each of these strings would previously rely on StringView's char const*
constructor overload, which would call __builtin_strlen on the string.
Since we now have operator ""sv, we can replace these with much simpler
versions. This opens the door to being able to remove
StringView(char const*).

No functional changes.

											
										
										
											2022-07-11 20:32:29 +03:00
+								    URL url { StringUtils::trim(mime_type, "\n\r\t "sv, TrimMode::Both), move(body), is_base64_encoded };
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								    dbgln_if(URL_PARSER_DEBUG, "URLParser::parse_data_url: Parsed data URL to be '{}'.", url.serialize());
 								    return url;
 								}
 								// https://url.spec.whatwg.org/#concept-basic-url-parser
 								// NOTE: This parser assumes a UTF-8 encoding.
 								// NOTE: Refrain from using the URL classes setters inside this algorithm. Rather, set the values directly. This bypasses the setters' built-in
 								//       validation, which is strictly unnecessary since we set m_valid=true at the end anyways. Furthermore, this algorithm may be used in the
 								//       future for validation of URLs, which would then lead to infinite recursion.
 								//       The same goes for base_url, because e.g. the port() getter does not always return m_port, and we are interested in the underlying member
 								//       variables' values here, not what the URL class presents to its users.
-												AK: Rename URLParser::parse to URLParser::basic_parse

To make it more clear that this function implements
'concept-basic-url-parser' instead of 'concept-url-parser'.

											
										
										
											2023-07-15 05:29:20 +03:00
+								URL URLParser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Optional<URL> url, Optional<State> state_override)
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								{
 								    dbgln_if(URL_PARSER_DEBUG, "URLParser::parse: Parsing '{}'", raw_input);
 								    if (raw_input.is_empty())
-												AK+Everywhere: Use Optional for URLParser::parse's base_url parameter

											
										
										
											2023-04-11 15:53:40 +03:00
+								        return base_url.has_value() ? *base_url : URL {};
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
-												Everywhere: Add sv suffix to strings relying on StringView(char const*)

Each of these strings would previously rely on StringView's char const*
constructor overload, which would call __builtin_strlen on the string.
Since we now have operator ""sv, we can replace these with much simpler
versions. This opens the door to being able to remove
StringView(char const*).

No functional changes.

											
										
										
											2022-07-11 20:32:29 +03:00
+								    if (raw_input.starts_with("data:"sv)) {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        auto maybe_url = parse_data_url(raw_input);
 								        if (!maybe_url.has_value())
 								            return {};
 								        return maybe_url.release_value();
 								    }
 								    size_t start_index = 0;
 								    size_t end_index = raw_input.length();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								    // 1. If url is not given:
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								    if (!url.has_value()) {
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // 1. Set url to a new URL.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								        url = URL();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // 2. If input contains any leading or trailing C0 control or space, invalid-URL-unit validation error.
 								        // 3. Remove any leading and trailing C0 control or space from input.
 								        //
 								        // FIXME: We aren't checking exactly for 'trailing C0 control or space' here.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								        bool has_validation_error = false;
 								        for (size_t i = 0; i < raw_input.length(); ++i) {
 								            i8 ch = raw_input[i];
 								            if (0 <= ch && ch <= 0x20) {
 								                ++start_index;
 								                has_validation_error = true;
 								            } else {
 								                break;
 								            }
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        }
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								        for (ssize_t i = raw_input.length() - 1; i >= 0; --i) {
 								            i8 ch = raw_input[i];
 								            if (0 <= ch && ch <= 0x20) {
 								                --end_index;
 								                has_validation_error = true;
 								            } else {
 								                break;
 								            }
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        }
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								        if (has_validation_error)
 								            report_validation_error();
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								    }
 								    if (start_index >= end_index)
 								        return {};
-												AK+Everywhere: Rename String to DeprecatedString

We have a new, improved string type coming up in AK (OOM aware, no null
state), and while it's going to use UTF-8, the name UTF8String is a
mouthful - so let's free up the String name by renaming the existing
class.
Making the old one have an annoying name will hopefully also help with
quick adoption :^)

											
										
										
											2022-12-04 21:02:33 +03:00
+								    DeprecatedString processed_input = raw_input.substring_view(start_index, end_index - start_index);
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								    // 2. If input contains any ASCII tab or newline, invalid-URL-unit validation error.
 								    // 3. Remove all ASCII tab or newline from input.
-												Everywhere: Add sv suffix to strings relying on StringView(char const*)

Each of these strings would previously rely on StringView's char const*
constructor overload, which would call __builtin_strlen on the string.
Since we now have operator ""sv, we can replace these with much simpler
versions. This opens the door to being able to remove
StringView(char const*).

No functional changes.

											
										
										
											2022-07-11 20:32:29 +03:00
+								    if (processed_input.contains("\t"sv) || processed_input.contains("\n"sv)) {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        report_validation_error();
-												Everywhere: Add sv suffix to strings relying on StringView(char const*)

Each of these strings would previously rely on StringView's char const*
constructor overload, which would call __builtin_strlen on the string.
Since we now have operator ""sv, we can replace these with much simpler
versions. This opens the door to being able to remove
StringView(char const*).

No functional changes.

											
										
										
											2022-07-11 20:32:29 +03:00
+								        processed_input = processed_input.replace("\t"sv, ""sv, ReplaceMode::All).replace("\n"sv, ""sv, ReplaceMode::All);
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								    }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								    // 4. Let state be state override if given, or scheme start state otherwise.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								    State state = state_override.value_or(State::SchemeStart);
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								    // FIXME: 5. Set encoding to the result of getting an output encoding from encoding.
 								    // 6. Let buffer be the empty string.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								    StringBuilder buffer;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								    // 7. Let atSignSeen, insideBrackets, and passwordTokenSeen be false.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								    bool at_sign_seen = false;
 								    bool inside_brackets = false;
 								    bool password_token_seen = false;
 								    Utf8View input(processed_input);
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								    // 8. Let pointer be a pointer for input.
-												AK: Rename Utf8CodepointIterator => Utf8CodePointIterator

											
										
										
											2021-06-01 10:45:52 +03:00
+								    Utf8CodePointIterator iterator = input.begin();
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
 								    auto get_remaining = [&input, &iterator] {
-												AK: Do not VERIFY on invalid code point bytes in UTF8View

The previous behavior was to always VERIFY that the UTF-8 bytes were
valid when iterating over the code points of an UTF8View. This change
makes it so we instead output the 0xFFFD 'REPLACEMENT CHARACTER'
code point when encountering invalid bytes, and keep iterating the
view after skipping one byte.

Leaving the decision to the consumer would break symmetry with the
UTF32View API, which would in turn require heavy refactoring and/or
code duplication in generic code such as the one found in
Gfx::Painter and the Shell.

To make it easier for the consumers to detect the original bytes, we
provide a new method on the iterator that returns a Span over the
data that has been decoded. This method is immediately used in the
TextNode::compute_text_for_rendering method, which previously did
this in a ad-hoc waay.

This also add tests for the new behavior in TestUtf8.cpp, as well
as reinforcements to the existing tests to check if the underlying
bytes match up with their expected values.

											
										
										
											2021-05-30 19:52:24 +03:00
+								        return input.substring_view(iterator - input.begin() + iterator.underlying_code_point_length_in_bytes()).as_string();
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								    };
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								    // 9. Keep running the following state machine by switching on state. If after a run pointer points to the EOF code point, go to the next step. Otherwise, increase pointer by 1 and continue with the state machine.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								    // NOTE: "continue" should only be used to prevent incrementing the iterator, as this is done at the end of the loop.
 								    //       ++iterator : "increase pointer by 1"
 								    //       continue   : "decrease pointer by 1"
 								    for (;;) {
-												AK: Stop using U+0000 as end of file code point in URL parser

This changes URL parser to use the 0xFFFFFFFF constant instead of 0 to
indicate end of file. This fixes a bug where inputs containing null
bytes would terminate the parser early, because they were interpreted
as end of file.

											
										
										
											2021-06-03 13:43:08 +03:00
+								        u32 code_point = end_of_file;
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        if (!iterator.done())
 								            code_point = *iterator;
 								        if constexpr (URL_PARSER_DEBUG) {
-												AK: Stop using U+0000 as end of file code point in URL parser

This changes URL parser to use the 0xFFFFFFFF constant instead of 0 to
indicate end of file. This fixes a bug where inputs containing null
bytes would terminate the parser early, because they were interpreted
as end of file.

											
										
										
											2021-06-03 13:43:08 +03:00
+								            if (code_point == end_of_file)
-												AK: Rename URLParser::parse to URLParser::basic_parse

To make it more clear that this function implements
'concept-basic-url-parser' instead of 'concept-url-parser'.

											
										
										
											2023-07-15 05:29:20 +03:00
+								                dbgln("URLParser::basic_parse: {} state with EOF.", state_name(state));
-												AK: Make debugging URLParser easier

This patch adds a state_name method to URLParser to convert a state to a
string. With this, the debugging statements now display the state names.

Furthermore, this fixes a bug where non-ASCII code points were
formatted as characters, which fails an assertion in the formatting
system.

											
										
										
											2021-06-03 13:40:04 +03:00
+								            else if (is_ascii_printable(code_point))
-												AK: Rename URLParser::parse to URLParser::basic_parse

To make it more clear that this function implements
'concept-basic-url-parser' instead of 'concept-url-parser'.

											
										
										
											2023-07-15 05:29:20 +03:00
+								                dbgln("URLParser::basic_parse: {} state with code point U+{:04X} ({:c}).", state_name(state), code_point, code_point);
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								            else
-												AK: Rename URLParser::parse to URLParser::basic_parse

To make it more clear that this function implements
'concept-basic-url-parser' instead of 'concept-url-parser'.

											
										
										
											2023-07-15 05:29:20 +03:00
+								                dbgln("URLParser::basic_parse: {} state with code point U+{:04X}.", state_name(state), code_point);
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        }
 								        switch (state) {
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> scheme start state, https://url.spec.whatwg.org/#scheme-start-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::SchemeStart:
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            // 1. If c is an ASCII alpha, append c, lowercased, to buffer, and set state to scheme state.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								            if (is_ascii_alpha(code_point)) {
 								                buffer.append_as_lowercase(code_point);
 								                state = State::Scheme;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
-												AK: Check for state override in more places for basic URL parsing

											
										
										
											2023-07-04 11:34:00 +03:00
+								            // 2. Otherwise, if state override is not given, set state to no scheme state and decrease pointer by 1.
 								            else if (!state_override.has_value()) {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::NoScheme;
 								                continue;
 								            }
-												AK: Check for state override in more places for basic URL parsing

											
										
										
											2023-07-04 11:34:00 +03:00
+								            // 3. Otherwise, return failure.
 								            else {
 								                return {};
 								            }
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> scheme state, https://url.spec.whatwg.org/#scheme-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::Scheme:
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            // 1. If c is an ASCII alphanumeric, U+002B (+), U+002D (-), or U+002E (.), append c, lowercased, to buffer.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								            if (is_ascii_alphanumeric(code_point) || code_point == '+' || code_point == '-' || code_point == '.') {
 								                buffer.append_as_lowercase(code_point);
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 2. Otherwise, if c is U+003A (:), then:
 								            else if (code_point == ':') {
-												AK: Implement steps for state override in URL parser

											
										
										
											2023-07-14 03:58:16 +03:00
+								                // 1. If state override is given, then:
 								                if (state_override.has_value()) {
 								                    // 1. If url’s scheme is a special scheme and buffer is not a special scheme, then return.
 								                    if (URL::is_special_scheme(url->scheme()) && !URL::is_special_scheme(buffer.string_view()))
 								                        return *url;
 								                    // 2. If url’s scheme is not a special scheme and buffer is a special scheme, then return.
 								                    if (!URL::is_special_scheme(url->scheme()) && URL::is_special_scheme(buffer.string_view()))
 								                        return *url;
 								                    // 3. If url includes credentials or has a non-null port, and buffer is "file", then return.
 								                    if ((url->includes_credentials() || url->port().has_value()) && buffer.string_view() == "file"sv)
 								                        return *url;
 								                    // 4. If url’s scheme is "file" and its host is an empty host, then return.
 								                    if (url->scheme() == "file"sv && url->host().is_empty())
 								                        return *url;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                }
 								                // 2. Set url’s scheme to buffer.
-												Everywhere: Rename to_{string => deprecated_string}() where applicable

This will make it easier to support both string types at the same time
while we convert code, and tracking down remaining uses.

One big exception is Value::to_string() in LibJS, where the name is
dictated by the ToString AO.

											
										
										
											2022-12-06 04:12:49 +03:00
+								                url->m_scheme = buffer.to_deprecated_string();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
-												AK: Implement steps for state override in URL parser

											
										
										
											2023-07-14 03:58:16 +03:00
+								                // 3. If state override is given, then:
 								                if (state_override.has_value()) {
 								                    // 1. If url’s port is url’s scheme’s default port, then set url’s port to null.
 								                    if (url->port() == URL::default_port_for_scheme(url->scheme()))
 								                        url->m_port = {};
 								                    // 2. Return.
 								                    return *url;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                }
 								                // 4. Set buffer to the empty string.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                buffer.clear();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 5. If url’s scheme is "file", then:
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                if (url->scheme() == "file") {
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                    // 1. If remaining does not start with "//", special-scheme-missing-following-solidus validation error.
-												Everywhere: Add sv suffix to strings relying on StringView(char const*)

Each of these strings would previously rely on StringView's char const*
constructor overload, which would call __builtin_strlen on the string.
Since we now have operator ""sv, we can replace these with much simpler
versions. This opens the door to being able to remove
StringView(char const*).

No functional changes.

											
										
										
											2022-07-11 20:32:29 +03:00
+								                    if (!get_remaining().starts_with("//"sv)) {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                        report_validation_error();
 								                    }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                    // 2. Set state to file state.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    state = State::File;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                }
 								                // 6. Otherwise, if url is special, base is non-null, and base’s scheme is url’s scheme:
 								                // 7. Otherwise, if url is special, set state to special authority slashes state.
 								                // FIXME: Write this block closer to spec text.
 								                else if (url->is_special()) {
 								                    // FIXME: 1. Assert: base is is special (and therefore does not have an opaque path).
 								                    // 2. Set state to special relative or authority state.
-												AK+Everywhere: Use Optional for URLParser::parse's base_url parameter

											
										
										
											2023-04-11 15:53:40 +03:00
+								                    if (base_url.has_value() && base_url->m_scheme == url->m_scheme)
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                        state = State::SpecialRelativeOrAuthority;
 								                    else
 								                        state = State::SpecialAuthoritySlashes;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                }
 								                // 8. Otherwise, if remaining starts with an U+002F (/), set state to path or authority state and increase pointer by 1.
 								                else if (get_remaining().starts_with("/"sv)) {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    state = State::PathOrAuthority;
 								                    ++iterator;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                }
 								                // 9. Otherwise, set url’s path to the empty string and set state to opaque path state.
 								                else {
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                    url->m_cannot_be_a_base_url = true;
-												AK: Don't store parts of URLs percent decoded

As noted in serval comments doing this goes against the WC3 spec,
and breaks parsing then re-serializing URLs that contain percent
encoded data, that was not encoded using the same character set as
the serializer.

For example, previously if you had a URL like:

https:://foo.com/what%2F%2F (the path is what + '//' percent encoded)

Creating URL("https:://foo.com/what%2F%2F").serialize() would return:

https://foo.com/what//

Which is incorrect and not the same as the URL we passed. This is
because the re-serializing uses the PercentEncodeSet::Path which
does not include '/'.

Only doing the percent encoding in the setters fixes this, which
is required to navigate to Google Street View (which includes a
percent encoded URL in its URL).

Seems to fix #13477 too

											
										
										
											2023-04-09 16:21:00 +03:00
+								                    url->append_slash();
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    state = State::CannotBeABaseUrlPath;
 								                }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
-												AK: Check for state override in more places for basic URL parsing

											
										
										
											2023-07-04 11:34:00 +03:00
+								            // 3. Otherwise, if state override is not given, set buffer to the empty string, state to no scheme state, and start over (from the first code point in input).
 								            else if (!state_override.has_value()) {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                buffer.clear();
 								                state = State::NoScheme;
 								                iterator = input.begin();
 								                continue;
 								            }
-												AK: Check for state override in more places for basic URL parsing

											
										
										
											2023-07-04 11:34:00 +03:00
+								            // 4. Otherwise, return failure.
 								            else {
 								                return {};
 								            }
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> no scheme state, https://url.spec.whatwg.org/#no-scheme-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::NoScheme:
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            // 1. If base is null, or base has an opaque path and c is not U+0023 (#), missing-scheme-non-relative-URL validation error, return failure.
-												AK+Everywhere: Use Optional for URLParser::parse's base_url parameter

											
										
										
											2023-04-11 15:53:40 +03:00
+								            if (!base_url.has_value() || (base_url->m_cannot_be_a_base_url && code_point != '#')) {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                report_validation_error();
 								                return {};
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 2. Otherwise, if base has an opaque path and c is U+0023 (#), set url’s scheme to base’s scheme, url’s path to base’s path, url’s query
 								            //    to base’s query,url’s fragment to the empty string, and set state to fragment state.
 								            else if (base_url->m_cannot_be_a_base_url && code_point == '#') {
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                url->m_scheme = base_url->m_scheme;
 								                url->m_paths = base_url->m_paths;
 								                url->m_query = base_url->m_query;
 								                url->m_fragment = "";
 								                url->m_cannot_be_a_base_url = true;
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::Fragment;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 3. Otherwise, if base’s scheme is not "file", set state to relative state and decrease pointer by 1.
 								            else if (base_url->m_scheme != "file") {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::Relative;
 								                continue;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 4. Otherwise, set state to file state and decrease pointer by 1.
 								            else {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::File;
 								                continue;
 								            }
 								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> special relative or authority state, https://url.spec.whatwg.org/#special-relative-or-authority-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::SpecialRelativeOrAuthority:
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            // 1. If c is U+002F (/) and remaining starts with U+002F (/), then set state to special authority ignore slashes state and increase pointer by 1.
-												Everywhere: Add sv suffix to strings relying on StringView(char const*)

Each of these strings would previously rely on StringView's char const*
constructor overload, which would call __builtin_strlen on the string.
Since we now have operator ""sv, we can replace these with much simpler
versions. This opens the door to being able to remove
StringView(char const*).

No functional changes.

											
										
										
											2022-07-11 20:32:29 +03:00
+								            if (code_point == '/' && get_remaining().starts_with("/"sv)) {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::SpecialAuthorityIgnoreSlashes;
 								                ++iterator;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 2. Otherwise, special-scheme-missing-following-solidus validation error, set state to relative state and decrease pointer by 1.
 								            else {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                report_validation_error();
 								                state = State::Relative;
 								                continue;
 								            }
 								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> path or authority state, https://url.spec.whatwg.org/#path-or-authority-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::PathOrAuthority:
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            // 1. If c is U+002F (/), then set state to authority state.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								            if (code_point == '/') {
 								                state = State::Authority;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 2. Otherwise, set state to path state, and decrease pointer by 1.
 								            else {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::Path;
 								                continue;
 								            }
 								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> relative state, https://url.spec.whatwg.org/#relative-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::Relative:
-												AK: Add missing spec assert in relative state basic URL parsing

											
										
										
											2023-07-04 12:11:42 +03:00
+								            // 1. Assert: base’s scheme is not "file".
 								            VERIFY(base_url->scheme() != "file");
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								            // 2. Set url’s scheme to base’s scheme.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								            url->m_scheme = base_url->m_scheme;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								            // 3. If c is U+002F (/), then set state to relative slash state.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								            if (code_point == '/') {
 								                state = State::RelativeSlash;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 4. Otherwise, if url is special and c is U+005C (\), invalid-reverse-solidus validation error, set state to relative slash state.
 								            else if (url->is_special() && code_point == '\\') {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                report_validation_error();
 								                state = State::RelativeSlash;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 5. Otherwise:
 								            else {
 								                // 1. Set url’s username to base’s username, url’s password to base’s password, url’s host to base’s host, url’s port to base’s port, url’s path to a clone of base’s path, and url’s query to base’s query.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                url->m_username = base_url->m_username;
 								                url->m_password = base_url->m_password;
 								                url->m_host = base_url->m_host;
 								                url->m_port = base_url->m_port;
 								                url->m_paths = base_url->m_paths;
 								                url->m_query = base_url->m_query;
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                // 2. If c is U+003F (?), then set url’s query to the empty string, and state to query state.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (code_point == '?') {
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                    url->m_query = "";
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    state = State::Query;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                }
 								                // 3. Otherwise, if c is U+0023 (#), set url’s fragment to the empty string and state to fragment state.
 								                else if (code_point == '#') {
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                    url->m_fragment = "";
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    state = State::Fragment;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                }
 								                // 4. Otherwise, if c is not the EOF code point:
 								                else if (code_point != end_of_file) {
 								                    // 1. Set url’s query to null.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                    url->m_query = {};
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                    // 2. Shorten url’s path.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                    if (url->m_paths.size())
 								                        url->m_paths.remove(url->m_paths.size() - 1);
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                    // 3. Set state to path state and decrease pointer by 1.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    state = State::Path;
 								                    continue;
 								                }
 								            }
 								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> relative slash state, https://url.spec.whatwg.org/#relative-slash-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::RelativeSlash:
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            // 1. If url is special and c is U+002F (/) or U+005C (\), then:
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								            if (url->is_special() && (code_point == '/' || code_point == '\\')) {
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                // 1. If c is U+005C (\), invalid-reverse-solidus validation error.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (code_point == '\\')
 								                    report_validation_error();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 2. Set state to special authority ignore slashes state.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::SpecialAuthorityIgnoreSlashes;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 2. Otherwise, if c is U+002F (/), then set state to authority state.
 								            else if (code_point == '/') {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::Authority;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 3. Otherwise, set url’s username to base’s username, url’s password to base’s password, url’s host to base’s host, url’s port to base’s port, state to path state, and then, decrease pointer by 1.
 								            else {
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                url->m_username = base_url->m_username;
 								                url->m_password = base_url->m_password;
 								                url->m_host = base_url->m_host;
 								                url->m_port = base_url->m_port;
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::Path;
 								                continue;
 								            }
 								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> special authority slashes state, https://url.spec.whatwg.org/#special-authority-slashes-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::SpecialAuthoritySlashes:
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            // 1. If c is U+002F (/) and remaining starts with U+002F (/), then set state to special authority ignore slashes state and increase pointer by 1.
-												Everywhere: Add sv suffix to strings relying on StringView(char const*)

Each of these strings would previously rely on StringView's char const*
constructor overload, which would call __builtin_strlen on the string.
Since we now have operator ""sv, we can replace these with much simpler
versions. This opens the door to being able to remove
StringView(char const*).

No functional changes.

											
										
										
											2022-07-11 20:32:29 +03:00
+								            if (code_point == '/' && get_remaining().starts_with("/"sv)) {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::SpecialAuthorityIgnoreSlashes;
 								                ++iterator;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 2. Otherwise, special-scheme-missing-following-solidus validation error, set state to special authority ignore slashes state and decrease pointer by 1.
 								            else {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                report_validation_error();
 								                state = State::SpecialAuthorityIgnoreSlashes;
 								                continue;
 								            }
 								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> special authority ignore slashes state, https://url.spec.whatwg.org/#special-authority-ignore-slashes-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::SpecialAuthorityIgnoreSlashes:
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            // 1. If c is neither U+002F (/) nor U+005C (\), then set state to authority state and decrease pointer by 1.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								            if (code_point != '/' && code_point != '\\') {
 								                state = State::Authority;
 								                continue;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 2. Otherwise, special-scheme-missing-following-solidus validation error.
 								            else {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                report_validation_error();
 								            }
 								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> authority state, https://url.spec.whatwg.org/#authority-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::Authority:
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            // 1. If c is U+0040 (@), then:
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								            if (code_point == '@') {
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                // 1. Invalid-credentials validation error.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                report_validation_error();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 2. If atSignSeen is true, then prepend "%40" to buffer.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (at_sign_seen) {
-												Everywhere: Rename to_{string => deprecated_string}() where applicable

This will make it easier to support both string types at the same time
while we convert code, and tracking down remaining uses.

One big exception is Value::to_string() in LibJS, where the name is
dictated by the ToString AO.

											
										
										
											2022-12-06 04:12:49 +03:00
+								                    auto content = buffer.to_deprecated_string();
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    buffer.clear();
-												Everywhere: Add sv suffix to strings relying on StringView(char const*)

Each of these strings would previously rely on StringView's char const*
constructor overload, which would call __builtin_strlen on the string.
Since we now have operator ""sv, we can replace these with much simpler
versions. This opens the door to being able to remove
StringView(char const*).

No functional changes.

											
										
										
											2022-07-11 20:32:29 +03:00
+								                    buffer.append("%40"sv);
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    buffer.append(content);
 								                }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 3. Set atSignSeen to true.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                at_sign_seen = true;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                StringBuilder builder;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
-												AK: Fix typo in URL basic parse authority state

We weren't actually ever iterating over the buffer, and only what we
were intending to append to (which is empty!).

											
										
										
											2023-07-04 13:22:01 +03:00
+								                // 4. For each codePoint in buffer:
 								                for (auto c : Utf8View(buffer.string_view())) {
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                    // 1. If codePoint is U+003A (:) and passwordTokenSeen is false, then set passwordTokenSeen to true and continue.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    if (c == ':' && !password_token_seen) {
 								                        password_token_seen = true;
 								                        continue;
 								                    }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                    // 2. Let encodedCodePoints be the result of running UTF-8 percent-encode codePoint using the userinfo percent-encode set.
 								                    // NOTE: This is done inside of step 3 and 4 implementation
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    builder.clear();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                    // 3. If passwordTokenSeen is true, then append encodedCodePoints to url’s password.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    if (password_token_seen) {
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                        builder.append(url->password());
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                        URL::append_percent_encoded_if_necessary(builder, c, URL::PercentEncodeSet::Userinfo);
-												AK: Don't store parts of URLs percent decoded

As noted in serval comments doing this goes against the WC3 spec,
and breaks parsing then re-serializing URLs that contain percent
encoded data, that was not encoded using the same character set as
the serializer.

For example, previously if you had a URL like:

https:://foo.com/what%2F%2F (the path is what + '//' percent encoded)

Creating URL("https:://foo.com/what%2F%2F").serialize() would return:

https://foo.com/what//

Which is incorrect and not the same as the URL we passed. This is
because the re-serializing uses the PercentEncodeSet::Path which
does not include '/'.

Only doing the percent encoding in the setters fixes this, which
is required to navigate to Google Street View (which includes a
percent encoded URL in its URL).

Seems to fix #13477 too

											
										
										
											2023-04-09 16:21:00 +03:00
+								                        url->m_password = builder.string_view();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                    }
 								                    // 4. Otherwise, append encodedCodePoints to url’s username.
 								                    else {
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                        builder.append(url->username());
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                        URL::append_percent_encoded_if_necessary(builder, c, URL::PercentEncodeSet::Userinfo);
-												AK: Don't store parts of URLs percent decoded

As noted in serval comments doing this goes against the WC3 spec,
and breaks parsing then re-serializing URLs that contain percent
encoded data, that was not encoded using the same character set as
the serializer.

For example, previously if you had a URL like:

https:://foo.com/what%2F%2F (the path is what + '//' percent encoded)

Creating URL("https:://foo.com/what%2F%2F").serialize() would return:

https://foo.com/what//

Which is incorrect and not the same as the URL we passed. This is
because the re-serializing uses the PercentEncodeSet::Path which
does not include '/'.

Only doing the percent encoding in the setters fixes this, which
is required to navigate to Google Street View (which includes a
percent encoded URL in its URL).

Seems to fix #13477 too

											
										
										
											2023-04-09 16:21:00 +03:00
+								                        url->m_username = builder.string_view();
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    }
 								                }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 5. Set buffer to the empty string.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                buffer.clear();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								            }
 								            // 2. Otherwise, if one of the following is true:
 								            //    * c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
 								            //    * url is special and c is U+005C (\)
 								            else if ((code_point == end_of_file || code_point == '/' || code_point == '?' || code_point == '#')
 								                || (url->is_special() && code_point == '\\')) {
 								                // then:
 								                // 1. If atSignSeen is true and buffer is the empty string, invalid-credentials validation error, return failure.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (at_sign_seen && buffer.is_empty()) {
 								                    report_validation_error();
 								                    return {};
 								                }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 2. Decrease pointer by buffer’s code point length + 1, set buffer to the empty string, and set state to host state.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                iterator = input.iterator_at_byte_offset(iterator - input.begin() - buffer.length() - 1);
 								                buffer.clear();
 								                state = State::Host;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 3. Otherwise, append c to buffer.
 								            else {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                buffer.append_code_point(code_point);
 								            }
 								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> host state, https://url.spec.whatwg.org/#host-state
 								        // -> hostname state, https://url.spec.whatwg.org/#hostname-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::Host:
 								        case State::Hostname:
-												AK: Check for state override in more places for basic URL parsing

											
										
										
											2023-07-04 11:34:00 +03:00
+								            // 1. If state override is given and url’s scheme is "file", then decrease pointer by 1 and set state to file host state.
 								            if (state_override.has_value() && url->scheme() == "file") {
 								                state = State::FileHost;
 								                continue;
 								            }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								            // 2. Otherwise, if c is U+003A (:) and insideBrackets is false, then:
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								            if (code_point == ':' && !inside_brackets) {
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                // 1. If buffer is the empty string, host-missing validation error, return failure.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (buffer.is_empty()) {
 								                    report_validation_error();
 								                    return {};
 								                }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
-												AK: Implement steps for state override in URL parser

											
										
										
											2023-07-14 03:58:16 +03:00
+								                // 2. If state override is given and state override is hostname state, then return.
 								                if (state_override.has_value() && *state_override == State::Hostname)
 								                    return *url;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 3. Let host be the result of host parsing buffer with url is not special.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                auto host = parse_host(buffer.string_view(), !url->is_special());
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 4. If host is failure, then return failure.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (!host.has_value())
 								                    return {};
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 5. Set url’s host to host, buffer to the empty string, and state to port state.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                url->m_host = host.release_value();
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                buffer.clear();
 								                state = State::Port;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 3. Otherwise, if one of the following is true:
 								            //    * c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
 								            //    * url is special and c is U+005C (\)
 								            else if ((code_point == end_of_file || code_point == '/' || code_point == '?' || code_point == '#')
 								                || (url->is_special() && code_point == '\\')) {
 								                // then decrease pointer by 1, and then:
 								                // NOTE: pointer decrement is done by the continue below
 								                // 1. If url is special and buffer is the empty string, host-missing validation error, return failure.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                if (url->is_special() && buffer.is_empty()) {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    report_validation_error();
 								                    return {};
 								                }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
-												AK: Implement steps for state override in URL parser

											
										
										
											2023-07-14 03:58:16 +03:00
+								                // 2. Otherwise, if state override is given, buffer is the empty string, and either url includes credentials or url’s port is non-null, return.
 								                if (state_override.has_value() && buffer.is_empty() && (url->includes_credentials() || url->port().has_value()))
 								                    return *url;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 3. Let host be the result of host parsing buffer with url is not special.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                auto host = parse_host(buffer.string_view(), !url->is_special());
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 4. If host is failure, then return failure.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (!host.has_value())
 								                    return {};
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 5. Set url’s host to host, buffer to the empty string, and state to path start state.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                url->m_host = host.value();
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                buffer.clear();
 								                state = State::Port;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
-												AK: Implement steps for state override in URL parser

											
										
										
											2023-07-14 03:58:16 +03:00
+								                // 6. If state override is given, then return.
 								                if (state_override.has_value())
 								                    return *url;
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                continue;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								            }
 								            // 4. Otherwise:
 								            else {
-												AK: Correct faulty logic for host state in basic URL parse

The '[' and ']' code points were not being appended to the buffer for
this case.

											
										
										
											2023-07-04 12:57:05 +03:00
+								                // 1. If c is U+005B ([), then set insideBrackets to true.
 								                if (code_point == '[') {
 								                    inside_brackets = true;
 								                }
 								                // 2. If c is U+005D (]), then set insideBrackets to false.
 								                else if (code_point == ']') {
 								                    inside_brackets = false;
 								                }
 								                // 3. Append c to buffer.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                buffer.append_code_point(code_point);
 								            }
 								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> port state, https://url.spec.whatwg.org/#port-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::Port:
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            // 1. If c is an ASCII digit, append c to buffer.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								            if (is_ascii_digit(code_point)) {
 								                buffer.append_code_point(code_point);
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 2. Otherwise, if one of the following is true:
 								            //    * c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
 								            //    * url is special and c is U+005C (\)
-												AK: Check for state override in more places for basic URL parsing

											
										
										
											2023-07-04 11:34:00 +03:00
+								            //    * state override is given
 								            else if ((code_point == end_of_file || code_point == '/' || code_point == '?' || code_point == '#')
 								                || (url->is_special() && code_point == '\\')
 								                || state_override.has_value()) {
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                // then:
 								                // 1. If buffer is not the empty string, then:
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (!buffer.is_empty()) {
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                    // 1. Let port be the mathematical integer value that is represented by buffer in radix-10 using ASCII digits for digits with values 0 through 9.
-												AK: Don't create Utf8View from temporary String in URLParser

This fixes a bug where a Utf8View was created with data from a temporary
string, which was immediately deleted. This lead to a use-after-free
issue. This also changes most occurences for StringBuilder::to_string in
URLParser to use ::string_view(), as the value is passed as StringView
const& most of the time anyways.

This fixes oss-fuzz issue 34973.

											
										
										
											2021-06-08 16:22:02 +03:00
+								                    auto port = buffer.string_view().to_uint();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                    // 2. If port is greater than 2^16 − 1, port-out-of-range validation error, return failure.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    if (!port.has_value() || port.value() > 65535) {
 								                        report_validation_error();
 								                        return {};
 								                    }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                    // 3. Set url’s port to null, if port is url’s scheme’s default port; otherwise to port.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                    if (port.value() == URL::default_port_for_scheme(url->scheme()))
-												AK: Make URL::m_port an Optional<u16>, Expose raw port getter

Our current way of signalling a missing port with m_port == 0 was
lacking, as 0 is a valid port number in URLs.

											
										
										
											2021-09-13 23:12:16 +03:00
+								                        url->m_port = {};
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    else
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                        url->m_port = port.value();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                    // 4. Set buffer to the empty string.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    buffer.clear();
 								                }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
-												AK: Implement steps for state override in URL parser

											
										
										
											2023-07-14 03:58:16 +03:00
+								                // 2. If state override is given, then return.
 								                if (state_override.has_value())
 								                    return *url;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 3. Set state to path start state and decrease pointer by 1.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::PathStart;
 								                continue;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 3. Otherwise, port-invalid validation error, return failure.
 								            else {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                report_validation_error();
 								                return {};
 								            }
 								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> file state, https://url.spec.whatwg.org/#file-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::File:
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            // 1. Set url’s scheme to "file".
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								            url->m_scheme = "file";
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								            // 2. Set url’s host to the empty string.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								            url->m_host = "";
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								            // 3. If c is U+002F (/) or U+005C (\), then:
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								            if (code_point == '/' || code_point == '\\') {
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                // 1. If c is U+005C (\), invalid-reverse-solidus validation error.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (code_point == '\\')
 								                    report_validation_error();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 2. Set state to file slash state.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::FileSlash;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 4. Otherwise, if base is non-null and base’s scheme is "file":
 								            else if (base_url.has_value() && base_url->m_scheme == "file") {
 								                // 1. Set url’s host to base’s host, url’s path to a clone of base’s path, and url’s query to base’s query.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                url->m_host = base_url->m_host;
 								                url->m_paths = base_url->m_paths;
 								                url->m_query = base_url->m_query;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 2. If c is U+003F (?), then set url’s query to the empty string and state to query state.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (code_point == '?') {
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                    url->m_query = "";
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    state = State::Query;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                }
 								                // 3. Otherwise, if c is U+0023 (#), set url’s fragment to the empty string and state to fragment state.
 								                else if (code_point == '#') {
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                    url->m_fragment = "";
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    state = State::Fragment;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                }
 								                // 4. Otherwise, if c is not the EOF code point:
 								                else if (code_point != end_of_file) {
 								                    // 1. Set url’s query to null.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                    url->m_query = {};
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                    // 2. If the code point substring from pointer to the end of input does not start with a Windows drive letter, then shorten url’s path.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string();
 								                    if (!starts_with_windows_drive_letter(substring_from_pointer)) {
-												AK+Everywhere: Replace URL::paths() with path_segment_at_index()

This allows accessing and looping over the path segments in a URL
without necessarily allocating a new vector if you want them percent
decoded too (which path_segment_at_index() has an option for).

											
										
										
											2023-04-14 01:29:51 +03:00
+								                        if (!url->m_paths.is_empty() && !(url->scheme() == "file" && url->m_paths.size() == 1 && is_normalized_windows_drive_letter(url->m_paths[0])))
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                            url->m_paths.remove(url->m_paths.size() - 1);
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                    }
 								                    // 3. Otherwise:
 								                    else {
 								                        // 1. File-invalid-Windows-drive-letter validation error.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                        report_validation_error();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                        // 2. Set url’s path to « ».
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                        url->m_paths.clear();
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                    // 4. Set state to path state and decrease pointer by 1.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    state = State::Path;
 								                    continue;
 								                }
 								            }
-												AK: Correct logic in file state decrementing a path in URL basic parsing

											
										
										
											2023-07-04 12:06:58 +03:00
+								            // 5. Otherwise, set state to path state, and decrease pointer by 1.
 								            else {
 								                state = State::Path;
 								                continue;
 								            }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> file slash state, https://url.spec.whatwg.org/#file-slash-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::FileSlash:
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            // 1. If c is U+002F (/) or U+005C (\), then:
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								            if (code_point == '/' || code_point == '\\') {
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                // 1. If c is U+005C (\), invalid-reverse-solidus validation error.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (code_point == '\\')
 								                    report_validation_error();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 2. Set state to file host state.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::FileHost;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 2. Otherwise:
-												AK: Correct faulty logic in file slash state in basic URL parsing

We were not correctly decrementing the pointer in the case that either
the base URL was non-null or the base URL's scheme was not a file.

											
										
										
											2023-07-04 12:12:33 +03:00
+								            else {
 								                // 1. If base is non-null and base’s scheme is "file", then:
 								                if (base_url.has_value() && base_url->m_scheme == "file") {
 								                    // 1. Set url’s host to base’s host.
 								                    url->m_paths = base_url->m_paths;
 								                    url->m_paths.remove(url->m_paths.size() - 1);
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
-												AK: Correct faulty logic in file slash state in basic URL parsing

We were not correctly decrementing the pointer in the case that either
the base URL was non-null or the base URL's scheme was not a file.

											
										
										
											2023-07-04 12:12:33 +03:00
+								                    // 2. If the code point substring from pointer to the end of input does not start with a Windows drive letter and base’s path[0] is a normalized Windows drive letter, then append base’s path[0] to url’s path.
 								                    auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string();
 								                    if (!starts_with_windows_drive_letter(substring_from_pointer) && is_normalized_windows_drive_letter(base_url->m_paths[0]))
 								                        url->append_path(base_url->m_paths[0], URL::ApplyPercentEncoding::No);
 								                }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
-												AK: Correct faulty logic in file slash state in basic URL parsing

We were not correctly decrementing the pointer in the case that either
the base URL was non-null or the base URL's scheme was not a file.

											
										
										
											2023-07-04 12:12:33 +03:00
+								                // 2. Set state to path state, and decrease pointer by 1.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::Path;
 								                continue;
 								            }
 								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> file host state, https://url.spec.whatwg.org/#file-host-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::FileHost:
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            // 1. If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), then decrease pointer by 1 and then:
 								            //    NOTE: decreasing the pointer is done at the bottom of this block.
-												AK: Stop using U+0000 as end of file code point in URL parser

This changes URL parser to use the 0xFFFFFFFF constant instead of 0 to
indicate end of file. This fixes a bug where inputs containing null
bytes would terminate the parser early, because they were interpreted
as end of file.

											
										
										
											2021-06-03 13:43:08 +03:00
+								            if (code_point == end_of_file || code_point == '/' || code_point == '\\' || code_point == '?' || code_point == '#') {
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                // 1. If state override is not given and buffer is a Windows drive letter, file-invalid-Windows-drive-letter-host validation error, set state to path state.
-												AK: Check for state override in more places for basic URL parsing

											
										
										
											2023-07-04 11:34:00 +03:00
+								                if (!state_override.has_value() && is_windows_drive_letter(buffer.string_view())) {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    report_validation_error();
 								                    state = State::Path;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                }
 								                // 2. Otherwise, if buffer is the empty string, then:
 								                else if (buffer.is_empty()) {
 								                    // 1. Set url’s host to the empty string.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                    url->m_host = "";
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
-												AK: Implement steps for state override in URL parser

											
										
										
											2023-07-14 03:58:16 +03:00
+								                    // 2. If state override is given, then return.
 								                    if (state_override.has_value())
 								                        return *url;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                    // 3. Set state to path start state.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    state = State::PathStart;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                }
 								                // 3. Otherwise, run these steps:
 								                else {
 								                    // 1. Let host be the result of host parsing buffer with url is not special.
 								                    // FIXME: It seems we are not passing through url is not special through here
-												AK: Don't create Utf8View from temporary String in URLParser

This fixes a bug where a Utf8View was created with data from a temporary
string, which was immediately deleted. This lead to a use-after-free
issue. This also changes most occurences for StringBuilder::to_string in
URLParser to use ::string_view(), as the value is passed as StringView
const& most of the time anyways.

This fixes oss-fuzz issue 34973.

											
										
										
											2021-06-08 16:22:02 +03:00
+								                    auto host = parse_host(buffer.string_view(), true);
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                    // 2. If host is failure, then return failure.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    if (!host.has_value())
 								                        return {};
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                    // 3. If host is "localhost", then set host to the empty string.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    if (host.value() == "localhost")
 								                        host = "";
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                    // 4. Set url’s host to host.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                    url->m_host = host.release_value();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
-												AK: Implement steps for state override in URL parser

											
										
										
											2023-07-14 03:58:16 +03:00
+								                    // 5. If state override is given, then return.
 								                    if (state_override.has_value())
 								                        return *url;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                    // 6. Set buffer to the empty string and state to path start state.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    buffer.clear();
 								                    state = State::PathStart;
 								                }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // NOTE: Decrement specified at the top of this 'if' statement.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                continue;
 								            } else {
 								                buffer.append_code_point(code_point);
 								            }
 								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> path start state, https://url.spec.whatwg.org/#path-start-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::PathStart:
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            // 1. If url is special, then:
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								            if (url->is_special()) {
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                // 1. If c is U+005C (\), invalid-reverse-solidus validation error.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (code_point == '\\')
 								                    report_validation_error();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 2. Set state to path state.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::Path;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 3. If c is neither U+002F (/) nor U+005C (\), then decrease pointer by 1.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (code_point != '/' && code_point != '\\')
 								                    continue;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 2. Otherwise, if state override is not given and c is U+003F (?), set url’s query to the empty string and state to query state.
-												AK: Check for state override in more places for basic URL parsing

											
										
										
											2023-07-04 11:34:00 +03:00
+								            else if (!state_override.has_value() && code_point == '?') {
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                url->m_query = "";
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::Query;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 3. Otherwise, if state override is not given and c is U+0023 (#), set url’s fragment to the empty string and state to fragment state.
-												AK: Check for state override in more places for basic URL parsing

											
										
										
											2023-07-04 11:34:00 +03:00
+								            else if (!state_override.has_value() && code_point == '#') {
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                url->m_fragment = "";
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::Fragment;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 4. Otherwise, if c is not the EOF code point:
 								            else if (code_point != end_of_file) {
 								                // 1. Set state to path state.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::Path;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 2. If c is not U+002F (/), then decrease pointer by 1.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (code_point != '/')
 								                    continue;
 								            }
-												AK: Check for state override in more places for basic URL parsing

											
										
										
											2023-07-04 11:34:00 +03:00
+								            // 5. Otherwise, if state override is given and url’s host is null, append the empty string to url’s path.
 								            else if (state_override.has_value() && url->host().is_empty()) {
 								                url->append_slash();
 								            }
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> path state, https://url.spec.whatwg.org/#path-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::Path:
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            // 1. If one of the following is true:
 								            //    * c is the EOF code point or U+002F (/)
 								            //    * url is special and c is U+005C (\)
-												AK: Check for state override in more places for basic URL parsing

											
										
										
											2023-07-04 11:34:00 +03:00
+								            //    * state override is not given and c is U+003F (?) or U+0023 (#)
 								            if ((code_point == end_of_file || code_point == '/')
 								                || (url->is_special() && code_point == '\\')
 								                || (!state_override.has_value() && (code_point == '?' || code_point == '#'))) {
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                // then:
 								                // 1. If url is special and c is U+005C (\), invalid-reverse-solidus validation error.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                if (url->is_special() && code_point == '\\')
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    report_validation_error();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 2. If buffer is a double-dot URL path segment, then:
-												AK: Don't create Utf8View from temporary String in URLParser

This fixes a bug where a Utf8View was created with data from a temporary
string, which was immediately deleted. This lead to a use-after-free
issue. This also changes most occurences for StringBuilder::to_string in
URLParser to use ::string_view(), as the value is passed as StringView
const& most of the time anyways.

This fixes oss-fuzz issue 34973.

											
										
										
											2021-06-08 16:22:02 +03:00
+								                if (is_double_dot_path_segment(buffer.string_view())) {
-												AK: Remove superfluous check for file state in URL basic parse

The spec does not mention any of the other checks we were doing.

											
										
										
											2023-07-04 12:21:33 +03:00
+								                    // 1. Shorten url’s path.
 								                    if (!url->m_paths.is_empty())
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                        url->m_paths.remove(url->m_paths.size() - 1);
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                    // 2. If neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                    if (code_point != '/' && !(url->is_special() && code_point == '\\'))
-												AK: Don't store parts of URLs percent decoded

As noted in serval comments doing this goes against the WC3 spec,
and breaks parsing then re-serializing URLs that contain percent
encoded data, that was not encoded using the same character set as
the serializer.

For example, previously if you had a URL like:

https:://foo.com/what%2F%2F (the path is what + '//' percent encoded)

Creating URL("https:://foo.com/what%2F%2F").serialize() would return:

https://foo.com/what//

Which is incorrect and not the same as the URL we passed. This is
because the re-serializing uses the PercentEncodeSet::Path which
does not include '/'.

Only doing the percent encoding in the setters fixes this, which
is required to navigate to Google Street View (which includes a
percent encoded URL in its URL).

Seems to fix #13477 too

											
										
										
											2023-04-09 16:21:00 +03:00
+								                        url->append_slash();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                }
 								                // 3. Otherwise, if buffer is a single-dot URL path segment and if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
 								                else if (is_single_dot_path_segment(buffer.string_view()) && code_point != '/' && !(url->is_special() && code_point == '\\')) {
-												AK: Don't store parts of URLs percent decoded

As noted in serval comments doing this goes against the WC3 spec,
and breaks parsing then re-serializing URLs that contain percent
encoded data, that was not encoded using the same character set as
the serializer.

For example, previously if you had a URL like:

https:://foo.com/what%2F%2F (the path is what + '//' percent encoded)

Creating URL("https:://foo.com/what%2F%2F").serialize() would return:

https://foo.com/what//

Which is incorrect and not the same as the URL we passed. This is
because the re-serializing uses the PercentEncodeSet::Path which
does not include '/'.

Only doing the percent encoding in the setters fixes this, which
is required to navigate to Google Street View (which includes a
percent encoded URL in its URL).

Seems to fix #13477 too

											
										
										
											2023-04-09 16:21:00 +03:00
+								                    url->append_slash();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                }
 								                // 4. Otherwise, if buffer is not a single-dot URL path segment, then:
 								                else if (!is_single_dot_path_segment(buffer.string_view())) {
 								                    // 1. If url’s scheme is "file", url’s path is empty, and buffer is a Windows drive letter, then replace the second code point in buffer with U+003A (:).
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                    if (url->m_scheme == "file" && url->m_paths.is_empty() && is_windows_drive_letter(buffer.string_view())) {
-												AK: Don't create Utf8View from temporary String in URLParser

This fixes a bug where a Utf8View was created with data from a temporary
string, which was immediately deleted. This lead to a use-after-free
issue. This also changes most occurences for StringBuilder::to_string in
URLParser to use ::string_view(), as the value is passed as StringView
const& most of the time anyways.

This fixes oss-fuzz issue 34973.

											
										
										
											2021-06-08 16:22:02 +03:00
+								                        auto drive_letter = buffer.string_view()[0];
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                        buffer.clear();
 								                        buffer.append(drive_letter);
 								                        buffer.append(':');
 								                    }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                    // 2. Append buffer to url’s path.
 								                    //    FIXME: It would be nicer (and closer to spec) if URLParser could just directly append the path.
-												AK: Don't store parts of URLs percent decoded

As noted in serval comments doing this goes against the WC3 spec,
and breaks parsing then re-serializing URLs that contain percent
encoded data, that was not encoded using the same character set as
the serializer.

For example, previously if you had a URL like:

https:://foo.com/what%2F%2F (the path is what + '//' percent encoded)

Creating URL("https:://foo.com/what%2F%2F").serialize() would return:

https://foo.com/what//

Which is incorrect and not the same as the URL we passed. This is
because the re-serializing uses the PercentEncodeSet::Path which
does not include '/'.

Only doing the percent encoding in the setters fixes this, which
is required to navigate to Google Street View (which includes a
percent encoded URL in its URL).

Seems to fix #13477 too

											
										
										
											2023-04-09 16:21:00 +03:00
+								                    url->append_path(buffer.string_view(), URL::ApplyPercentEncoding::No);
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 5. Set buffer to the empty string.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                buffer.clear();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 6. If c is U+003F (?), then set url’s query to the empty string and state to query state.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (code_point == '?') {
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                    url->m_query = "";
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    state = State::Query;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                }
 								                // 7. If c is U+0023 (#), then set url’s fragment to the empty string and state to fragment state.
 								                else if (code_point == '#') {
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                    url->m_fragment = "";
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    state = State::Fragment;
 								                }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 2. Otherwise, run these steps
 								            else {
 								                // 1. If c is not a URL code point and not U+0025 (%), invalid-URL-unit validation error.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (!is_url_code_point(code_point) && code_point != '%')
 								                    report_validation_error();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // FIXME: 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
 								                // 3. UTF-8 percent-encode c using the path percent-encode set and append the result to buffer.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                URL::append_percent_encoded_if_necessary(buffer, code_point, URL::PercentEncodeSet::Path);
 								            }
 								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> opaque path state, https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::CannotBeABaseUrlPath:
 								            // NOTE: This does not follow the spec exactly but rather uses the buffer and only sets the path on EOF.
 								            // NOTE: Verify that the assumptions required for this simplification are correct.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								            VERIFY(url->m_paths.size() == 1 && url->m_paths[0].is_empty());
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								            // 1. If c is U+003F (?), then set url’s query to the empty string and state to query state.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								            if (code_point == '?') {
-												AK: Don't store parts of URLs percent decoded

As noted in serval comments doing this goes against the WC3 spec,
and breaks parsing then re-serializing URLs that contain percent
encoded data, that was not encoded using the same character set as
the serializer.

For example, previously if you had a URL like:

https:://foo.com/what%2F%2F (the path is what + '//' percent encoded)

Creating URL("https:://foo.com/what%2F%2F").serialize() would return:

https://foo.com/what//

Which is incorrect and not the same as the URL we passed. This is
because the re-serializing uses the PercentEncodeSet::Path which
does not include '/'.

Only doing the percent encoding in the setters fixes this, which
is required to navigate to Google Street View (which includes a
percent encoded URL in its URL).

Seems to fix #13477 too

											
										
										
											2023-04-09 16:21:00 +03:00
+								                url->m_paths[0] = buffer.string_view();
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                url->m_query = "";
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::Query;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 2. Otherwise, if c is U+0023 (#), then set url’s fragment to the empty string and state to fragment state.
 								            else if (code_point == '#') {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                // NOTE: This needs to be percent decoded since the member variables contain decoded data.
-												AK: Don't store parts of URLs percent decoded

As noted in serval comments doing this goes against the WC3 spec,
and breaks parsing then re-serializing URLs that contain percent
encoded data, that was not encoded using the same character set as
the serializer.

For example, previously if you had a URL like:

https:://foo.com/what%2F%2F (the path is what + '//' percent encoded)

Creating URL("https:://foo.com/what%2F%2F").serialize() would return:

https://foo.com/what//

Which is incorrect and not the same as the URL we passed. This is
because the re-serializing uses the PercentEncodeSet::Path which
does not include '/'.

Only doing the percent encoding in the setters fixes this, which
is required to navigate to Google Street View (which includes a
percent encoded URL in its URL).

Seems to fix #13477 too

											
										
										
											2023-04-09 16:21:00 +03:00
+								                url->m_paths[0] = buffer.string_view();
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                url->m_fragment = "";
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                state = State::Fragment;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 3. Otherwise:
 								            else {
 								                // 1. If c is not the EOF code point, not a URL code point, and not U+0025 (%), invalid-URL-unit validation error.
-												AK: Stop using U+0000 as end of file code point in URL parser

This changes URL parser to use the 0xFFFFFFFF constant instead of 0 to
indicate end of file. This fixes a bug where inputs containing null
bytes would terminate the parser early, because they were interpreted
as end of file.

											
										
										
											2021-06-03 13:43:08 +03:00
+								                if (code_point != end_of_file && !is_url_code_point(code_point) && code_point != '%')
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    report_validation_error();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // FIXME: 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
 								                // 3. If c is not the EOF code point, UTF-8 percent-encode c using the C0 control percent-encode set and append the result to url’s path.
-												AK: Stop using U+0000 as end of file code point in URL parser

This changes URL parser to use the 0xFFFFFFFF constant instead of 0 to
indicate end of file. This fixes a bug where inputs containing null
bytes would terminate the parser early, because they were interpreted
as end of file.

											
										
										
											2021-06-03 13:43:08 +03:00
+								                if (code_point != end_of_file) {
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    URL::append_percent_encoded_if_necessary(buffer, code_point, URL::PercentEncodeSet::C0Control);
 								                } else {
-												AK: Don't store parts of URLs percent decoded

As noted in serval comments doing this goes against the WC3 spec,
and breaks parsing then re-serializing URLs that contain percent
encoded data, that was not encoded using the same character set as
the serializer.

For example, previously if you had a URL like:

https:://foo.com/what%2F%2F (the path is what + '//' percent encoded)

Creating URL("https:://foo.com/what%2F%2F").serialize() would return:

https://foo.com/what//

Which is incorrect and not the same as the URL we passed. This is
because the re-serializing uses the PercentEncodeSet::Path which
does not include '/'.

Only doing the percent encoding in the setters fixes this, which
is required to navigate to Google Street View (which includes a
percent encoded URL in its URL).

Seems to fix #13477 too

											
										
										
											2023-04-09 16:21:00 +03:00
+								                    url->m_paths[0] = buffer.string_view();
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                }
 								            }
 								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> query state, https://url.spec.whatwg.org/#query-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::Query:
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            // FIXME: 1. If encoding is not UTF-8 and one of the following is true:
 								            //           * url is not special
 								            //           * url’s scheme is "ws" or "wss"
 								            //        then set encoding to UTF-8.
 								            // 2. If one of the following is true:
-												AK: Check for state override in more places for basic URL parsing

											
										
										
											2023-07-04 11:34:00 +03:00
+								            //    * state override is not given and c is U+0023 (#)
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            //    * c is the EOF code point
-												AK: Check for state override in more places for basic URL parsing

											
										
										
											2023-07-04 11:34:00 +03:00
+								            if ((!state_override.has_value() && code_point == '#')
 								                || code_point == end_of_file) {
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                VERIFY(url->m_query == "");
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                // then:
 								                // 1. Let queryPercentEncodeSet be the special-query percent-encode set if url is special; otherwise the query percent-encode set.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                auto query_percent_encode_set = url->is_special() ? URL::PercentEncodeSet::SpecialQuery : URL::PercentEncodeSet::Query;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 2. Percent-encode after encoding, with encoding, buffer, and queryPercentEncodeSet, and append the result to url’s query.
-												AK: Don't destructively re-encode query strings in the URL parser

We were decoding and then re-encoding the query string in URLs.
This round-trip caused us to lose information about plus ('+')
ASCII characters encoded as "%2B".

											
										
										
											2022-04-10 01:48:15 +03:00
+								                url->m_query = percent_encode_after_encoding(buffer.string_view(), query_percent_encode_set);
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 3. Set buffer to the empty string.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                buffer.clear();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // 4. If c is U+0023 (#), then set url’s fragment to the empty string and state to fragment state.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (code_point == '#') {
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								                    url->m_fragment = "";
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                    state = State::Fragment;
 								                }
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            }
 								            // 3. Otherwise, if c is not the EOF code point:
 								            else if (code_point != end_of_file) {
 								                // 1. If c is not a URL code point and not U+0025 (%), invalid-URL-unit validation error.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (!is_url_code_point(code_point) && code_point != '%')
 								                    report_validation_error();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // FIXME: 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
 								                // 3. Append c to buffer.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                buffer.append_code_point(code_point);
 								            }
 								            break;
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								        // -> fragment state, https://url.spec.whatwg.org/#fragment-state
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								        case State::Fragment:
 								            // NOTE: This does not follow the spec exactly but rather uses the buffer and only sets the fragment on EOF.
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								            // 1. If c is not the EOF code point, then:
-												AK: Stop using U+0000 as end of file code point in URL parser

This changes URL parser to use the 0xFFFFFFFF constant instead of 0 to
indicate end of file. This fixes a bug where inputs containing null
bytes would terminate the parser early, because they were interpreted
as end of file.

											
										
										
											2021-06-03 13:43:08 +03:00
+								            if (code_point != end_of_file) {
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
+								                // 1. If c is not a URL code point and not U+0025 (%), invalid-URL-unit validation error.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                if (!is_url_code_point(code_point) && code_point != '%')
 								                    report_validation_error();
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								                // FIXME: 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
 								                // FIXME: 3. UTF-8 percent-encode c using the fragment percent-encode set and append the result to url’s fragment.
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                buffer.append_code_point(code_point);
 								            } else {
-												AK: Don't store parts of URLs percent decoded

As noted in serval comments doing this goes against the WC3 spec,
and breaks parsing then re-serializing URLs that contain percent
encoded data, that was not encoded using the same character set as
the serializer.

For example, previously if you had a URL like:

https:://foo.com/what%2F%2F (the path is what + '//' percent encoded)

Creating URL("https:://foo.com/what%2F%2F").serialize() would return:

https://foo.com/what//

Which is incorrect and not the same as the URL we passed. This is
because the re-serializing uses the PercentEncodeSet::Path which
does not include '/'.

Only doing the percent encoding in the setters fixes this, which
is required to navigate to Google Street View (which includes a
percent encoded URL in its URL).

Seems to fix #13477 too

											
										
										
											2023-04-09 16:21:00 +03:00
+								                url->m_fragment = buffer.string_view();
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								                buffer.clear();
 								            }
 								            break;
 								        default:
 								            VERIFY_NOT_REACHED();
 								        }
 								        if (iterator.done())
 								            break;
 								        ++iterator;
 								    }
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								    url->m_valid = true;
 								    dbgln_if(URL_PARSER_DEBUG, "URLParser::parse: Parsed URL to be '{}'.", url->serialize());
-												AK: Add spec comments for 'basic URL parser'

By golly, this is a lot more spec comments than I originally thought
I would need to do! This has exposed some bugs in the implementation,
as well as a whole lot of things which we are yet to implement.

No functional changes intended in this commit (already pretty large
as is!).

											
										
										
											2023-07-03 13:52:08 +03:00
 								    // 10. Return url.
-												AK: Accept optional url and state override parameters in URLParser

These are required in the specification and used by the web's URL
built-in, this commit also removes the Badge<AK::URL> from URLParser
to allow other classes that need to call the parser directly like the
web's URL built-in to do so.

											
										
										
											2021-09-13 22:34:14 +03:00
+								    return url.release_value();
-												AK: Add a new, spec-compliant URLParser

This adds a new URL parser, which aims to be compliant with the URL
specification (https://url.spec.whatwg.org/). It also contains a
rudimentary data URL parser.

											
										
										
											2021-05-25 23:13:15 +03:00
+								}
 								}