ladybird/AK/URLParser.h
Karol Kosek eb41f0144b AK: Decode data URLs to separate class (and parse like every other URL)
Parsing 'data:' URLs took it's own route. It never set standard URL
fields like path, query or fragment (except for scheme) and instead
gave us separate methods called `data_payload()`, `data_mime_type()`,
and `data_payload_is_base64()`.

Because parsing 'data:' didn't use standard fields, running the
following JS code:

    new URL('#a', 'data:text/plain,hello').toString()

not only cleared the path as URLParser doesn't check for data from
data_payload() function (making the result be 'data:#a'), but it also
crashes the program because we forbid having an empty MIME type when we
serialize to string.

With this change, 'data:' URLs will be parsed like every other URLs.
To decode the 'data:' URL contents, one needs to call process_data_url()
on a URL, which will return a struct containing MIME type with already
decoded data! :^)
2023-08-01 14:19:05 +02:00

75 lines
2.1 KiB
C++

/*
* Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Optional.h>
#include <AK/StringView.h>
#include <AK/URL.h>
namespace AK {
#define ENUMERATE_STATES \
STATE(SchemeStart) \
STATE(Scheme) \
STATE(NoScheme) \
STATE(SpecialRelativeOrAuthority) \
STATE(PathOrAuthority) \
STATE(Relative) \
STATE(RelativeSlash) \
STATE(SpecialAuthoritySlashes) \
STATE(SpecialAuthorityIgnoreSlashes) \
STATE(Authority) \
STATE(Host) \
STATE(Hostname) \
STATE(Port) \
STATE(File) \
STATE(FileSlash) \
STATE(FileHost) \
STATE(PathStart) \
STATE(Path) \
STATE(CannotBeABaseUrlPath) \
STATE(Query) \
STATE(Fragment)
class URLParser {
public:
enum class State {
#define STATE(state) state,
ENUMERATE_STATES
#undef STATE
};
static char const* state_name(State const& state)
{
switch (state) {
#define STATE(state) \
case State::state: \
return #state;
ENUMERATE_STATES
#undef STATE
}
VERIFY_NOT_REACHED();
}
// https://url.spec.whatwg.org/#concept-basic-url-parser
static URL basic_parse(StringView input, Optional<URL> const& base_url = {}, Optional<URL> url = {}, Optional<State> state_override = {});
// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
static DeprecatedString percent_encode_after_encoding(StringView input, URL::PercentEncodeSet percent_encode_set, bool space_as_plus = false);
// https://url.spec.whatwg.org/#concept-host-serializer
static ErrorOr<String> serialize_host(URL::Host const&);
};
#undef ENUMERATE_STATES
}
#if USING_AK_GLOBALLY
using AK::URLParser;
#endif