mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-12-28 05:35:52 +03:00
AK: Implement more conforming URL percent encode/decode mechanism
This adds a few new functions to percent encode/decode strings according to the URL specification. The functions allow specifying a PercentEncodeSet, which is defined by the specification. It will be used to replace the current urlencode() and urldecode() functions in a further commit. This commit adds a few duplicate helper functions in the URL class, such as is_digit() and is_ascii_digit(). This will be cleaned up as soon as the upcoming new URL parser will replace the current one.
This commit is contained in:
parent
0e4f7aa8e8
commit
2a6c9bc5f7
Notes:
sideshowbarker
2024-07-18 17:04:44 +09:00
Author: https://github.com/MaxWipfli Commit: https://github.com/SerenityOS/serenity/commit/2a6c9bc5f77 Pull-request: https://github.com/SerenityOS/serenity/pull/7478 Reviewed-by: https://github.com/awesomekling
114
AK/URL.cpp
114
AK/URL.cpp
@ -9,9 +9,30 @@
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/URL.h>
|
||||
#include <AK/URLParser.h>
|
||||
#include <AK/Utf8View.h>
|
||||
|
||||
namespace AK {
|
||||
|
||||
constexpr bool is_ascii_alpha(u32 code_point)
|
||||
{
|
||||
return ('a' <= code_point && code_point <= 'z') || ('A' <= code_point && code_point <= 'Z');
|
||||
}
|
||||
|
||||
constexpr bool is_ascii_digit(u32 code_point)
|
||||
{
|
||||
return '0' <= code_point && code_point <= '9';
|
||||
}
|
||||
|
||||
constexpr bool is_ascii_alphanumeric(u32 code_point)
|
||||
{
|
||||
return is_ascii_alpha(code_point) || is_ascii_digit(code_point);
|
||||
}
|
||||
|
||||
constexpr bool is_ascii_hex_digit(u32 code_point)
|
||||
{
|
||||
return is_ascii_digit(code_point) || (code_point >= 'a' && code_point <= 'f') || (code_point >= 'A' && code_point <= 'F');
|
||||
}
|
||||
|
||||
static inline bool is_valid_scheme_character(char ch)
|
||||
{
|
||||
return ch >= 'a' && ch <= 'z';
|
||||
@ -467,4 +488,97 @@ String URL::basename() const
|
||||
return LexicalPath(m_path).basename();
|
||||
}
|
||||
|
||||
void URL::append_percent_encoded(StringBuilder& builder, u32 code_point)
|
||||
{
|
||||
if (code_point <= 0x7f)
|
||||
builder.appendff("%{:02X}", code_point);
|
||||
else if (code_point <= 0x07ff)
|
||||
builder.appendff("%{:02X}%{:02X}", ((code_point >> 6) & 0x1f) | 0xc0, (code_point & 0x3f) | 0x80);
|
||||
else if (code_point <= 0xffff)
|
||||
builder.appendff("%{:02X}%{:02X}%{:02X}", ((code_point >> 12) & 0x0f) | 0xe0, ((code_point >> 6) & 0x3f) | 0x80, (code_point & 0x3f) | 0x80);
|
||||
else if (code_point <= 0x10ffff)
|
||||
builder.appendff("%{:02X}%{:02X}%{:02X}%{:02X}", ((code_point >> 18) & 0x07) | 0xf0, ((code_point >> 12) & 0x3f) | 0x80, ((code_point >> 6) & 0x3f) | 0x80, (code_point & 0x3f) | 0x80);
|
||||
else
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#c0-control-percent-encode-set
|
||||
constexpr bool code_point_is_in_percent_encode_set(u32 code_point, URL::PercentEncodeSet set)
|
||||
{
|
||||
switch (set) {
|
||||
case URL::PercentEncodeSet::C0Control:
|
||||
return code_point < 0x20 || code_point > 0x7E;
|
||||
case URL::PercentEncodeSet::Fragment:
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::C0Control) || " \"<>`"sv.contains(code_point);
|
||||
case URL::PercentEncodeSet::Query:
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::C0Control) || " \"#<>"sv.contains(code_point);
|
||||
case URL::PercentEncodeSet::SpecialQuery:
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Query) || code_point == '\'';
|
||||
case URL::PercentEncodeSet::Path:
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Query) || "?`{}"sv.contains(code_point);
|
||||
case URL::PercentEncodeSet::Userinfo:
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Path) || "/:;=@[\\]^|"sv.contains(code_point);
|
||||
case URL::PercentEncodeSet::Component:
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Userinfo) || "$%&+,"sv.contains(code_point);
|
||||
case URL::PercentEncodeSet::ApplicationXWWWFormUrlencoded:
|
||||
return code_point >= 0x7E || !(is_ascii_alphanumeric(code_point) || "!'()~"sv.contains(code_point));
|
||||
case URL::PercentEncodeSet::EncodeURI:
|
||||
// NOTE: This is the same percent encode set that JS encodeURI() uses.
|
||||
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
|
||||
return code_point >= 0x7E || (!is_ascii_alphanumeric(code_point) && !";,/?:@&=+$-_.!~*'()#"sv.contains(code_point));
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
}
|
||||
|
||||
void URL::append_percent_encoded_if_necessary(StringBuilder& builder, u32 code_point, URL::PercentEncodeSet set)
|
||||
{
|
||||
if (code_point_is_in_percent_encode_set(code_point, set))
|
||||
append_percent_encoded(builder, code_point);
|
||||
else
|
||||
builder.append_code_point(code_point);
|
||||
}
|
||||
|
||||
String URL::percent_encode(const StringView& input, URL::PercentEncodeSet set)
|
||||
{
|
||||
StringBuilder builder;
|
||||
for (auto code_point : Utf8View(input)) {
|
||||
append_percent_encoded_if_necessary(builder, code_point, set);
|
||||
}
|
||||
return builder.to_string();
|
||||
}
|
||||
|
||||
constexpr u8 parse_hex_digit(u8 digit)
|
||||
{
|
||||
if (digit >= '0' && digit <= '9')
|
||||
return digit - '0';
|
||||
if (digit >= 'a' && digit <= 'f')
|
||||
return digit - 'a' + 10;
|
||||
if (digit >= 'A' && digit <= 'F')
|
||||
return digit - 'A' + 10;
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
String URL::percent_decode(const StringView& input)
|
||||
{
|
||||
if (!input.contains('%'))
|
||||
return input;
|
||||
StringBuilder builder;
|
||||
Utf8View utf8_view(input);
|
||||
for (auto it = utf8_view.begin(); !it.done(); ++it) {
|
||||
if (*it != '%') {
|
||||
builder.append_code_point(*it);
|
||||
} else if (!is_ascii_hex_digit(it.peek(1).value_or(0)) || !is_ascii_hex_digit(it.peek(2).value_or(0))) {
|
||||
builder.append_code_point(*it);
|
||||
} else {
|
||||
++it;
|
||||
u8 byte = parse_hex_digit(*it) << 4;
|
||||
++it;
|
||||
byte += parse_hex_digit(*it);
|
||||
builder.append(byte);
|
||||
}
|
||||
}
|
||||
return builder.to_string();
|
||||
}
|
||||
|
||||
}
|
||||
|
18
AK/URL.h
18
AK/URL.h
@ -17,6 +17,18 @@ namespace AK {
|
||||
|
||||
class URL {
|
||||
public:
|
||||
enum class PercentEncodeSet {
|
||||
C0Control,
|
||||
Fragment,
|
||||
Query,
|
||||
SpecialQuery,
|
||||
Path,
|
||||
Userinfo,
|
||||
Component,
|
||||
ApplicationXWWWFormUrlencoded,
|
||||
EncodeURI
|
||||
};
|
||||
|
||||
URL() = default;
|
||||
URL(const StringView&);
|
||||
URL(const char* string)
|
||||
@ -67,6 +79,9 @@ public:
|
||||
static bool scheme_requires_port(const StringView&);
|
||||
static u16 default_port_for_scheme(const StringView&);
|
||||
|
||||
static String percent_encode(const StringView& input, PercentEncodeSet set = PercentEncodeSet::Userinfo);
|
||||
static String percent_decode(const StringView& input);
|
||||
|
||||
bool operator==(const URL& other) const
|
||||
{
|
||||
if (this == &other)
|
||||
@ -78,6 +93,9 @@ private:
|
||||
bool parse(const StringView&);
|
||||
bool compute_validity() const;
|
||||
|
||||
static void append_percent_encoded_if_necessary(StringBuilder&, u32 code_point, PercentEncodeSet set = PercentEncodeSet::Userinfo);
|
||||
static void append_percent_encoded(StringBuilder&, u32 code_point);
|
||||
|
||||
bool m_valid { false };
|
||||
u16 m_port { 0 };
|
||||
bool m_data_payload_is_base64 { false };
|
||||
|
Loading…
Reference in New Issue
Block a user