2020-01-18 11:38:21 +03:00
/*
* Copyright ( c ) 2018 - 2020 , Andreas Kling < kling @ serenityos . org >
2021-05-24 00:31:16 +03:00
* Copyright ( c ) 2021 , Max Wipfli < mail @ maxwipfli . ch >
2020-01-18 11:38:21 +03:00
*
2021-04-22 11:24:48 +03:00
* SPDX - License - Identifier : BSD - 2 - Clause
2020-01-18 11:38:21 +03:00
*/
2021-06-01 22:18:08 +03:00
# include <AK/CharacterTypes.h>
2021-05-27 22:05:07 +03:00
# include <AK/Debug.h>
2020-05-26 14:52:44 +03:00
# include <AK/LexicalPath.h>
2019-08-10 18:27:56 +03:00
# include <AK/StringBuilder.h>
# include <AK/URL.h>
2021-05-27 22:05:07 +03:00
# include <AK/URLParser.h>
2021-05-25 14:50:03 +03:00
# include <AK/Utf8View.h>
2019-08-10 18:27:56 +03:00
namespace AK {
2021-05-27 22:05:07 +03:00
// FIXME: It could make sense to force users of URL to use URLParser::parse() explicitly instead of using a constructor.
2021-11-11 02:55:02 +03:00
URL : : URL ( StringView string )
2021-09-13 22:34:14 +03:00
: URL ( URLParser : : parse ( string ) )
2019-08-10 18:27:56 +03:00
{
2021-05-27 22:05:07 +03:00
if constexpr ( URL_PARSER_DEBUG ) {
if ( m_valid )
dbgln ( " URL constructor: Parsed URL to be '{}'. " , serialize ( ) ) ;
else
dbgln ( " URL constructor: Parsed URL to be invalid. " ) ;
}
2019-08-10 18:27:56 +03:00
}
2023-02-13 20:42:27 +03:00
URL URL : : complete_url ( StringView relative_url ) const
2019-11-19 00:04:39 +03:00
{
2020-06-07 19:23:33 +03:00
if ( ! is_valid ( ) )
return { } ;
2023-04-11 15:53:40 +03:00
return URLParser : : parse ( relative_url , * this ) ;
2019-11-19 00:04:39 +03:00
}
2023-04-14 01:06:58 +03:00
DeprecatedString URL : : username ( ApplyPercentDecoding apply_percent_decoding ) const
{
return apply_percent_decoding = = ApplyPercentDecoding : : Yes ? percent_decode ( m_username ) : m_username ;
}
DeprecatedString URL : : password ( ApplyPercentDecoding apply_percent_decoding ) const
{
return apply_percent_decoding = = ApplyPercentDecoding : : Yes ? percent_decode ( m_password ) : m_password ;
}
2023-04-14 01:29:51 +03:00
DeprecatedString URL : : path_segment_at_index ( size_t index , ApplyPercentDecoding apply_percent_decoding ) const
{
VERIFY ( index < path_segment_count ( ) ) ;
return apply_percent_decoding = = ApplyPercentDecoding : : Yes ? percent_decode ( m_paths [ index ] ) : m_paths [ index ] ;
}
2023-04-14 01:06:58 +03:00
DeprecatedString URL : : basename ( ApplyPercentDecoding apply_percent_decoding ) const
{
if ( ! m_valid )
return { } ;
if ( m_paths . is_empty ( ) )
return { } ;
auto & last_segment = m_paths . last ( ) ;
return apply_percent_decoding = = ApplyPercentDecoding : : Yes ? percent_decode ( last_segment ) : last_segment ;
}
DeprecatedString URL : : query ( ApplyPercentDecoding apply_percent_decoding ) const
{
return apply_percent_decoding = = ApplyPercentDecoding : : Yes ? percent_decode ( m_query ) : m_query ;
}
DeprecatedString URL : : fragment ( ApplyPercentDecoding apply_percent_decoding ) const
{
return apply_percent_decoding = = ApplyPercentDecoding : : Yes ? percent_decode ( m_fragment ) : m_fragment ;
}
2023-04-09 16:21:00 +03:00
// NOTE: This only exists for compatibility with the existing URL tests which check for both .is_null() and .is_empty().
static DeprecatedString deprecated_string_percent_encode ( DeprecatedString const & input , URL : : PercentEncodeSet set = URL : : PercentEncodeSet : : Userinfo , URL : : SpaceAsPlus space_as_plus = URL : : SpaceAsPlus : : No )
{
if ( input . is_null ( ) | | input . is_empty ( ) )
return input ;
return URL : : percent_encode ( input . view ( ) , set , space_as_plus ) ;
}
2022-12-04 21:02:33 +03:00
void URL : : set_scheme ( DeprecatedString scheme )
2020-04-12 00:07:23 +03:00
{
2021-06-01 11:58:27 +03:00
m_scheme = move ( scheme ) ;
2020-04-12 00:07:23 +03:00
m_valid = compute_validity ( ) ;
}
2023-04-09 16:21:00 +03:00
void URL : : set_username ( DeprecatedString username , ApplyPercentEncoding apply_percent_encoding )
2021-05-25 22:32:20 +03:00
{
2023-04-09 16:21:00 +03:00
if ( apply_percent_encoding = = ApplyPercentEncoding : : Yes )
username = deprecated_string_percent_encode ( username , PercentEncodeSet : : Userinfo ) ;
2021-06-01 11:58:27 +03:00
m_username = move ( username ) ;
2021-05-25 22:32:20 +03:00
m_valid = compute_validity ( ) ;
}
2023-04-09 16:21:00 +03:00
void URL : : set_password ( DeprecatedString password , ApplyPercentEncoding apply_percent_encoding )
2021-05-25 22:32:20 +03:00
{
2023-04-09 16:21:00 +03:00
if ( apply_percent_encoding = = ApplyPercentEncoding : : Yes )
password = deprecated_string_percent_encode ( password , PercentEncodeSet : : Userinfo ) ;
2021-06-01 11:58:27 +03:00
m_password = move ( password ) ;
2021-05-25 22:32:20 +03:00
m_valid = compute_validity ( ) ;
}
2022-12-04 21:02:33 +03:00
void URL : : set_host ( DeprecatedString host )
2020-04-12 00:07:23 +03:00
{
2021-06-01 11:58:27 +03:00
m_host = move ( host ) ;
2020-04-12 00:07:23 +03:00
m_valid = compute_validity ( ) ;
}
2021-09-13 23:12:16 +03:00
void URL : : set_port ( Optional < u16 > port )
2020-11-04 09:20:20 +03:00
{
2021-05-25 22:32:20 +03:00
if ( port = = default_port_for_scheme ( m_scheme ) ) {
2021-09-13 23:12:16 +03:00
m_port = { } ;
2021-05-25 22:32:20 +03:00
return ;
}
2021-09-13 23:12:16 +03:00
m_port = move ( port ) ;
2020-11-04 09:20:20 +03:00
m_valid = compute_validity ( ) ;
}
2023-04-09 16:21:00 +03:00
void URL : : set_paths ( Vector < DeprecatedString > paths , ApplyPercentEncoding apply_percent_encoding )
2021-05-25 22:32:20 +03:00
{
2023-04-09 16:21:00 +03:00
if ( apply_percent_encoding = = ApplyPercentEncoding : : Yes ) {
Vector < DeprecatedString > encoded_paths ;
encoded_paths . ensure_capacity ( paths . size ( ) ) ;
for ( auto & segment : paths )
encoded_paths . unchecked_append ( deprecated_string_percent_encode ( segment , PercentEncodeSet : : Path ) ) ;
m_paths = move ( encoded_paths ) ;
} else {
m_paths = move ( paths ) ;
}
2021-05-25 22:32:20 +03:00
m_valid = compute_validity ( ) ;
}
2023-04-09 16:21:00 +03:00
void URL : : append_path ( DeprecatedString path , ApplyPercentEncoding apply_percent_encoding )
{
if ( apply_percent_encoding = = ApplyPercentEncoding : : Yes )
path = deprecated_string_percent_encode ( path , PercentEncodeSet : : Path ) ;
m_paths . append ( path ) ;
}
void URL : : set_query ( DeprecatedString query , ApplyPercentEncoding apply_percent_encoding )
2020-04-12 00:07:23 +03:00
{
2023-04-09 16:21:00 +03:00
if ( apply_percent_encoding = = ApplyPercentEncoding : : Yes )
query = deprecated_string_percent_encode ( query , is_special ( ) ? PercentEncodeSet : : SpecialQuery : PercentEncodeSet : : Query ) ;
2021-06-01 11:58:27 +03:00
m_query = move ( query ) ;
2020-04-12 00:07:23 +03:00
}
2023-04-09 16:21:00 +03:00
void URL : : set_fragment ( DeprecatedString fragment , ApplyPercentEncoding apply_percent_encoding )
2020-04-12 01:38:13 +03:00
{
2023-04-09 16:21:00 +03:00
if ( apply_percent_encoding = = ApplyPercentEncoding : : Yes )
fragment = deprecated_string_percent_encode ( fragment , PercentEncodeSet : : Fragment ) ;
2021-06-01 11:58:27 +03:00
m_fragment = move ( fragment ) ;
2020-04-12 01:38:13 +03:00
}
2021-05-29 21:46:49 +03:00
// FIXME: This is by no means complete.
// NOTE: This relies on some assumptions about how the spec-defined URL parser works that may turn out to be wrong.
2020-04-12 00:07:23 +03:00
bool URL : : compute_validity ( ) const
{
2021-05-24 00:31:16 +03:00
if ( m_scheme . is_empty ( ) )
2020-04-12 00:07:23 +03:00
return false ;
2020-11-04 09:20:20 +03:00
2021-05-24 00:31:16 +03:00
if ( m_scheme = = " data " ) {
2020-11-04 09:20:20 +03:00
if ( m_data_mime_type . is_empty ( ) )
2020-04-19 11:36:56 +03:00
return false ;
2021-05-29 21:46:49 +03:00
if ( m_data_payload_is_base64 ) {
if ( m_data_payload . length ( ) % 4 ! = 0 )
return false ;
for ( auto character : m_data_payload ) {
if ( ! is_ascii_alphanumeric ( character ) | | character = = ' + ' | | character = = ' / ' | | character = = ' = ' )
return false ;
}
}
} else if ( m_cannot_be_a_base_url ) {
if ( m_paths . size ( ) ! = 1 )
return false ;
if ( m_paths [ 0 ] . is_empty ( ) )
return false ;
} else {
if ( m_scheme . is_one_of ( " about " , " mailto " ) )
return false ;
// NOTE: Maybe it is allowed to have a zero-segment path.
if ( m_paths . size ( ) = = 0 )
return false ;
2020-04-12 00:07:23 +03:00
}
2020-11-04 09:20:20 +03:00
2021-05-29 21:46:49 +03:00
// NOTE: A file URL's host should be the empty string for localhost, not null.
if ( m_scheme = = " file " & & m_host . is_null ( ) )
2020-11-04 09:20:20 +03:00
return false ;
2020-04-12 00:07:23 +03:00
return true ;
}
2021-11-11 02:55:02 +03:00
bool URL : : scheme_requires_port ( StringView scheme )
2020-11-04 09:20:20 +03:00
{
2021-05-24 00:31:16 +03:00
return ( default_port_for_scheme ( scheme ) ! = 0 ) ;
2020-11-04 09:20:20 +03:00
}
2021-11-11 02:55:02 +03:00
u16 URL : : default_port_for_scheme ( StringView scheme )
2020-11-04 09:20:20 +03:00
{
2021-05-24 00:31:16 +03:00
if ( scheme = = " http " )
2020-11-04 09:20:20 +03:00
return 80 ;
2021-05-24 00:31:16 +03:00
if ( scheme = = " https " )
2020-11-04 09:20:20 +03:00
return 443 ;
2021-05-24 00:31:16 +03:00
if ( scheme = = " gemini " )
2020-11-04 09:20:20 +03:00
return 1965 ;
2021-05-24 00:31:16 +03:00
if ( scheme = = " irc " )
2020-11-04 09:20:20 +03:00
return 6667 ;
2021-05-24 00:31:16 +03:00
if ( scheme = = " ircs " )
2020-11-04 09:20:20 +03:00
return 6697 ;
2021-05-24 00:31:16 +03:00
if ( scheme = = " ws " )
2021-04-16 16:21:03 +03:00
return 80 ;
2021-05-24 00:31:16 +03:00
if ( scheme = = " wss " )
2021-04-16 16:21:03 +03:00
return 443 ;
2020-11-04 09:20:20 +03:00
return 0 ;
}
2022-12-04 21:02:33 +03:00
URL URL : : create_with_file_scheme ( DeprecatedString const & path , DeprecatedString const & fragment , DeprecatedString const & hostname )
2020-04-18 23:02:04 +03:00
{
2021-05-27 22:40:02 +03:00
LexicalPath lexical_path ( path ) ;
2021-06-29 14:11:03 +03:00
if ( ! lexical_path . is_absolute ( ) )
2021-05-27 22:40:02 +03:00
return { } ;
2021-05-29 22:57:20 +03:00
2020-04-18 23:02:04 +03:00
URL url ;
2021-05-24 00:31:16 +03:00
url . set_scheme ( " file " ) ;
2021-05-29 22:57:20 +03:00
// NOTE: If the hostname is localhost (or null, which implies localhost), it should be set to the empty string.
2022-03-24 05:46:52 +03:00
// This is because a file URL always needs a non-null hostname.
2022-12-04 21:02:33 +03:00
url . set_host ( hostname . is_null ( ) | | hostname = = " localhost " ? DeprecatedString : : empty ( ) : hostname ) ;
2022-03-24 05:46:52 +03:00
url . set_paths ( lexical_path . parts ( ) ) ;
if ( path . ends_with ( ' / ' ) )
2023-04-09 16:21:00 +03:00
url . append_slash ( ) ;
2022-03-24 05:46:52 +03:00
url . set_fragment ( fragment ) ;
return url ;
}
2022-12-04 21:02:33 +03:00
URL URL : : create_with_help_scheme ( DeprecatedString const & path , DeprecatedString const & fragment , DeprecatedString const & hostname )
2022-03-24 05:46:52 +03:00
{
LexicalPath lexical_path ( path ) ;
URL url ;
url . set_scheme ( " help " ) ;
// NOTE: If the hostname is localhost (or null, which implies localhost), it should be set to the empty string.
2021-05-29 22:57:20 +03:00
// This is because a file URL always needs a non-null hostname.
2022-12-04 21:02:33 +03:00
url . set_host ( hostname . is_null ( ) | | hostname = = " localhost " ? DeprecatedString : : empty ( ) : hostname ) ;
2021-05-27 22:40:02 +03:00
url . set_paths ( lexical_path . parts ( ) ) ;
if ( path . ends_with ( ' / ' ) )
2023-04-09 16:21:00 +03:00
url . append_slash ( ) ;
2021-03-02 00:24:34 +03:00
url . set_fragment ( fragment ) ;
2020-04-18 23:02:04 +03:00
return url ;
}
2022-12-04 21:02:33 +03:00
URL URL : : create_with_url_or_path ( DeprecatedString const & url_or_path )
2020-04-19 11:55:59 +03:00
{
URL url = url_or_path ;
if ( url . is_valid ( ) )
return url ;
2022-12-04 21:02:33 +03:00
DeprecatedString path = LexicalPath : : canonicalized_path ( url_or_path ) ;
2021-05-24 00:31:16 +03:00
return URL : : create_with_file_scheme ( path ) ;
2020-04-19 11:55:59 +03:00
}
2021-05-25 23:05:01 +03:00
// https://url.spec.whatwg.org/#special-scheme
2021-11-11 02:55:02 +03:00
bool URL : : is_special_scheme ( StringView scheme )
2021-05-25 23:05:01 +03:00
{
return scheme . is_one_of ( " ftp " , " file " , " http " , " https " , " ws " , " wss " ) ;
}
2023-04-14 22:12:03 +03:00
DeprecatedString URL : : serialize_path ( ApplyPercentDecoding apply_percent_decoding ) const
{
if ( cannot_be_a_base_url ( ) )
return m_paths [ 0 ] ;
StringBuilder builder ;
for ( auto & path : m_paths ) {
builder . append ( ' / ' ) ;
builder . append ( apply_percent_decoding = = ApplyPercentDecoding : : Yes ? percent_decode ( path ) : path ) ;
}
return builder . to_deprecated_string ( ) ;
}
2022-12-04 21:02:33 +03:00
DeprecatedString URL : : serialize_data_url ( ) const
2021-05-25 23:32:39 +03:00
{
VERIFY ( m_scheme = = " data " ) ;
VERIFY ( ! m_data_mime_type . is_null ( ) ) ;
VERIFY ( ! m_data_payload . is_null ( ) ) ;
StringBuilder builder ;
builder . append ( m_scheme ) ;
builder . append ( ' : ' ) ;
builder . append ( m_data_mime_type ) ;
if ( m_data_payload_is_base64 )
2022-07-11 20:32:29 +03:00
builder . append ( " ;base64 " sv ) ;
2021-05-25 23:32:39 +03:00
builder . append ( ' , ' ) ;
// NOTE: The specification does not say anything about encoding this, but we should encode at least control and non-ASCII
// characters (since this is also a valid representation of the same data URL).
builder . append ( URL : : percent_encode ( m_data_payload , PercentEncodeSet : : C0Control ) ) ;
2022-12-06 04:12:49 +03:00
return builder . to_deprecated_string ( ) ;
2021-05-25 23:32:39 +03:00
}
// https://url.spec.whatwg.org/#concept-url-serializer
2022-12-04 21:02:33 +03:00
DeprecatedString URL : : serialize ( ExcludeFragment exclude_fragment ) const
2021-05-25 23:32:39 +03:00
{
if ( m_scheme = = " data " )
return serialize_data_url ( ) ;
StringBuilder builder ;
builder . append ( m_scheme ) ;
builder . append ( ' : ' ) ;
if ( ! m_host . is_null ( ) ) {
2022-07-11 20:32:29 +03:00
builder . append ( " // " sv ) ;
2021-05-25 23:32:39 +03:00
if ( includes_credentials ( ) ) {
2023-04-09 16:21:00 +03:00
builder . append ( m_username ) ;
2021-05-25 23:32:39 +03:00
if ( ! m_password . is_empty ( ) ) {
builder . append ( ' : ' ) ;
2023-04-09 16:21:00 +03:00
builder . append ( m_password ) ;
2021-05-25 23:32:39 +03:00
}
builder . append ( ' @ ' ) ;
}
builder . append ( m_host ) ;
2021-09-13 23:12:16 +03:00
if ( m_port . has_value ( ) )
builder . appendff ( " :{} " , * m_port ) ;
2021-05-25 23:32:39 +03:00
}
if ( cannot_be_a_base_url ( ) ) {
2023-04-09 16:21:00 +03:00
builder . append ( m_paths [ 0 ] ) ;
2021-05-25 23:32:39 +03:00
} else {
2021-05-27 22:40:02 +03:00
if ( m_host . is_null ( ) & & m_paths . size ( ) > 1 & & m_paths [ 0 ] . is_empty ( ) )
2022-07-11 20:32:29 +03:00
builder . append ( " /. " sv ) ;
2021-05-27 22:40:02 +03:00
for ( auto & segment : m_paths ) {
builder . append ( ' / ' ) ;
2023-04-09 16:21:00 +03:00
builder . append ( segment ) ;
2021-05-25 23:32:39 +03:00
}
}
if ( ! m_query . is_null ( ) ) {
builder . append ( ' ? ' ) ;
2023-04-09 16:21:00 +03:00
builder . append ( m_query ) ;
2021-05-25 23:32:39 +03:00
}
if ( exclude_fragment = = ExcludeFragment : : No & & ! m_fragment . is_null ( ) ) {
builder . append ( ' # ' ) ;
2023-04-09 16:21:00 +03:00
builder . append ( m_fragment ) ;
2021-05-25 23:32:39 +03:00
}
2022-12-06 04:12:49 +03:00
return builder . to_deprecated_string ( ) ;
2021-05-25 23:32:39 +03:00
}
// https://url.spec.whatwg.org/#url-rendering
// NOTE: This does e.g. not display credentials.
// FIXME: Parts of the URL other than the host should have their sequences of percent-encoded bytes replaced with code points
// resulting from percent-decoding those sequences converted to bytes, unless that renders those sequences invisible.
2022-12-04 21:02:33 +03:00
DeprecatedString URL : : serialize_for_display ( ) const
2021-05-25 23:32:39 +03:00
{
VERIFY ( m_valid ) ;
if ( m_scheme = = " data " )
return serialize_data_url ( ) ;
StringBuilder builder ;
builder . append ( m_scheme ) ;
builder . append ( ' : ' ) ;
if ( ! m_host . is_null ( ) ) {
2022-07-11 20:32:29 +03:00
builder . append ( " // " sv ) ;
2021-05-25 23:32:39 +03:00
builder . append ( m_host ) ;
2021-09-13 23:12:16 +03:00
if ( m_port . has_value ( ) )
builder . appendff ( " :{} " , * m_port ) ;
2021-05-25 23:32:39 +03:00
}
if ( cannot_be_a_base_url ( ) ) {
2023-04-09 16:21:00 +03:00
builder . append ( m_paths [ 0 ] ) ;
2021-05-25 23:32:39 +03:00
} else {
2021-05-27 22:40:02 +03:00
if ( m_host . is_null ( ) & & m_paths . size ( ) > 1 & & m_paths [ 0 ] . is_empty ( ) )
2022-07-11 20:32:29 +03:00
builder . append ( " /. " sv ) ;
2021-05-27 22:40:02 +03:00
for ( auto & segment : m_paths ) {
builder . append ( ' / ' ) ;
2023-04-09 16:21:00 +03:00
builder . append ( segment ) ;
2021-05-25 23:32:39 +03:00
}
}
if ( ! m_query . is_null ( ) ) {
builder . append ( ' ? ' ) ;
2023-04-09 16:21:00 +03:00
builder . append ( m_query ) ;
2021-05-25 23:32:39 +03:00
}
if ( ! m_fragment . is_null ( ) ) {
builder . append ( ' # ' ) ;
2023-04-09 16:21:00 +03:00
builder . append ( m_fragment ) ;
2021-05-25 23:32:39 +03:00
}
2022-12-06 04:12:49 +03:00
return builder . to_deprecated_string ( ) ;
2021-05-25 23:32:39 +03:00
}
2021-09-13 22:18:14 +03:00
// https://html.spec.whatwg.org/multipage/origin.html#ascii-serialisation-of-an-origin
// https://url.spec.whatwg.org/#concept-url-origin
2022-12-04 21:02:33 +03:00
DeprecatedString URL : : serialize_origin ( ) const
2021-09-13 22:18:14 +03:00
{
VERIFY ( m_valid ) ;
if ( m_scheme = = " blob " sv ) {
// TODO: 1. If URL’ s blob URL entry is non-null, then return URL’ s blob URL entry’ s environment’ s origin.
// 2. Let url be the result of parsing URL’ s path[0].
VERIFY ( ! m_paths . is_empty ( ) ) ;
URL url = m_paths [ 0 ] ;
// 3. Return a new opaque origin, if url is failure, and url’ s origin otherwise.
if ( ! url . is_valid ( ) )
return " null " ;
return url . serialize_origin ( ) ;
} else if ( ! m_scheme . is_one_of ( " ftp " sv , " http " sv , " https " sv , " ws " sv , " wss " sv ) ) { // file: "Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin."
return " null " ;
}
StringBuilder builder ;
builder . append ( m_scheme ) ;
builder . append ( " :// " sv ) ;
builder . append ( m_host ) ;
2021-09-13 23:12:16 +03:00
if ( m_port . has_value ( ) )
2022-06-10 21:37:51 +03:00
builder . appendff ( " :{} " , * m_port ) ;
2023-01-26 21:58:09 +03:00
return builder . to_deprecated_string ( ) ;
2021-09-13 22:18:14 +03:00
}
2021-06-01 11:58:27 +03:00
bool URL : : equals ( URL const & other , ExcludeFragment exclude_fragments ) const
2021-05-27 22:38:16 +03:00
{
2021-06-01 12:14:30 +03:00
if ( this = = & other )
return true ;
2021-05-27 22:38:16 +03:00
if ( ! m_valid | | ! other . m_valid )
return false ;
return serialize ( exclude_fragments ) = = other . serialize ( exclude_fragments ) ;
}
2021-05-25 14:50:03 +03:00
void URL : : append_percent_encoded ( StringBuilder & builder , u32 code_point )
{
if ( code_point < = 0x7f )
builder . appendff ( " %{:02X} " , code_point ) ;
else if ( code_point < = 0x07ff )
builder . appendff ( " %{:02X}%{:02X} " , ( ( code_point > > 6 ) & 0x1f ) | 0xc0 , ( code_point & 0x3f ) | 0x80 ) ;
else if ( code_point < = 0xffff )
builder . appendff ( " %{:02X}%{:02X}%{:02X} " , ( ( code_point > > 12 ) & 0x0f ) | 0xe0 , ( ( code_point > > 6 ) & 0x3f ) | 0x80 , ( code_point & 0x3f ) | 0x80 ) ;
else if ( code_point < = 0x10ffff )
builder . appendff ( " %{:02X}%{:02X}%{:02X}%{:02X} " , ( ( code_point > > 18 ) & 0x07 ) | 0xf0 , ( ( code_point > > 12 ) & 0x3f ) | 0x80 , ( ( code_point > > 6 ) & 0x3f ) | 0x80 , ( code_point & 0x3f ) | 0x80 ) ;
else
VERIFY_NOT_REACHED ( ) ;
}
// https://url.spec.whatwg.org/#c0-control-percent-encode-set
2022-04-10 01:48:15 +03:00
bool URL : : code_point_is_in_percent_encode_set ( u32 code_point , URL : : PercentEncodeSet set )
2021-05-25 14:50:03 +03:00
{
switch ( set ) {
case URL : : PercentEncodeSet : : C0Control :
return code_point < 0x20 | | code_point > 0x7E ;
case URL : : PercentEncodeSet : : Fragment :
return code_point_is_in_percent_encode_set ( code_point , URL : : PercentEncodeSet : : C0Control ) | | " \" <>` " sv . contains ( code_point ) ;
case URL : : PercentEncodeSet : : Query :
return code_point_is_in_percent_encode_set ( code_point , URL : : PercentEncodeSet : : C0Control ) | | " \" #<> " sv . contains ( code_point ) ;
case URL : : PercentEncodeSet : : SpecialQuery :
return code_point_is_in_percent_encode_set ( code_point , URL : : PercentEncodeSet : : Query ) | | code_point = = ' \' ' ;
case URL : : PercentEncodeSet : : Path :
return code_point_is_in_percent_encode_set ( code_point , URL : : PercentEncodeSet : : Query ) | | " ?` { } " sv.contains(code_point);
case URL : : PercentEncodeSet : : Userinfo :
return code_point_is_in_percent_encode_set ( code_point , URL : : PercentEncodeSet : : Path ) | | " /: ; = @ [ \ \ ] ^ | " sv.contains(code_point);
case URL : : PercentEncodeSet : : Component :
return code_point_is_in_percent_encode_set ( code_point , URL : : PercentEncodeSet : : Userinfo ) | | " $%&+, " sv . contains ( code_point ) ;
case URL : : PercentEncodeSet : : ApplicationXWWWFormUrlencoded :
2022-06-10 21:39:08 +03:00
return code_point_is_in_percent_encode_set ( code_point , URL : : PercentEncodeSet : : Component ) | | " !'()~ " sv . contains ( code_point ) ;
2021-05-25 14:50:03 +03:00
case URL : : PercentEncodeSet : : EncodeURI :
// NOTE: This is the same percent encode set that JS encodeURI() uses.
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
2022-12-25 22:25:34 +03:00
return code_point > 0x7E | | ( ! is_ascii_alphanumeric ( code_point ) & & ! " ;,/?:@&=+$-_.!~*'()# " sv . contains ( static_cast < char > ( code_point ) ) ) ;
2021-05-25 14:50:03 +03:00
default :
VERIFY_NOT_REACHED ( ) ;
}
}
2022-04-08 16:20:30 +03:00
void URL : : append_percent_encoded_if_necessary ( StringBuilder & builder , u32 code_point , URL : : PercentEncodeSet set )
2021-05-25 14:50:03 +03:00
{
2022-04-08 16:20:30 +03:00
if ( code_point_is_in_percent_encode_set ( code_point , set ) )
2021-05-25 14:50:03 +03:00
append_percent_encoded ( builder , code_point ) ;
else
builder . append_code_point ( code_point ) ;
}
2022-12-04 21:02:33 +03:00
DeprecatedString URL : : percent_encode ( StringView input , URL : : PercentEncodeSet set , SpaceAsPlus space_as_plus )
2021-05-25 14:50:03 +03:00
{
StringBuilder builder ;
for ( auto code_point : Utf8View ( input ) ) {
2022-04-09 19:34:49 +03:00
if ( space_as_plus = = SpaceAsPlus : : Yes & & code_point = = ' ' )
builder . append ( ' + ' ) ;
else
append_percent_encoded_if_necessary ( builder , code_point , set ) ;
2021-05-25 14:50:03 +03:00
}
2022-12-06 04:12:49 +03:00
return builder . to_deprecated_string ( ) ;
2021-05-25 14:50:03 +03:00
}
2022-12-04 21:02:33 +03:00
DeprecatedString URL : : percent_decode ( StringView input )
2021-05-25 14:50:03 +03:00
{
if ( ! input . contains ( ' % ' ) )
return input ;
StringBuilder builder ;
Utf8View utf8_view ( input ) ;
for ( auto it = utf8_view . begin ( ) ; ! it . done ( ) ; + + it ) {
if ( * it ! = ' % ' ) {
builder . append_code_point ( * it ) ;
} else if ( ! is_ascii_hex_digit ( it . peek ( 1 ) . value_or ( 0 ) ) | | ! is_ascii_hex_digit ( it . peek ( 2 ) . value_or ( 0 ) ) ) {
builder . append_code_point ( * it ) ;
} else {
+ + it ;
2021-06-01 22:18:08 +03:00
u8 byte = parse_ascii_hex_digit ( * it ) < < 4 ;
2021-05-25 14:50:03 +03:00
+ + it ;
2021-06-01 22:18:08 +03:00
byte + = parse_ascii_hex_digit ( * it ) ;
2021-05-25 14:50:03 +03:00
builder . append ( byte ) ;
}
}
2022-12-06 04:12:49 +03:00
return builder . to_deprecated_string ( ) ;
2021-05-25 14:50:03 +03:00
}
2019-08-10 18:27:56 +03:00
}