2020-01-18 11:38:21 +03:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
|
2021-05-24 00:31:16 +03:00
|
|
|
* Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
|
2020-01-18 11:38:21 +03:00
|
|
|
*
|
2021-04-22 11:24:48 +03:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
2020-01-18 11:38:21 +03:00
|
|
|
*/
|
|
|
|
|
2020-05-26 14:52:44 +03:00
|
|
|
#include <AK/LexicalPath.h>
|
2019-08-10 18:27:56 +03:00
|
|
|
#include <AK/StringBuilder.h>
|
|
|
|
#include <AK/URL.h>
|
2021-05-25 14:50:03 +03:00
|
|
|
#include <AK/Utf8View.h>
|
2019-08-10 18:27:56 +03:00
|
|
|
|
|
|
|
namespace AK {
|
|
|
|
|
2021-05-25 14:50:03 +03:00
|
|
|
constexpr bool is_ascii_alpha(u32 code_point)
|
|
|
|
{
|
|
|
|
return ('a' <= code_point && code_point <= 'z') || ('A' <= code_point && code_point <= 'Z');
|
|
|
|
}
|
|
|
|
|
|
|
|
constexpr bool is_ascii_digit(u32 code_point)
|
|
|
|
{
|
|
|
|
return '0' <= code_point && code_point <= '9';
|
|
|
|
}
|
|
|
|
|
|
|
|
constexpr bool is_ascii_alphanumeric(u32 code_point)
|
|
|
|
{
|
|
|
|
return is_ascii_alpha(code_point) || is_ascii_digit(code_point);
|
|
|
|
}
|
|
|
|
|
|
|
|
constexpr bool is_ascii_hex_digit(u32 code_point)
|
|
|
|
{
|
|
|
|
return is_ascii_digit(code_point) || (code_point >= 'a' && code_point <= 'f') || (code_point >= 'A' && code_point <= 'F');
|
|
|
|
}
|
|
|
|
|
2021-05-24 00:31:16 +03:00
|
|
|
static inline bool is_valid_scheme_character(char ch)
|
2019-08-10 18:27:56 +03:00
|
|
|
{
|
|
|
|
return ch >= 'a' && ch <= 'z';
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool is_valid_hostname_character(char ch)
|
|
|
|
{
|
|
|
|
return ch && ch != '/' && ch != ':';
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool is_digit(char ch)
|
|
|
|
{
|
|
|
|
return ch >= '0' && ch <= '9';
|
|
|
|
}
|
|
|
|
|
|
|
|
bool URL::parse(const StringView& string)
|
|
|
|
{
|
2020-06-07 19:23:33 +03:00
|
|
|
if (string.is_null())
|
|
|
|
return false;
|
|
|
|
|
2019-08-10 18:27:56 +03:00
|
|
|
enum class State {
|
2021-05-24 00:31:16 +03:00
|
|
|
InScheme,
|
2019-08-10 18:27:56 +03:00
|
|
|
InHostname,
|
|
|
|
InPort,
|
|
|
|
InPath,
|
2020-04-12 01:38:28 +03:00
|
|
|
InQuery,
|
|
|
|
InFragment,
|
2020-04-26 23:48:54 +03:00
|
|
|
InDataMimeType,
|
|
|
|
InDataPayload,
|
2019-08-10 18:27:56 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
Vector<char, 256> buffer;
|
2021-05-24 00:31:16 +03:00
|
|
|
State state { State::InScheme };
|
2019-08-10 18:27:56 +03:00
|
|
|
|
2019-12-09 19:45:40 +03:00
|
|
|
size_t index = 0;
|
2019-08-10 18:27:56 +03:00
|
|
|
|
|
|
|
auto peek = [&] {
|
|
|
|
if (index >= string.length())
|
|
|
|
return '\0';
|
|
|
|
return string[index];
|
|
|
|
};
|
|
|
|
|
|
|
|
auto consume = [&] {
|
|
|
|
if (index >= string.length())
|
|
|
|
return '\0';
|
|
|
|
return string[index++];
|
|
|
|
};
|
|
|
|
|
|
|
|
while (index < string.length()) {
|
|
|
|
switch (state) {
|
2021-05-24 00:31:16 +03:00
|
|
|
case State::InScheme: {
|
|
|
|
if (is_valid_scheme_character(peek())) {
|
2019-08-10 18:27:56 +03:00
|
|
|
buffer.append(consume());
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (consume() != ':')
|
|
|
|
return false;
|
2020-04-26 23:48:54 +03:00
|
|
|
|
2021-05-24 00:31:16 +03:00
|
|
|
m_scheme = String::copy(buffer);
|
2020-04-26 23:48:54 +03:00
|
|
|
|
2021-05-24 00:31:16 +03:00
|
|
|
if (m_scheme == "data") {
|
2020-04-26 23:48:54 +03:00
|
|
|
buffer.clear();
|
2020-11-04 09:20:20 +03:00
|
|
|
m_host = "";
|
2020-04-26 23:48:54 +03:00
|
|
|
state = State::InDataMimeType;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2021-05-24 00:31:16 +03:00
|
|
|
if (m_scheme == "about") {
|
2020-05-10 12:11:48 +03:00
|
|
|
buffer.clear();
|
2020-11-04 09:20:20 +03:00
|
|
|
m_host = "";
|
2020-05-10 12:11:48 +03:00
|
|
|
state = State::InPath;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2019-08-10 18:27:56 +03:00
|
|
|
if (consume() != '/')
|
|
|
|
return false;
|
|
|
|
if (consume() != '/')
|
|
|
|
return false;
|
|
|
|
if (buffer.is_empty())
|
|
|
|
return false;
|
2020-05-09 17:14:37 +03:00
|
|
|
state = State::InHostname;
|
2020-04-26 23:48:54 +03:00
|
|
|
buffer.clear();
|
2019-08-10 18:27:56 +03:00
|
|
|
continue;
|
2020-04-26 23:48:54 +03:00
|
|
|
}
|
2019-08-10 18:27:56 +03:00
|
|
|
case State::InHostname:
|
|
|
|
if (is_valid_hostname_character(peek())) {
|
|
|
|
buffer.append(consume());
|
|
|
|
continue;
|
|
|
|
}
|
2020-05-09 17:47:05 +03:00
|
|
|
if (buffer.is_empty()) {
|
2021-05-24 00:31:16 +03:00
|
|
|
if (m_scheme == "file") {
|
2020-05-09 17:47:05 +03:00
|
|
|
m_host = "";
|
|
|
|
state = State::InPath;
|
|
|
|
continue;
|
|
|
|
}
|
2019-08-10 18:27:56 +03:00
|
|
|
return false;
|
2020-05-09 17:47:05 +03:00
|
|
|
}
|
2019-08-10 18:27:56 +03:00
|
|
|
m_host = String::copy(buffer);
|
|
|
|
buffer.clear();
|
|
|
|
if (peek() == ':') {
|
|
|
|
consume();
|
|
|
|
state = State::InPort;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (peek() == '/') {
|
|
|
|
state = State::InPath;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
case State::InPort:
|
|
|
|
if (is_digit(peek())) {
|
|
|
|
buffer.append(consume());
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (buffer.is_empty())
|
|
|
|
return false;
|
|
|
|
{
|
2020-06-12 22:07:52 +03:00
|
|
|
auto port_opt = String::copy(buffer).to_uint();
|
2019-08-10 18:27:56 +03:00
|
|
|
buffer.clear();
|
2020-06-12 22:07:52 +03:00
|
|
|
if (!port_opt.has_value())
|
2019-08-10 18:27:56 +03:00
|
|
|
return false;
|
2020-06-12 22:07:52 +03:00
|
|
|
m_port = port_opt.value();
|
2019-08-10 18:27:56 +03:00
|
|
|
}
|
|
|
|
if (peek() == '/') {
|
|
|
|
state = State::InPath;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
case State::InPath:
|
2020-04-12 01:38:28 +03:00
|
|
|
if (peek() == '?' || peek() == '#') {
|
|
|
|
m_path = String::copy(buffer);
|
|
|
|
buffer.clear();
|
|
|
|
state = peek() == '?' ? State::InQuery : State::InFragment;
|
|
|
|
consume();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
buffer.append(consume());
|
|
|
|
continue;
|
|
|
|
case State::InQuery:
|
|
|
|
if (peek() == '#') {
|
|
|
|
m_query = String::copy(buffer);
|
|
|
|
buffer.clear();
|
|
|
|
consume();
|
|
|
|
state = State::InFragment;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
buffer.append(consume());
|
|
|
|
continue;
|
|
|
|
case State::InFragment:
|
2019-08-10 18:27:56 +03:00
|
|
|
buffer.append(consume());
|
|
|
|
continue;
|
2020-04-26 23:48:54 +03:00
|
|
|
case State::InDataMimeType: {
|
|
|
|
if (peek() != ';' && peek() != ',') {
|
|
|
|
buffer.append(consume());
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
m_data_mime_type = String::copy(buffer);
|
|
|
|
buffer.clear();
|
|
|
|
|
|
|
|
if (peek() == ';') {
|
|
|
|
consume();
|
|
|
|
if (consume() != 'b')
|
|
|
|
return false;
|
|
|
|
if (consume() != 'a')
|
|
|
|
return false;
|
|
|
|
if (consume() != 's')
|
|
|
|
return false;
|
|
|
|
if (consume() != 'e')
|
|
|
|
return false;
|
|
|
|
if (consume() != '6')
|
|
|
|
return false;
|
|
|
|
if (consume() != '4')
|
|
|
|
return false;
|
|
|
|
m_data_payload_is_base64 = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (consume() != ',')
|
|
|
|
return false;
|
|
|
|
|
|
|
|
state = State::InDataPayload;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case State::InDataPayload:
|
|
|
|
buffer.append(consume());
|
|
|
|
break;
|
2019-08-10 18:27:56 +03:00
|
|
|
}
|
|
|
|
}
|
2019-10-17 21:21:57 +03:00
|
|
|
if (state == State::InHostname) {
|
|
|
|
// We're still in the hostname, so e.g "http://serenityos.org"
|
|
|
|
if (buffer.is_empty())
|
|
|
|
return false;
|
|
|
|
m_host = String::copy(buffer);
|
|
|
|
m_path = "/";
|
|
|
|
}
|
2021-05-24 00:31:16 +03:00
|
|
|
if (state == State::InScheme)
|
2020-04-19 11:36:56 +03:00
|
|
|
return false;
|
2020-04-12 01:38:28 +03:00
|
|
|
if (state == State::InPath)
|
|
|
|
m_path = String::copy(buffer);
|
|
|
|
if (state == State::InQuery)
|
|
|
|
m_query = String::copy(buffer);
|
|
|
|
if (state == State::InFragment)
|
|
|
|
m_fragment = String::copy(buffer);
|
2020-04-26 23:48:54 +03:00
|
|
|
if (state == State::InDataPayload)
|
2021-05-25 15:06:03 +03:00
|
|
|
m_data_payload = URL::percent_decode(String::copy(buffer));
|
2020-12-12 19:24:19 +03:00
|
|
|
if (state == State::InPort) {
|
|
|
|
auto port_opt = String::copy(buffer).to_uint();
|
|
|
|
if (port_opt.has_value())
|
|
|
|
m_port = port_opt.value();
|
|
|
|
}
|
|
|
|
|
2020-04-12 01:38:28 +03:00
|
|
|
if (m_query.is_null())
|
|
|
|
m_query = "";
|
|
|
|
if (m_fragment.is_null())
|
|
|
|
m_fragment = "";
|
2020-11-04 09:20:20 +03:00
|
|
|
|
2021-05-24 00:31:16 +03:00
|
|
|
if (!m_port && scheme_requires_port(m_scheme))
|
|
|
|
set_port(default_port_for_scheme(m_scheme));
|
2020-11-04 09:20:20 +03:00
|
|
|
|
|
|
|
return compute_validity();
|
2019-08-10 18:27:56 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
URL::URL(const StringView& string)
|
|
|
|
{
|
|
|
|
m_valid = parse(string);
|
|
|
|
}
|
|
|
|
|
2021-05-25 22:32:20 +03:00
|
|
|
String URL::path() const
|
|
|
|
{
|
|
|
|
if (cannot_be_a_base_url())
|
|
|
|
return paths()[0];
|
|
|
|
if (!m_path.is_null())
|
|
|
|
return m_path;
|
|
|
|
StringBuilder builder;
|
|
|
|
for (auto& path : m_paths) {
|
|
|
|
builder.append('/');
|
|
|
|
builder.append(path);
|
|
|
|
}
|
|
|
|
return builder.to_string();
|
|
|
|
}
|
|
|
|
|
2019-08-10 18:27:56 +03:00
|
|
|
String URL::to_string() const
|
|
|
|
{
|
|
|
|
StringBuilder builder;
|
2021-05-24 00:31:16 +03:00
|
|
|
builder.append(m_scheme);
|
2020-04-26 23:59:12 +03:00
|
|
|
|
2021-05-24 00:31:16 +03:00
|
|
|
if (m_scheme == "about") {
|
2020-05-10 12:11:48 +03:00
|
|
|
builder.append(':');
|
|
|
|
builder.append(m_path);
|
|
|
|
return builder.to_string();
|
|
|
|
}
|
|
|
|
|
2021-05-24 00:31:16 +03:00
|
|
|
if (m_scheme == "data") {
|
2020-04-26 23:59:12 +03:00
|
|
|
builder.append(':');
|
|
|
|
builder.append(m_data_mime_type);
|
|
|
|
if (m_data_payload_is_base64)
|
|
|
|
builder.append(";base64");
|
|
|
|
builder.append(',');
|
|
|
|
builder.append(m_data_payload);
|
|
|
|
return builder.to_string();
|
|
|
|
}
|
|
|
|
|
2019-08-10 18:27:56 +03:00
|
|
|
builder.append("://");
|
2020-05-09 17:14:37 +03:00
|
|
|
builder.append(m_host);
|
2021-05-24 00:31:16 +03:00
|
|
|
if (default_port_for_scheme(scheme()) != port()) {
|
2020-11-04 09:20:20 +03:00
|
|
|
builder.append(':');
|
|
|
|
builder.append(String::number(m_port));
|
2019-10-05 11:14:42 +03:00
|
|
|
}
|
2020-11-04 09:20:20 +03:00
|
|
|
|
2021-05-25 22:32:20 +03:00
|
|
|
builder.append(path());
|
2019-11-25 23:20:03 +03:00
|
|
|
if (!m_query.is_empty()) {
|
|
|
|
builder.append('?');
|
|
|
|
builder.append(m_query);
|
|
|
|
}
|
2020-04-12 01:38:13 +03:00
|
|
|
if (!m_fragment.is_empty()) {
|
|
|
|
builder.append('#');
|
|
|
|
builder.append(m_fragment);
|
|
|
|
}
|
2019-08-10 18:27:56 +03:00
|
|
|
return builder.to_string();
|
|
|
|
}
|
|
|
|
|
2019-11-19 00:04:39 +03:00
|
|
|
URL URL::complete_url(const String& string) const
|
|
|
|
{
|
2020-06-07 19:23:33 +03:00
|
|
|
if (!is_valid())
|
|
|
|
return {};
|
|
|
|
|
2019-11-19 00:04:39 +03:00
|
|
|
URL url(string);
|
|
|
|
if (url.is_valid())
|
|
|
|
return url;
|
|
|
|
|
2021-05-24 00:31:16 +03:00
|
|
|
if (scheme() == "data")
|
2020-06-07 19:23:33 +03:00
|
|
|
return {};
|
|
|
|
|
2020-05-16 20:54:09 +03:00
|
|
|
if (string.starts_with("//")) {
|
2021-05-24 00:31:16 +03:00
|
|
|
URL url(String::formatted("{}:{}", m_scheme, string));
|
2020-05-16 20:54:09 +03:00
|
|
|
if (url.is_valid())
|
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
2019-12-10 23:13:00 +03:00
|
|
|
if (string.starts_with("/")) {
|
|
|
|
url = *this;
|
|
|
|
url.set_path(string);
|
|
|
|
return url;
|
|
|
|
}
|
2020-05-23 04:24:58 +03:00
|
|
|
|
|
|
|
if (string.starts_with("#")) {
|
|
|
|
url = *this;
|
|
|
|
url.set_fragment(string.substring(1, string.length() - 1));
|
|
|
|
return url;
|
|
|
|
}
|
2019-12-10 23:13:00 +03:00
|
|
|
|
2019-11-19 00:04:39 +03:00
|
|
|
StringBuilder builder;
|
2020-05-26 14:52:44 +03:00
|
|
|
LexicalPath lexical_path(path());
|
2019-11-19 00:04:39 +03:00
|
|
|
builder.append('/');
|
|
|
|
|
|
|
|
bool document_url_ends_in_slash = path()[path().length() - 1] == '/';
|
|
|
|
|
2020-05-26 14:52:44 +03:00
|
|
|
for (size_t i = 0; i < lexical_path.parts().size(); ++i) {
|
|
|
|
if (i == lexical_path.parts().size() - 1 && !document_url_ends_in_slash)
|
2019-11-19 00:04:39 +03:00
|
|
|
break;
|
2020-05-26 14:52:44 +03:00
|
|
|
builder.append(lexical_path.parts()[i]);
|
2019-11-19 00:04:39 +03:00
|
|
|
builder.append('/');
|
|
|
|
}
|
|
|
|
builder.append(string);
|
|
|
|
auto built = builder.to_string();
|
2020-05-26 14:52:44 +03:00
|
|
|
lexical_path = LexicalPath(built);
|
2019-11-19 00:04:39 +03:00
|
|
|
|
2020-05-26 14:52:44 +03:00
|
|
|
built = lexical_path.string();
|
2020-05-17 14:41:36 +03:00
|
|
|
if (string.ends_with('/') && !built.ends_with('/')) {
|
|
|
|
builder.clear();
|
|
|
|
builder.append(built);
|
|
|
|
builder.append('/');
|
|
|
|
built = builder.to_string();
|
|
|
|
}
|
|
|
|
|
2019-11-19 00:04:39 +03:00
|
|
|
url = *this;
|
2020-05-17 14:41:36 +03:00
|
|
|
url.set_path(built);
|
2019-11-19 00:04:39 +03:00
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
2021-05-24 00:31:16 +03:00
|
|
|
void URL::set_scheme(const String& scheme)
|
2020-04-12 00:07:23 +03:00
|
|
|
{
|
2021-05-24 00:31:16 +03:00
|
|
|
m_scheme = scheme;
|
2020-04-12 00:07:23 +03:00
|
|
|
m_valid = compute_validity();
|
|
|
|
}
|
|
|
|
|
2021-05-25 22:32:20 +03:00
|
|
|
void URL::set_username(const String& username)
|
|
|
|
{
|
|
|
|
m_username = username;
|
|
|
|
m_valid = compute_validity();
|
|
|
|
}
|
|
|
|
|
|
|
|
void URL::set_password(const String& password)
|
|
|
|
{
|
|
|
|
m_password = password;
|
|
|
|
m_valid = compute_validity();
|
|
|
|
}
|
|
|
|
|
2020-04-12 00:07:23 +03:00
|
|
|
void URL::set_host(const String& host)
|
|
|
|
{
|
|
|
|
m_host = host;
|
|
|
|
m_valid = compute_validity();
|
|
|
|
}
|
|
|
|
|
2021-05-31 12:44:13 +03:00
|
|
|
void URL::set_port(const u16 port)
|
2020-11-04 09:20:20 +03:00
|
|
|
{
|
2021-05-25 22:32:20 +03:00
|
|
|
if (port == default_port_for_scheme(m_scheme)) {
|
|
|
|
m_port = 0;
|
|
|
|
return;
|
|
|
|
}
|
2020-11-04 09:20:20 +03:00
|
|
|
m_port = port;
|
|
|
|
m_valid = compute_validity();
|
|
|
|
}
|
|
|
|
|
2020-04-12 00:07:23 +03:00
|
|
|
void URL::set_path(const String& path)
|
|
|
|
{
|
|
|
|
m_path = path;
|
|
|
|
m_valid = compute_validity();
|
|
|
|
}
|
|
|
|
|
2021-05-25 22:32:20 +03:00
|
|
|
void URL::set_paths(const Vector<String>& paths)
|
|
|
|
{
|
|
|
|
m_paths = paths;
|
|
|
|
m_valid = compute_validity();
|
|
|
|
}
|
|
|
|
|
2020-04-12 00:07:23 +03:00
|
|
|
void URL::set_query(const String& query)
|
|
|
|
{
|
|
|
|
m_query = query;
|
|
|
|
}
|
|
|
|
|
2020-04-12 01:38:13 +03:00
|
|
|
void URL::set_fragment(const String& fragment)
|
|
|
|
{
|
|
|
|
m_fragment = fragment;
|
|
|
|
}
|
|
|
|
|
2020-04-12 00:07:23 +03:00
|
|
|
bool URL::compute_validity() const
|
|
|
|
{
|
|
|
|
// FIXME: This is by no means complete.
|
2021-05-24 00:31:16 +03:00
|
|
|
if (m_scheme.is_empty())
|
2020-04-12 00:07:23 +03:00
|
|
|
return false;
|
2020-11-04 09:20:20 +03:00
|
|
|
|
2021-05-24 00:31:16 +03:00
|
|
|
if (m_scheme == "about") {
|
2021-05-25 22:32:20 +03:00
|
|
|
if (path().is_empty())
|
2020-11-04 09:20:20 +03:00
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-05-24 00:31:16 +03:00
|
|
|
if (m_scheme == "file") {
|
2021-05-25 22:32:20 +03:00
|
|
|
if (path().is_empty())
|
2020-04-12 00:07:23 +03:00
|
|
|
return false;
|
2020-11-04 09:20:20 +03:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-05-24 00:31:16 +03:00
|
|
|
if (m_scheme == "data") {
|
2020-11-04 09:20:20 +03:00
|
|
|
if (m_data_mime_type.is_empty())
|
2020-04-19 11:36:56 +03:00
|
|
|
return false;
|
2020-11-04 09:20:20 +03:00
|
|
|
return true;
|
2020-04-12 00:07:23 +03:00
|
|
|
}
|
2020-11-04 09:20:20 +03:00
|
|
|
|
|
|
|
if (m_host.is_empty())
|
|
|
|
return false;
|
|
|
|
|
2020-04-12 00:07:23 +03:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-05-24 00:31:16 +03:00
|
|
|
bool URL::scheme_requires_port(const StringView& scheme)
|
2020-11-04 09:20:20 +03:00
|
|
|
{
|
2021-05-24 00:31:16 +03:00
|
|
|
return (default_port_for_scheme(scheme) != 0);
|
2020-11-04 09:20:20 +03:00
|
|
|
}
|
|
|
|
|
2021-05-24 00:31:16 +03:00
|
|
|
u16 URL::default_port_for_scheme(const StringView& scheme)
|
2020-11-04 09:20:20 +03:00
|
|
|
{
|
2021-05-24 00:31:16 +03:00
|
|
|
if (scheme == "http")
|
2020-11-04 09:20:20 +03:00
|
|
|
return 80;
|
2021-05-24 00:31:16 +03:00
|
|
|
if (scheme == "https")
|
2020-11-04 09:20:20 +03:00
|
|
|
return 443;
|
2021-05-24 00:31:16 +03:00
|
|
|
if (scheme == "gemini")
|
2020-11-04 09:20:20 +03:00
|
|
|
return 1965;
|
2021-05-24 00:31:16 +03:00
|
|
|
if (scheme == "irc")
|
2020-11-04 09:20:20 +03:00
|
|
|
return 6667;
|
2021-05-24 00:31:16 +03:00
|
|
|
if (scheme == "ircs")
|
2020-11-04 09:20:20 +03:00
|
|
|
return 6697;
|
2021-05-24 00:31:16 +03:00
|
|
|
if (scheme == "ws")
|
2021-04-16 16:21:03 +03:00
|
|
|
return 80;
|
2021-05-24 00:31:16 +03:00
|
|
|
if (scheme == "wss")
|
2021-04-16 16:21:03 +03:00
|
|
|
return 443;
|
2020-11-04 09:20:20 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-05-24 00:31:16 +03:00
|
|
|
URL URL::create_with_file_scheme(const String& path, const String& fragment)
|
2020-04-18 23:02:04 +03:00
|
|
|
{
|
|
|
|
URL url;
|
2021-05-24 00:31:16 +03:00
|
|
|
url.set_scheme("file");
|
2020-04-18 23:02:04 +03:00
|
|
|
url.set_path(path);
|
2021-03-02 00:24:34 +03:00
|
|
|
url.set_fragment(fragment);
|
2020-04-18 23:02:04 +03:00
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
2020-04-19 11:55:59 +03:00
|
|
|
URL URL::create_with_url_or_path(const String& url_or_path)
|
|
|
|
{
|
|
|
|
URL url = url_or_path;
|
|
|
|
if (url.is_valid())
|
|
|
|
return url;
|
|
|
|
|
2020-05-26 14:52:44 +03:00
|
|
|
String path = LexicalPath::canonicalized_path(url_or_path);
|
2021-05-24 00:31:16 +03:00
|
|
|
return URL::create_with_file_scheme(path);
|
2020-04-19 11:55:59 +03:00
|
|
|
}
|
|
|
|
|
2020-08-24 11:40:45 +03:00
|
|
|
URL URL::create_with_data(const StringView& mime_type, const StringView& payload, bool is_base64)
|
|
|
|
{
|
|
|
|
URL url;
|
2021-05-24 00:31:16 +03:00
|
|
|
url.set_scheme("data");
|
2020-11-04 09:20:20 +03:00
|
|
|
url.m_valid = true;
|
2020-08-24 11:40:45 +03:00
|
|
|
url.m_data_payload = payload;
|
|
|
|
url.m_data_mime_type = mime_type;
|
|
|
|
url.m_data_payload_is_base64 = is_base64;
|
|
|
|
|
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
2020-05-06 00:56:35 +03:00
|
|
|
String URL::basename() const
|
|
|
|
{
|
|
|
|
if (!m_valid)
|
|
|
|
return {};
|
2021-05-25 22:32:20 +03:00
|
|
|
// FIXME: Temporary m_path hack
|
|
|
|
if (!m_path.is_null())
|
|
|
|
return LexicalPath(m_path).basename();
|
|
|
|
if (m_paths.is_empty())
|
|
|
|
return {};
|
|
|
|
return m_paths.last();
|
2020-05-06 00:56:35 +03:00
|
|
|
}
|
|
|
|
|
2021-05-25 14:50:03 +03:00
|
|
|
void URL::append_percent_encoded(StringBuilder& builder, u32 code_point)
|
|
|
|
{
|
|
|
|
if (code_point <= 0x7f)
|
|
|
|
builder.appendff("%{:02X}", code_point);
|
|
|
|
else if (code_point <= 0x07ff)
|
|
|
|
builder.appendff("%{:02X}%{:02X}", ((code_point >> 6) & 0x1f) | 0xc0, (code_point & 0x3f) | 0x80);
|
|
|
|
else if (code_point <= 0xffff)
|
|
|
|
builder.appendff("%{:02X}%{:02X}%{:02X}", ((code_point >> 12) & 0x0f) | 0xe0, ((code_point >> 6) & 0x3f) | 0x80, (code_point & 0x3f) | 0x80);
|
|
|
|
else if (code_point <= 0x10ffff)
|
|
|
|
builder.appendff("%{:02X}%{:02X}%{:02X}%{:02X}", ((code_point >> 18) & 0x07) | 0xf0, ((code_point >> 12) & 0x3f) | 0x80, ((code_point >> 6) & 0x3f) | 0x80, (code_point & 0x3f) | 0x80);
|
|
|
|
else
|
|
|
|
VERIFY_NOT_REACHED();
|
|
|
|
}
|
|
|
|
|
|
|
|
// https://url.spec.whatwg.org/#c0-control-percent-encode-set
|
|
|
|
constexpr bool code_point_is_in_percent_encode_set(u32 code_point, URL::PercentEncodeSet set)
|
|
|
|
{
|
|
|
|
switch (set) {
|
|
|
|
case URL::PercentEncodeSet::C0Control:
|
|
|
|
return code_point < 0x20 || code_point > 0x7E;
|
|
|
|
case URL::PercentEncodeSet::Fragment:
|
|
|
|
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::C0Control) || " \"<>`"sv.contains(code_point);
|
|
|
|
case URL::PercentEncodeSet::Query:
|
|
|
|
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::C0Control) || " \"#<>"sv.contains(code_point);
|
|
|
|
case URL::PercentEncodeSet::SpecialQuery:
|
|
|
|
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Query) || code_point == '\'';
|
|
|
|
case URL::PercentEncodeSet::Path:
|
|
|
|
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Query) || "?`{}"sv.contains(code_point);
|
|
|
|
case URL::PercentEncodeSet::Userinfo:
|
|
|
|
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Path) || "/:;=@[\\]^|"sv.contains(code_point);
|
|
|
|
case URL::PercentEncodeSet::Component:
|
|
|
|
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Userinfo) || "$%&+,"sv.contains(code_point);
|
|
|
|
case URL::PercentEncodeSet::ApplicationXWWWFormUrlencoded:
|
|
|
|
return code_point >= 0x7E || !(is_ascii_alphanumeric(code_point) || "!'()~"sv.contains(code_point));
|
|
|
|
case URL::PercentEncodeSet::EncodeURI:
|
|
|
|
// NOTE: This is the same percent encode set that JS encodeURI() uses.
|
|
|
|
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
|
|
|
|
return code_point >= 0x7E || (!is_ascii_alphanumeric(code_point) && !";,/?:@&=+$-_.!~*'()#"sv.contains(code_point));
|
|
|
|
default:
|
|
|
|
VERIFY_NOT_REACHED();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void URL::append_percent_encoded_if_necessary(StringBuilder& builder, u32 code_point, URL::PercentEncodeSet set)
|
|
|
|
{
|
|
|
|
if (code_point_is_in_percent_encode_set(code_point, set))
|
|
|
|
append_percent_encoded(builder, code_point);
|
|
|
|
else
|
|
|
|
builder.append_code_point(code_point);
|
|
|
|
}
|
|
|
|
|
|
|
|
String URL::percent_encode(const StringView& input, URL::PercentEncodeSet set)
|
|
|
|
{
|
|
|
|
StringBuilder builder;
|
|
|
|
for (auto code_point : Utf8View(input)) {
|
|
|
|
append_percent_encoded_if_necessary(builder, code_point, set);
|
|
|
|
}
|
|
|
|
return builder.to_string();
|
|
|
|
}
|
|
|
|
|
|
|
|
constexpr u8 parse_hex_digit(u8 digit)
|
|
|
|
{
|
|
|
|
if (digit >= '0' && digit <= '9')
|
|
|
|
return digit - '0';
|
|
|
|
if (digit >= 'a' && digit <= 'f')
|
|
|
|
return digit - 'a' + 10;
|
|
|
|
if (digit >= 'A' && digit <= 'F')
|
|
|
|
return digit - 'A' + 10;
|
|
|
|
VERIFY_NOT_REACHED();
|
|
|
|
}
|
|
|
|
|
|
|
|
String URL::percent_decode(const StringView& input)
|
|
|
|
{
|
|
|
|
if (!input.contains('%'))
|
|
|
|
return input;
|
|
|
|
StringBuilder builder;
|
|
|
|
Utf8View utf8_view(input);
|
|
|
|
for (auto it = utf8_view.begin(); !it.done(); ++it) {
|
|
|
|
if (*it != '%') {
|
|
|
|
builder.append_code_point(*it);
|
|
|
|
} else if (!is_ascii_hex_digit(it.peek(1).value_or(0)) || !is_ascii_hex_digit(it.peek(2).value_or(0))) {
|
|
|
|
builder.append_code_point(*it);
|
|
|
|
} else {
|
|
|
|
++it;
|
|
|
|
u8 byte = parse_hex_digit(*it) << 4;
|
|
|
|
++it;
|
|
|
|
byte += parse_hex_digit(*it);
|
|
|
|
builder.append(byte);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return builder.to_string();
|
|
|
|
}
|
|
|
|
|
2019-08-10 18:27:56 +03:00
|
|
|
}
|