diff --git a/Meta/gn/secondary/Tests/LibWeb/BUILD.gn b/Meta/gn/secondary/Tests/LibWeb/BUILD.gn index aada6eab0bd..c2b69538ffe 100644 --- a/Meta/gn/secondary/Tests/LibWeb/BUILD.gn +++ b/Meta/gn/secondary/Tests/LibWeb/BUILD.gn @@ -12,6 +12,15 @@ unittest("TestCSSPixels") { deps = [ "//Userland/Libraries/LibWeb" ] } +unittest("TestFetchURL") { + include_dirs = [ "//Userland/Libraries" ] + sources = [ "TestFetchURL.cpp" ] + deps = [ + "//Userland/Libraries/LibURL", + "//Userland/Libraries/LibWeb", + ] +} + unittest("TestHTMLTokenizer") { include_dirs = [ "//Userland/Libraries" ] sources = [ "TestHTMLTokenizer.cpp" ] @@ -41,6 +50,7 @@ group("LibWeb") { deps = [ ":TestCSSIDSpeed", ":TestCSSPixels", + ":TestFetchURL", ":TestHTMLTokenizer", ":TestMicrosyntax", ":TestMimeSniff", diff --git a/Tests/LibURL/TestURL.cpp b/Tests/LibURL/TestURL.cpp index 77a0fd64b75..89cc014d273 100644 --- a/Tests/LibURL/TestURL.cpp +++ b/Tests/LibURL/TestURL.cpp @@ -238,109 +238,6 @@ TEST_CASE(mailto_url_with_subject) EXPECT_EQ(url.serialize(), "mailto:mail@example.com?subject=test"); } -TEST_CASE(data_url) -{ - URL::URL url("data:text/html,test"sv); - EXPECT(url.is_valid()); - EXPECT_EQ(url.scheme(), "data"); - EXPECT(url.host().has()); - EXPECT_EQ(url.serialize(), "data:text/html,test"); - - auto data_url = TRY_OR_FAIL(url.process_data_url()); - EXPECT_EQ(data_url.mime_type, "text/html"); - EXPECT_EQ(StringView(data_url.body.bytes()), "test"sv); -} - -TEST_CASE(data_url_default_mime_type) -{ - URL::URL url("data:,test"sv); - EXPECT(url.is_valid()); - EXPECT_EQ(url.scheme(), "data"); - EXPECT(url.host().has()); - EXPECT_EQ(url.serialize(), "data:,test"); - - auto data_url = TRY_OR_FAIL(url.process_data_url()); - EXPECT_EQ(data_url.mime_type, "text/plain;charset=US-ASCII"); - EXPECT_EQ(StringView(data_url.body.bytes()), "test"sv); -} - -TEST_CASE(data_url_encoded) -{ - URL::URL url("data:text/html,Hello%20friends%2C%0X%X0"sv); - EXPECT(url.is_valid()); - EXPECT_EQ(url.scheme(), "data"); - EXPECT(url.host().has()); - EXPECT_EQ(url.serialize(), "data:text/html,Hello%20friends%2C%0X%X0"); - - auto data_url = TRY_OR_FAIL(url.process_data_url()); - EXPECT_EQ(data_url.mime_type, "text/html"); - EXPECT_EQ(StringView(data_url.body.bytes()), "Hello friends,%0X%X0"sv); -} - -TEST_CASE(data_url_base64_encoded) -{ - URL::URL url("data:text/html;base64,dGVzdA=="sv); - EXPECT(url.is_valid()); - EXPECT_EQ(url.scheme(), "data"); - EXPECT(url.host().has()); - EXPECT_EQ(url.serialize(), "data:text/html;base64,dGVzdA=="); - - auto data_url = TRY_OR_FAIL(url.process_data_url()); - EXPECT_EQ(data_url.mime_type, "text/html"); - EXPECT_EQ(StringView(data_url.body.bytes()), "test"sv); -} - -TEST_CASE(data_url_base64_encoded_default_mime_type) -{ - URL::URL url("data:;base64,dGVzdA=="sv); - EXPECT(url.is_valid()); - EXPECT_EQ(url.scheme(), "data"); - EXPECT(url.host().has()); - EXPECT_EQ(url.serialize(), "data:;base64,dGVzdA=="); - - auto data_url = TRY_OR_FAIL(url.process_data_url()); - EXPECT_EQ(data_url.mime_type, "text/plain;charset=US-ASCII"); - EXPECT_EQ(StringView(data_url.body.bytes()), "test"sv); -} - -TEST_CASE(data_url_base64_encoded_with_whitespace) -{ - URL::URL url("data: text/html ; bAsE64 , dGVz dA== "sv); - EXPECT(url.is_valid()); - EXPECT_EQ(url.scheme(), "data"); - EXPECT(url.host().has()); - EXPECT_EQ(url.serialize(), "data: text/html ; bAsE64 , dGVz dA=="); - - auto data_url = TRY_OR_FAIL(url.process_data_url()); - EXPECT_EQ(data_url.mime_type, "text/html"); - EXPECT_EQ(StringView(data_url.body.bytes()), "test"); -} - -TEST_CASE(data_url_base64_encoded_with_inline_whitespace) -{ - URL::URL url("data:text/javascript;base64,%20ZD%20Qg%0D%0APS%20An%20Zm91cic%0D%0A%207%20"sv); - EXPECT(url.is_valid()); - EXPECT_EQ(url.scheme(), "data"); - EXPECT(url.host().has()); - - auto data_url = TRY_OR_FAIL(url.process_data_url()); - EXPECT_EQ(data_url.mime_type, "text/javascript"); - EXPECT_EQ(StringView(data_url.body.bytes()), "d4 = 'four';"sv); -} - -TEST_CASE(data_url_completed_with_fragment) -{ - auto url = URL::URL("data:text/plain,test"sv).complete_url("#a"sv); - EXPECT(url.is_valid()); - EXPECT_EQ(url.scheme(), "data"); - EXPECT_EQ(url.fragment(), "a"); - EXPECT(url.host().has()); - - auto data_url = TRY_OR_FAIL(url.process_data_url()); - EXPECT_EQ(data_url.mime_type, "text/plain"); - EXPECT_EQ(StringView(data_url.body.bytes()), "test"sv); -} - TEST_CASE(trailing_slash_with_complete_url) { EXPECT_EQ(URL::URL("http://a/b/"sv).complete_url("c/"sv).serialize(), "http://a/b/c/"); diff --git a/Tests/LibWeb/CMakeLists.txt b/Tests/LibWeb/CMakeLists.txt index 440c01dba4c..9253df290b1 100644 --- a/Tests/LibWeb/CMakeLists.txt +++ b/Tests/LibWeb/CMakeLists.txt @@ -1,6 +1,7 @@ set(TEST_SOURCES TestCSSIDSpeed.cpp TestCSSPixels.cpp + TestFetchURL.cpp TestHTMLTokenizer.cpp TestMicrosyntax.cpp TestMimeSniff.cpp @@ -11,4 +12,6 @@ foreach(source IN LISTS TEST_SOURCES) serenity_test("${source}" LibWeb LIBS LibWeb) endforeach() +target_link_libraries(TestFetchURL PRIVATE LibURL) + install(FILES tokenizer-test.html DESTINATION usr/Tests/LibWeb) diff --git a/Tests/LibWeb/TestFetchURL.cpp b/Tests/LibWeb/TestFetchURL.cpp new file mode 100644 index 00000000000..590fe9fd7ef --- /dev/null +++ b/Tests/LibWeb/TestFetchURL.cpp @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2023, Karol Kosek + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include + +#include +#include + +TEST_CASE(data_url) +{ + URL::URL url("data:text/html,test"sv); + EXPECT(url.is_valid()); + EXPECT_EQ(url.scheme(), "data"); + EXPECT(url.host().has()); + EXPECT_EQ(url.serialize(), "data:text/html,test"); + + auto data_url = TRY_OR_FAIL(Web::Fetch::Infrastructure::process_data_url(url)); + EXPECT_EQ(data_url.mime_type, "text/html"); + EXPECT_EQ(StringView(data_url.body.bytes()), "test"sv); +} + +TEST_CASE(data_url_default_mime_type) +{ + URL::URL url("data:,test"sv); + EXPECT(url.is_valid()); + EXPECT_EQ(url.scheme(), "data"); + EXPECT(url.host().has()); + EXPECT_EQ(url.serialize(), "data:,test"); + + auto data_url = TRY_OR_FAIL(Web::Fetch::Infrastructure::process_data_url(url)); + EXPECT_EQ(data_url.mime_type, "text/plain;charset=US-ASCII"); + EXPECT_EQ(StringView(data_url.body.bytes()), "test"sv); +} + +TEST_CASE(data_url_encoded) +{ + URL::URL url("data:text/html,Hello%20friends%2C%0X%X0"sv); + EXPECT(url.is_valid()); + EXPECT_EQ(url.scheme(), "data"); + EXPECT(url.host().has()); + EXPECT_EQ(url.serialize(), "data:text/html,Hello%20friends%2C%0X%X0"); + + auto data_url = TRY_OR_FAIL(Web::Fetch::Infrastructure::process_data_url(url)); + EXPECT_EQ(data_url.mime_type, "text/html"); + EXPECT_EQ(StringView(data_url.body.bytes()), "Hello friends,%0X%X0"sv); +} + +TEST_CASE(data_url_base64_encoded) +{ + URL::URL url("data:text/html;base64,dGVzdA=="sv); + EXPECT(url.is_valid()); + EXPECT_EQ(url.scheme(), "data"); + EXPECT(url.host().has()); + EXPECT_EQ(url.serialize(), "data:text/html;base64,dGVzdA=="); + + auto data_url = TRY_OR_FAIL(Web::Fetch::Infrastructure::process_data_url(url)); + EXPECT_EQ(data_url.mime_type, "text/html"); + EXPECT_EQ(StringView(data_url.body.bytes()), "test"sv); +} + +TEST_CASE(data_url_base64_encoded_default_mime_type) +{ + URL::URL url("data:;base64,dGVzdA=="sv); + EXPECT(url.is_valid()); + EXPECT_EQ(url.scheme(), "data"); + EXPECT(url.host().has()); + EXPECT_EQ(url.serialize(), "data:;base64,dGVzdA=="); + + auto data_url = TRY_OR_FAIL(Web::Fetch::Infrastructure::process_data_url(url)); + EXPECT_EQ(data_url.mime_type, "text/plain;charset=US-ASCII"); + EXPECT_EQ(StringView(data_url.body.bytes()), "test"sv); +} + +TEST_CASE(data_url_base64_encoded_with_whitespace) +{ + URL::URL url("data: text/html ; bAsE64 , dGVz dA== "sv); + EXPECT(url.is_valid()); + EXPECT_EQ(url.scheme(), "data"); + EXPECT(url.host().has()); + EXPECT_EQ(url.serialize(), "data: text/html ; bAsE64 , dGVz dA=="); + + auto data_url = TRY_OR_FAIL(Web::Fetch::Infrastructure::process_data_url(url)); + EXPECT_EQ(data_url.mime_type, "text/html"); + EXPECT_EQ(StringView(data_url.body.bytes()), "test"); +} + +TEST_CASE(data_url_base64_encoded_with_inline_whitespace) +{ + URL::URL url("data:text/javascript;base64,%20ZD%20Qg%0D%0APS%20An%20Zm91cic%0D%0A%207%20"sv); + EXPECT(url.is_valid()); + EXPECT_EQ(url.scheme(), "data"); + EXPECT(url.host().has()); + + auto data_url = TRY_OR_FAIL(Web::Fetch::Infrastructure::process_data_url(url)); + EXPECT_EQ(data_url.mime_type, "text/javascript"); + EXPECT_EQ(StringView(data_url.body.bytes()), "d4 = 'four';"sv); +} + +TEST_CASE(data_url_completed_with_fragment) +{ + auto url = URL::URL("data:text/plain,test"sv).complete_url("#a"sv); + EXPECT(url.is_valid()); + EXPECT_EQ(url.scheme(), "data"); + EXPECT_EQ(url.fragment(), "a"); + EXPECT(url.host().has()); + + auto data_url = TRY_OR_FAIL(Web::Fetch::Infrastructure::process_data_url(url)); + EXPECT_EQ(data_url.mime_type, "text/plain"); + EXPECT_EQ(StringView(data_url.body.bytes()), "test"sv); +} diff --git a/Userland/Libraries/LibURL/URL.cpp b/Userland/Libraries/LibURL/URL.cpp index 37dc8075d7e..b69a6f32993 100644 --- a/Userland/Libraries/LibURL/URL.cpp +++ b/Userland/Libraries/LibURL/URL.cpp @@ -419,80 +419,6 @@ bool URL::equals(URL const& other, ExcludeFragment exclude_fragments) const return serialize(exclude_fragments) == other.serialize(exclude_fragments); } -// https://fetch.spec.whatwg.org/#data-url-processor -ErrorOr URL::process_data_url() const -{ - // 1. Assert: dataURL’s scheme is "data". - VERIFY(scheme() == "data"); - - // 2. Let input be the result of running the URL serializer on dataURL with exclude fragment set to true. - auto input = serialize(ExcludeFragment::Yes); - - // 3. Remove the leading "data:" from input. - input = input.substring("data:"sv.length()); - - // 4. Let position point at the start of input. - - // 5. Let mimeType be the result of collecting a sequence of code points that are not equal to U+002C (,), given position. - auto position = input.find(','); - auto mime_type = input.substring_view(0, position.value_or(input.length())); - - // 6. Strip leading and trailing ASCII whitespace from mimeType. - mime_type = mime_type.trim_whitespace(TrimMode::Both); - - // 7. If position is past the end of input, then return failure. - if (!position.has_value()) - return Error::from_string_literal("Missing a comma character"); - - // 8. Advance position by 1. - position = position.value() + 1; - - // 9. Let encodedBody be the remainder of input. - auto encoded_body = input.substring_view(position.value()); - - // 10. Let body be the percent-decoding of encodedBody. - auto body = percent_decode(encoded_body).to_byte_buffer(); - - // 11. If mimeType ends with U+003B (;), followed by zero or more U+0020 SPACE, followed by an ASCII case-insensitive match for "base64", then: - if (mime_type.ends_with("base64"sv, CaseSensitivity::CaseInsensitive)) { - auto trimmed_substring_view = mime_type.substring_view(0, mime_type.length() - 6); - trimmed_substring_view = trimmed_substring_view.trim(" "sv, TrimMode::Right); - if (trimmed_substring_view.ends_with(';')) { - // 1. Let stringBody be the isomorphic decode of body. - auto string_body = StringView(body); - - // 2. Set body to the forgiving-base64 decode of stringBody. - // FIXME: Check if it's really forgiving. - // 3. If body is failure, then return failure. - body = TRY(decode_base64(string_body)); - - // 4. Remove the last 6 code points from mimeType. - // 5. Remove trailing U+0020 SPACE code points from mimeType, if any. - // 6. Remove the last U+003B (;) from mimeType. - mime_type = trimmed_substring_view.substring_view(0, trimmed_substring_view.length() - 1); - } - } - - // 12. If mimeType starts with ";", then prepend "text/plain" to mimeType. - StringBuilder builder; - if (mime_type.starts_with(';')) { - builder.append("text/plain"sv); - builder.append(mime_type); - mime_type = builder.string_view(); - } - - // FIXME: Parse the MIME type's components according to https://mimesniff.spec.whatwg.org/#parse-a-mime-type - // FIXME: 13. Let mimeTypeRecord be the result of parsing mimeType. - auto mime_type_record = mime_type.trim("\n\r\t "sv, TrimMode::Both); - - // 14. If mimeTypeRecord is failure, then set mimeTypeRecord to text/plain;charset=US-ASCII. - if (mime_type_record.is_empty()) - mime_type_record = "text/plain;charset=US-ASCII"sv; - - // 15. Return a new data: URL struct whose MIME type is mimeTypeRecord and body is body. - return DataURL { TRY(String::from_utf8(mime_type_record)), body }; -} - void append_percent_encoded(StringBuilder& builder, u32 code_point) { if (code_point <= 0x7f) diff --git a/Userland/Libraries/LibURL/URL.h b/Userland/Libraries/LibURL/URL.h index 98492f95f92..8809a237157 100644 --- a/Userland/Libraries/LibURL/URL.h +++ b/Userland/Libraries/LibURL/URL.h @@ -58,11 +58,6 @@ enum class ApplyPercentDecoding { No }; -struct DataURL { - String mime_type; - ByteBuffer body; -}; - void append_percent_encoded_if_necessary(StringBuilder&, u32 code_point, PercentEncodeSet set = PercentEncodeSet::Userinfo); void append_percent_encoded(StringBuilder&, u32 code_point); bool code_point_is_in_percent_encode_set(u32 code_point, PercentEncodeSet); @@ -143,8 +138,6 @@ public: URL complete_url(StringView) const; - ErrorOr process_data_url() const; - bool operator==(URL const& other) const { return equals(other, ExcludeFragment::No); } String const& raw_username() const { return m_username; } diff --git a/Userland/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp b/Userland/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp index 2201b763ff9..9b8b2095156 100644 --- a/Userland/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp +++ b/Userland/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp @@ -805,7 +805,7 @@ WebIDL::ExceptionOr> scheme_fetch(JS::Realm& r // -> "data" else if (request->current_url().scheme() == "data"sv) { // 1. Let dataURLStruct be the result of running the data: URL processor on request’s current URL. - auto data_url_struct = request->current_url().process_data_url(); + auto data_url_struct = Infrastructure::process_data_url(request->current_url()); // 2. If dataURLStruct is failure, then return a network error. if (data_url_struct.is_error()) diff --git a/Userland/Libraries/LibWeb/Fetch/Infrastructure/URL.cpp b/Userland/Libraries/LibWeb/Fetch/Infrastructure/URL.cpp index d789fb6fe3e..a7872e2f415 100644 --- a/Userland/Libraries/LibWeb/Fetch/Infrastructure/URL.cpp +++ b/Userland/Libraries/LibWeb/Fetch/Infrastructure/URL.cpp @@ -1,10 +1,13 @@ /* * Copyright (c) 2022, Linus Groh * Copyright (c) 2022, Andreas Kling + * Copyright (c) 2023, Karol Kosek * * SPDX-License-Identifier: BSD-2-Clause */ +#include +#include #include namespace Web::Fetch::Infrastructure { @@ -30,4 +33,79 @@ bool is_http_or_https_scheme(StringView scheme) return any_of(HTTP_SCHEMES, [&](auto http_scheme) { return scheme == http_scheme; }); } +// https://fetch.spec.whatwg.org/#data-url-processor +ErrorOr process_data_url(URL::URL const& data_url) +{ + // 1. Assert: dataURL’s scheme is "data". + VERIFY(data_url.scheme() == "data"); + + // 2. Let input be the result of running the URL serializer on dataURL with exclude fragment set to true. + auto input_serialized = data_url.serialize(URL::ExcludeFragment::Yes); + StringView input = input_serialized; + + // 3. Remove the leading "data:" from input. + input = input.substring_view("data:"sv.length()); + + // 4. Let position point at the start of input. + + // 5. Let mimeType be the result of collecting a sequence of code points that are not equal to U+002C (,), given position. + auto position = input.find(','); + auto mime_type = input.substring_view(0, position.value_or(input.length())); + + // 6. Strip leading and trailing ASCII whitespace from mimeType. + mime_type = mime_type.trim_whitespace(TrimMode::Both); + + // 7. If position is past the end of input, then return failure. + if (!position.has_value()) + return Error::from_string_literal("Missing a comma character"); + + // 8. Advance position by 1. + position = position.value() + 1; + + // 9. Let encodedBody be the remainder of input. + auto encoded_body = input.substring_view(position.value()); + + // 10. Let body be the percent-decoding of encodedBody. + auto body = URL::percent_decode(encoded_body).to_byte_buffer(); + + // 11. If mimeType ends with U+003B (;), followed by zero or more U+0020 SPACE, followed by an ASCII case-insensitive match for "base64", then: + if (mime_type.ends_with("base64"sv, CaseSensitivity::CaseInsensitive)) { + auto trimmed_substring_view = mime_type.substring_view(0, mime_type.length() - 6); + trimmed_substring_view = trimmed_substring_view.trim(" "sv, TrimMode::Right); + if (trimmed_substring_view.ends_with(';')) { + // 1. Let stringBody be the isomorphic decode of body. + auto string_body = StringView(body); + + // 2. Set body to the forgiving-base64 decode of stringBody. + // FIXME: Check if it's really forgiving. + // 3. If body is failure, then return failure. + body = TRY(decode_base64(string_body)); + + // 4. Remove the last 6 code points from mimeType. + // 5. Remove trailing U+0020 SPACE code points from mimeType, if any. + // 6. Remove the last U+003B (;) from mimeType. + mime_type = trimmed_substring_view.substring_view(0, trimmed_substring_view.length() - 1); + } + } + + // 12. If mimeType starts with ";", then prepend "text/plain" to mimeType. + StringBuilder builder; + if (mime_type.starts_with(';')) { + builder.append("text/plain"sv); + builder.append(mime_type); + mime_type = builder.string_view(); + } + + // FIXME: Parse the MIME type's components according to https://mimesniff.spec.whatwg.org/#parse-a-mime-type + // FIXME: 13. Let mimeTypeRecord be the result of parsing mimeType. + auto mime_type_record = mime_type.trim("\n\r\t "sv, TrimMode::Both); + + // 14. If mimeTypeRecord is failure, then set mimeTypeRecord to text/plain;charset=US-ASCII. + if (mime_type_record.is_empty()) + mime_type_record = "text/plain;charset=US-ASCII"sv; + + // 15. Return a new data: URL struct whose MIME type is mimeTypeRecord and body is body. + return DataURL { TRY(String::from_utf8(mime_type_record)), body }; +} + } diff --git a/Userland/Libraries/LibWeb/Fetch/Infrastructure/URL.h b/Userland/Libraries/LibWeb/Fetch/Infrastructure/URL.h index a8fc1fd83e7..0b6ea077318 100644 --- a/Userland/Libraries/LibWeb/Fetch/Infrastructure/URL.h +++ b/Userland/Libraries/LibWeb/Fetch/Infrastructure/URL.h @@ -8,7 +8,11 @@ #pragma once #include -#include +#include +#include +#include +#include +#include namespace Web::Fetch::Infrastructure { @@ -33,8 +37,15 @@ inline constexpr Array FETCH_SCHEMES = { "resource"sv }; +// https://fetch.spec.whatwg.org/#data-url-struct +struct DataURL { + String mime_type; + ByteBuffer body; +}; + [[nodiscard]] bool is_local_url(URL::URL const&); [[nodiscard]] bool is_fetch_scheme(StringView); [[nodiscard]] bool is_http_or_https_scheme(StringView); +ErrorOr process_data_url(URL::URL const&); } diff --git a/Userland/Libraries/LibWeb/Loader/ResourceLoader.cpp b/Userland/Libraries/LibWeb/Loader/ResourceLoader.cpp index 816dcda32fa..52b75e7a1c2 100644 --- a/Userland/Libraries/LibWeb/Loader/ResourceLoader.cpp +++ b/Userland/Libraries/LibWeb/Loader/ResourceLoader.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -251,7 +252,7 @@ void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback } if (url.scheme() == "data") { - auto data_url_or_error = url.process_data_url(); + auto data_url_or_error = Fetch::Infrastructure::process_data_url(url); if (data_url_or_error.is_error()) { auto error_message = data_url_or_error.error().string_literal(); log_failure(request, error_message);