diff --git a/AK/Base64.cpp b/AK/Base64.cpp index aa887b9eabe..7b48de7eb23 100644 --- a/AK/Base64.cpp +++ b/AK/Base64.cpp @@ -121,102 +121,4 @@ ErrorOr encode_base64(ReadonlyBytes input) return output.to_string(); } -// https://infra.spec.whatwg.org/#forgiving-base64 -ErrorOr decode_forgiving_base64(StringView input) -{ - // 1. Remove all ASCII whitespace from data. - auto data = input.trim_whitespace(); - - // 2. If data’s code point length divides by 4 leaving no remainder, then: - if (data.length() % 4 == 0) { - // If data ends with one or two U+003D (=) code points, then remove them from data. - if (data.ends_with("=="sv)) - data = data.substring_view(0, data.length() - 2); - else if (data.ends_with('=')) - data = data.substring_view(0, data.length() - 1); - } - - // 3. If data’s code point length divides by 4 leaving a remainder of 1, then return failure. - if (data.length() % 4 == 1) - return Error::from_string_literal("Invalid input length in forgiving base64 decode"); - - // 4. If data contains a code point that is not one of - // U+002B (+), U+002F (/), ASCII alphanumeric - // then return failure. - for (auto point : data) { - if (point != '+' && point != '/' && !is_ascii_alphanumeric(point)) - return Error::from_string_literal("Invalid character in forgiving base64 decode"); - } - - // 5. Let output be an empty byte sequence. - // 6. Let buffer be an empty buffer that can have bits appended to it. - Vector output; - u32 buffer = 0; - auto accumulated_bits = 0; - - auto add_to_buffer = [&](u8 number) { - VERIFY(number < 64); - u32 buffer_mask = number; - - if (accumulated_bits == 0) - buffer_mask <<= 18; - else if (accumulated_bits == 6) - buffer_mask <<= 12; - else if (accumulated_bits == 12) - buffer_mask <<= 6; - else if (accumulated_bits == 18) - buffer_mask <<= 0; - - buffer |= buffer_mask; - - accumulated_bits += 6; - }; - - auto append_bytes = [&]() { - output.append(static_cast((buffer & 0xff0000) >> 16)); - output.append(static_cast((buffer & 0xff00) >> 8)); - output.append(static_cast(buffer & 0xff)); - - buffer = 0; - accumulated_bits = 0; - }; - - auto alphabet_lookup_table = base64_lookup_table(); - - // 7. Let position be a position variable for data, initially pointing at the start of data. - // 8. While position does not point past the end of data: - for (auto point : data) { - // 1. Find the code point pointed to by position in the second column of Table 1: The Base 64 Alphabet of RFC 4648. - // Let n be the number given in the first cell of the same row. [RFC4648] - auto n = alphabet_lookup_table[point]; - VERIFY(n >= 0); - - // 2. Append the six bits corresponding to n, most significant bit first, to buffer. - add_to_buffer(static_cast(n)); - - // 3. buffer has accumulated 24 bits, - if (accumulated_bits == 24) { - // interpret them as three 8-bit big-endian numbers. - // Append three bytes with values equal to those numbers to output, in the same order, and then empty buffer - append_bytes(); - } - } - - // 9. If buffer is not empty, it contains either 12 or 18 bits. - VERIFY(accumulated_bits == 0 || accumulated_bits == 12 || accumulated_bits == 18); - - // If it contains 12 bits, then discard the last four and interpret the remaining eight as an 8-bit big-endian number. - if (accumulated_bits == 12) - output.append(static_cast((buffer & 0xff0000) >> 16)); - - // If it contains 18 bits, then discard the last two and interpret the remaining 16 as two 8-bit big-endian numbers. - // Append the one or two bytes with values equal to those one or two numbers to output, in the same order. - if (accumulated_bits == 18) { - output.append(static_cast((buffer & 0xff0000) >> 16)); - output.append(static_cast((buffer & 0xff00) >> 8)); - } - - return ByteBuffer::copy(output); -} - } diff --git a/AK/Base64.h b/AK/Base64.h index 5e2592a659a..3dbf8e3385d 100644 --- a/AK/Base64.h +++ b/AK/Base64.h @@ -40,8 +40,6 @@ consteval auto base64_lookup_table() [[nodiscard]] ErrorOr decode_base64(StringView); -[[nodiscard]] ErrorOr decode_forgiving_base64(StringView); - [[nodiscard]] ErrorOr encode_base64(ReadonlyBytes); } diff --git a/Userland/Libraries/LibWeb/CMakeLists.txt b/Userland/Libraries/LibWeb/CMakeLists.txt index 12c274163af..1dd25027319 100644 --- a/Userland/Libraries/LibWeb/CMakeLists.txt +++ b/Userland/Libraries/LibWeb/CMakeLists.txt @@ -302,6 +302,7 @@ set(SOURCES HTML/WorkerNavigator.cpp HighResolutionTime/Performance.cpp HighResolutionTime/TimeOrigin.cpp + Infra/Base64.cpp Infra/ByteSequences.cpp Infra/JSON.cpp Infra/Strings.cpp diff --git a/Userland/Libraries/LibWeb/HTML/Window.cpp b/Userland/Libraries/LibWeb/HTML/Window.cpp index 661b92b887e..5b24f477cff 100644 --- a/Userland/Libraries/LibWeb/HTML/Window.cpp +++ b/Userland/Libraries/LibWeb/HTML/Window.cpp @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -1410,7 +1411,7 @@ JS_DEFINE_NATIVE_FUNCTION(Window::atob) // Otherwise, the user agent must convert data to a byte sequence whose nth byte is the eight-bit representation of the nth code point of data // and then must apply forgiving-base64 encode to that byte sequence and return the result. - auto decoded = AK::decode_forgiving_base64(StringView(deprecated_string)); + auto decoded = Infra::decode_forgiving_base64(StringView(deprecated_string)); if (decoded.is_error()) return vm.throw_completion(JS::ErrorType::InvalidFormat, "Base64"); diff --git a/Userland/Libraries/LibWeb/Infra/Base64.cpp b/Userland/Libraries/LibWeb/Infra/Base64.cpp new file mode 100644 index 00000000000..7944b5e4271 --- /dev/null +++ b/Userland/Libraries/LibWeb/Infra/Base64.cpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2022-2023, the SerenityOS developers. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include +#include +#include + +namespace Web::Infra { + +// https://infra.spec.whatwg.org/#forgiving-base64 +ErrorOr decode_forgiving_base64(StringView input) +{ + // 1. Remove all ASCII whitespace from data. + auto data = input.trim_whitespace(); + + // 2. If data’s code point length divides by 4 leaving no remainder, then: + if (data.length() % 4 == 0) { + // If data ends with one or two U+003D (=) code points, then remove them from data. + if (data.ends_with("=="sv)) + data = data.substring_view(0, data.length() - 2); + else if (data.ends_with('=')) + data = data.substring_view(0, data.length() - 1); + } + + // 3. If data’s code point length divides by 4 leaving a remainder of 1, then return failure. + if (data.length() % 4 == 1) + return Error::from_string_literal("Invalid input length in forgiving base64 decode"); + + // 4. If data contains a code point that is not one of + // U+002B (+), U+002F (/), ASCII alphanumeric + // then return failure. + for (auto point : data) { + if (point != '+' && point != '/' && !is_ascii_alphanumeric(point)) + return Error::from_string_literal("Invalid character in forgiving base64 decode"); + } + + // 5. Let output be an empty byte sequence. + // 6. Let buffer be an empty buffer that can have bits appended to it. + Vector output; + u32 buffer = 0; + auto accumulated_bits = 0; + + auto add_to_buffer = [&](u8 number) { + VERIFY(number < 64); + u32 buffer_mask = number; + + if (accumulated_bits == 0) + buffer_mask <<= 18; + else if (accumulated_bits == 6) + buffer_mask <<= 12; + else if (accumulated_bits == 12) + buffer_mask <<= 6; + else if (accumulated_bits == 18) + buffer_mask <<= 0; + + buffer |= buffer_mask; + + accumulated_bits += 6; + }; + + auto append_bytes = [&]() { + output.append(static_cast((buffer & 0xff0000) >> 16)); + output.append(static_cast((buffer & 0xff00) >> 8)); + output.append(static_cast(buffer & 0xff)); + + buffer = 0; + accumulated_bits = 0; + }; + + auto alphabet_lookup_table = AK::base64_lookup_table(); + + // 7. Let position be a position variable for data, initially pointing at the start of data. + // 8. While position does not point past the end of data: + for (auto point : data) { + // 1. Find the code point pointed to by position in the second column of Table 1: The Base 64 Alphabet of RFC 4648. + // Let n be the number given in the first cell of the same row. [RFC4648] + auto n = alphabet_lookup_table[point]; + VERIFY(n >= 0); + + // 2. Append the six bits corresponding to n, most significant bit first, to buffer. + add_to_buffer(static_cast(n)); + + // 3. buffer has accumulated 24 bits, + if (accumulated_bits == 24) { + // interpret them as three 8-bit big-endian numbers. + // Append three bytes with values equal to those numbers to output, in the same order, and then empty buffer + append_bytes(); + } + } + + // 9. If buffer is not empty, it contains either 12 or 18 bits. + VERIFY(accumulated_bits == 0 || accumulated_bits == 12 || accumulated_bits == 18); + + // If it contains 12 bits, then discard the last four and interpret the remaining eight as an 8-bit big-endian number. + if (accumulated_bits == 12) + output.append(static_cast((buffer & 0xff0000) >> 16)); + + // If it contains 18 bits, then discard the last two and interpret the remaining 16 as two 8-bit big-endian numbers. + // Append the one or two bytes with values equal to those one or two numbers to output, in the same order. + if (accumulated_bits == 18) { + output.append(static_cast((buffer & 0xff0000) >> 16)); + output.append(static_cast((buffer & 0xff00) >> 8)); + } + + return ByteBuffer::copy(output); +} + +} diff --git a/Userland/Libraries/LibWeb/Infra/Base64.h b/Userland/Libraries/LibWeb/Infra/Base64.h new file mode 100644 index 00000000000..381fbebb75d --- /dev/null +++ b/Userland/Libraries/LibWeb/Infra/Base64.h @@ -0,0 +1,15 @@ +/* + * Copyright (c) 2022-2023, the SerenityOS developers. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include + +namespace Web::Infra { + +[[nodiscard]] ErrorOr decode_forgiving_base64(StringView); + +}