From 84722ae2ef4ca766c65b68a42e29d68d6ca37929 Mon Sep 17 00:00:00 2001 From: Kenneth Myhra Date: Sun, 2 Apr 2023 21:32:23 +0200 Subject: [PATCH] LibWeb: Implement multipart/form-data encoding algorithm --- Userland/Libraries/LibWeb/Forward.h | 1 + .../LibWeb/HTML/FormControlInfrastructure.cpp | 83 +++++++++++++++++++ .../LibWeb/HTML/FormControlInfrastructure.h | 6 ++ 3 files changed, 90 insertions(+) diff --git a/Userland/Libraries/LibWeb/Forward.h b/Userland/Libraries/LibWeb/Forward.h index 692ee6bca4e..8f7b9d0af00 100644 --- a/Userland/Libraries/LibWeb/Forward.h +++ b/Userland/Libraries/LibWeb/Forward.h @@ -348,6 +348,7 @@ struct PolicyContainer; class PromiseRejectionEvent; class WorkerDebugConsoleClient; struct SandboxingFlagSet; +struct SerializedFormData; class Storage; class SubmitEvent; class TextMetrics; diff --git a/Userland/Libraries/LibWeb/HTML/FormControlInfrastructure.cpp b/Userland/Libraries/LibWeb/HTML/FormControlInfrastructure.cpp index bd28b553aa2..22e7e9c4ccb 100644 --- a/Userland/Libraries/LibWeb/HTML/FormControlInfrastructure.cpp +++ b/Userland/Libraries/LibWeb/HTML/FormControlInfrastructure.cpp @@ -4,6 +4,7 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include #include #include #include @@ -175,4 +176,86 @@ WebIDL::ExceptionOr>> construct_entry_list(J return entry_list; } +// https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#multipart-form-data +ErrorOr serialize_to_multipart_form_data(Vector const& entry_list) +{ + auto normalize_line_breaks = [](StringView value) -> ErrorOr { + StringBuilder builder; + GenericLexer lexer { value }; + while (!lexer.is_eof()) { + TRY(builder.try_append(lexer.consume_until(is_any_of("\r\n"sv)))); + if ((lexer.peek() == '\r' && lexer.peek(1) != '\n') || lexer.peek() == '\n') { + TRY(builder.try_append("\r\n"sv)); + lexer.ignore(1); + } else { + lexer.ignore(2); + } + } + return builder.to_string(); + }; + + auto escape_line_feed_carriage_return_double_quote = [](StringView value) -> ErrorOr { + StringBuilder builder; + GenericLexer lexer { value }; + while (!lexer.is_eof()) { + TRY(builder.try_append(lexer.consume_until(is_any_of("\r\n\""sv)))); + switch (lexer.peek()) { + case '\r': + TRY(builder.try_append("%0D"sv)); + break; + case '\n': + TRY(builder.try_append("%0A"sv)); + break; + case '\"': + TRY(builder.try_append("%22"sv)); + break; + } + lexer.ignore(1); + } + return builder.to_string(); + }; + + // The boundary used by the user agent in generating the return value of this algorithm is the multipart/form-data boundary string. + auto boundary = TRY(String::formatted("---------------------------{}", get_random())); + StringBuilder builder; + // 1. For each entry of entry list: + for (auto const& entry : entry_list) { + TRY(builder.try_append(TRY(String::formatted("--{}\r\n"sv, boundary)))); + + // Replace every occurrence of U+000D (CR) not followed by U+000A (LF), and every occurrence of U+000A (LF) not preceded by U+000D (CR) by a string consisting of a U+000D (CR) and U+000A (LF). + auto normalized_name = TRY(normalize_line_breaks(entry.name)); + // For field names replace any 0x0A (LF) bytes with the byte sequence `%0A`, 0x0D (CR) with `%0D` and 0x22 (") with `%22` + auto escaped_name = TRY(escape_line_feed_carriage_return_double_quote(normalized_name)); + + TRY(entry.value.visit( + [&](JS::Handle const& file) -> ErrorOr { + // For filenames replace any 0x0A (LF) bytes with the byte sequence `%0A`, 0x0D (CR) with `%0D` and 0x22 (") with `%22` + auto escaped_filename = TRY(escape_line_feed_carriage_return_double_quote(file->name())); + // Add a `Content-Disposition` header with a `name` set to entry's name and `filename` set to entry's filename. + TRY(builder.try_append(TRY(String::formatted("Content-Disposition: form-data; name=\"{}\"; filename=\"{}\"\r\n"sv, escaped_name, escaped_filename)))); + // The parts of the generated multipart/form-data resource that correspond to file fields must have a `Content-Type` header specified. + TRY(builder.try_append(TRY(String::formatted("Content-Type: {}\r\n\r\n"sv, file->type())))); + // FIXME: Serialize the contents of the file. + TRY(builder.try_append(TRY(String::formatted("\r\n"sv)))); + return {}; + }, + [&](String const& string) -> ErrorOr { + // Replace every occurrence of U+000D (CR) not followed by U+000A (LF), and every occurrence of U+000A (LF) not preceded by U+000D (CR) by a string consisting of a U+000D (CR) and U+000A (LF). + auto normalized_value = TRY(normalize_line_breaks(string)); + // Add a `Content-Disposition` header with a `name` set to entry's name. + TRY(builder.try_append(TRY(String::formatted("Content-Disposition: form-data; name=\"{}\"\r\n\r\n"sv, escaped_name)))); + TRY(builder.try_append(TRY(String::formatted("{}\r\n", normalized_value)))); + return {}; + })); + } + TRY(builder.try_append(TRY(String::formatted("--{}--\r\n", boundary)))); + + // 2. Return the byte sequence resulting from encoding the entry list using the rules described by RFC 7578, Returning Values from Forms: multipart/form-data, given the following conditions: [RFC7578] + auto serialized_data = TRY(builder.to_byte_buffer()); + return SerializedFormData { + .boundary = move(boundary), + .serialized_data = move(serialized_data) + }; +} + } diff --git a/Userland/Libraries/LibWeb/HTML/FormControlInfrastructure.h b/Userland/Libraries/LibWeb/HTML/FormControlInfrastructure.h index 493b5681987..c0d75a4bfce 100644 --- a/Userland/Libraries/LibWeb/HTML/FormControlInfrastructure.h +++ b/Userland/Libraries/LibWeb/HTML/FormControlInfrastructure.h @@ -10,7 +10,13 @@ namespace Web::HTML { +struct SerializedFormData { + String boundary; + ByteBuffer serialized_data; +}; + WebIDL::ExceptionOr create_entry(JS::Realm& realm, String const& name, Variant, String> const& value, Optional const& filename = {}); WebIDL::ExceptionOr>> construct_entry_list(JS::Realm&, HTMLFormElement&); +ErrorOr serialize_to_multipart_form_data(Vector const& entry_list); }