mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-01-06 02:55:49 +03:00
5e1499d104
This commit un-deprecates DeprecatedString, and repurposes it as a byte string. As the null state has already been removed, there are no other particularly hairy blockers in repurposing this type as a byte string (what it _really_ is). This commit is auto-generated: $ xs=$(ack -l \bDeprecatedString\b\|deprecated_string AK Userland \ Meta Ports Ladybird Tests Kernel) $ perl -pie 's/\bDeprecatedString\b/ByteString/g; s/deprecated_string/byte_string/g' $xs $ clang-format --style=file -i \ $(git diff --name-only | grep \.cpp\|\.h) $ gn format $(git ls-files '*.gn' '*.gni')
98 lines
3.5 KiB
C++
98 lines
3.5 KiB
C++
/*
|
|
* Copyright (c) 2023, Valtteri Koskivuori <vkoskiv@gmail.com>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include "QuotedPrintable.h"
|
|
#include <AK/Base64.h>
|
|
#include <AK/GenericLexer.h>
|
|
#include <AK/StringBuilder.h>
|
|
#include <LibIMAP/MessageHeaderEncoding.h>
|
|
#include <LibTextCodec/Decoder.h>
|
|
|
|
namespace IMAP {
|
|
|
|
ErrorOr<ByteBuffer> decode_rfc2047_encoded_words(StringView input)
|
|
{
|
|
GenericLexer lexer(input);
|
|
StringBuilder output;
|
|
|
|
while (!lexer.is_eof()) {
|
|
auto ascii_view = lexer.consume_until("=?"sv);
|
|
ByteString ascii = ascii_view.replace("\r"sv, " "sv, ReplaceMode::All);
|
|
ascii = ascii.replace("\n"sv, " "sv, ReplaceMode::All);
|
|
TRY(output.try_append(ascii));
|
|
if (lexer.is_eof())
|
|
break;
|
|
lexer.consume_specific("=?"sv);
|
|
auto charset = lexer.consume_until('?');
|
|
lexer.consume();
|
|
auto encoding = lexer.consume_until('?');
|
|
lexer.consume();
|
|
auto encoded_text = lexer.consume_until("?=");
|
|
lexer.consume_specific("?="sv);
|
|
|
|
// RFC 2047 Section 6.2, "...any 'linear-white-space' that separates a pair of adjacent 'encoded-word's is ignored."
|
|
// https://datatracker.ietf.org/doc/html/rfc2047#section-6.2
|
|
bool found_next_start = false;
|
|
int spaces = 0;
|
|
for (size_t i = 0; i < lexer.tell_remaining(); ++i) {
|
|
if (lexer.peek(i) == ' ' || lexer.peek(i) == '\r' || lexer.peek(i) == '\n') {
|
|
spaces++;
|
|
if (lexer.peek(i + 1) == '=' && lexer.peek(i + 2) == '?') {
|
|
found_next_start = true;
|
|
break;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
if (found_next_start) {
|
|
for (int i = 0; i < spaces; i++) {
|
|
lexer.consume();
|
|
}
|
|
}
|
|
|
|
ByteBuffer first_pass_decoded;
|
|
if (encoding == 'Q' || encoding == 'q') {
|
|
auto maybe_decoded_data = decode_quoted_printable(encoded_text);
|
|
if (maybe_decoded_data.is_error()) {
|
|
dbgln("Failed to decode quoted-printable rfc2047 text, skipping.");
|
|
continue;
|
|
}
|
|
// RFC 2047 Section 4.2.2, https://datatracker.ietf.org/doc/html/rfc2047#section-4.2
|
|
auto decoded_data = maybe_decoded_data.release_value();
|
|
for (auto character : decoded_data.bytes()) {
|
|
if (character == '_')
|
|
first_pass_decoded.append(' ');
|
|
else
|
|
first_pass_decoded.append(character);
|
|
}
|
|
} else if (encoding == 'B' || encoding == 'b') {
|
|
auto maybe_decoded_data = AK::decode_base64(encoded_text);
|
|
if (maybe_decoded_data.is_error()) {
|
|
dbgln("Failed to decode base64-encoded rfc2047 text, skipping.");
|
|
continue;
|
|
}
|
|
first_pass_decoded = maybe_decoded_data.release_value();
|
|
} else {
|
|
dbgln("Unknown encoding \"{}\" found, skipping, original string: \"{}\"", encoding, input);
|
|
continue;
|
|
}
|
|
if (first_pass_decoded.is_empty())
|
|
continue;
|
|
auto maybe_decoder = TextCodec::decoder_for(charset);
|
|
if (!maybe_decoder.has_value()) {
|
|
dbgln("No decoder found for charset \"{}\", skipping.", charset);
|
|
continue;
|
|
}
|
|
auto decoded_text = TRY(maybe_decoder->to_utf8(first_pass_decoded));
|
|
TRY(output.try_append(decoded_text));
|
|
}
|
|
|
|
return output.to_byte_buffer();
|
|
}
|
|
|
|
}
|