LibTextCodec: Add x-user-defined decoder

It's a pretty simple charset: the bottom 128 bytes (0x00-0x7F) are
standard ASCII, while the top 128 bytes (0x80-0xFF) are mapped to a
portion of the Unicode Private Use Area, specifically 0xF780-0xF7FF.

This is used by Google Maps for certain blobs.
This commit is contained in:
Luke Wilde 2022-02-11 20:38:44 +00:00 committed by Andreas Kling
parent 835a344337
commit 0e0f98a45e
Notes: sideshowbarker 2024-07-17 18:58:23 +09:00
2 changed files with 30 additions and 0 deletions

View File

@ -20,6 +20,7 @@ CyrillicDecoder s_cyrillic_decoder;
Koi8RDecoder s_koi8r_decoder;
Latin9Decoder s_latin9_decoder;
TurkishDecoder s_turkish_decoder;
XUserDefinedDecoder s_x_user_defined_decoder;
}
Decoder* decoder_for(const String& a_encoding)
@ -44,6 +45,8 @@ Decoder* decoder_for(const String& a_encoding)
return &s_latin9_decoder;
if (encoding.value().equals_ignoring_case("windows-1254"))
return &s_turkish_decoder;
if (encoding.value().equals_ignoring_case("x-user-defined"))
return &s_x_user_defined_decoder;
}
dbgln("TextCodec: No decoder implemented for encoding '{}'", a_encoding);
return nullptr;
@ -466,4 +469,26 @@ void TurkishDecoder::process(StringView input, Function<void(u32)> on_code_point
}
}
// https://encoding.spec.whatwg.org/#x-user-defined-decoder
void XUserDefinedDecoder::process(StringView input, Function<void(u32)> on_code_point)
{
auto convert_x_user_defined_to_utf8 = [](u8 ch) -> u32 {
// 2. If byte is an ASCII byte, return a code point whose value is byte.
// https://infra.spec.whatwg.org/#ascii-byte
// An ASCII byte is a byte in the range 0x00 (NUL) to 0x7F (DEL), inclusive.
// NOTE: This doesn't check for ch >= 0x00, as that would always be true due to being unsigned.
if (ch <= 0x7f)
return ch;
// 3. Return a code point whose value is 0xF780 + byte 0x80.
return 0xF780 + ch - 0x80;
};
for (auto ch : input) {
on_code_point(convert_x_user_defined_to_utf8(ch));
}
// 1. If byte is end-of-queue, return finished.
}
}

View File

@ -67,6 +67,11 @@ public:
virtual void process(StringView, Function<void(u32)> on_code_point) override;
};
class XUserDefinedDecoder final : public Decoder {
public:
virtual void process(StringView, Function<void(u32)> on_code_point) override;
};
Decoder* decoder_for(String const& encoding);
Optional<String> get_standardized_encoding(const String& encoding);