AK: Add GenericLexer::{consume_decimal_integer,peek_string}

This commit is contained in:
Dan Klishch 2023-10-30 14:43:24 -04:00 committed by Daniel Bertalan
parent 6b30847120
commit b65d281bbb
Notes: sideshowbarker 2024-07-17 01:11:48 +09:00
3 changed files with 153 additions and 0 deletions

View File

@ -7,6 +7,7 @@
#include <AK/Assertions.h>
#include <AK/CharacterTypes.h>
#include <AK/GenericLexer.h>
#include <AK/ScopeGuard.h>
#include <AK/StringBuilder.h>
#ifndef KERNEL
@ -128,6 +129,62 @@ StringView GenericLexer::consume_quoted_string(char escape_char)
return m_input.substring_view(start, length);
}
template<Integral T>
ErrorOr<T> GenericLexer::consume_decimal_integer()
{
using UnsignedT = MakeUnsigned<T>;
ArmedScopeGuard rollback { [&, rollback_position = m_index] {
m_index = rollback_position;
} };
bool has_minus_sign = false;
if (next_is('+') || next_is('-'))
if (consume() == '-')
has_minus_sign = true;
StringView number_view = consume_while(is_ascii_digit);
if (number_view.is_empty())
return Error::from_errno(EINVAL);
auto maybe_number = StringUtils::convert_to_uint<UnsignedT>(number_view, TrimWhitespace::No);
if (!maybe_number.has_value())
return Error::from_errno(ERANGE);
auto number = maybe_number.value();
if (!has_minus_sign) {
if (NumericLimits<T>::max() < number) // This is only possible in a signed case.
return Error::from_errno(ERANGE);
rollback.disarm();
return number;
} else {
if constexpr (IsUnsigned<T>) {
if (number == 0) {
rollback.disarm();
return 0;
}
return Error::from_errno(ERANGE);
} else {
static constexpr UnsignedT max_value = static_cast<UnsignedT>(NumericLimits<T>::max()) + 1;
if (number > max_value)
return Error::from_errno(ERANGE);
rollback.disarm();
return -number;
}
}
}
template ErrorOr<u8> GenericLexer::consume_decimal_integer<u8>();
template ErrorOr<i8> GenericLexer::consume_decimal_integer<i8>();
template ErrorOr<u16> GenericLexer::consume_decimal_integer<u16>();
template ErrorOr<i16> GenericLexer::consume_decimal_integer<i16>();
template ErrorOr<u32> GenericLexer::consume_decimal_integer<u32>();
template ErrorOr<i32> GenericLexer::consume_decimal_integer<i32>();
template ErrorOr<u64> GenericLexer::consume_decimal_integer<u64>();
template ErrorOr<i64> GenericLexer::consume_decimal_integer<i64>();
#ifndef KERNEL
Optional<DeprecatedString> GenericLexer::consume_and_unescape_string(char escape_char)
{

View File

@ -31,6 +31,13 @@ public:
return (m_index + offset < m_input.length()) ? m_input[m_index + offset] : '\0';
}
Optional<StringView> peek_string(size_t length, size_t offset = 0) const
{
if (m_index + offset + length > m_input.length())
return {};
return m_input.substring_view(m_index + offset, length);
}
constexpr bool next_is(char expected) const
{
return peek() == expected;
@ -121,6 +128,8 @@ public:
#ifndef KERNEL
Optional<DeprecatedString> consume_and_unescape_string(char escape_char = '\\');
#endif
template<Integral T>
ErrorOr<T> consume_decimal_integer();
enum class UnicodeEscapeError {
MalformedUnicodeEscape,

View File

@ -204,3 +204,90 @@ TEST_CASE(consume_escaped_code_point)
test("\\ud83d\\ude00"sv, 0x1f600);
test("\\ud83d\\ude00"sv, 0xd83d, false);
}
TEST_CASE(consume_decimal_integer_correctly_parses)
{
#define CHECK_PARSES_INTEGER(test, expected, type) \
do { \
GenericLexer lexer(test##sv); \
auto actual = lexer.consume_decimal_integer<type>(); \
VERIFY(!actual.is_error()); \
EXPECT_EQ(actual.value(), static_cast<type>(expected)); \
EXPECT_EQ(lexer.tell(), test##sv.length()); \
} while (false)
CHECK_PARSES_INTEGER("0", 0, u8);
CHECK_PARSES_INTEGER("-0", -0, u8);
CHECK_PARSES_INTEGER("10", 10, u8);
CHECK_PARSES_INTEGER("255", 255, u8);
CHECK_PARSES_INTEGER("0", 0, u16);
CHECK_PARSES_INTEGER("-0", -0, u16);
CHECK_PARSES_INTEGER("1234", 1234, u16);
CHECK_PARSES_INTEGER("65535", 65535, u16);
CHECK_PARSES_INTEGER("0", 0, u32);
CHECK_PARSES_INTEGER("-0", -0, u32);
CHECK_PARSES_INTEGER("1234", 1234, u32);
CHECK_PARSES_INTEGER("4294967295", 4294967295, u32);
CHECK_PARSES_INTEGER("0", 0, u64);
CHECK_PARSES_INTEGER("-0", -0, u64);
CHECK_PARSES_INTEGER("1234", 1234, u64);
CHECK_PARSES_INTEGER("18446744073709551615", 18446744073709551615ULL, u64);
CHECK_PARSES_INTEGER("0", 0, i8);
CHECK_PARSES_INTEGER("-0", -0, i8);
CHECK_PARSES_INTEGER("10", 10, i8);
CHECK_PARSES_INTEGER("-10", -10, i8);
CHECK_PARSES_INTEGER("127", 127, i8);
CHECK_PARSES_INTEGER("-128", -128, i8);
CHECK_PARSES_INTEGER("0", 0, i16);
CHECK_PARSES_INTEGER("-0", -0, i16);
CHECK_PARSES_INTEGER("1234", 1234, i16);
CHECK_PARSES_INTEGER("-1234", -1234, i16);
CHECK_PARSES_INTEGER("32767", 32767, i16);
CHECK_PARSES_INTEGER("-32768", -32768, i16);
CHECK_PARSES_INTEGER("0", 0, i32);
CHECK_PARSES_INTEGER("-0", -0, i32);
CHECK_PARSES_INTEGER("1234", 1234, i32);
CHECK_PARSES_INTEGER("-1234", -1234, i32);
CHECK_PARSES_INTEGER("2147483647", 2147483647, i32);
CHECK_PARSES_INTEGER("-2147483648", -2147483648, i32);
CHECK_PARSES_INTEGER("0", 0, i64);
CHECK_PARSES_INTEGER("-0", -0, i64);
CHECK_PARSES_INTEGER("1234", 1234, i64);
CHECK_PARSES_INTEGER("-1234", -1234, i64);
CHECK_PARSES_INTEGER("9223372036854775807", 9223372036854775807, i64);
CHECK_PARSES_INTEGER("-9223372036854775808", -9223372036854775808ULL, i64);
#undef CHECK_PARSES_INTEGER
}
TEST_CASE(consume_decimal_integer_fails_with_correct_error)
{
#define CHECK_FAILS_WITH_ERROR(test, type, err) \
do { \
GenericLexer lexer(test##sv); \
auto actual = lexer.consume_decimal_integer<type>(); \
VERIFY(actual.is_error() && actual.error().is_errno()); \
EXPECT_EQ(actual.error().code(), err); \
EXPECT_EQ(lexer.tell(), static_cast<size_t>(0)); \
} while (false)
CHECK_FAILS_WITH_ERROR("Well hello GenericLexer!", u64, EINVAL);
CHECK_FAILS_WITH_ERROR("+", u64, EINVAL);
CHECK_FAILS_WITH_ERROR("+WHF", u64, EINVAL);
CHECK_FAILS_WITH_ERROR("-WHF", u64, EINVAL);
CHECK_FAILS_WITH_ERROR("-1", u8, ERANGE);
CHECK_FAILS_WITH_ERROR("-100", u8, ERANGE);
CHECK_FAILS_WITH_ERROR("-1", u16, ERANGE);
CHECK_FAILS_WITH_ERROR("-100", u16, ERANGE);
CHECK_FAILS_WITH_ERROR("-1", u32, ERANGE);
CHECK_FAILS_WITH_ERROR("-100", u32, ERANGE);
CHECK_FAILS_WITH_ERROR("-1", u64, ERANGE);
CHECK_FAILS_WITH_ERROR("-100", u64, ERANGE);
CHECK_FAILS_WITH_ERROR("-129", i8, ERANGE);
CHECK_FAILS_WITH_ERROR("128", i8, ERANGE);
CHECK_FAILS_WITH_ERROR("-32769", i16, ERANGE);
CHECK_FAILS_WITH_ERROR("32768", i16, ERANGE);
CHECK_FAILS_WITH_ERROR("-2147483649", i32, ERANGE);
CHECK_FAILS_WITH_ERROR("2147483648", i32, ERANGE);
CHECK_FAILS_WITH_ERROR("-9223372036854775809", i64, ERANGE);
CHECK_FAILS_WITH_ERROR("9223372036854775808", i64, ERANGE);
#undef CHECK_FAILS_WITH_ERROR
}