mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-12-05 01:10:24 +00:00
AK: Templatize GenericLexer for UTF-16 strings
We now define GenericLexer as a template to allow using it with UTF-16 strings. To keep existing users happy, the template is defined in the Detail namespace. Then AK::GenericLexer is an alias for a char-based view, and AK::Utf16GenericLexer is an alias for a char16-based view.
This commit is contained in:
Notes:
github-actions[bot]
2025-08-13 13:57:56 +00:00
Author: https://github.com/trflynn89 Commit: https://github.com/LadybirdBrowser/ladybird/commit/99d7e08dff0 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5762
@@ -18,6 +18,9 @@ namespace Detail {
|
||||
template<size_t inline_capacity>
|
||||
class ByteBuffer;
|
||||
|
||||
template<typename CharType>
|
||||
class GenericLexer;
|
||||
|
||||
class StringData;
|
||||
class Utf16StringData;
|
||||
|
||||
@@ -36,7 +39,6 @@ class CountingStream;
|
||||
class Duration;
|
||||
class Error;
|
||||
class FlyString;
|
||||
class GenericLexer;
|
||||
class IPv4Address;
|
||||
class IPv6Address;
|
||||
class JsonArray;
|
||||
@@ -63,6 +65,9 @@ class Utf8View;
|
||||
|
||||
using ByteBuffer = Detail::ByteBuffer<32>;
|
||||
|
||||
using GenericLexer = Detail::GenericLexer<char>;
|
||||
using Utf16GenericLexer = Detail::GenericLexer<char16_t>;
|
||||
|
||||
template<typename T>
|
||||
class Span;
|
||||
|
||||
@@ -207,6 +212,7 @@ using AK::TrailingCodePointTransformation;
|
||||
using AK::Traits;
|
||||
using AK::UnixDateTime;
|
||||
using AK::Utf16FlyString;
|
||||
using AK::Utf16GenericLexer;
|
||||
using AK::Utf16String;
|
||||
using AK::Utf16View;
|
||||
using AK::Utf32CodePointIterator;
|
||||
|
||||
@@ -4,151 +4,10 @@
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/Assertions.h>
|
||||
#include <AK/ByteString.h>
|
||||
#include <AK/CharacterTypes.h>
|
||||
#include <AK/GenericLexer.h>
|
||||
#include <AK/ScopeGuard.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/UnicodeUtils.h>
|
||||
|
||||
namespace AK {
|
||||
|
||||
// Consume a number of characters
|
||||
StringView GenericLexer::consume(size_t count)
|
||||
{
|
||||
size_t start = m_index;
|
||||
size_t length = min(count, m_input.length() - m_index);
|
||||
m_index += length;
|
||||
|
||||
return m_input.substring_view(start, length);
|
||||
}
|
||||
|
||||
// Consume the rest of the input
|
||||
StringView GenericLexer::consume_all()
|
||||
{
|
||||
auto rest = m_input.substring_view(m_index, m_input.length() - m_index);
|
||||
m_index = m_input.length();
|
||||
return rest;
|
||||
}
|
||||
|
||||
// Consume until a new line is found
|
||||
StringView GenericLexer::consume_line()
|
||||
{
|
||||
size_t start = m_index;
|
||||
while (!is_eof() && peek() != '\r' && peek() != '\n')
|
||||
m_index++;
|
||||
size_t length = m_index - start;
|
||||
|
||||
consume_specific('\r');
|
||||
consume_specific('\n');
|
||||
|
||||
return m_input.substring_view(start, length);
|
||||
}
|
||||
|
||||
// Consume and return characters until `stop` is peek'd
|
||||
StringView GenericLexer::consume_until(char stop)
|
||||
{
|
||||
size_t start = m_index;
|
||||
while (!is_eof() && peek() != stop)
|
||||
m_index++;
|
||||
size_t length = m_index - start;
|
||||
|
||||
return m_input.substring_view(start, length);
|
||||
}
|
||||
|
||||
// Consume and return characters until the string `stop` is found
|
||||
StringView GenericLexer::consume_until(StringView stop)
|
||||
{
|
||||
size_t start = m_index;
|
||||
while (!is_eof() && !next_is(stop))
|
||||
m_index++;
|
||||
size_t length = m_index - start;
|
||||
|
||||
return m_input.substring_view(start, length);
|
||||
}
|
||||
|
||||
/*
|
||||
* Consume a string surrounded by single or double quotes. The returned
|
||||
* StringView does not include the quotes. An escape character can be provided
|
||||
* to capture the enclosing quotes. Please note that the escape character will
|
||||
* still be in the resulting StringView
|
||||
*/
|
||||
StringView GenericLexer::consume_quoted_string(char escape_char)
|
||||
{
|
||||
if (!next_is(is_quote))
|
||||
return {};
|
||||
|
||||
char quote_char = consume();
|
||||
size_t start = m_index;
|
||||
while (!is_eof()) {
|
||||
if (next_is(escape_char))
|
||||
m_index++;
|
||||
else if (next_is(quote_char))
|
||||
break;
|
||||
m_index++;
|
||||
}
|
||||
size_t length = m_index - start;
|
||||
|
||||
if (peek() != quote_char) {
|
||||
// Restore the index in case the string is unterminated
|
||||
m_index = start - 1;
|
||||
return {};
|
||||
}
|
||||
|
||||
// Ignore closing quote
|
||||
ignore();
|
||||
|
||||
return m_input.substring_view(start, length);
|
||||
}
|
||||
|
||||
template<Integral T>
|
||||
ErrorOr<T> GenericLexer::consume_decimal_integer()
|
||||
{
|
||||
using UnsignedT = MakeUnsigned<T>;
|
||||
|
||||
ArmedScopeGuard rollback { [&, rollback_position = m_index] {
|
||||
m_index = rollback_position;
|
||||
} };
|
||||
|
||||
bool has_minus_sign = false;
|
||||
|
||||
if (next_is('+') || next_is('-'))
|
||||
if (consume() == '-')
|
||||
has_minus_sign = true;
|
||||
|
||||
StringView number_view = consume_while(is_ascii_digit);
|
||||
if (number_view.is_empty())
|
||||
return Error::from_errno(EINVAL);
|
||||
|
||||
auto maybe_number = number_view.to_number<UnsignedT>(TrimWhitespace::No);
|
||||
if (!maybe_number.has_value())
|
||||
return Error::from_errno(ERANGE);
|
||||
auto number = maybe_number.value();
|
||||
|
||||
if (!has_minus_sign) {
|
||||
if (NumericLimits<T>::max() < number) // This is only possible in a signed case.
|
||||
return Error::from_errno(ERANGE);
|
||||
|
||||
rollback.disarm();
|
||||
return number;
|
||||
} else {
|
||||
if constexpr (IsUnsigned<T>) {
|
||||
if (number == 0) {
|
||||
rollback.disarm();
|
||||
return 0;
|
||||
}
|
||||
return Error::from_errno(ERANGE);
|
||||
} else {
|
||||
static constexpr UnsignedT max_value = static_cast<UnsignedT>(NumericLimits<T>::max()) + 1;
|
||||
if (number > max_value)
|
||||
return Error::from_errno(ERANGE);
|
||||
rollback.disarm();
|
||||
return -number;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LineTrackingLexer::Position LineTrackingLexer::position_for(size_t index) const
|
||||
{
|
||||
// Sad case: we have no idea where the nearest newline is, so we have to
|
||||
@@ -157,8 +16,8 @@ LineTrackingLexer::Position LineTrackingLexer::position_for(size_t index) const
|
||||
auto next_newline = m_input.find('\n', m_largest_known_line_start_position);
|
||||
if (!next_newline.has_value()) {
|
||||
// No more newlines, add the end of the input as a line start to avoid searching again.
|
||||
m_line_start_positions->insert(m_input.length(), m_line_start_positions->size());
|
||||
m_largest_known_line_start_position = m_input.length();
|
||||
m_line_start_positions->insert(input_length(), m_line_start_positions->size());
|
||||
m_largest_known_line_start_position = input_length();
|
||||
break;
|
||||
}
|
||||
m_line_start_positions->insert(next_newline.value() + 1, m_line_start_positions->size());
|
||||
@@ -179,83 +38,4 @@ LineTrackingLexer::Position LineTrackingLexer::position_for(size_t index) const
|
||||
return { index, line, column };
|
||||
}
|
||||
|
||||
template ErrorOr<u8> GenericLexer::consume_decimal_integer<u8>();
|
||||
template ErrorOr<i8> GenericLexer::consume_decimal_integer<i8>();
|
||||
template ErrorOr<u16> GenericLexer::consume_decimal_integer<u16>();
|
||||
template ErrorOr<i16> GenericLexer::consume_decimal_integer<i16>();
|
||||
template ErrorOr<u32> GenericLexer::consume_decimal_integer<u32>();
|
||||
template ErrorOr<i32> GenericLexer::consume_decimal_integer<i32>();
|
||||
template ErrorOr<u64> GenericLexer::consume_decimal_integer<u64>();
|
||||
template ErrorOr<i64> GenericLexer::consume_decimal_integer<i64>();
|
||||
|
||||
auto GenericLexer::consume_escaped_code_point(bool combine_surrogate_pairs) -> Result<u32, UnicodeEscapeError>
|
||||
{
|
||||
if (!consume_specific("\\u"sv))
|
||||
return UnicodeEscapeError::MalformedUnicodeEscape;
|
||||
|
||||
if (next_is('{'))
|
||||
return decode_code_point();
|
||||
return decode_single_or_paired_surrogate(combine_surrogate_pairs);
|
||||
}
|
||||
|
||||
auto GenericLexer::decode_code_point() -> Result<u32, UnicodeEscapeError>
|
||||
{
|
||||
bool starts_with_open_bracket = consume_specific('{');
|
||||
VERIFY(starts_with_open_bracket);
|
||||
|
||||
u32 code_point = 0;
|
||||
|
||||
while (true) {
|
||||
if (!next_is(is_ascii_hex_digit))
|
||||
return UnicodeEscapeError::MalformedUnicodeEscape;
|
||||
|
||||
auto new_code_point = (code_point << 4u) | parse_ascii_hex_digit(consume());
|
||||
if (new_code_point < code_point)
|
||||
return UnicodeEscapeError::UnicodeEscapeOverflow;
|
||||
|
||||
code_point = new_code_point;
|
||||
if (consume_specific('}'))
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_unicode(code_point))
|
||||
return code_point;
|
||||
return UnicodeEscapeError::UnicodeEscapeOverflow;
|
||||
}
|
||||
|
||||
auto GenericLexer::decode_single_or_paired_surrogate(bool combine_surrogate_pairs) -> Result<u32, UnicodeEscapeError>
|
||||
{
|
||||
constexpr size_t surrogate_length = 4;
|
||||
|
||||
auto decode_one_surrogate = [&]() -> Optional<u16> {
|
||||
u16 surrogate = 0;
|
||||
|
||||
for (size_t i = 0; i < surrogate_length; ++i) {
|
||||
if (!next_is(is_ascii_hex_digit))
|
||||
return {};
|
||||
|
||||
surrogate = (surrogate << 4u) | parse_ascii_hex_digit(consume());
|
||||
}
|
||||
|
||||
return surrogate;
|
||||
};
|
||||
|
||||
auto high_surrogate = decode_one_surrogate();
|
||||
if (!high_surrogate.has_value())
|
||||
return UnicodeEscapeError::MalformedUnicodeEscape;
|
||||
if (!UnicodeUtils::is_utf16_high_surrogate(*high_surrogate))
|
||||
return *high_surrogate;
|
||||
if (!combine_surrogate_pairs || !consume_specific("\\u"sv))
|
||||
return *high_surrogate;
|
||||
|
||||
auto low_surrogate = decode_one_surrogate();
|
||||
if (!low_surrogate.has_value())
|
||||
return UnicodeEscapeError::MalformedUnicodeEscape;
|
||||
if (UnicodeUtils::is_utf16_low_surrogate(*low_surrogate))
|
||||
return UnicodeUtils::decode_utf16_surrogate_pair(*high_surrogate, *low_surrogate);
|
||||
|
||||
retreat(6);
|
||||
return *high_surrogate;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -6,51 +6,96 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/Assertions.h>
|
||||
#include <AK/Forward.h>
|
||||
#include <AK/NonnullOwnPtr.h>
|
||||
#include <AK/RedBlackTree.h>
|
||||
#include <AK/Result.h>
|
||||
#include <AK/ScopeGuard.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Utf16View.h>
|
||||
|
||||
namespace AK {
|
||||
|
||||
constexpr auto is_any_of(StringView values)
|
||||
{
|
||||
return [values](auto c) { return values.contains(c); };
|
||||
}
|
||||
|
||||
constexpr auto is_not_any_of(StringView values)
|
||||
{
|
||||
return [values](auto c) { return !values.contains(c); };
|
||||
}
|
||||
|
||||
constexpr auto is_path_separator = is_any_of("/\\"sv);
|
||||
constexpr auto is_quote = is_any_of("'\""sv);
|
||||
|
||||
enum class UnicodeEscapeError {
|
||||
MalformedUnicodeEscape,
|
||||
UnicodeEscapeOverflow,
|
||||
};
|
||||
|
||||
namespace Detail {
|
||||
|
||||
template<typename CharType>
|
||||
class GenericLexer {
|
||||
static_assert(IsOneOf<CharType, char, char16_t>);
|
||||
|
||||
public:
|
||||
constexpr explicit GenericLexer(StringView input)
|
||||
using ViewType = Detail::Conditional<IsSame<CharType, char>, StringView, Utf16View>;
|
||||
|
||||
constexpr explicit GenericLexer(ViewType input)
|
||||
: m_input(input)
|
||||
{
|
||||
}
|
||||
|
||||
constexpr size_t tell() const { return m_index; }
|
||||
constexpr size_t tell_remaining() const { return m_input.length() - m_index; }
|
||||
constexpr size_t tell_remaining() const { return input_length() - m_index; }
|
||||
|
||||
StringView remaining() const { return m_input.substring_view(m_index); }
|
||||
StringView input() const { return m_input; }
|
||||
constexpr ViewType remaining() const { return m_input.substring_view(m_index); }
|
||||
constexpr ViewType input() const { return m_input; }
|
||||
|
||||
constexpr bool is_eof() const { return m_index >= m_input.length(); }
|
||||
constexpr bool is_eof() const { return m_index >= input_length(); }
|
||||
|
||||
constexpr char peek(size_t offset = 0) const
|
||||
constexpr CharType peek(size_t offset = 0) const
|
||||
{
|
||||
return (m_index + offset < m_input.length()) ? m_input[m_index + offset] : '\0';
|
||||
return (m_index + offset < input_length()) ? code_unit_at(m_index + offset) : '\0';
|
||||
}
|
||||
|
||||
Optional<StringView> peek_string(size_t length, size_t offset = 0) const
|
||||
constexpr Optional<ViewType> peek_string(size_t length, size_t offset = 0) const
|
||||
{
|
||||
if (m_index + offset + length > m_input.length())
|
||||
if (m_index + offset + length > input_length())
|
||||
return {};
|
||||
return m_input.substring_view(m_index + offset, length);
|
||||
}
|
||||
|
||||
constexpr bool next_is(char expected) const
|
||||
constexpr bool next_is(CharType expected) const
|
||||
{
|
||||
return peek() == expected;
|
||||
}
|
||||
|
||||
constexpr bool next_is(StringView expected) const
|
||||
constexpr bool next_is(char expected) const
|
||||
requires(IsSame<CharType, char16_t>)
|
||||
{
|
||||
for (size_t i = 0; i < expected.length(); ++i)
|
||||
if (peek(i) != expected[i])
|
||||
return false;
|
||||
return true;
|
||||
return peek() == expected;
|
||||
}
|
||||
|
||||
constexpr bool next_is(ViewType expected) const
|
||||
{
|
||||
size_t length = 0;
|
||||
|
||||
if constexpr (IsSame<CharType, char16_t>)
|
||||
length = expected.length_in_code_units();
|
||||
else
|
||||
length = expected.length();
|
||||
|
||||
return peek_string(length) == expected;
|
||||
}
|
||||
|
||||
constexpr bool next_is(StringView expected) const
|
||||
requires(IsSame<CharType, char16_t>)
|
||||
{
|
||||
return peek_string(expected.length()) == expected;
|
||||
}
|
||||
|
||||
constexpr void retreat()
|
||||
@@ -65,13 +110,42 @@ public:
|
||||
m_index -= count;
|
||||
}
|
||||
|
||||
constexpr char consume()
|
||||
constexpr CharType consume()
|
||||
{
|
||||
VERIFY(!is_eof());
|
||||
return m_input[m_index++];
|
||||
return code_unit_at(m_index++);
|
||||
}
|
||||
|
||||
constexpr bool consume_specific(CharType next)
|
||||
{
|
||||
if (!next_is(next))
|
||||
return false;
|
||||
|
||||
ignore();
|
||||
return true;
|
||||
}
|
||||
|
||||
constexpr bool consume_specific(char next)
|
||||
requires(IsSame<CharType, char16_t>)
|
||||
{
|
||||
return consume_specific(static_cast<char16_t>(next));
|
||||
}
|
||||
|
||||
constexpr bool consume_specific(ViewType next)
|
||||
{
|
||||
if (!next_is(next))
|
||||
return false;
|
||||
|
||||
if constexpr (IsSame<CharType, char16_t>)
|
||||
ignore(next.length_in_code_units());
|
||||
else
|
||||
ignore(next.length());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
constexpr bool consume_specific(StringView next)
|
||||
requires(IsSame<CharType, char16_t>)
|
||||
{
|
||||
if (!next_is(next))
|
||||
return false;
|
||||
@@ -80,16 +154,7 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
constexpr bool consume_specific(char next)
|
||||
{
|
||||
if (!next_is(next))
|
||||
return false;
|
||||
|
||||
ignore(sizeof(next));
|
||||
return true;
|
||||
}
|
||||
|
||||
constexpr char consume_escaped_character(char escape_char = '\\', StringView escape_map = "n\nr\rt\tb\bf\f"sv)
|
||||
constexpr CharType consume_escaped_character(CharType escape_char = '\\', StringView escape_map = "n\nr\rt\tb\bf\f"sv)
|
||||
{
|
||||
if (!consume_specific(escape_char))
|
||||
return consume();
|
||||
@@ -104,44 +169,180 @@ public:
|
||||
return c;
|
||||
}
|
||||
|
||||
StringView consume(size_t count);
|
||||
StringView consume_all();
|
||||
StringView consume_line();
|
||||
StringView consume_until(char);
|
||||
StringView consume_until(StringView);
|
||||
StringView consume_quoted_string(char escape_char = 0);
|
||||
|
||||
template<Integral T>
|
||||
ErrorOr<T> consume_decimal_integer();
|
||||
|
||||
enum class UnicodeEscapeError {
|
||||
MalformedUnicodeEscape,
|
||||
UnicodeEscapeOverflow,
|
||||
};
|
||||
|
||||
Result<u32, UnicodeEscapeError> consume_escaped_code_point(bool combine_surrogate_pairs = true);
|
||||
|
||||
constexpr void ignore(size_t count = 1)
|
||||
// Consume a number of characters
|
||||
constexpr ViewType consume(size_t count)
|
||||
{
|
||||
count = min(count, m_input.length() - m_index);
|
||||
m_index += count;
|
||||
auto start = m_index;
|
||||
auto length = min(count, input_length() - m_index);
|
||||
m_index += length;
|
||||
|
||||
return m_input.substring_view(start, length);
|
||||
}
|
||||
|
||||
constexpr void ignore_until(char stop)
|
||||
// Consume the rest of the input
|
||||
constexpr ViewType consume_all()
|
||||
{
|
||||
while (!is_eof() && peek() != stop) {
|
||||
++m_index;
|
||||
auto rest = m_input.substring_view(m_index, input_length() - m_index);
|
||||
m_index = input_length();
|
||||
return rest;
|
||||
}
|
||||
|
||||
// Consume until a new line is found
|
||||
constexpr ViewType consume_line()
|
||||
{
|
||||
auto start = m_index;
|
||||
while (!is_eof() && peek() != '\r' && peek() != '\n')
|
||||
m_index++;
|
||||
|
||||
auto length = m_index - start;
|
||||
consume_specific('\r');
|
||||
consume_specific('\n');
|
||||
|
||||
return m_input.substring_view(start, length);
|
||||
}
|
||||
|
||||
// Consume and return characters until `stop` is peeked
|
||||
constexpr ViewType consume_until(CharType stop)
|
||||
{
|
||||
auto start = m_index;
|
||||
while (!is_eof() && peek() != stop)
|
||||
m_index++;
|
||||
|
||||
auto length = m_index - start;
|
||||
return m_input.substring_view(start, length);
|
||||
}
|
||||
|
||||
constexpr ViewType consume_until(char stop)
|
||||
requires(IsSame<CharType, char16_t>)
|
||||
{
|
||||
return consume_until(static_cast<char16_t>(stop));
|
||||
}
|
||||
|
||||
// Consume and return characters until the string `stop` is found
|
||||
constexpr ViewType consume_until(ViewType stop)
|
||||
{
|
||||
auto start = m_index;
|
||||
while (!is_eof() && !next_is(stop))
|
||||
m_index++;
|
||||
|
||||
auto length = m_index - start;
|
||||
return m_input.substring_view(start, length);
|
||||
}
|
||||
|
||||
// Consume a string surrounded by single or double quotes. The returned ViewType does not include the quotes. An
|
||||
// escape character can be provided to capture the enclosing quotes. Please note that the escape character will
|
||||
// still be in the resulting ViewType.
|
||||
constexpr ViewType consume_quoted_string(CharType escape_char = 0)
|
||||
{
|
||||
if (!next_is(is_quote))
|
||||
return {};
|
||||
|
||||
auto quote_char = consume();
|
||||
auto start = m_index;
|
||||
while (!is_eof()) {
|
||||
if (next_is(escape_char))
|
||||
m_index++;
|
||||
else if (next_is(quote_char))
|
||||
break;
|
||||
m_index++;
|
||||
}
|
||||
auto length = m_index - start;
|
||||
|
||||
if (peek() != quote_char) {
|
||||
// Restore the index in case the string is unterminated
|
||||
m_index = start - 1;
|
||||
return {};
|
||||
}
|
||||
|
||||
// Ignore closing quote
|
||||
ignore();
|
||||
|
||||
return m_input.substring_view(start, length);
|
||||
}
|
||||
|
||||
template<Integral T>
|
||||
ErrorOr<T> consume_decimal_integer()
|
||||
{
|
||||
using UnsignedT = MakeUnsigned<T>;
|
||||
|
||||
ArmedScopeGuard rollback { [&, rollback_position = m_index]() {
|
||||
m_index = rollback_position;
|
||||
} };
|
||||
|
||||
bool has_minus_sign = false;
|
||||
|
||||
if (next_is('+') || next_is('-'))
|
||||
if (consume() == '-')
|
||||
has_minus_sign = true;
|
||||
|
||||
auto number_view = consume_while(is_ascii_digit);
|
||||
if (number_view.is_empty())
|
||||
return Error::from_errno(EINVAL);
|
||||
|
||||
auto maybe_number = number_view.template to_number<UnsignedT>(TrimWhitespace::No);
|
||||
if (!maybe_number.has_value())
|
||||
return Error::from_errno(ERANGE);
|
||||
auto number = maybe_number.value();
|
||||
|
||||
if (!has_minus_sign) {
|
||||
if (NumericLimits<T>::max() < number) // This is only possible in a signed case.
|
||||
return Error::from_errno(ERANGE);
|
||||
|
||||
rollback.disarm();
|
||||
return number;
|
||||
}
|
||||
|
||||
if constexpr (IsUnsigned<T>) {
|
||||
if (number != 0)
|
||||
return Error::from_errno(ERANGE);
|
||||
|
||||
rollback.disarm();
|
||||
return 0;
|
||||
} else {
|
||||
static constexpr UnsignedT max_value = static_cast<UnsignedT>(NumericLimits<T>::max()) + 1;
|
||||
if (number > max_value)
|
||||
return Error::from_errno(ERANGE);
|
||||
|
||||
rollback.disarm();
|
||||
return -number;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Conditions are used to match arbitrary characters. You can use lambdas,
|
||||
* ctype functions, or is_any_of() and its derivatives (see below).
|
||||
* A few examples:
|
||||
* - `if (lexer.next_is(isdigit))`
|
||||
* - `auto name = lexer.consume_while([](char c) { return isalnum(c) || c == '_'; });`
|
||||
* - `lexer.ignore_until(is_any_of("<^>"));`
|
||||
*/
|
||||
Result<u32, UnicodeEscapeError> consume_escaped_code_point(bool combine_surrogate_pairs = true)
|
||||
{
|
||||
if (!consume_specific("\\u"sv))
|
||||
return UnicodeEscapeError::MalformedUnicodeEscape;
|
||||
|
||||
if (next_is('{'))
|
||||
return decode_code_point();
|
||||
return decode_single_or_paired_surrogate(combine_surrogate_pairs);
|
||||
}
|
||||
|
||||
constexpr void ignore(size_t count = 1)
|
||||
{
|
||||
count = min(count, input_length() - m_index);
|
||||
m_index += count;
|
||||
}
|
||||
|
||||
constexpr void ignore_until(CharType stop)
|
||||
{
|
||||
while (!is_eof() && peek() != stop)
|
||||
++m_index;
|
||||
}
|
||||
|
||||
constexpr void ignore_until(char stop)
|
||||
requires(IsSame<CharType, char16_t>)
|
||||
{
|
||||
return ignore_until(static_cast<char16_t>(stop));
|
||||
}
|
||||
|
||||
// Conditions are used to match arbitrary characters. You can use lambdas, ctype functions, or is_any_of() and its
|
||||
// derivatives (see below).
|
||||
//
|
||||
// A few examples:
|
||||
// - `if (lexer.next_is(isdigit))`
|
||||
// - `auto name = lexer.consume_while([](char c) { return isalnum(c) || c == '_'; });`
|
||||
// - `lexer.ignore_until(is_any_of("<^>"));`
|
||||
|
||||
// Test the next character against a Condition
|
||||
template<typename TPredicate>
|
||||
@@ -152,25 +353,25 @@ public:
|
||||
|
||||
// Consume and return characters while `pred` returns true
|
||||
template<typename TPredicate>
|
||||
StringView consume_while(TPredicate pred)
|
||||
constexpr ViewType consume_while(TPredicate pred)
|
||||
{
|
||||
size_t start = m_index;
|
||||
auto start = m_index;
|
||||
while (!is_eof() && pred(peek()))
|
||||
++m_index;
|
||||
size_t length = m_index - start;
|
||||
|
||||
auto length = m_index - start;
|
||||
return m_input.substring_view(start, length);
|
||||
}
|
||||
|
||||
// Consume and return characters until `pred` return true
|
||||
template<typename TPredicate>
|
||||
StringView consume_until(TPredicate pred)
|
||||
constexpr ViewType consume_until(TPredicate pred)
|
||||
{
|
||||
size_t start = m_index;
|
||||
auto start = m_index;
|
||||
while (!is_eof() && !pred(peek()))
|
||||
++m_index;
|
||||
size_t length = m_index - start;
|
||||
|
||||
auto length = m_index - start;
|
||||
return m_input.substring_view(start, length);
|
||||
}
|
||||
|
||||
@@ -201,13 +402,88 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
Result<u32, UnicodeEscapeError> decode_code_point();
|
||||
Result<u32, UnicodeEscapeError> decode_single_or_paired_surrogate(bool combine_surrogate_pairs = true);
|
||||
Result<u32, UnicodeEscapeError> decode_code_point()
|
||||
{
|
||||
bool starts_with_open_bracket = consume_specific('{');
|
||||
VERIFY(starts_with_open_bracket);
|
||||
|
||||
StringView m_input;
|
||||
u32 code_point = 0;
|
||||
|
||||
while (true) {
|
||||
if (!next_is(is_ascii_hex_digit))
|
||||
return UnicodeEscapeError::MalformedUnicodeEscape;
|
||||
|
||||
auto new_code_point = (code_point << 4u) | parse_ascii_hex_digit(consume());
|
||||
if (new_code_point < code_point)
|
||||
return UnicodeEscapeError::UnicodeEscapeOverflow;
|
||||
|
||||
code_point = new_code_point;
|
||||
if (consume_specific('}'))
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_unicode(code_point))
|
||||
return code_point;
|
||||
return UnicodeEscapeError::UnicodeEscapeOverflow;
|
||||
}
|
||||
|
||||
Result<u32, UnicodeEscapeError> decode_single_or_paired_surrogate(bool combine_surrogate_pairs = true)
|
||||
{
|
||||
constexpr size_t surrogate_length = 4;
|
||||
|
||||
auto decode_one_surrogate = [&]() -> Optional<u16> {
|
||||
u16 surrogate = 0;
|
||||
|
||||
for (size_t i = 0; i < surrogate_length; ++i) {
|
||||
if (!next_is(is_ascii_hex_digit))
|
||||
return {};
|
||||
|
||||
surrogate = (surrogate << 4u) | parse_ascii_hex_digit(consume());
|
||||
}
|
||||
|
||||
return surrogate;
|
||||
};
|
||||
|
||||
auto high_surrogate = decode_one_surrogate();
|
||||
if (!high_surrogate.has_value())
|
||||
return UnicodeEscapeError::MalformedUnicodeEscape;
|
||||
if (!UnicodeUtils::is_utf16_high_surrogate(*high_surrogate))
|
||||
return *high_surrogate;
|
||||
if (!combine_surrogate_pairs || !consume_specific("\\u"sv))
|
||||
return *high_surrogate;
|
||||
|
||||
auto low_surrogate = decode_one_surrogate();
|
||||
if (!low_surrogate.has_value())
|
||||
return UnicodeEscapeError::MalformedUnicodeEscape;
|
||||
if (UnicodeUtils::is_utf16_low_surrogate(*low_surrogate))
|
||||
return UnicodeUtils::decode_utf16_surrogate_pair(*high_surrogate, *low_surrogate);
|
||||
|
||||
retreat(6);
|
||||
return *high_surrogate;
|
||||
}
|
||||
|
||||
constexpr size_t input_length() const
|
||||
{
|
||||
if constexpr (IsSame<CharType, char16_t>)
|
||||
return m_input.length_in_code_units();
|
||||
else
|
||||
return m_input.length();
|
||||
}
|
||||
|
||||
constexpr CharType code_unit_at(size_t index) const
|
||||
{
|
||||
if constexpr (IsSame<CharType, char16_t>)
|
||||
return m_input.code_unit_at(index);
|
||||
else
|
||||
return m_input[index];
|
||||
}
|
||||
|
||||
ViewType m_input;
|
||||
size_t m_index { 0 };
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
class LineTrackingLexer : public GenericLexer {
|
||||
public:
|
||||
struct Position {
|
||||
@@ -241,19 +517,6 @@ protected:
|
||||
mutable size_t m_largest_known_line_start_position { 0 };
|
||||
};
|
||||
|
||||
constexpr auto is_any_of(StringView values)
|
||||
{
|
||||
return [values](auto c) { return values.contains(c); };
|
||||
}
|
||||
|
||||
constexpr auto is_not_any_of(StringView values)
|
||||
{
|
||||
return [values](auto c) { return !values.contains(c); };
|
||||
}
|
||||
|
||||
constexpr auto is_path_separator = is_any_of("/\\"sv);
|
||||
constexpr auto is_quote = is_any_of("'\""sv);
|
||||
|
||||
}
|
||||
|
||||
#if USING_AK_GLOBALLY
|
||||
|
||||
@@ -123,9 +123,9 @@ ByteString Token::string_value(StringValueStatus& status) const
|
||||
|
||||
if (code_point_or_error.is_error()) {
|
||||
switch (code_point_or_error.error()) {
|
||||
case GenericLexer::UnicodeEscapeError::MalformedUnicodeEscape:
|
||||
case AK::UnicodeEscapeError::MalformedUnicodeEscape:
|
||||
return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
|
||||
case GenericLexer::UnicodeEscapeError::UnicodeEscapeOverflow:
|
||||
case AK::UnicodeEscapeError::UnicodeEscapeOverflow:
|
||||
return encoding_failure(StringValueStatus::UnicodeEscapeOverflow);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,120 +7,214 @@
|
||||
#include <LibTest/TestCase.h>
|
||||
|
||||
#include <AK/GenericLexer.h>
|
||||
#include <AK/StringView.h>
|
||||
|
||||
TEST_CASE(should_constexpr_construct_from_empty_string_view)
|
||||
{
|
||||
constexpr GenericLexer sut(StringView {});
|
||||
static_assert(sut.is_eof());
|
||||
{
|
||||
constexpr GenericLexer sut(StringView {});
|
||||
static_assert(sut.is_eof());
|
||||
}
|
||||
{
|
||||
constexpr Utf16GenericLexer sut(Utf16View {});
|
||||
static_assert(sut.is_eof());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(should_construct_from_string_view)
|
||||
{
|
||||
constexpr GenericLexer sut("abcdef"sv);
|
||||
static_assert(!sut.is_eof());
|
||||
{
|
||||
constexpr GenericLexer sut("abcdef"sv);
|
||||
static_assert(!sut.is_eof());
|
||||
}
|
||||
{
|
||||
constexpr Utf16GenericLexer sut("abcdef"sv);
|
||||
static_assert(!sut.is_eof());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_tell)
|
||||
{
|
||||
constexpr GenericLexer sut("abcdef"sv);
|
||||
static_assert(sut.tell() == 0);
|
||||
{
|
||||
constexpr GenericLexer sut("abcdef"sv);
|
||||
static_assert(sut.tell() == 0);
|
||||
}
|
||||
{
|
||||
constexpr Utf16GenericLexer sut("abcdef"sv);
|
||||
static_assert(sut.tell() == 0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_tell_remaining)
|
||||
{
|
||||
constexpr GenericLexer sut("abcdef"sv);
|
||||
static_assert(sut.tell_remaining() == 6);
|
||||
{
|
||||
constexpr GenericLexer sut("abcdef"sv);
|
||||
static_assert(sut.tell_remaining() == 6);
|
||||
}
|
||||
{
|
||||
constexpr Utf16GenericLexer sut("abcdef"sv);
|
||||
static_assert(sut.tell_remaining() == 6);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_peek)
|
||||
{
|
||||
constexpr GenericLexer sut("abcdef"sv);
|
||||
static_assert(sut.peek() == 'a');
|
||||
static_assert(sut.peek(2) == 'c');
|
||||
static_assert(sut.peek(100) == '\0');
|
||||
{
|
||||
constexpr GenericLexer sut("abcdef"sv);
|
||||
static_assert(sut.peek() == 'a');
|
||||
static_assert(sut.peek(2) == 'c');
|
||||
static_assert(sut.peek(100) == '\0');
|
||||
}
|
||||
{
|
||||
constexpr Utf16GenericLexer sut("abcdef"sv);
|
||||
static_assert(sut.peek() == 'a');
|
||||
static_assert(sut.peek(2) == 'c');
|
||||
static_assert(sut.peek(100) == '\0');
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_next_is)
|
||||
{
|
||||
constexpr GenericLexer sut("abcdef"sv);
|
||||
static_assert(sut.next_is('a'));
|
||||
static_assert(sut.next_is("abc"sv));
|
||||
{
|
||||
constexpr GenericLexer sut("abcdef"sv);
|
||||
static_assert(sut.next_is('a'));
|
||||
static_assert(sut.next_is("abc"sv));
|
||||
}
|
||||
{
|
||||
constexpr Utf16GenericLexer sut("abcdef"sv);
|
||||
static_assert(sut.next_is('a'));
|
||||
static_assert(sut.next_is("abc"sv));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_retreat)
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut("abcdef"sv);
|
||||
sut.consume();
|
||||
sut.retreat();
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'a');
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut("abcdef"sv);
|
||||
sut.consume();
|
||||
sut.retreat();
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'a');
|
||||
}
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
Utf16GenericLexer sut("abcdef"sv);
|
||||
sut.consume();
|
||||
sut.retreat();
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'a');
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_consume_1)
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut("abcdef"sv);
|
||||
sut.consume();
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'b');
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut("abcdef"sv);
|
||||
sut.consume();
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'b');
|
||||
}
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
Utf16GenericLexer sut("abcdef"sv);
|
||||
sut.consume();
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'b');
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_consume_specific_char)
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut("abcdef"sv);
|
||||
sut.consume_specific('a');
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'b');
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut("abcdef"sv);
|
||||
sut.consume_specific('a');
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'b');
|
||||
}
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
Utf16GenericLexer sut("abcdef"sv);
|
||||
sut.consume_specific('a');
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'b');
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_consume_specific_string_view)
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut("abcdef"sv);
|
||||
sut.consume_specific("ab"sv);
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'c');
|
||||
}
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut("abcdef"sv);
|
||||
VERIFY(sut.consume_specific("ab"sv));
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'c');
|
||||
}
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
Utf16GenericLexer sut("abcdef"sv);
|
||||
VERIFY(sut.consume_specific("abcd"sv));
|
||||
return sut;
|
||||
}();
|
||||
|
||||
TEST_CASE(should_constexpr_consume_specific_cstring)
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut("abcdef"sv);
|
||||
sut.consume_specific("abcd"sv);
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'e');
|
||||
static_assert(sut.peek() == 'e');
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_consume_specific_with_predicate)
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut("h e l l o !"sv);
|
||||
for (size_t i = 0; i < 100; ++i) {
|
||||
sut.consume_specific_with_predicate([](auto c) {
|
||||
return is_ascii_alpha(c) || is_ascii_space(c);
|
||||
});
|
||||
}
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == '!');
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut("h e l l o !"sv);
|
||||
for (size_t i = 0; i < 100; ++i) {
|
||||
sut.consume_specific_with_predicate([](auto c) {
|
||||
return is_ascii_alpha(c) || is_ascii_space(c);
|
||||
});
|
||||
}
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == '!');
|
||||
}
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
Utf16GenericLexer sut("h e l l o !"sv);
|
||||
for (size_t i = 0; i < 100; ++i) {
|
||||
sut.consume_specific_with_predicate([](auto c) {
|
||||
return is_ascii_alpha(c) || is_ascii_space(c);
|
||||
});
|
||||
}
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == '!');
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_ignore_until)
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut("abcdef"sv);
|
||||
sut.ignore_until('d');
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'd');
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut("abcdef"sv);
|
||||
sut.ignore_until('d');
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'd');
|
||||
}
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
Utf16GenericLexer sut("abcdef"sv);
|
||||
sut.ignore_until('d');
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'd');
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_next_is_pred)
|
||||
@@ -128,41 +222,68 @@ TEST_CASE(should_constexpr_next_is_pred)
|
||||
constexpr auto pred = [](auto c) {
|
||||
return c == 'a';
|
||||
};
|
||||
constexpr GenericLexer sut("abcdef"sv);
|
||||
static_assert(sut.next_is(pred));
|
||||
|
||||
{
|
||||
constexpr GenericLexer sut("abcdef"sv);
|
||||
static_assert(sut.next_is(pred));
|
||||
}
|
||||
{
|
||||
constexpr Utf16GenericLexer sut("abcdef"sv);
|
||||
static_assert(sut.next_is(pred));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_ignore_while_pred)
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
constexpr auto pred = [](auto c) {
|
||||
return c == 'a';
|
||||
};
|
||||
constexpr auto pred = [](auto c) {
|
||||
return c == 'a';
|
||||
};
|
||||
|
||||
GenericLexer sut("abcdef"sv);
|
||||
sut.ignore_while(pred);
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'b');
|
||||
{
|
||||
constexpr auto sut = [&] {
|
||||
GenericLexer sut("abcdef"sv);
|
||||
sut.ignore_while(pred);
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'b');
|
||||
}
|
||||
{
|
||||
constexpr auto sut = [&] {
|
||||
Utf16GenericLexer sut("abcdef"sv);
|
||||
sut.ignore_while(pred);
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'b');
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_ignore_until_pred)
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
constexpr auto pred = [](auto c) {
|
||||
return c == 'c';
|
||||
};
|
||||
constexpr auto pred = [](auto c) {
|
||||
return c == 'c';
|
||||
};
|
||||
|
||||
GenericLexer sut("abcdef"sv);
|
||||
sut.ignore_until(pred);
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'c');
|
||||
{
|
||||
constexpr auto sut = [&] {
|
||||
GenericLexer sut("abcdef"sv);
|
||||
sut.ignore_until(pred);
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'c');
|
||||
}
|
||||
{
|
||||
constexpr auto sut = [&] {
|
||||
Utf16GenericLexer sut("abcdef"sv);
|
||||
sut.ignore_until(pred);
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'c');
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(consume_escaped_code_point)
|
||||
{
|
||||
auto test = [](StringView test, Result<u32, GenericLexer::UnicodeEscapeError> expected, bool combine_surrogate_pairs = true) {
|
||||
auto test = [](StringView test, Result<u32, AK::UnicodeEscapeError> expected, bool combine_surrogate_pairs = true) {
|
||||
GenericLexer lexer(test);
|
||||
|
||||
auto actual = lexer.consume_escaped_code_point(combine_surrogate_pairs);
|
||||
@@ -173,39 +294,56 @@ TEST_CASE(consume_escaped_code_point)
|
||||
else
|
||||
EXPECT_EQ(actual.value(), expected.value());
|
||||
};
|
||||
auto test_utf16 = [](Utf16View test, Result<u32, AK::UnicodeEscapeError> expected, bool combine_surrogate_pairs = true) {
|
||||
Utf16GenericLexer lexer(test);
|
||||
|
||||
test("\\u"sv, GenericLexer::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
test("\\u{"sv, GenericLexer::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
test("\\u{1"sv, GenericLexer::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
test("\\u{}"sv, GenericLexer::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
test("\\u{x}"sv, GenericLexer::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
auto actual = lexer.consume_escaped_code_point(combine_surrogate_pairs);
|
||||
EXPECT_EQ(actual.is_error(), expected.is_error());
|
||||
|
||||
test("\\u{110000}"sv, GenericLexer::UnicodeEscapeError::UnicodeEscapeOverflow);
|
||||
test("\\u{f00000000}"sv, GenericLexer::UnicodeEscapeError::UnicodeEscapeOverflow);
|
||||
if (actual.is_error() && expected.is_error())
|
||||
EXPECT_EQ(actual.error(), expected.error());
|
||||
else
|
||||
EXPECT_EQ(actual.value(), expected.value());
|
||||
};
|
||||
|
||||
test("\\u{0}"sv, 0);
|
||||
test("\\u{41}"sv, 0x41);
|
||||
test("\\u{ffff}"sv, 0xffff);
|
||||
test("\\u{10ffff}"sv, 0x10ffff);
|
||||
#define CHECK(input, expected, ...) \
|
||||
do { \
|
||||
test(input, expected __VA_OPT__(, ) __VA_ARGS__); \
|
||||
test_utf16(input, expected __VA_OPT__(, ) __VA_ARGS__); \
|
||||
} while (false)
|
||||
|
||||
test("\\u1"sv, GenericLexer::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
test("\\u11"sv, GenericLexer::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
test("\\u111"sv, GenericLexer::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
test("\\u111x"sv, GenericLexer::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
test("\\ud800\\u"sv, GenericLexer::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
test("\\ud800\\u1"sv, GenericLexer::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
test("\\ud800\\u11"sv, GenericLexer::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
test("\\ud800\\u111"sv, GenericLexer::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
test("\\ud800\\u111x"sv, GenericLexer::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
CHECK("\\u"sv, AK::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
CHECK("\\u{"sv, AK::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
CHECK("\\u{1"sv, AK::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
CHECK("\\u{}"sv, AK::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
CHECK("\\u{x}"sv, AK::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
|
||||
test("\\u0000"sv, 0x0);
|
||||
test("\\u0041"sv, 0x41);
|
||||
test("\\uffff"sv, 0xffff);
|
||||
CHECK("\\u{110000}"sv, AK::UnicodeEscapeError::UnicodeEscapeOverflow);
|
||||
CHECK("\\u{f00000000}"sv, AK::UnicodeEscapeError::UnicodeEscapeOverflow);
|
||||
|
||||
test("\\ud83d"sv, 0xd83d);
|
||||
test("\\ud83d\\u1111"sv, 0xd83d);
|
||||
test("\\ud83d\\ude00"sv, 0x1f600);
|
||||
test("\\ud83d\\ude00"sv, 0xd83d, false);
|
||||
CHECK("\\u{0}"sv, 0);
|
||||
CHECK("\\u{41}"sv, 0x41);
|
||||
CHECK("\\u{ffff}"sv, 0xffff);
|
||||
CHECK("\\u{10ffff}"sv, 0x10ffff);
|
||||
|
||||
CHECK("\\u1"sv, AK::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
CHECK("\\u11"sv, AK::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
CHECK("\\u111"sv, AK::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
CHECK("\\u111x"sv, AK::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
CHECK("\\ud800\\u"sv, AK::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
CHECK("\\ud800\\u1"sv, AK::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
CHECK("\\ud800\\u11"sv, AK::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
CHECK("\\ud800\\u111"sv, AK::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
CHECK("\\ud800\\u111x"sv, AK::UnicodeEscapeError::MalformedUnicodeEscape);
|
||||
|
||||
CHECK("\\u0000"sv, 0x0);
|
||||
CHECK("\\u0041"sv, 0x41);
|
||||
CHECK("\\uffff"sv, 0xffff);
|
||||
|
||||
CHECK("\\ud83d"sv, 0xd83d);
|
||||
CHECK("\\ud83d\\u1111"sv, 0xd83d);
|
||||
CHECK("\\ud83d\\ude00"sv, 0x1f600);
|
||||
CHECK("\\ud83d\\ude00"sv, 0xd83d, false);
|
||||
}
|
||||
|
||||
TEST_CASE(consume_decimal_integer_correctly_parses)
|
||||
@@ -217,7 +355,14 @@ TEST_CASE(consume_decimal_integer_correctly_parses)
|
||||
VERIFY(!actual.is_error()); \
|
||||
EXPECT_EQ(actual.value(), static_cast<type>(expected)); \
|
||||
EXPECT_EQ(lexer.tell(), test##sv.length()); \
|
||||
\
|
||||
Utf16GenericLexer utf16_lexer(test##sv); \
|
||||
actual = utf16_lexer.consume_decimal_integer<type>(); \
|
||||
VERIFY(!actual.is_error()); \
|
||||
EXPECT_EQ(actual.value(), static_cast<type>(expected)); \
|
||||
EXPECT_EQ(utf16_lexer.tell(), test##sv.length()); \
|
||||
} while (false)
|
||||
|
||||
CHECK_PARSES_INTEGER("0", 0, u8);
|
||||
CHECK_PARSES_INTEGER("-0", -0, u8);
|
||||
CHECK_PARSES_INTEGER("10", 10, u8);
|
||||
@@ -270,7 +415,14 @@ TEST_CASE(consume_decimal_integer_fails_with_correct_error)
|
||||
VERIFY(actual.is_error() && actual.error().is_errno()); \
|
||||
EXPECT_EQ(actual.error().code(), err); \
|
||||
EXPECT_EQ(lexer.tell(), static_cast<size_t>(0)); \
|
||||
\
|
||||
Utf16GenericLexer utf16_lexer(test##sv); \
|
||||
actual = utf16_lexer.consume_decimal_integer<type>(); \
|
||||
VERIFY(actual.is_error() && actual.error().is_errno()); \
|
||||
EXPECT_EQ(actual.error().code(), err); \
|
||||
EXPECT_EQ(utf16_lexer.tell(), static_cast<size_t>(0)); \
|
||||
} while (false)
|
||||
|
||||
CHECK_FAILS_WITH_ERROR("Well hello GenericLexer!", u64, EINVAL);
|
||||
CHECK_FAILS_WITH_ERROR("+", u64, EINVAL);
|
||||
CHECK_FAILS_WITH_ERROR("+WHF", u64, EINVAL);
|
||||
|
||||
Reference in New Issue
Block a user