diff --git a/Libraries/LibRegex/RegexOptimizer.cpp b/Libraries/LibRegex/RegexOptimizer.cpp index 00651711c8c..da2bc374add 100644 --- a/Libraries/LibRegex/RegexOptimizer.cpp +++ b/Libraries/LibRegex/RegexOptimizer.cpp @@ -430,9 +430,18 @@ static bool has_overlap(Vector const& lhs, Vector disjunction_stack; + disjunction_stack.empend(); + + auto in_or = [&] -> bool& { return disjunction_stack.last().in_or; }; + auto matched_in_or = [&] -> bool& { return disjunction_stack.last().matched_in_or; }; + auto inverse_matched_in_or = [&] -> bool& { return disjunction_stack.last().inverse_matched_in_or; }; for (auto const& pair : rhs) { if (reset_temporary_inverse) { @@ -452,7 +461,7 @@ static bool has_overlap(Vector const& lhs, Vector const& lhs, Vector const& lhs, Vector(pair.value)); - if (!in_or && (current_lhs_inversion_state() ^ contains)) + if (!in_or() && (current_lhs_inversion_state() ^ contains)) return true; - if (in_or) { - matched_in_or |= contains; - inverse_matched_in_or |= !contains; + if (in_or()) { + matched_in_or() |= contains; + inverse_matched_in_or() |= !contains; } break; } case CharacterCompareType::CharRange: { auto range = CharRange(pair.value); auto contains = range_contains(range); - if (!in_or && (contains ^ current_lhs_inversion_state())) + if (!in_or() && (contains ^ current_lhs_inversion_state())) return true; - if (in_or) { - matched_in_or |= contains; - inverse_matched_in_or |= !contains; + if (in_or()) { + matched_in_or() |= contains; + inverse_matched_in_or() |= !contains; } break; @@ -525,16 +534,16 @@ static bool has_overlap(Vector const& lhs, Vector(pair.value)); - if (!in_or && (current_lhs_inversion_state() ^ contains)) + if (!in_or() && (current_lhs_inversion_state() ^ contains)) return true; auto inverse_contains = lhs_negated_unicode_properties.contains(static_cast(pair.value)); - if (!in_or && !(current_lhs_inversion_state() ^ inverse_contains)) + if (!in_or() && !(current_lhs_inversion_state() ^ inverse_contains)) return true; - if (in_or) { - matched_in_or |= contains; - inverse_matched_in_or |= inverse_contains; + if (in_or()) { + matched_in_or() |= contains; + inverse_matched_in_or() |= inverse_contains; } } break; @@ -543,14 +552,14 @@ static bool has_overlap(Vector const& lhs, Vector(pair.value)); - if (!in_or && (current_lhs_inversion_state() ^ contains)) + if (!in_or() && (current_lhs_inversion_state() ^ contains)) return true; auto inverse_contains = lhs_negated_unicode_general_categories.contains(static_cast(pair.value)); - if (!in_or && !(current_lhs_inversion_state() ^ inverse_contains)) + if (!in_or() && !(current_lhs_inversion_state() ^ inverse_contains)) return true; - if (in_or) { - matched_in_or |= contains; - inverse_matched_in_or |= inverse_contains; + if (in_or()) { + matched_in_or() |= contains; + inverse_matched_in_or() |= inverse_contains; } } break; @@ -559,14 +568,14 @@ static bool has_overlap(Vector const& lhs, Vector(pair.value)); - if (!in_or && (current_lhs_inversion_state() ^ contains)) + if (!in_or() && (current_lhs_inversion_state() ^ contains)) return true; auto inverse_contains = lhs_negated_unicode_scripts.contains(static_cast(pair.value)); - if (!in_or && !(current_lhs_inversion_state() ^ inverse_contains)) + if (!in_or() && !(current_lhs_inversion_state() ^ inverse_contains)) return true; - if (in_or) { - matched_in_or |= contains; - inverse_matched_in_or |= inverse_contains; + if (in_or()) { + matched_in_or() |= contains; + inverse_matched_in_or() |= inverse_contains; } } break; @@ -575,33 +584,34 @@ static bool has_overlap(Vector const& lhs, Vector(pair.value)); - if (!in_or && (current_lhs_inversion_state() ^ contains)) + if (!in_or() && (current_lhs_inversion_state() ^ contains)) return true; auto inverse_contains = lhs_negated_unicode_script_extensions.contains(static_cast(pair.value)); - if (!in_or && !(current_lhs_inversion_state() ^ inverse_contains)) + if (!in_or() && !(current_lhs_inversion_state() ^ inverse_contains)) return true; - if (in_or) { - matched_in_or |= contains; - inverse_matched_in_or |= inverse_contains; + if (in_or()) { + matched_in_or() |= contains; + inverse_matched_in_or() |= inverse_contains; } } break; case CharacterCompareType::Or: - in_or = true; + disjunction_stack.empend(true); break; - case CharacterCompareType::EndAndOr: + case CharacterCompareType::EndAndOr: { // FIXME: Handle And when we support it below. - VERIFY(in_or); - in_or = false; + VERIFY(in_or()); + auto state = disjunction_stack.take_last(); if (current_lhs_inversion_state()) { - if (!inverse_matched_in_or) + if (!state.inverse_matched_in_or) return true; } else { - if (matched_in_or) + if (state.matched_in_or) return true; } break; + } case CharacterCompareType::And: // FIXME: These are too difficult to handle, so bail out. return true; diff --git a/Tests/LibRegex/TestRegex.cpp b/Tests/LibRegex/TestRegex.cpp index 45f1cc86301..a9e0aa52614 100644 --- a/Tests/LibRegex/TestRegex.cpp +++ b/Tests/LibRegex/TestRegex.cpp @@ -738,6 +738,9 @@ TEST_CASE(ECMA262_match) { "((?a)|(?b))"sv, "aa"sv, false }, // Insensitive charclasses should accept upper/lowercase in pattern (lookup table should still be ordered if insensitive lookup is used), ladybird#5399. { "[aBc]"sv, "b"sv, true, ECMAScriptFlags::Insensitive }, + // Optimizer bug: nested 'or' compare ops caused a crash, ladybird#6647. + { "([[[]]])*0"sv, ""sv, false, ECMAScriptFlags::UnicodeSets }, + { "(([[[]]]{2,})\\s)*"sv, ""sv, true, (ECMAScriptFlags::UnicodeSets | ECMAScriptFlags::Global).value() }, }; for (auto& test : tests) {