mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-12-05 01:10:24 +00:00
AK: Store hash with HashTable entry to avoid expensive equality checks
When T in HashTable<T> has a potentially slow equality check, it can be very profitable to check for a matching hash before full equality. This patch adds may_have_slow_equality_check() to AK::Traits and defaults it to true. For trivial types (pointers, integers, etc) we default it to false. This means we skip the hash check when the equality check would be a single-CPU-word compare anyway. This synergizes really well with things like HashMap<String, V> where collisions previously meant we may have to churn through multiple O(n) equality checks.
This commit is contained in:
committed by
Jelle Raaijmakers
parent
c077ba9caf
commit
59a28febc9
Notes:
github-actions[bot]
2025-09-18 20:40:12 +00:00
Author: https://github.com/awesomekling Commit: https://github.com/LadybirdBrowser/ladybird/commit/59a28febc97 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/6234 Reviewed-by: https://github.com/gmta ✅
@@ -200,6 +200,7 @@ private:
|
||||
template<>
|
||||
struct Traits<FlyString> : public DefaultTraits<FlyString> {
|
||||
static unsigned hash(FlyString const&);
|
||||
static constexpr bool may_have_slow_equality_check() { return false; }
|
||||
};
|
||||
|
||||
template<>
|
||||
|
||||
@@ -26,6 +26,7 @@ private:
|
||||
};
|
||||
|
||||
struct EntryTraits {
|
||||
static constexpr bool may_have_slow_equality_check() { return KeyTraits::may_have_slow_equality_check(); }
|
||||
static unsigned hash(Entry const& entry) { return KeyTraits::hash(entry.key); }
|
||||
static bool equals(Entry const& a, Entry const& b) { return KeyTraits::equals(a.key, b.key); }
|
||||
};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2020, Andreas Kling <andreas@ladybird.org>
|
||||
* Copyright (c) 2018-2025, Andreas Kling <andreas@ladybird.org>
|
||||
* Copyright (c) 2023, Jelle Raaijmakers <jelle@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
@@ -123,8 +123,29 @@ class HashTable {
|
||||
static constexpr size_t grow_at_load_factor_percent = 80;
|
||||
static constexpr size_t grow_capacity_increase_percent = 60;
|
||||
|
||||
struct StoredHash {
|
||||
void set([[maybe_unused]] u32 h)
|
||||
{
|
||||
if constexpr (TraitsForT::may_have_slow_equality_check()) {
|
||||
hash = h;
|
||||
}
|
||||
}
|
||||
bool check(u32 h)
|
||||
{
|
||||
if constexpr (TraitsForT::may_have_slow_equality_check()) {
|
||||
// If equality checks may be slow, we always store the hash and compare it first.
|
||||
return hash == h;
|
||||
} else {
|
||||
// If equality checks are fast, we don't store the hash and always return true.
|
||||
return true;
|
||||
}
|
||||
}
|
||||
u32 hash;
|
||||
};
|
||||
|
||||
struct Bucket {
|
||||
BucketState state;
|
||||
StoredHash hash;
|
||||
alignas(T) u8 storage[sizeof(T)];
|
||||
T* slot() { return reinterpret_cast<T*>(storage); }
|
||||
T const* slot() const { return reinterpret_cast<T const*>(storage); }
|
||||
@@ -134,6 +155,7 @@ class HashTable {
|
||||
OrderedBucket* previous;
|
||||
OrderedBucket* next;
|
||||
BucketState state;
|
||||
StoredHash hash;
|
||||
alignas(T) u8 storage[sizeof(T)];
|
||||
T* slot() { return reinterpret_cast<T*>(storage); }
|
||||
T const* slot() const { return reinterpret_cast<T const*>(storage); }
|
||||
@@ -583,15 +605,15 @@ private:
|
||||
if (is_empty())
|
||||
return nullptr;
|
||||
|
||||
hash %= m_capacity;
|
||||
size_t bucket_index = hash % m_capacity;
|
||||
for (;;) {
|
||||
auto* bucket = &m_buckets[hash];
|
||||
auto* bucket = &m_buckets[bucket_index];
|
||||
if (bucket->state == BucketState::Free)
|
||||
return nullptr;
|
||||
if (predicate(*bucket->slot()))
|
||||
if (bucket->hash.check(hash) && predicate(*bucket->slot()))
|
||||
return bucket;
|
||||
if (++hash == m_capacity) [[unlikely]]
|
||||
hash = 0;
|
||||
if (++bucket_index == m_capacity) [[unlikely]]
|
||||
bucket_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -663,7 +685,8 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
auto bucket_index = TraitsForT::hash(value) % m_capacity;
|
||||
u32 const hash = TraitsForT::hash(value);
|
||||
auto bucket_index = hash % m_capacity;
|
||||
size_t probe_length = 0;
|
||||
for (;;) {
|
||||
auto* bucket = &m_buckets[bucket_index];
|
||||
@@ -672,13 +695,15 @@ private:
|
||||
if (bucket->state == BucketState::Free) {
|
||||
new (bucket->slot()) T(forward<U>(value));
|
||||
bucket->state = bucket_state_for_probe_length(probe_length);
|
||||
bucket->hash.set(hash);
|
||||
update_collection_for_new_bucket(*bucket);
|
||||
++m_size;
|
||||
return HashSetResult::InsertedNewEntry;
|
||||
}
|
||||
|
||||
// The bucket is already used, does it have an identical value?
|
||||
if (TraitsForT::equals(*bucket->slot(), static_cast<T const&>(value))) {
|
||||
if (bucket->hash.check(hash)
|
||||
&& TraitsForT::equals(*bucket->slot(), static_cast<T const&>(value))) {
|
||||
if (existing_entry_behavior == HashSetExistingEntryBehavior::Replace) {
|
||||
(*bucket->slot()) = forward<U>(value);
|
||||
return HashSetResult::ReplacedExistingEntry;
|
||||
@@ -697,6 +722,7 @@ private:
|
||||
// Write new bucket
|
||||
new (bucket->slot()) T(forward<U>(value));
|
||||
bucket->state = bucket_state_for_probe_length(probe_length);
|
||||
bucket->hash.set(hash);
|
||||
probe_length = target_probe_length;
|
||||
if constexpr (IsOrdered)
|
||||
bucket->next = nullptr;
|
||||
|
||||
@@ -169,6 +169,7 @@ struct Traits<NonnullOwnPtr<T>> : public DefaultTraits<NonnullOwnPtr<T>> {
|
||||
using ConstPeekType = T const*;
|
||||
static unsigned hash(NonnullOwnPtr<T> const& p) { return ptr_hash(p.ptr()); }
|
||||
static bool equals(NonnullOwnPtr<T> const& a, NonnullOwnPtr<T> const& b) { return a.ptr() == b.ptr(); }
|
||||
static constexpr bool may_have_slow_equality_check() { return false; }
|
||||
};
|
||||
|
||||
template<typename T, typename U>
|
||||
|
||||
@@ -50,6 +50,7 @@ private:
|
||||
template<typename T>
|
||||
struct Traits<NonnullRawPtr<T>> : public DefaultTraits<NonnullRawPtr<T>> {
|
||||
static unsigned hash(NonnullRawPtr<T> const& handle) { return Traits<T>::hash(handle); }
|
||||
static constexpr bool may_have_slow_equality_check() { return false; }
|
||||
};
|
||||
|
||||
namespace Detail {
|
||||
|
||||
@@ -276,6 +276,7 @@ struct Traits<NonnullRefPtr<T>> : public DefaultTraits<NonnullRefPtr<T>> {
|
||||
using ConstPeekType = T const*;
|
||||
static unsigned hash(NonnullRefPtr<T> const& p) { return ptr_hash(p.ptr()); }
|
||||
static bool equals(NonnullRefPtr<T> const& a, NonnullRefPtr<T> const& b) { return a.ptr() == b.ptr(); }
|
||||
static constexpr bool may_have_slow_equality_check() { return false; }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -198,6 +198,7 @@ struct Traits<OwnPtr<T>> : public DefaultTraits<OwnPtr<T>> {
|
||||
using ConstPeekType = T const*;
|
||||
static unsigned hash(OwnPtr<T> const& p) { return ptr_hash(p.ptr()); }
|
||||
static bool equals(OwnPtr<T> const& a, OwnPtr<T> const& b) { return a.ptr() == b.ptr(); }
|
||||
static constexpr bool may_have_slow_equality_check() { return false; }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
|
||||
@@ -298,6 +298,7 @@ struct Traits<RefPtr<T>> : public DefaultTraits<RefPtr<T>> {
|
||||
using ConstPeekType = T const*;
|
||||
static unsigned hash(RefPtr<T> const& p) { return ptr_hash(p.ptr()); }
|
||||
static bool equals(RefPtr<T> const& a, RefPtr<T> const& b) { return a.ptr() == b.ptr(); }
|
||||
static constexpr bool may_have_slow_equality_check() { return false; }
|
||||
};
|
||||
|
||||
template<typename T, typename U>
|
||||
|
||||
12
AK/Traits.h
12
AK/Traits.h
@@ -23,6 +23,9 @@ struct DefaultTraits {
|
||||
static constexpr bool equals(T const& a, T const& b) { return a == b; }
|
||||
template<Concepts::HashCompatible<T> U>
|
||||
static bool equals(T const& self, U const& other) { return self == other; }
|
||||
// NOTE: Override this to say false if your type has a fast equality check.
|
||||
// If equality checks are fast, we won't store hashes in HashTable/HashMap,
|
||||
static constexpr bool may_have_slow_equality_check() { return true; }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
@@ -38,6 +41,8 @@ template<Integral T>
|
||||
struct Traits<T> : public DefaultTraits<T> {
|
||||
static constexpr bool is_trivial() { return true; }
|
||||
static constexpr bool is_trivially_serializable() { return true; }
|
||||
// NOTE: Trivial types always have fast equality checks.
|
||||
static constexpr bool may_have_slow_equality_check() { return false; }
|
||||
static unsigned hash(T value)
|
||||
{
|
||||
if constexpr (sizeof(T) < 8)
|
||||
@@ -51,6 +56,7 @@ template<FloatingPoint T>
|
||||
struct Traits<T> : public DefaultTraits<T> {
|
||||
static constexpr bool is_trivial() { return true; }
|
||||
static constexpr bool is_trivially_serializable() { return true; }
|
||||
static constexpr bool may_have_slow_equality_check() { return false; }
|
||||
static unsigned hash(T value)
|
||||
{
|
||||
if constexpr (sizeof(T) < 8)
|
||||
@@ -64,12 +70,16 @@ template<typename T>
|
||||
requires(IsPointer<T> && !Detail::IsPointerOfType<char, T>) struct Traits<T> : public DefaultTraits<T> {
|
||||
static unsigned hash(T p) { return ptr_hash(bit_cast<FlatPtr>(p)); }
|
||||
static constexpr bool is_trivial() { return true; }
|
||||
// NOTE: Trivial types always have fast equality checks.
|
||||
static constexpr bool may_have_slow_equality_check() { return false; }
|
||||
};
|
||||
|
||||
template<Enum T>
|
||||
struct Traits<T> : public DefaultTraits<T> {
|
||||
static unsigned hash(T value) { return Traits<UnderlyingType<T>>::hash(to_underlying(value)); }
|
||||
static constexpr bool is_trivial() { return Traits<UnderlyingType<T>>::is_trivial(); }
|
||||
// NOTE: Trivial types always have fast equality checks.
|
||||
static constexpr bool may_have_slow_equality_check() { return !is_trivial(); }
|
||||
static constexpr bool is_trivially_serializable() { return Traits<UnderlyingType<T>>::is_trivially_serializable(); }
|
||||
};
|
||||
|
||||
@@ -78,6 +88,8 @@ requires(Detail::IsPointerOfType<char, T>) struct Traits<T> : public DefaultTrai
|
||||
static unsigned hash(T const value) { return string_hash(value, strlen(value)); }
|
||||
static constexpr bool equals(T const a, T const b) { return strcmp(a, b); }
|
||||
static constexpr bool is_trivial() { return true; }
|
||||
// NOTE: Trivial types always have fast equality checks.
|
||||
static constexpr bool may_have_slow_equality_check() { return false; }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -251,6 +251,7 @@ private:
|
||||
template<>
|
||||
struct Traits<Utf16FlyString> : public DefaultTraits<Utf16FlyString> {
|
||||
static unsigned hash(Utf16FlyString const& string) { return string.hash(); }
|
||||
static constexpr bool may_have_slow_equality_check() { return false; }
|
||||
};
|
||||
|
||||
template<>
|
||||
|
||||
@@ -212,6 +212,7 @@ struct Traits<WeakPtr<T>> : public DefaultTraits<WeakPtr<T>> {
|
||||
using ConstPeekType = T const*;
|
||||
static unsigned hash(WeakPtr<T> const& p) { return ptr_hash(p.ptr()); }
|
||||
static bool equals(WeakPtr<T> const& a, WeakPtr<T> const& b) { return a.ptr() == b.ptr(); }
|
||||
static constexpr bool may_have_slow_equality_check() { return false; }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -235,6 +235,7 @@ struct Traits<GC::Ptr<T>> : public DefaultTraits<GC::Ptr<T>> {
|
||||
{
|
||||
return Traits<T*>::hash(value.ptr());
|
||||
}
|
||||
static constexpr bool may_have_slow_equality_check() { return false; }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
|
||||
@@ -269,6 +269,7 @@ static u8 ascii_alphabetic_to_index(u8 c)
|
||||
class Node final : public RefCounted<Node> {
|
||||
private:
|
||||
struct NonnullRefPtrNodeTraits {
|
||||
static constexpr bool may_have_slow_equality_check() { return true; }
|
||||
static unsigned hash(NonnullRefPtr<Node> const& node)
|
||||
{
|
||||
u32 hash = 0;
|
||||
|
||||
Reference in New Issue
Block a user