AK: Store hash with HashTable entry to avoid expensive equality checks

When T in HashTable<T> has a potentially slow equality check, it can be
very profitable to check for a matching hash before full equality.

This patch adds may_have_slow_equality_check() to AK::Traits and
defaults it to true. For trivial types (pointers, integers, etc) we
default it to false. This means we skip the hash check when the equality
check would be a single-CPU-word compare anyway.

This synergizes really well with things like HashMap<String, V> where
collisions previously meant we may have to churn through multiple O(n)
equality checks.
This commit is contained in:
Andreas Kling
2025-09-18 18:28:54 +02:00
committed by Jelle Raaijmakers
parent c077ba9caf
commit 59a28febc9
Notes: github-actions[bot] 2025-09-18 20:40:12 +00:00
13 changed files with 57 additions and 8 deletions

View File

@@ -200,6 +200,7 @@ private:
template<>
struct Traits<FlyString> : public DefaultTraits<FlyString> {
static unsigned hash(FlyString const&);
static constexpr bool may_have_slow_equality_check() { return false; }
};
template<>

View File

@@ -26,6 +26,7 @@ private:
};
struct EntryTraits {
static constexpr bool may_have_slow_equality_check() { return KeyTraits::may_have_slow_equality_check(); }
static unsigned hash(Entry const& entry) { return KeyTraits::hash(entry.key); }
static bool equals(Entry const& a, Entry const& b) { return KeyTraits::equals(a.key, b.key); }
};

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <andreas@ladybird.org>
* Copyright (c) 2018-2025, Andreas Kling <andreas@ladybird.org>
* Copyright (c) 2023, Jelle Raaijmakers <jelle@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
@@ -123,8 +123,29 @@ class HashTable {
static constexpr size_t grow_at_load_factor_percent = 80;
static constexpr size_t grow_capacity_increase_percent = 60;
struct StoredHash {
void set([[maybe_unused]] u32 h)
{
if constexpr (TraitsForT::may_have_slow_equality_check()) {
hash = h;
}
}
bool check(u32 h)
{
if constexpr (TraitsForT::may_have_slow_equality_check()) {
// If equality checks may be slow, we always store the hash and compare it first.
return hash == h;
} else {
// If equality checks are fast, we don't store the hash and always return true.
return true;
}
}
u32 hash;
};
struct Bucket {
BucketState state;
StoredHash hash;
alignas(T) u8 storage[sizeof(T)];
T* slot() { return reinterpret_cast<T*>(storage); }
T const* slot() const { return reinterpret_cast<T const*>(storage); }
@@ -134,6 +155,7 @@ class HashTable {
OrderedBucket* previous;
OrderedBucket* next;
BucketState state;
StoredHash hash;
alignas(T) u8 storage[sizeof(T)];
T* slot() { return reinterpret_cast<T*>(storage); }
T const* slot() const { return reinterpret_cast<T const*>(storage); }
@@ -583,15 +605,15 @@ private:
if (is_empty())
return nullptr;
hash %= m_capacity;
size_t bucket_index = hash % m_capacity;
for (;;) {
auto* bucket = &m_buckets[hash];
auto* bucket = &m_buckets[bucket_index];
if (bucket->state == BucketState::Free)
return nullptr;
if (predicate(*bucket->slot()))
if (bucket->hash.check(hash) && predicate(*bucket->slot()))
return bucket;
if (++hash == m_capacity) [[unlikely]]
hash = 0;
if (++bucket_index == m_capacity) [[unlikely]]
bucket_index = 0;
}
}
@@ -663,7 +685,8 @@ private:
}
};
auto bucket_index = TraitsForT::hash(value) % m_capacity;
u32 const hash = TraitsForT::hash(value);
auto bucket_index = hash % m_capacity;
size_t probe_length = 0;
for (;;) {
auto* bucket = &m_buckets[bucket_index];
@@ -672,13 +695,15 @@ private:
if (bucket->state == BucketState::Free) {
new (bucket->slot()) T(forward<U>(value));
bucket->state = bucket_state_for_probe_length(probe_length);
bucket->hash.set(hash);
update_collection_for_new_bucket(*bucket);
++m_size;
return HashSetResult::InsertedNewEntry;
}
// The bucket is already used, does it have an identical value?
if (TraitsForT::equals(*bucket->slot(), static_cast<T const&>(value))) {
if (bucket->hash.check(hash)
&& TraitsForT::equals(*bucket->slot(), static_cast<T const&>(value))) {
if (existing_entry_behavior == HashSetExistingEntryBehavior::Replace) {
(*bucket->slot()) = forward<U>(value);
return HashSetResult::ReplacedExistingEntry;
@@ -697,6 +722,7 @@ private:
// Write new bucket
new (bucket->slot()) T(forward<U>(value));
bucket->state = bucket_state_for_probe_length(probe_length);
bucket->hash.set(hash);
probe_length = target_probe_length;
if constexpr (IsOrdered)
bucket->next = nullptr;

View File

@@ -169,6 +169,7 @@ struct Traits<NonnullOwnPtr<T>> : public DefaultTraits<NonnullOwnPtr<T>> {
using ConstPeekType = T const*;
static unsigned hash(NonnullOwnPtr<T> const& p) { return ptr_hash(p.ptr()); }
static bool equals(NonnullOwnPtr<T> const& a, NonnullOwnPtr<T> const& b) { return a.ptr() == b.ptr(); }
static constexpr bool may_have_slow_equality_check() { return false; }
};
template<typename T, typename U>

View File

@@ -50,6 +50,7 @@ private:
template<typename T>
struct Traits<NonnullRawPtr<T>> : public DefaultTraits<NonnullRawPtr<T>> {
static unsigned hash(NonnullRawPtr<T> const& handle) { return Traits<T>::hash(handle); }
static constexpr bool may_have_slow_equality_check() { return false; }
};
namespace Detail {

View File

@@ -276,6 +276,7 @@ struct Traits<NonnullRefPtr<T>> : public DefaultTraits<NonnullRefPtr<T>> {
using ConstPeekType = T const*;
static unsigned hash(NonnullRefPtr<T> const& p) { return ptr_hash(p.ptr()); }
static bool equals(NonnullRefPtr<T> const& a, NonnullRefPtr<T> const& b) { return a.ptr() == b.ptr(); }
static constexpr bool may_have_slow_equality_check() { return false; }
};
}

View File

@@ -198,6 +198,7 @@ struct Traits<OwnPtr<T>> : public DefaultTraits<OwnPtr<T>> {
using ConstPeekType = T const*;
static unsigned hash(OwnPtr<T> const& p) { return ptr_hash(p.ptr()); }
static bool equals(OwnPtr<T> const& a, OwnPtr<T> const& b) { return a.ptr() == b.ptr(); }
static constexpr bool may_have_slow_equality_check() { return false; }
};
template<typename T>

View File

@@ -298,6 +298,7 @@ struct Traits<RefPtr<T>> : public DefaultTraits<RefPtr<T>> {
using ConstPeekType = T const*;
static unsigned hash(RefPtr<T> const& p) { return ptr_hash(p.ptr()); }
static bool equals(RefPtr<T> const& a, RefPtr<T> const& b) { return a.ptr() == b.ptr(); }
static constexpr bool may_have_slow_equality_check() { return false; }
};
template<typename T, typename U>

View File

@@ -23,6 +23,9 @@ struct DefaultTraits {
static constexpr bool equals(T const& a, T const& b) { return a == b; }
template<Concepts::HashCompatible<T> U>
static bool equals(T const& self, U const& other) { return self == other; }
// NOTE: Override this to say false if your type has a fast equality check.
// If equality checks are fast, we won't store hashes in HashTable/HashMap,
static constexpr bool may_have_slow_equality_check() { return true; }
};
template<typename T>
@@ -38,6 +41,8 @@ template<Integral T>
struct Traits<T> : public DefaultTraits<T> {
static constexpr bool is_trivial() { return true; }
static constexpr bool is_trivially_serializable() { return true; }
// NOTE: Trivial types always have fast equality checks.
static constexpr bool may_have_slow_equality_check() { return false; }
static unsigned hash(T value)
{
if constexpr (sizeof(T) < 8)
@@ -51,6 +56,7 @@ template<FloatingPoint T>
struct Traits<T> : public DefaultTraits<T> {
static constexpr bool is_trivial() { return true; }
static constexpr bool is_trivially_serializable() { return true; }
static constexpr bool may_have_slow_equality_check() { return false; }
static unsigned hash(T value)
{
if constexpr (sizeof(T) < 8)
@@ -64,12 +70,16 @@ template<typename T>
requires(IsPointer<T> && !Detail::IsPointerOfType<char, T>) struct Traits<T> : public DefaultTraits<T> {
static unsigned hash(T p) { return ptr_hash(bit_cast<FlatPtr>(p)); }
static constexpr bool is_trivial() { return true; }
// NOTE: Trivial types always have fast equality checks.
static constexpr bool may_have_slow_equality_check() { return false; }
};
template<Enum T>
struct Traits<T> : public DefaultTraits<T> {
static unsigned hash(T value) { return Traits<UnderlyingType<T>>::hash(to_underlying(value)); }
static constexpr bool is_trivial() { return Traits<UnderlyingType<T>>::is_trivial(); }
// NOTE: Trivial types always have fast equality checks.
static constexpr bool may_have_slow_equality_check() { return !is_trivial(); }
static constexpr bool is_trivially_serializable() { return Traits<UnderlyingType<T>>::is_trivially_serializable(); }
};
@@ -78,6 +88,8 @@ requires(Detail::IsPointerOfType<char, T>) struct Traits<T> : public DefaultTrai
static unsigned hash(T const value) { return string_hash(value, strlen(value)); }
static constexpr bool equals(T const a, T const b) { return strcmp(a, b); }
static constexpr bool is_trivial() { return true; }
// NOTE: Trivial types always have fast equality checks.
static constexpr bool may_have_slow_equality_check() { return false; }
};
}

View File

@@ -251,6 +251,7 @@ private:
template<>
struct Traits<Utf16FlyString> : public DefaultTraits<Utf16FlyString> {
static unsigned hash(Utf16FlyString const& string) { return string.hash(); }
static constexpr bool may_have_slow_equality_check() { return false; }
};
template<>

View File

@@ -212,6 +212,7 @@ struct Traits<WeakPtr<T>> : public DefaultTraits<WeakPtr<T>> {
using ConstPeekType = T const*;
static unsigned hash(WeakPtr<T> const& p) { return ptr_hash(p.ptr()); }
static bool equals(WeakPtr<T> const& a, WeakPtr<T> const& b) { return a.ptr() == b.ptr(); }
static constexpr bool may_have_slow_equality_check() { return false; }
};
}

View File

@@ -235,6 +235,7 @@ struct Traits<GC::Ptr<T>> : public DefaultTraits<GC::Ptr<T>> {
{
return Traits<T*>::hash(value.ptr());
}
static constexpr bool may_have_slow_equality_check() { return false; }
};
template<typename T>

View File

@@ -269,6 +269,7 @@ static u8 ascii_alphabetic_to_index(u8 c)
class Node final : public RefCounted<Node> {
private:
struct NonnullRefPtrNodeTraits {
static constexpr bool may_have_slow_equality_check() { return true; }
static unsigned hash(NonnullRefPtr<Node> const& node)
{
u32 hash = 0;