Compare commits

...

6 Commits

Author SHA1 Message Date
Erik Johnston
e982141c0c Invalidate in the correct place 2024-09-10 10:11:52 +01:00
Erik Johnston
6f894411df Share _get_member_counts_txn 2024-09-10 10:07:48 +01:00
Erik Johnston
de61dbf044 Newsfile 2024-09-09 11:34:28 +01:00
Erik Johnston
277f843fb9 Don't return counts/heroes for non-joined rooms 2024-09-09 11:31:16 +01:00
Erik Johnston
7ac622f5a8 Don't get the room summary unless needed
For named rooms we only fetched the room summary for member counts, but
its cheaper to just get only them.
2024-09-09 11:31:16 +01:00
Erik Johnston
20f272e9ec Add index to speed up getting member counts in rooms
The index means we don't have to scan all current state events for the
room, which can take a while for really big rooms.
2024-09-09 10:41:39 +01:00
7 changed files with 121 additions and 76 deletions

1
changelog.d/17683.misc Normal file
View File

@@ -0,0 +1 @@
Speed up sliding sync by reducing amount of data pulled out of the database for large rooms.

View File

@@ -800,32 +800,10 @@ class SlidingSyncHandler:
):
avatar_changed = True
# We only need the room summary for calculating heroes, however if we do
# fetch it then we can use it to calculate `joined_count` and
# `invited_count`.
room_membership_summary: Optional[Mapping[str, MemberSummary]] = None
empty_membership_summary = MemberSummary([], 0)
# We need the room summary for:
# - Always for initial syncs (or the first time we send down the room)
# - When the room has no name, we need `heroes`
# - When the membership has changed so we need to give updated `heroes` and
# `joined_count`/`invited_count`.
#
# Ideally, instead of just looking at `name_changed`, we'd check if the room
# name is not set but this is a good enough approximation that saves us from
# having to pull out the full event. This just means, we're generating the
# summary whenever the room name changes instead of only when it changes to
# `None`.
if initial or name_changed or membership_changed:
# We can't trace the function directly because it's cached and the `@cached`
# decorator doesn't mix with `@trace` yet.
with start_active_span("get_room_summary"):
if room_membership_for_user_at_to_token.membership in (
Membership.LEAVE,
Membership.BAN,
):
# TODO: Figure out how to get the membership summary for left/banned rooms
room_membership_summary = {}
else:
room_membership_summary = await self.store.get_room_summary(room_id)
# TODO: Reverse/rewind back to the `to_token`
# `heroes` are required if the room name is not set.
#
@@ -844,11 +822,45 @@ class SlidingSyncHandler:
# get them on initial syncs (or the first time we send down the room) or if the
# membership has changed which may change the heroes.
if name_event_id is None and (initial or (not initial and membership_changed)):
assert room_membership_summary is not None
# We need the room summary to extract the heroes from
if room_membership_for_user_at_to_token.membership != Membership.JOIN:
# TODO: Figure out how to get the membership summary for left/banned rooms
# For invite/knock rooms we don't include the information.
room_membership_summary = {}
else:
room_membership_summary = await self.store.get_room_summary(room_id)
# TODO: Reverse/rewind back to the `to_token`
hero_user_ids = extract_heroes_from_room_summary(
room_membership_summary, me=user.to_string()
)
# Fetch the membership counts for rooms we're joined to.
#
# Similarly to other metadata, we only need to calculate the member
# counts if this is an initial sync or the memberships have changed.
joined_count: Optional[int] = None
invited_count: Optional[int] = None
if (
initial or membership_changed
) and room_membership_for_user_at_to_token.membership == Membership.JOIN:
# If we have the room summary (because we calculated heroes above)
# then we can simply pull the counts from there.
if room_membership_summary is not None:
empty_membership_summary = MemberSummary([], 0)
joined_count = room_membership_summary.get(
Membership.JOIN, empty_membership_summary
).count
invited_count = room_membership_summary.get(
Membership.INVITE, empty_membership_summary
).count
else:
member_counts = await self.store.get_member_counts(room_id)
joined_count = member_counts.get(Membership.JOIN, 0)
invited_count = member_counts.get(Membership.INVITE, 0)
# Fetch the `required_state` for the room
#
# No `required_state` for invite/knock rooms (just `stripped_state`)
@@ -1115,20 +1127,6 @@ class SlidingSyncHandler:
set_tag(SynapseTags.RESULT_PREFIX + "initial", initial)
joined_count: Optional[int] = None
if initial or membership_changed:
assert room_membership_summary is not None
joined_count = room_membership_summary.get(
Membership.JOIN, empty_membership_summary
).count
invited_count: Optional[int] = None
if initial or membership_changed:
assert room_membership_summary is not None
invited_count = room_membership_summary.get(
Membership.INVITE, empty_membership_summary
).count
return SlidingSyncResult.RoomResult(
name=room_name,
avatar=room_avatar,

View File

@@ -112,6 +112,7 @@ class SQLBaseStore(metaclass=ABCMeta):
self._attempt_to_invalidate_cache(
"get_number_joined_users_in_room", (room_id,)
)
self._attempt_to_invalidate_cache("get_member_counts", (room_id,))
self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,))
# There's no easy way of invalidating this cache for just the users
@@ -153,6 +154,7 @@ class SQLBaseStore(metaclass=ABCMeta):
self._attempt_to_invalidate_cache("get_current_hosts_in_room", (room_id,))
self._attempt_to_invalidate_cache("get_users_in_room_with_profiles", (room_id,))
self._attempt_to_invalidate_cache("get_number_joined_users_in_room", (room_id,))
self._attempt_to_invalidate_cache("get_member_counts", (room_id,))
self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,))
self._attempt_to_invalidate_cache("does_pair_of_users_share_a_room", None)
self._attempt_to_invalidate_cache("get_user_in_room_with_profile", None)

View File

@@ -312,18 +312,10 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
# We do this all in one transaction to keep the cache small.
# FIXME: get rid of this when we have room_stats
# Note, rejected events will have a null membership field, so
# we we manually filter them out.
sql = """
SELECT count(*), membership FROM current_state_events
WHERE type = 'm.room.member' AND room_id = ?
AND membership IS NOT NULL
GROUP BY membership
"""
counts = self._get_member_counts_txn(txn, room_id)
txn.execute(sql, (room_id,))
res: Dict[str, MemberSummary] = {}
for count, membership in txn:
for membership, count in counts.items():
res.setdefault(membership, MemberSummary([], count))
# Order by membership (joins -> invites -> leave (former insiders) ->
@@ -369,6 +361,31 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
"get_room_summary", _get_room_summary_txn
)
@cached()
async def get_member_counts(self, room_id: str) -> Mapping[str, int]:
"""Get a mapping of number of users by membership"""
return await self.db_pool.runInteraction(
"get_member_counts", self._get_member_counts_txn, room_id
)
def _get_member_counts_txn(
self, txn: LoggingTransaction, room_id: str
) -> Dict[str, int]:
"""Get a mapping of number of users by membership"""
# Note, rejected events will have a null membership field, so
# we we manually filter them out.
sql = """
SELECT count(*), membership FROM current_state_events
WHERE type = 'm.room.member' AND room_id = ?
AND membership IS NOT NULL
GROUP BY membership
"""
txn.execute(sql, (room_id,))
return {membership: count for count, membership in txn}
@cached()
async def get_number_joined_users_in_room(self, room_id: str) -> int:
return await self.db_pool.simple_select_one_onecol(

View File

@@ -736,6 +736,7 @@ class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx"
EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index"
DELETE_CURRENT_STATE_UPDATE_NAME = "delete_old_current_state_events"
MEMBERS_CURRENT_STATE_UPDATE_NAME = "current_state_events_members_room_index"
def __init__(
self,
@@ -764,6 +765,13 @@ class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
self.DELETE_CURRENT_STATE_UPDATE_NAME,
self._background_remove_left_rooms,
)
self.db_pool.updates.register_background_index_update(
self.MEMBERS_CURRENT_STATE_UPDATE_NAME,
index_name="current_state_events_members_room_index",
table="current_state_events",
columns=["room_id", "membership"],
where_clause="type='m.room.member'",
)
async def _background_remove_left_rooms(
self, progress: JsonDict, batch_size: int

View File

@@ -0,0 +1,19 @@
--
-- This file is licensed under the Affero General Public License (AGPL) version 3.
--
-- Copyright (C) 2024 New Vector, Ltd
--
-- This program is free software: you can redistribute it and/or modify
-- it under the terms of the GNU Affero General Public License as
-- published by the Free Software Foundation, either version 3 of the
-- License, or (at your option) any later version.
--
-- See the GNU Affero General Public License for more details:
-- <https://www.gnu.org/licenses/agpl-3.0.html>.
-- Add a background updates to add a new index:
-- `current_state_events(room_id, membership) WHERE type = 'm.room.member'
-- This makes counting membership in rooms (for syncs) much faster
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
(8701, 'current_state_events_members_room_index', '{}');

View File

@@ -371,14 +371,17 @@ class SlidingSyncRoomsMetaTestCase(SlidingSyncBase):
"mxc://UPDATED_DUMMY_MEDIA_ID",
response_body["rooms"][room_id1],
)
self.assertEqual(
response_body["rooms"][room_id1]["joined_count"],
1,
# We don't give extra room information to invitees
self.assertNotIn(
"joined_count",
response_body["rooms"][room_id1],
)
self.assertEqual(
response_body["rooms"][room_id1]["invited_count"],
1,
self.assertNotIn(
"invited_count",
response_body["rooms"][room_id1],
)
self.assertIsNone(
response_body["rooms"][room_id1].get("is_dm"),
)
@@ -450,15 +453,16 @@ class SlidingSyncRoomsMetaTestCase(SlidingSyncBase):
"mxc://DUMMY_MEDIA_ID",
response_body["rooms"][room_id1],
)
self.assertEqual(
response_body["rooms"][room_id1]["joined_count"],
# FIXME: The actual number should be "1" (user2) but we currently don't
# support this for rooms where the user has left/been banned.
0,
# FIXME: We possibly want to return joined and invited counts for rooms
# you're banned form
self.assertNotIn(
"joined_count",
response_body["rooms"][room_id1],
)
self.assertEqual(
response_body["rooms"][room_id1]["invited_count"],
0,
self.assertNotIn(
"invited_count",
response_body["rooms"][room_id1],
)
self.assertIsNone(
response_body["rooms"][room_id1].get("is_dm"),
@@ -692,19 +696,15 @@ class SlidingSyncRoomsMetaTestCase(SlidingSyncBase):
[],
)
self.assertEqual(
response_body["rooms"][room_id1]["joined_count"],
# FIXME: The actual number should be "1" (user2) but we currently don't
# support this for rooms where the user has left/been banned.
0,
# FIXME: We possibly want to return joined and invited counts for rooms
# you're banned form
self.assertNotIn(
"joined_count",
response_body["rooms"][room_id1],
)
self.assertEqual(
response_body["rooms"][room_id1]["invited_count"],
# We shouldn't see user5 since they were invited after user1 was banned.
#
# FIXME: The actual number should be "1" (user3) but we currently don't
# support this for rooms where the user has left/been banned.
0,
self.assertNotIn(
"invited_count",
response_body["rooms"][room_id1],
)
def test_rooms_meta_heroes_incremental_sync_no_change(self) -> None: