improve hero ordering

fix pep8
remove matthewdebug at erik's req
2025-12-05 01:10:13 +00:00 · 2018-09-11 15:30:32 +01:00 · 2018-09-11 12:17:55 +01:00 · 2018-09-11 12:10:04 +01:00 · 2018-09-11 11:59:24 +01:00 · 2018-09-11 11:09:48 +01:00
5 changed files with 159 additions and 30 deletions
--- a/changelog.d/3827.misc
+++ b/changelog.d/3827.misc
@@ -0,0 +1 @@
+speed up lazy loading by 2-3x
--- a/changelog.d/3840.misc
+++ b/changelog.d/3840.misc
@@ -0,0 +1 @@
+Disable lazy loading for incremental syncs for now
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -24,6 +24,7 @@ from twisted.internet import defer

 from synapse.api.constants import EventTypes, Membership
 from synapse.push.clientformat import format_push_rules_for_user
+from synapse.storage.roommember import MemberSummary
 from synapse.types import RoomStreamToken
 from synapse.util.async_helpers import concurrently_execute
 from synapse.util.caches.expiringcache import ExpiringCache
@@ -525,6 +526,8 @@ class SyncHandler(object):
             A deferred dict describing the room summary
        """

+        # FIXME: we could/should get this from room_stats when matthew/stats lands
+
        # FIXME: this promulgates https://github.com/matrix-org/synapse/issues/3305
        last_events, _ = yield self.store.get_recent_event_ids_for_room(
            room_id, end_token=now_token.room_key, limit=1,
@@ -537,44 +540,54 @@ class SyncHandler(object):
        last_event = last_events[-1]
        state_ids = yield self.store.get_state_ids_for_event(
            last_event.event_id, [
-                (EventTypes.Member, None),
                (EventTypes.Name, ''),
                (EventTypes.CanonicalAlias, ''),
            ]
        )

-        member_ids = {
-            state_key: event_id
-            for (t, state_key), event_id in iteritems(state_ids)
-            if t == EventTypes.Member
-        }
+        # this is heavily cached, thus: fast.
+        details = yield self.store.get_room_summary(room_id)
+
        name_id = state_ids.get((EventTypes.Name, ''))
        canonical_alias_id = state_ids.get((EventTypes.CanonicalAlias, ''))

        summary = {}
-
-        # FIXME: it feels very heavy to load up every single membership event
-        # just to calculate the counts.
-        member_events = yield self.store.get_events(member_ids.values())
-
-        joined_user_ids = []
-        invited_user_ids = []
-
-        for ev in member_events.values():
-            if ev.content.get("membership") == Membership.JOIN:
-                joined_user_ids.append(ev.state_key)
-            elif ev.content.get("membership") == Membership.INVITE:
-                invited_user_ids.append(ev.state_key)
+        empty_ms = MemberSummary([], 0)

        # TODO: only send these when they change.
-        summary["m.joined_member_count"] = len(joined_user_ids)
-        summary["m.invited_member_count"] = len(invited_user_ids)
+        summary["m.joined_member_count"] = (
+            details.get(Membership.JOIN, empty_ms).count
+        )
+        summary["m.invited_member_count"] = (
+            details.get(Membership.INVITE, empty_ms).count
+        )

        if name_id or canonical_alias_id:
            defer.returnValue(summary)

-        # FIXME: order by stream ordering, not alphabetic
+        joined_user_ids = [
+            r[0] for r in details.get(Membership.JOIN, empty_ms).members
+        ]
+        invited_user_ids = [
+            r[0] for r in details.get(Membership.INVITE, empty_ms).members
+        ]
+        gone_user_ids = (
+            [r[0] for r in details.get(Membership.LEAVE, empty_ms).members] +
+            [r[0] for r in details.get(Membership.BAN, empty_ms).members]
+        )

+        # FIXME: only build up a member_ids list for our heroes
+        member_ids = {}
+        for membership in (
+            Membership.JOIN,
+            Membership.INVITE,
+            Membership.LEAVE,
+            Membership.BAN
+        ):
+            for user_id, event_id in details.get(membership, empty_ms).members:
+                member_ids[user_id] = event_id
+
+        # FIXME: order by stream ordering rather than as returned by SQL
        me = sync_config.user.to_string()
        if (joined_user_ids or invited_user_ids):
            summary['m.heroes'] = sorted(
@@ -586,7 +599,11 @@ class SyncHandler(object):
            )[0:5]
        else:
            summary['m.heroes'] = sorted(
-                [user_id for user_id in member_ids.keys() if user_id != me]
+                [
+                    user_id
+                    for user_id in gone_user_ids
+                    if user_id != me
+                ]
            )[0:5]

        if not sync_config.filter_collection.lazy_load_members():
@@ -719,6 +736,26 @@ class SyncHandler(object):
                    lazy_load_members=lazy_load_members,
                )
            elif batch.limited:
+                state_at_timeline_start = yield self.store.get_state_ids_for_event(
+                    batch.events[0].event_id, types=types,
+                    filtered_types=filtered_types,
+                )
+
+                # for now, we disable LL for gappy syncs - see
+                # https://github.com/vector-im/riot-web/issues/7211#issuecomment-419976346
+                # N.B. this slows down incr syncs as we are now processing way
+                # more state in the server than if we were LLing.
+                #
+                # We still have to filter timeline_start to LL entries (above) in order
+                # for _calculate_state's LL logic to work, as we have to include LL
+                # members for timeline senders in case they weren't loaded in the initial
+                # sync.  We do this by (counterintuitively) by filtering timeline_start
+                # members to just be ones which were timeline senders, which then ensures
+                # all of the rest get included in the state block (if we need to know
+                # about them).
+                types = None
+                filtered_types = None
+
                state_at_previous_sync = yield self.get_state_at(
                    room_id, stream_position=since_token, types=types,
                    filtered_types=filtered_types,
@@ -729,24 +766,21 @@ class SyncHandler(object):
                    filtered_types=filtered_types,
                )

-                state_at_timeline_start = yield self.store.get_state_ids_for_event(
-                    batch.events[0].event_id, types=types,
-                    filtered_types=filtered_types,
-                )
-
                state_ids = _calculate_state(
                    timeline_contains=timeline_state,
                    timeline_start=state_at_timeline_start,
                    previous=state_at_previous_sync,
                    current=current_state_ids,
+                    # we have to include LL members in case LL initial sync missed them
                    lazy_load_members=lazy_load_members,
                )
            else:
                state_ids = {}
                if lazy_load_members:
                    if types:
-                        # We're returning an incremental sync, with no "gap" since
-                        # the previous sync, so normally there would be no state to return
+                        # We're returning an incremental sync, with no
+                        # "gap" since the previous sync, so normally there would be
+                        # no state to return.
                        # But we're lazy-loading, so the client might need some more
                        # member events to understand the events in this timeline.
                        # So we fish out all the member events corresponding to the
@@ -1616,10 +1650,24 @@ class SyncHandler(object):
        )

        summary = {}
+
+        # we include a summary in room responses when we're lazy loading
+        # members (as the client otherwise doesn't have enough info to form
+        # the name itself).
        if (
            sync_config.filter_collection.lazy_load_members() and
            (
+                # we recalulate the summary:
+                #   if there are membership changes in the timeline, or
+                #   if membership has changed during a gappy sync, or
+                #   if this is an initial sync.
                any(ev.type == EventTypes.Member for ev in batch.events) or
+                (
+                    # XXX: this may include false positives in the form of LL
+                    # members which have snuck into state
+                    batch.limited and
+                    any(t == EventTypes.Member for (t, k) in state)
+                ) or
                since_token is None
            )
        ):
@@ -1649,6 +1697,16 @@ class SyncHandler(object):
                    unread_notifications["highlight_count"] = notifs["highlight_count"]

                sync_result_builder.joined.append(room_sync)
+
+            if batch.limited:
+                user_id = sync_result_builder.sync_config.user.to_string()
+                logger.info(
+                    "Incremental syncing room %s for user %s with %d state events" % (
+                        room_id,
+                        user_id,
+                        len(state),
+                    )
+                )
        elif room_builder.rtype == "archived":
            room_sync = ArchivedSyncResult(
                room_id=room_id,
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -929,6 +929,10 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
                    txn, self.get_users_in_room, (room_id,)
                )

+                self._invalidate_cache_and_stream(
+                    txn, self.get_room_summary, (room_id,)
+                )
+
                self._invalidate_cache_and_stream(
                    txn, self.get_current_state_ids, (room_id,)
                )
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -51,6 +51,12 @@ ProfileInfo = namedtuple(
    "ProfileInfo", ("avatar_url", "display_name")
 )

+# "members" points to a truncated list of (user_id, event_id) tuples for users of
+# a given membership type, suitable for use in calculating heroes for a room.
+# "count" points to the total numberr of users of a given membership type.
+MemberSummary = namedtuple(
+    "MemberSummary", ("members", "count")
+)

 _MEMBERSHIP_PROFILE_UPDATE_NAME = "room_membership_profile_update"

@@ -82,6 +88,65 @@ class RoomMemberWorkerStore(EventsWorkerStore):
            return [to_ascii(r[0]) for r in txn]
        return self.runInteraction("get_users_in_room", f)

+    @cached(max_entries=100000)
+    def get_room_summary(self, room_id):
+        """ Get the details of a room roughly suitable for use by the room
+        summary extension to /sync. Useful when lazy loading room members.
+        Args:
+            room_id (str): The room ID to query
+        Returns:
+            Deferred[dict[str, MemberSummary]:
+                dict of membership states, pointing to a MemberSummary named tuple.
+        """
+
+        def _get_room_summary_txn(txn):
+            # first get counts.
+            # We do this all in one transaction to keep the cache small.
+            # FIXME: get rid of this when we have room_stats
+            sql = """
+                SELECT count(*), m.membership FROM room_memberships as m
+                 INNER JOIN current_state_events as c
+                 ON m.event_id = c.event_id
+                 AND m.room_id = c.room_id
+                 AND m.user_id = c.state_key
+                 WHERE c.type = 'm.room.member' AND c.room_id = ?
+                 GROUP BY m.membership
+            """
+
+            txn.execute(sql, (room_id,))
+            res = {}
+            for count, membership in txn:
+                summary = res.setdefault(to_ascii(membership), MemberSummary([], count))
+
+            # we order by membership and then fairly arbitrarily by event_id so
+            # heroes are consistent
+            sql = """
+                SELECT m.user_id, m.membership, m.event_id
+                FROM room_memberships as m
+                 INNER JOIN current_state_events as c
+                 ON m.event_id = c.event_id
+                 AND m.room_id = c.room_id
+                 AND m.user_id = c.state_key
+                 WHERE c.type = 'm.room.member' AND c.room_id = ?
+                 ORDER BY
+                    CASE m.membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
+                    m.event_id ASC
+                 LIMIT ?
+            """
+
+            # 6 is 5 (number of heroes) plus 1, in case one of them is the calling user.
+            txn.execute(sql, (room_id, Membership.JOIN, Membership.INVITE, 6))
+            for user_id, membership, event_id in txn:
+                summary = res[to_ascii(membership)]
+                # we will always have a summary for this membership type at this
+                # point given the summary currently contains the counts.
+                members = summary.members
+                members.append((to_ascii(user_id), to_ascii(event_id)))
+
+            return res
+
+        return self.runInteraction("get_room_summary", _get_room_summary_txn)
+
    @cached()
    def get_invited_rooms_for_user(self, user_id):
        """ Get all the rooms the user is invited to
Author	SHA1	Message	Date
Matthew Hodgson	934535fea5	improve hero ordering	2018-09-11 15:30:32 +01:00
Matthew Hodgson	6167eb822c	fix pep8	2018-09-11 12:17:55 +01:00
Matthew Hodgson	1483fc6660	remove matthewdebug at erik's req	2018-09-11 12:10:04 +01:00
Matthew Hodgson	c81dba4a5d	improve comment	2018-09-11 11:59:24 +01:00
Matthew Hodgson	2201662060	correctly disable LL for incr sync	2018-09-11 11:09:48 +01:00
Matthew Hodgson	a55facc887	fix github-on-phone typo	2018-09-11 00:40:58 +01:00
Matthew Hodgson	92c918d253	We actually have to include state for LL members as the user might not have heard of them before, even though we’re not LLing members in incremental syncs	2018-09-11 00:37:57 +01:00
Matthew Hodgson	18dfb4a03f	changelog	2018-09-11 00:01:29 +01:00
Matthew Hodgson	8e8dfb4803	disable LL for incr syncs, and log incr sync stats See vector-im/riot-web#7211 for details	2018-09-10 23:59:16 +01:00
Matthew Hodgson	2527370fba	sql that actually compiles	2018-09-10 22:41:55 +01:00
Matthew Hodgson	0020712d66	incorporate review	2018-09-10 18:20:35 +01:00
Matthew Hodgson	47a3d7ca24	split out 3792 back whence it came	2018-09-10 17:18:11 +01:00
Matthew Hodgson	1e73d6c00d	improve comment	2018-09-10 16:55:45 +01:00
Matthew Hodgson	53a12bc1a5	isort	2018-09-08 00:19:02 +01:00
Matthew Hodgson	2a4629b35e	changelog	2018-09-08 00:18:09 +01:00
Matthew Hodgson	1b04557e9e	grab sufficient hero material	2018-09-08 00:08:35 +01:00
Matthew Hodgson	c39754aac2	switch to named tuple	2018-09-07 23:49:05 +01:00
Matthew Hodgson	967fdfef10	flake8 and make API more compact & docced	2018-09-07 22:59:38 +01:00
Matthew Hodgson	3bccef4885	fix uninitialised var	2018-09-07 17:23:53 +01:00
Matthew Hodgson	6d836eb529	yet another typo	2018-09-07 17:05:00 +01:00
Matthew Hodgson	2d92d5ab2d	yet another typo	2018-09-07 17:03:06 +01:00
Matthew Hodgson	6020c112d5	thinko	2018-09-07 16:47:30 +01:00
Matthew Hodgson	671ff2cef6	fix thinkos	2018-09-07 16:42:00 +01:00
Matthew Hodgson	c5976d394a	thinko	2018-09-07 16:35:19 +01:00
Matthew Hodgson	c630121b01	fix typos	2018-09-07 16:33:00 +01:00
Matthew Hodgson	0bbcd070d2	typo	2018-09-07 16:25:21 +01:00
Matthew Hodgson	e3d028a5f0	fix state	2018-09-07 16:01:50 +01:00
Matthew Hodgson	119b8b63d3	wip for speeding up room summaries	2018-09-07 15:58:39 +01:00
Matthew Hodgson	040f14b5d4	changelog	2018-09-04 23:20:49 +01:00
Matthew Hodgson	7afd63cdfc	this will never have worked due to the query being split up into separate queries.	2018-09-04 23:14:33 +01:00
				`@@ -0,0 +1 @@`
				`Disable lazy loading for incremental syncs for now`