Compare commits

...

3 Commits

Author SHA1 Message Date
Erik Johnston
dbbaab7de7 Fix invites 2018-03-14 18:43:38 +00:00
Erik Johnston
220a6265b8 Add concept of StatelessEventContext
The master process (usually) doesn't need the state at an event when it
has been created by a worker process, so let's not automatically load
the state in that case.
2018-03-14 16:51:39 +00:00
Erik Johnston
2ad7c767ea Refactor event storage to not require state
This is in preparation for using contexts that may or may not have the
current_state_ids set. This will allow us to avoid unnecessarily pulling
out state for an event on the master process when using workers.
2018-03-14 16:41:20 +00:00
4 changed files with 87 additions and 113 deletions

View File

@@ -13,22 +13,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from twisted.internet import defer
from frozendict import frozendict class StatelessEventContext(object):
class EventContext(object):
""" """
Attributes: Attributes:
current_state_ids (dict[(str, str), str]):
The current state map including the current event.
(type, state_key) -> event_id
prev_state_ids (dict[(str, str), str]):
The current state map excluding the current event.
(type, state_key) -> event_id
state_group (int|None): state group id, if the state has been stored state_group (int|None): state group id, if the state has been stored
as a state group. This is usually only None if e.g. the event is as a state group. This is usually only None if e.g. the event is
an outlier. an outlier.
@@ -40,29 +28,20 @@ class EventContext(object):
prev_group (int): Previously persisted state group. ``None`` for an prev_group (int): Previously persisted state group. ``None`` for an
outlier. outlier.
delta_ids (dict[(str, str), str]): Delta from ``prev_group``.
(type, state_key) -> event_id. ``None`` for an outlier.
prev_state_events (?): XXX: is this ever set to anything other than prev_state_events (?): XXX: is this ever set to anything other than
the empty list? the empty list?
""" """
__slots__ = [ __slots__ = [
"current_state_ids",
"prev_state_ids",
"state_group", "state_group",
"rejected", "rejected",
"prev_group", "prev_group",
"delta_ids",
"prev_state_events", "prev_state_events",
"app_service", "app_service",
] ]
def __init__(self): def __init__(self):
# The current state including the current event
self.current_state_ids = None
# The current state excluding the current event
self.prev_state_ids = None
self.state_group = None self.state_group = None
self.rejected = False self.rejected = False
@@ -70,46 +49,27 @@ class EventContext(object):
# A previously persisted state group and a delta between that # A previously persisted state group and a delta between that
# and this state. # and this state.
self.prev_group = None self.prev_group = None
self.delta_ids = None
self.prev_state_events = None self.prev_state_events = None
self.app_service = None self.app_service = None
def serialize(self, event): def serialize(self):
"""Converts self to a type that can be serialized as JSON, and then """Converts self to a type that can be serialized as JSON, and then
deserialized by `deserialize` deserialized by `deserialize`
Args:
event (FrozenEvent): The event that this context relates to
Returns: Returns:
dict dict
""" """
# We don't serialize the full state dicts, instead they get pulled out
# of the DB on the other side. However, the other side can't figure out
# the prev_state_ids, so if we're a state event we include the event
# id that we replaced in the state.
if event.is_state():
prev_state_id = self.prev_state_ids.get((event.type, event.state_key))
else:
prev_state_id = None
return { return {
"prev_state_id": prev_state_id,
"event_type": event.type,
"event_state_key": event.state_key if event.is_state() else None,
"state_group": self.state_group, "state_group": self.state_group,
"rejected": self.rejected, "rejected": self.rejected,
"prev_group": self.prev_group, "prev_group": self.prev_group,
"delta_ids": _encode_state_dict(self.delta_ids),
"prev_state_events": self.prev_state_events, "prev_state_events": self.prev_state_events,
"app_service_id": self.app_service.id if self.app_service else None "app_service_id": self.app_service.id if self.app_service else None
} }
@staticmethod @staticmethod
@defer.inlineCallbacks
def deserialize(store, input): def deserialize(store, input):
"""Converts a dict that was produced by `serialize` back into a """Converts a dict that was produced by `serialize` back into a
EventContext. EventContext.
@@ -121,52 +81,47 @@ class EventContext(object):
Returns: Returns:
EventContext EventContext
""" """
context = EventContext() context = StatelessEventContext()
context.state_group = input["state_group"] context.state_group = input["state_group"]
context.rejected = input["rejected"] context.rejected = input["rejected"]
context.prev_group = input["prev_group"] context.prev_group = input["prev_group"]
context.delta_ids = _decode_state_dict(input["delta_ids"])
context.prev_state_events = input["prev_state_events"] context.prev_state_events = input["prev_state_events"]
# We use the state_group and prev_state_id stuff to pull the
# current_state_ids out of the DB and construct prev_state_ids.
prev_state_id = input["prev_state_id"]
event_type = input["event_type"]
event_state_key = input["event_state_key"]
context.current_state_ids = yield store.get_state_ids_for_group(
context.state_group,
)
if prev_state_id and event_state_key:
context.prev_state_ids = dict(context.current_state_ids)
context.prev_state_ids[(event_type, event_state_key)] = prev_state_id
else:
context.prev_state_ids = context.current_state_ids
app_service_id = input["app_service_id"] app_service_id = input["app_service_id"]
if app_service_id: if app_service_id:
context.app_service = store.get_app_service_by_id(app_service_id) context.app_service = store.get_app_service_by_id(app_service_id)
defer.returnValue(context) return context
def _encode_state_dict(state_dict): class EventContext(StatelessEventContext):
"""Since dicts of (type, state_key) -> event_id cannot be serialized in
JSON we need to convert them to a form that can.
""" """
if state_dict is None: Attributes:
return None current_state_ids (dict[(str, str), str]):
The current state map including the current event.
(type, state_key) -> event_id
return [ prev_state_ids (dict[(str, str), str]):
(etype, state_key, v) The current state map excluding the current event.
for (etype, state_key), v in state_dict.iteritems() (type, state_key) -> event_id
delta_ids (dict[(str, str), str]): Delta from ``prev_group``.
(type, state_key) -> event_id. ``None`` for an outlier.
"""
__slots__ = [
"current_state_ids",
"prev_state_ids",
"delta_ids",
] ]
def __init__(self):
# The current state including the current event
self.current_state_ids = None
# The current state excluding the current event
self.prev_state_ids = None
def _decode_state_dict(input): self.delta_ids = None
"""Decodes a state dict encoded using `_encode_state_dict` above
"""
if input is None:
return None
return frozendict({(etype, state_key,): v for etype, state_key, v in input}) super(EventContext, self).__init__()

View File

@@ -20,6 +20,7 @@ from synapse.api.constants import EventTypes, Membership
from synapse.api.errors import AuthError, Codes, SynapseError from synapse.api.errors import AuthError, Codes, SynapseError
from synapse.crypto.event_signing import add_hashes_and_signatures from synapse.crypto.event_signing import add_hashes_and_signatures
from synapse.events.utils import serialize_event from synapse.events.utils import serialize_event
from synapse.events.snapshot import EventContext
from synapse.events.validator import EventValidator from synapse.events.validator import EventValidator
from synapse.types import ( from synapse.types import (
UserID, RoomAlias, RoomStreamToken, UserID, RoomAlias, RoomStreamToken,
@@ -665,7 +666,7 @@ class EventCreationHandler(object):
Args: Args:
requester (Requester) requester (Requester)
event (FrozenEvent) event (FrozenEvent)
context (EventContext) context (StatelessEventContext)
ratelimit (bool) ratelimit (bool)
extra_users (list(UserID)): Any extra users to notify about event extra_users (list(UserID)): Any extra users to notify about event
""" """
@@ -763,9 +764,18 @@ class EventCreationHandler(object):
e.sender == event.sender e.sender == event.sender
) )
# We get the current state at the event. If we have a full
# EventContext, use it, otherwise we hit the DB.
if isinstance(context, EventContext):
current_state_ids = context.current_state_ids
else:
current_state_ids = yield self.store.get_state_ids_for_group(
context.state_group,
)
state_to_include_ids = [ state_to_include_ids = [
e_id e_id
for k, e_id in context.current_state_ids.iteritems() for k, e_id in current_state_ids.iteritems()
if k[0] in self.hs.config.room_invite_state_types if k[0] in self.hs.config.room_invite_state_types
or k == (EventTypes.Member, event.sender) or k == (EventTypes.Member, event.sender)
] ]

View File

@@ -19,7 +19,7 @@ from synapse.api.errors import (
SynapseError, MatrixCodeMessageException, CodeMessageException, SynapseError, MatrixCodeMessageException, CodeMessageException,
) )
from synapse.events import FrozenEvent from synapse.events import FrozenEvent
from synapse.events.snapshot import EventContext from synapse.events.snapshot import StatelessEventContext
from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.http.servlet import RestServlet, parse_json_object_from_request
from synapse.util.async import sleep from synapse.util.async import sleep
from synapse.util.caches.response_cache import ResponseCache from synapse.util.caches.response_cache import ResponseCache
@@ -44,7 +44,7 @@ def send_event_to_master(client, host, port, requester, event, context,
port (int): port on master listening for HTTP replication port (int): port on master listening for HTTP replication
requester (Requester) requester (Requester)
event (FrozenEvent) event (FrozenEvent)
context (EventContext) context (StatelessEventContext)
ratelimit (bool) ratelimit (bool)
extra_users (list(UserID)): Any extra users to notify about event extra_users (list(UserID)): Any extra users to notify about event
""" """
@@ -56,7 +56,7 @@ def send_event_to_master(client, host, port, requester, event, context,
"event": event.get_pdu_json(), "event": event.get_pdu_json(),
"internal_metadata": event.internal_metadata.get_dict(), "internal_metadata": event.internal_metadata.get_dict(),
"rejected_reason": event.rejected_reason, "rejected_reason": event.rejected_reason,
"context": context.serialize(event), "context": context.serialize(),
"requester": requester.serialize(), "requester": requester.serialize(),
"ratelimit": ratelimit, "ratelimit": ratelimit,
"extra_users": [u.to_string() for u in extra_users], "extra_users": [u.to_string() for u in extra_users],
@@ -140,7 +140,9 @@ class ReplicationSendEventRestServlet(RestServlet):
event = FrozenEvent(event_dict, internal_metadata, rejected_reason) event = FrozenEvent(event_dict, internal_metadata, rejected_reason)
requester = Requester.deserialize(self.store, content["requester"]) requester = Requester.deserialize(self.store, content["requester"])
context = yield EventContext.deserialize(self.store, content["context"]) context = yield StatelessEventContext.deserialize(
self.store, content["context"],
)
ratelimit = content["ratelimit"] ratelimit = content["ratelimit"]
extra_users = [UserID.from_string(u) for u in content["extra_users"]] extra_users = [UserID.from_string(u) for u in content["extra_users"]]

View File

@@ -19,6 +19,7 @@ from synapse.storage.events_worker import EventsWorkerStore
from twisted.internet import defer from twisted.internet import defer
from synapse.events import USE_FROZEN_DICTS from synapse.events import USE_FROZEN_DICTS
from synapse.events.snapshot import EventContext
from synapse.util.async import ObservableDeferred from synapse.util.async import ObservableDeferred
from synapse.util.logcontext import ( from synapse.util.logcontext import (
@@ -42,7 +43,6 @@ import ujson as json
# these are only included to make the type annotations work # these are only included to make the type annotations work
from synapse.events import EventBase # noqa: F401 from synapse.events import EventBase # noqa: F401
from synapse.events.snapshot import EventContext # noqa: F401
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -504,61 +504,68 @@ class EventsStore(EventsWorkerStore):
defer.returnValue({}) defer.returnValue({})
# map from state_group to ((type, key) -> event_id) state map # map from state_group to ((type, key) -> event_id) state map
state_groups = {} state_groups_map = {}
missing_event_ids = [] for ev, ctx in events_context:
was_updated = False if ctx.state_group is None:
# I don't think this can happen, but let's double-check
raise Exception(
"Context for new extremity event %s has no state "
"group" % (ev.event_id, ),
)
if ctx.state_group in state_groups_map:
continue
if isinstance(ctx, EventContext) and ctx.current_state_ids:
state_groups_map[ctx.state_group] = ctx.current_state_ids
# We need to map new_latest_event_ids to their state groups. First, lets
# check if the event is one we're persisting and then we can pull the
# state group from its context.
# Otherwise we need to pull the state group from the database.
missing_event_ids = [] # List of events we need to fetch groups for
state_groups_to_resolve = set() # State groups of new_latest_event_ids
for event_id in new_latest_event_ids: for event_id in new_latest_event_ids:
# First search in the list of new events we're adding, # First search in the list of new events we're adding.
# and then use the current state from that
for ev, ctx in events_context: for ev, ctx in events_context:
if event_id == ev.event_id: if event_id == ev.event_id:
if ctx.current_state_ids is None: state_groups_to_resolve.add(ctx.state_group)
raise Exception("Unknown current state")
if ctx.state_group is None:
# I don't think this can happen, but let's double-check
raise Exception(
"Context for new extremity event %s has no state "
"group" % (event_id, ),
)
# If we've already seen the state group don't bother adding
# it to the state sets again
if ctx.state_group not in state_groups:
state_groups[ctx.state_group] = ctx.current_state_ids
if ctx.delta_ids or hasattr(ev, "state_key"):
was_updated = True
break break
else: else:
# If we couldn't find it, then we'll need to pull # If we couldn't find it, then we'll need to pull
# the state from the database # the state from the database
was_updated = True
missing_event_ids.append(event_id) missing_event_ids.append(event_id)
if not was_updated:
return
if missing_event_ids: if missing_event_ids:
# Now pull out the state for any missing events from DB # Now pull out the state for any missing events from DB
event_to_groups = yield self._get_state_group_for_events( event_to_groups = yield self._get_state_group_for_events(
missing_event_ids, missing_event_ids,
) )
state_groups_to_resolve.update(event_to_groups.itervalues())
groups = set(event_to_groups.itervalues()) - set(state_groups.iterkeys()) # Now that we have calculated state_groups_to_resolve we need to get
# their state so we can resolve to a single state set.
missing_state = state_groups_to_resolve - set(state_groups_map)
if missing_state:
group_to_state = yield self._get_state_for_groups(missing_state)
state_groups_map.update(group_to_state)
if groups: if len(state_groups_to_resolve) == 1:
group_to_state = yield self._get_state_for_groups(groups)
state_groups.update(group_to_state)
if len(state_groups) == 1:
# If there is only one state group, then we know what the current # If there is only one state group, then we know what the current
# state is. # state is.
defer.returnValue(state_groups.values()[0]) defer.returnValue(state_groups_map[state_groups_to_resolve.pop()])
# Ok, we need to defer to the state handler to resolve our state sets.
def get_events(ev_ids): def get_events(ev_ids):
return self.get_events( return self.get_events(
ev_ids, get_prev_content=False, check_redacted=False, ev_ids, get_prev_content=False, check_redacted=False,
) )
state_groups = {
sg: state_groups_map[sg] for sg in state_groups_to_resolve
}
events_map = {ev.event_id: ev for ev, _ in events_context} events_map = {ev.event_id: ev for ev, _ in events_context}
logger.debug("calling resolve_state_groups from preserve_events") logger.debug("calling resolve_state_groups from preserve_events")
res = yield self._state_resolution_handler.resolve_state_groups( res = yield self._state_resolution_handler.resolve_state_groups(