Compare commits

...

5 Commits

Author SHA1 Message Date
Matthew Hodgson
03bdbb8c6b typo 2018-12-29 12:48:09 +00:00
Matthew Hodgson
18752982db hook up state deltas to stats 2018-07-18 23:56:57 +01:00
Matthew Hodgson
6dacdd5fbe WIP for updating the stats store 2018-07-18 12:03:07 +01:00
Matthew Hodgson
c82785f5cb flake8 2018-07-18 09:52:28 +01:00
Matthew Hodgson
a34061d332 WIP of tracking per-room and per-user stats 2018-07-18 02:07:36 +01:00
14 changed files with 995 additions and 124 deletions

View File

@@ -68,6 +68,7 @@ class EventTypes(object):
RoomHistoryVisibility = "m.room.history_visibility"
CanonicalAlias = "m.room.canonical_alias"
Encryption = "m.room.encryption"
RoomAvatar = "m.room.avatar"
GuestAccess = "m.room.guest_access"

46
synapse/config/stats.py Normal file
View File

@@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
# Copyright 2018 New Vector Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ._base import Config
import sys
class StatsConfig(Config):
"""Stats Configuration
Configuration for the behaviour of synapse's stats engine
"""
def read_config(self, config):
self.stats_enable = False
self.stats_bucket_size = 86400
self.stats_retention = sys.maxint
stats_config = config.get("stats", None)
if stats_config:
self.stats_enable = stats_config.get("enable", self.stats_enable)
self.stats_bucket_size = stats_config.get(
"bucket_size", self.stats_bucket_size
)
self.stats_retention = stats_config.get("retention", self.stats_retention)
def default_config(self, config_dir_path, server_name, **kwargs):
return """
# Stats configuration
#
# stats:
# enable: false
# bucket_size: 86400 # 1 day
# retention: 31536000 # 1 year
"""

View File

@@ -0,0 +1,73 @@
# -*- coding: utf-8 -*-
# Copyright 2017 Vector Creations Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from twisted.internet import defer
from .base import BaseHandler
logger = logging.getLogger(__name__)
class StateDeltasHandler(BaseHandler):
def __init__(self, hs):
super(StateDeltasHandler, self).__init__(hs)
self.store = hs.get_datastore()
@defer.inlineCallbacks
def _get_key_change(self, prev_event_id, event_id, key_name, public_value):
"""Given two events check if the `key_name` field in content changed
from not matching `public_value` to doing so.
For example, check if `history_visibility` (`key_name`) changed from
`shared` to `world_readable` (`public_value`).
Returns:
None if the field in the events either both match `public_value`
or if neither do, i.e. there has been no change.
True if it didnt match `public_value` but now does
False if it did match `public_value` but now doesn't
"""
prev_event = None
event = None
if prev_event_id:
prev_event = yield self.store.get_event(prev_event_id, allow_none=True)
if event_id:
event = yield self.store.get_event(event_id, allow_none=True)
if not event and not prev_event:
logger.debug("Neither event exists: %r %r", prev_event_id, event_id)
defer.returnValue(None)
prev_value = None
value = None
if prev_event:
prev_value = prev_event.content.get(key_name)
if event:
value = event.content.get(key_name)
logger.debug("prev_value: %r -> value: %r", prev_value, value)
if value == public_value and prev_value != public_value:
defer.returnValue(True)
elif value != public_value and prev_value == public_value:
defer.returnValue(False)
else:
defer.returnValue(None)

427
synapse/handlers/stats.py Normal file
View File

@@ -0,0 +1,427 @@
# -*- coding: utf-8 -*-
# Copyright 2018 New Vector Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from twisted.internet import defer
from synapse.api.constants import EventTypes, Membership, JoinRules
from synapse.util.metrics import Measure
from .state_deltas import StateDeltasHandler
logger = logging.getLogger(__name__)
class StatsHandler(StateDeltasHandler):
"""Handles keeping the *_stats tables updated with a simple time-series of
information about the users, rooms and media on the server, such that admins
have some idea of who is consuming their resources.
Heavily derived from UserDirectoryHandler
"""
INITIAL_ROOM_SLEEP_MS = 50
INITIAL_USER_SLEEP_MS = 10
def __init__(self, hs):
super(StatsHandler, self).__init__(hs)
self.store = hs.get_datastore()
self.state = hs.get_state_handler()
self.server_name = hs.hostname
self.clock = hs.get_clock()
self.notifier = hs.get_notifier()
self.is_mine_id = hs.is_mine_id
self.stats_enable = hs.config.stats_enable
self.stats_bucket_size = hs.config.stats_bucket_size
# The current position in the current_state_delta stream
self.pos = None
# Guard to ensure we only process deltas one at a time
self._is_processing = False
if self.stats_enable:
self.notifier.add_replication_callback(self.notify_new_event)
# We kick this off so that we don't have to wait for a change before
# we start populating stats
self.clock.call_later(0, self.notify_new_event)
@defer.inlineCallbacks
def notify_new_event(self):
"""Called when there may be more deltas to process
"""
if not self.stats_enable:
return
if self._is_processing:
return
self._is_processing = True
try:
yield self._unsafe_process()
finally:
self._is_processing = False
@defer.inlineCallbacks
def _unsafe_process(self):
# If self.pos is None then means we haven't fetched it from DB
if self.pos is None:
self.pos = yield self.store.get_stats_stream_pos()
# If still None then we need to do the initial fill of stats
if self.pos is None:
yield self._do_initial_spam()
self.pos = yield self.store.get_stats_stream_pos()
# Loop round handling deltas until we're up to date
while True:
with Measure(self.clock, "stats_delta"):
deltas = yield self.store.get_current_state_deltas(self.pos)
if not deltas:
return
logger.info("Handling %d state deltas", len(deltas))
yield self._handle_deltas(deltas)
self.pos = deltas[-1]["stream_id"]
yield self.store.update_stats_stream_pos(self.pos)
@defer.inlineCallbacks
def _do_initial_spam(self):
"""Populates the stats tables from the current state of the DB, used
when synapse first starts with stats support
"""
new_pos = yield self.store.get_max_stream_id_in_current_state_deltas()
# We process by going through each existing room at a time.
room_ids = yield self.store.get_all_rooms()
logger.info("Doing initial update of room_stats. %d rooms", len(room_ids))
num_processed_rooms = 0
for room_id in room_ids:
logger.info("Handling room %d/%d", num_processed_rooms + 1, len(room_ids))
yield self._handle_initial_room(room_id)
num_processed_rooms += 1
yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.)
logger.info("Processed all rooms.")
num_processed_users = 0
user_ids = yield self.store.get_all_local_users()
logger.info("Doing initial update user_stats. %d users", len(user_ids))
for user_id in user_ids:
logger.info("Handling user %d/%d", num_processed_users + 1, len(user_ids))
yield self._handle_local_user(user_id)
num_processed_users += 1
yield self.clock.sleep(self.INITIAL_USER_SLEEP_MS / 1000.)
logger.info("Processed all users")
yield self.store.update_stats_stream_pos(new_pos)
@defer.inlineCallbacks
def _handle_initial_room(self, room_id):
"""Called when we initially fill out stats one room at a time
"""
current_state_ids = yield self.store.get_current_state_ids(room_id)
join_rules = yield self.store.get_event(
current_state_ids.get((EventTypes.JoinRules, ""))
)
history_visibility = yield self.store.get_event(
current_state_ids.get((EventTypes.RoomHistoryVisibility, ""))
)
encryption = yield self.store.get_event(
current_state_ids.get((EventTypes.RoomEncryption, ""))
)
name = yield self.store.get_event(
current_state_ids.get((EventTypes.Name, ""))
)
topic = yield self.store.get_event(
current_state_ids.get((EventTypes.Topic, ""))
)
avatar = yield self.store.get_event(
current_state_ids.get((EventTypes.RoomAvatar, ""))
)
canonical_alias = yield self.store.get_event(
current_state_ids.get((EventTypes.CanonicalAlias, ""))
)
yield self.store.update_room_state(
room_id,
{
"join_rules": join_rules.content.get("join_rule")
if join_rules else None,
"history_visibility": history_visibility.content.get("history_visibility")
if history_visibility else None,
"encryption": encryption.content.get("algorithm")
if encryption else None,
"name": name.content.get("name")
if name else None,
"topic": name.content.get("topic")
if topic else None,
"avatar": name.content.get("url")
if avatar else None,
"canonical_alias": name.content.get("alias")
if canonical_alias else None,
}
)
now = self.clock.time_msec()
# quantise time to the nearest bucket
now = int(now / (self.stats_bucket_size * 1000)) * self.stats_bucket_size * 1000
current_state_events = len(current_state_ids)
joined_members = yield self.store.get_user_count_in_room(
room_id, Membership.JOIN
)
invited_members = yield self.store.get_user_count_in_room(
room_id, Membership.INVITE
)
left_members = yield self.store.get_user_count_in_room(
room_id, Membership.LEAVE
)
banned_members = yield self.store.get_user_count_in_room(
room_id, Membership.BAN
)
state_events = yield self.store.get_state_event_counts(room_id)
(local_events, remote_events) = yield self.store.get_event_counts(
room_id, self.server_name
)
yield self.store.delete_room_stats(room_id, now)
self.store.update_room_stats(
room_id,
now,
{
"bucket_size": self.stats_bucket_size,
"current_state_events": current_state_events,
"joined_members": joined_members,
"invited_members": invited_members,
"left_members": left_members,
"banned_members": banned_members,
"state_events": state_events,
"local_events": local_events,
"remote_events": remote_events,
}
)
@defer.inlineCallbacks
def _handle_deltas(self, deltas):
"""Called with the state deltas to process
"""
# XXX: shouldn't this be the timestamp where the delta was emitted rather
# than received?
now = self.clock.time_msec()
# quantise time to the nearest bucket
now = int(now / (self.stats_bucket_size * 1000)) * self.stats_bucket_size * 1000
for delta in deltas:
typ = delta["type"]
state_key = delta["state_key"]
room_id = delta["room_id"]
event_id = delta["event_id"]
prev_event_id = delta["prev_event_id"]
logger.debug("Handling: %r %r, %s", typ, state_key, event_id)
if event_id is None:
return
event = yield self.store.get_event(event_id)
if event is None:
return
if typ == EventTypes.Member:
# we could use _get_key_change here but it's a bit inefficient
# given we're not testing for a specific result; might as well
# just grab the prev_membership and membership strings and
# compare them.
if prev_event_id is not None:
prev_event = yield self.store.get_event(prev_event_id)
prev_membership = None
membership = event.content.get("membership")
if prev_event:
prev_membership = prev_event.content.get("membership")
if prev_membership != membership:
if prev_membership == Membership.JOIN:
yield self.store.update_stats_delta(
now, self.stats_bucket_size,
"room", room_id, "joined_members", -1
)
elif prev_membership == Membership.INVITE:
yield self.store.update_stats_delta(
now, self.stats_bucket_size,
"room", room_id, "invited_members", -1
)
elif prev_membership == Membership.LEAVE:
yield self.store.update_stats_delta(
now, self.stats_bucket_size,
"room", room_id, "left_members", -1
)
elif prev_membership == Membership.BAN:
yield self.store.update_stats_delta(
now, self.stats_bucket_size,
"room", room_id, "banned_members", -1
)
if membership == Membership.JOIN:
yield self.store.update_stats_delta(
now, self.stats_bucket_size,
"room", room_id, "joined_members", +1
)
elif membership == Membership.INVITE:
yield self.store.update_stats_delta(
now, self.stats_bucket_size,
"room", room_id, "invited_members", +1
)
elif membership == Membership.LEAVE:
yield self.store.update_stats_delta(
now, self.stats_bucket_size,
"room", room_id, "left_members", +1
)
elif membership == Membership.BAN:
yield self.store.update_stats_delta(
now, self.stats_bucket_size,
"room", room_id, "banned_members", +1
)
user_id = event.state_key
if self.is_mine_id(user_id):
# update user_stats as it's one of our users
public = yield self._is_public_room(room_id)
if prev_membership != membership:
if prev_membership == Membership.JOIN:
yield self.store.update_stats_delta(
now, self.stats_bucket_size,
"user", user_id,
"public_rooms" if public else "private_rooms",
-1
)
elif membership == Membership.JOIN:
yield self.store.update_stats_delta(
now, self.stats_bucket_size,
"user", user_id,
"public_rooms" if public else "private_rooms",
+1
)
elif typ == EventTypes.JoinRules:
self.store.update_room_state(room_id, {
"join_rules": event.content.get("join_rule")
})
is_public = self._get_key_change(
room_id, prev_event_id, event_id,
"join_rule", JoinRules.PUBLIC
)
if is_public is not None:
self.store.update_public_room_stats(
now, self.stats_bucket_size,
room_id, is_public
)
elif typ == EventTypes.RoomHistoryVisibility:
yield self.store.update_room_state(room_id, {
"history_visibility": event.content.get("history_visibility")
})
is_public = self._get_key_change(
room_id, prev_event_id, event_id,
"history_visibility", "world_readable"
)
if is_public is not None:
yield self.update_public_room_stats(
now, self.stats_bucket_size,
room_id, is_public
)
elif typ == EventTypes.RoomEncryption:
self.store.update_room_state(room_id, {
"encryption": event.content.get("algorithm")
})
elif typ == EventTypes.Name:
self.store.update_room_state(room_id, {
"name": event.content.get("name")
})
elif typ == EventTypes.Topic:
self.store.update_room_state(room_id, {
"topic": event.content.get("topic")
})
elif typ == EventTypes.RoomAvatar:
self.store.update_room_state(room_id, {
"avatar": event.content.get("url")
})
elif typ == EventTypes.CanonicalAlias:
self.store.update_room_state(room_id, {
"canonical_alias": event.content.get("alias")
})
@defer.inlineCallbacks
def update_public_room_stats(self, ts, bucket_size, room_id, is_public):
# For now, blindly iterate over all local users in the room so that
# we can handle the whole problem of copying buckets over as needed
user_ids = yield self.store.get_users_in_room(room_id)
for user_id in user_ids:
if self.is_mine(user_id):
self.store.update_stats_delta(
ts, bucket_size,
"user", user_id,
"public_rooms", +1 if is_public else -1
)
self.store.update_stats_delta(
ts, bucket_size,
"user", user_id,
"private_rooms", -1 if is_public else +1
)
@defer.inlineCallbacks
def _is_public_room(self, room_id):
events = yield self.store.get_current_state(
room_id, (
(EventTypes.JoinRules, ""),
(EventTypes.RoomHistoryVisibility, "")
)
)
join_rules = events.get((EventTypes.JoinRules, ""))
history_visibility = events.get((EventTypes.RoomHistoryVisibility, ""))
if (
join_rules.content.get("join_rule") == JoinRules.PUBLIC or
history_visibility.content.get("history_visibility") == "world_readable"
):
defer.returnValue(True)
else:
defer.returnValue(True)
@defer.inlineCallbacks
def _handle_local_user(self, user_id):
logger.debug("Adding new local user to stats, %r", user_id)

View File

@@ -24,10 +24,12 @@ from synapse.storage.roommember import ProfileInfo
from synapse.types import get_localpart_from_id
from synapse.util.metrics import Measure
from .state_deltas import StateDeltasHandler
logger = logging.getLogger(__name__)
class UserDirectoryHandler(object):
class UserDirectoryHandler(StateDeltasHandler):
"""Handles querying of and keeping updated the user_directory.
N.B.: ASSUMES IT IS THE ONLY THING THAT MODIFIES THE USER DIRECTORY
@@ -49,6 +51,8 @@ class UserDirectoryHandler(object):
INITIAL_USER_SLEEP_MS = 10
def __init__(self, hs):
super(UserDirectoryHandler, self).__init__(hs)
self.store = hs.get_datastore()
self.state = hs.get_state_handler()
self.server_name = hs.hostname
@@ -531,7 +535,7 @@ class UserDirectoryHandler(object):
@defer.inlineCallbacks
def _handle_remove_user(self, room_id, user_id):
"""Called when we might need to remove user to directory
"""Called when we might need to remove user from directory
Args:
room_id (str): room_id that user left or stopped being public that
@@ -643,47 +647,3 @@ class UserDirectoryHandler(object):
yield self.store.update_profile_in_user_dir(
user_id, new_name, new_avatar, room_id,
)
@defer.inlineCallbacks
def _get_key_change(self, prev_event_id, event_id, key_name, public_value):
"""Given two events check if the `key_name` field in content changed
from not matching `public_value` to doing so.
For example, check if `history_visibility` (`key_name`) changed from
`shared` to `world_readable` (`public_value`).
Returns:
None if the field in the events either both match `public_value`
or if neither do, i.e. there has been no change.
True if it didnt match `public_value` but now does
False if it did match `public_value` but now doesn't
"""
prev_event = None
event = None
if prev_event_id:
prev_event = yield self.store.get_event(prev_event_id, allow_none=True)
if event_id:
event = yield self.store.get_event(event_id, allow_none=True)
if not event and not prev_event:
logger.debug("Neither event exists: %r %r", prev_event_id, event_id)
defer.returnValue(None)
prev_value = None
value = None
if prev_event:
prev_value = prev_event.content.get(key_name)
if event:
value = event.content.get(key_name)
logger.debug("prev_value: %r -> value: %r", prev_value, value)
if value == public_value and prev_value != public_value:
defer.returnValue(True)
elif value != public_value and prev_value == public_value:
defer.returnValue(False)
else:
defer.returnValue(None)

View File

@@ -502,7 +502,7 @@ class SQLBaseStore(object):
Args:
table (str): The table to upsert into
keyvalues (dict): The unique key tables and their new values
keyvalues (dict): The unique key columns and their new values
values (dict): The nonunique columns and their new values
insertion_values (dict): additional key/values to use only when
inserting

View File

@@ -1807,6 +1807,43 @@ class EventsStore(EventsWorkerStore):
)
return self.runInteraction("get_all_new_events", get_all_new_events_txn)
def get_state_event_counts(self, room_id):
"""Gets the total number of state events in the room
"""
def f(txn):
sql = (
"SELECT COUNT(*)"
" FROM state_events"
" WHERE room_id=?"
)
txn.execute(sql, (room_id,))
row = txn.fetchone()
return row[0] if row else 0
return self.runInteraction("get_state_event_counts", f)
def get_event_counts(self, room_id, local_server):
"""Gets the number of events in the room, split into local versus remote
"""
def f(txn):
sql = (
"SELECT sender LIKE '%%:%s' AS local, COUNT(*)"
" FROM events"
" WHERE room_id=?"
" GROUP BY local"
)
txn.execute(sql, (local_server, room_id,))
rows = txn.fetchall()
results = {
("local" if row[0] else "remote") : row[1]
for row in rows
}
return (results.get("local", 0), results.get("remote", 0))
return self.runInteraction("get_event_counts", f)
def purge_history(
self, room_id, token, delete_local_events,
):

View File

@@ -276,7 +276,7 @@ class GroupServerStore(SQLBaseStore):
"category_id": category_id,
"room_id": room_id,
},
values=to_update,
updatevalues=to_update,
)
else:
if is_public is None:
@@ -562,7 +562,7 @@ class GroupServerStore(SQLBaseStore):
"role_id": role_id,
"user_id": user_id,
},
values=to_update,
updatevalues=to_update,
)
else:
if is_public is None:

View File

@@ -82,6 +82,24 @@ class RoomMemberWorkerStore(EventsWorkerStore):
return [to_ascii(r[0]) for r in txn]
return self.runInteraction("get_users_in_room", f)
@cached()
def get_user_count_in_room(self, room_id, membership):
def f(txn):
sql = (
"SELECT count(*) FROM room_memberships as m"
" INNER JOIN current_state_events as c"
" ON m.event_id = c.event_id "
" AND m.room_id = c.room_id "
" AND m.user_id = c.state_key"
" WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?"
)
txn.execute(sql, (room_id, membership,))
row = txn.fetchone()
return row[0]
return self.runInteraction("get_users_in_room", f)
@cached()
def get_invited_rooms_for_user(self, user_id):
""" Get all the rooms the user is invited to

View File

@@ -0,0 +1,79 @@
/* Copyright 2018 New Vector Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
CREATE TABLE stats_stream_pos (
Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row.
stream_id BIGINT,
CHECK (Lock='X')
);
INSERT INTO stats_stream_pos (stream_id) VALUES (null);
CREATE TABLE user_stats (
user_id TEXT NOT NULL,
ts BIGINT NOT NULL,
bucket_size INT NOT NULL,
sent_events INT NOT NULL,
local_events INT NOT NULL,
public_rooms INT NOT NULL,
private_rooms INT NOT NULL,
sent_file_count INT NOT NULL,
sent_file_size INT NOT NULL,
);
CREATE UNIQUE INDEX user_stats_user_ts ON user_stats(user_id, ts);
CREATE TABLE room_stats (
room_id TEXT NOT NULL,
ts BIGINT NOT NULL,
bucket_size INT NOT NULL,
current_state_events INT NOT NULL,
joined_members INT NOT NULL,
invited_members INT NOT NULL,
left_members INT NOT NULL,
banned_members INT NOT NULL,
state_events INT NOT NULL,
local_events INT NOT NULL,
remote_events INT NOT NULL,
sent_events INT NOT NULL, -- number sent per timeslice
);
CREATE UNIQUE INDEX room_stats_room_ts ON room_stats(room_id, ts);
-- cache of current room state; useful for the publicRooms list
CREATE TABLE room_state (
room_id TEXT NOT NULL,
join_rules TEXT NOT NULL,
history_visibility TEXT NOT NULL,
encrypted BOOLEAN,
name TEXT NOT NULL,
topic TEXT NOT NULL,
avatar TEXT NOT NULL,
canonical_alias TEXT NOT NULL,
-- get aliases straight from the right table
);
CREATE UNIQUE INDEX room_state_room ON room_state(room_id);
CREATE TABLE media_stats (
ts BIGINT NOT NULL,
bucket_size INT NOT NULL,
local_media_count INT NOT NULL,
local_media_size INT NOT NULL,
remote_media_count INT NOT NULL,
remote_media_size INT NOT NULL,
);
CREATE UNIQUE INDEX media_stats_ts ON media_stats(ts);

View File

@@ -89,6 +89,59 @@ class StateGroupWorkerStore(SQLBaseStore):
_get_current_state_ids_txn,
)
@defer.inlineCallbacks
def get_current_state(self, room_id, types):
"""Get the current state event of a given type for a room based on the
current_state_events table. This may not be as up-to-date as the result
of doing a fresh state resolution as per state_handler.get_current_state
Args:
room_id (str)
types (list): List of (type, state_key) tuples which are used to
filter the state fetched. `state_key` may be None, which matches
any `state_key`
Returns:
deferred: dict of (type, state_key) -> event
"""
def _get_current_state_txn(txn):
sql = """SELECT type, state_key, event_id FROM current_state_events
WHERE room_id = ? and %s"""
# Turns out that postgres doesn't like doing a list of OR's and
# is about 1000x slower, so we just issue a query for each specific
# type seperately.
if types:
clause_to_args = [
(
"AND type = ? AND state_key = ?",
(etype, state_key)
) if state_key is not None else (
"AND type = ?",
(etype,)
)
for etype, state_key in types
]
else:
# If types is None we fetch all the state, and so just use an
# empty where clause with no extra args.
clause_to_args = [("", [])]
for where_clause, where_args in clause_to_args:
args = [room_id]
args.extend(where_args)
txn.execute(sql % (where_clause,), args)
for row in txn:
typ, state_key, event_id = row
key = (typ, state_key)
results[intern_string(key)] = event_id
return results
results = self.runInteraction(
"get_current_state",
_get_current_state_txn,
)
for (key, event_id) in iteritems(results):
results[key] = yield self.store.get_event(event_id, allow_none=True)
defer.returnValue(results)
@cached(max_entries=10000, iterable=True)
def get_state_group_delta(self, state_group):
"""Given a state group try to return a previous group and a delta between

View File

@@ -0,0 +1,98 @@
# -*- coding: utf-8 -*-
# Copyright 2018 Vector Creations Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from twisted.internet import defer
from ._base import SQLBaseStore
logger = logging.getLogger(__name__)
class StateDeltasStore(SQLBaseStore):
@defer.inlineCallbacks
def get_all_rooms(self):
"""Get all room_ids we've ever known about, in ascending order of "size"
"""
sql = """
SELECT room_id FROM current_state_events
GROUP BY room_id
ORDER BY count(*) ASC
"""
rows = yield self._execute("get_all_rooms", None, sql)
defer.returnValue([room_id for room_id, in rows])
@defer.inlineCallbacks
def get_all_local_users(self):
"""Get all local users
"""
sql = """
SELECT name FROM users
"""
rows = yield self._execute("get_all_local_users", None, sql)
defer.returnValue([name for name, in rows])
def get_current_state_deltas(self, prev_stream_id):
prev_stream_id = int(prev_stream_id)
if not self._curr_state_delta_stream_cache.has_any_entity_changed(prev_stream_id):
return []
def get_current_state_deltas_txn(txn):
# First we calculate the max stream id that will give us less than
# N results.
# We arbitarily limit to 100 stream_id entries to ensure we don't
# select toooo many.
sql = """
SELECT stream_id, count(*)
FROM current_state_delta_stream
WHERE stream_id > ?
GROUP BY stream_id
ORDER BY stream_id ASC
LIMIT 100
"""
txn.execute(sql, (prev_stream_id,))
total = 0
max_stream_id = prev_stream_id
for max_stream_id, count in txn:
total += count
if total > 100:
# We arbitarily limit to 100 entries to ensure we don't
# select toooo many.
break
# Now actually get the deltas
sql = """
SELECT stream_id, room_id, type, state_key, event_id, prev_event_id
FROM current_state_delta_stream
WHERE ? < stream_id AND stream_id <= ?
ORDER BY stream_id ASC
"""
txn.execute(sql, (prev_stream_id, max_stream_id,))
return self.cursor_to_dict(txn)
return self.runInteraction(
"get_current_state_deltas", get_current_state_deltas_txn
)
def get_max_stream_id_in_current_state_deltas(self):
return self._simple_select_one_onecol(
table="current_state_delta_stream",
keyvalues={},
retcol="COALESCE(MAX(stream_id), -1)",
desc="get_max_stream_id_in_current_state_deltas",
)

152
synapse/storage/stats.py Normal file
View File

@@ -0,0 +1,152 @@
# -*- coding: utf-8 -*-
# Copyright 2018 Vector Creations Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from .StateDeltasStore import StateDeltasStore
logger = logging.getLogger(__name__)
# these fields track relative numbers (e.g. number of events sent in this timeslice)
RELATIVE_STATS_FIELDS = {
"room": (
"sent_events"
),
"user": (
"sent_events"
)
}
# these fields track rather than absolutes (e.g. total number of rooms on the server)
ABSOLUTE_STATS_FIELDS = {
"room": (
"current_state_events",
"joined_members",
"invited_members",
"left_members",
"banned_members",
"state_events",
"local_events",
"remote_events",
),
"user": (
"local_events",
"public_rooms",
"private_rooms",
"sent_file_count",
"sent_file_size",
),
}
class StatsStore(StateDeltasStore):
def get_stats_stream_pos(self):
return self._simple_select_one_onecol(
table="stats_stream_pos",
keyvalues={},
retcol="stream_id",
desc="stats_stream_pos",
)
def update_stats_stream_pos(self, stream_id):
return self._simple_update_one(
table="stats_stream_pos",
keyvalues={},
updatevalues={"stream_id": stream_id},
desc="update_stats_stream_pos",
)
def update_room_state(self, room_id, fields):
return self._simple_upsert(
table="room_state",
keyvalues={
"room_id": room_id,
},
values=fields,
desc="update_room_state",
)
def update_stats(self, stats_type, stats_id, ts, fields):
return self._simple_upsert(
table=("%s_stats" % stats_type),
keyvalues={
("%s_id" % stats_type): stats_id,
"ts": ts,
},
updatevalues=fields,
desc="update_stats",
)
def update_stats_delta(self, ts, bucket_size, stats_type, stats_id, field, value):
def _update_stats_delta(txn):
table = "%s_stats" % stats_type
id_col = "%s_id" % stats_type
sql = (
"SELECT * FROM %s"
" WHERE %s=? and ts=("
" SELECT MAX(ts) FROM %s"
" WHERE where %s=?"
")"
) % (table, id_col, table, id_col)
txn.execute(sql, (stats_id, stats_id))
rows = self.cursor_to_dict(txn)
if len(rows) == 0:
# silently skip as we don't have anything to apply a delta to yet.
# this tries to minimise any race between the initial sync and
# subsequent deltas arriving.
return
values = {
key: rows[0][key] for key in ABSOLUTE_STATS_FIELDS[stats_type]
}
values[id_col] = stats_id
values["ts"] = ts
values["bucket_size"] = bucket_size
latest_ts = rows[0]["ts"]
if ts != latest_ts:
# we have to copy our absolute counters over to the new entry.
self._simple_insert_txn(
txn,
table=table,
values=values
)
# actually update the new value
if stats_type in ABSOLUTE_STATS_FIELDS[stats_type]:
self._simple_update_txn(
txn,
table=table,
keyvalues={
id_col: stats_id,
"ts": ts,
},
updatevalues={
field: value
}
)
else:
sql = (
"UPDATE %s "
" SET %s=%s+?"
" WHERE %s=? AND ts=?"
) % (table, field, field, id_col)
txn.execute(sql, (value, stats_id, ts))
return self.runInteraction(
"update_stats_delta", _update_stats_delta
)

View File

@@ -25,12 +25,12 @@ from synapse.storage.engines import PostgresEngine, Sqlite3Engine
from synapse.types import get_domain_from_id, get_localpart_from_id
from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
from ._base import SQLBaseStore
from .state_deltas import StateDeltasStore
logger = logging.getLogger(__name__)
class UserDirectoryStore(SQLBaseStore):
class UserDirectoryStore(StateDeltasStore):
@cachedInlineCallbacks(cache_context=True)
def is_room_world_readable_or_publicly_joinable(self, room_id, cache_context):
"""Check if the room is either world_readable or publically joinable
@@ -307,28 +307,6 @@ class UserDirectoryStore(SQLBaseStore):
defer.returnValue(user_ids)
@defer.inlineCallbacks
def get_all_rooms(self):
"""Get all room_ids we've ever known about, in ascending order of "size"
"""
sql = """
SELECT room_id FROM current_state_events
GROUP BY room_id
ORDER BY count(*) ASC
"""
rows = yield self._execute("get_all_rooms", None, sql)
defer.returnValue([room_id for room_id, in rows])
@defer.inlineCallbacks
def get_all_local_users(self):
"""Get all local users
"""
sql = """
SELECT name FROM users
"""
rows = yield self._execute("get_all_local_users", None, sql)
defer.returnValue([name for name, in rows])
def add_users_who_share_room(self, room_id, share_private, user_id_tuples):
"""Insert entries into the users_who_share_rooms table. The first
user should be a local user.
@@ -572,57 +550,6 @@ class UserDirectoryStore(SQLBaseStore):
desc="update_user_directory_stream_pos",
)
def get_current_state_deltas(self, prev_stream_id):
prev_stream_id = int(prev_stream_id)
if not self._curr_state_delta_stream_cache.has_any_entity_changed(prev_stream_id):
return []
def get_current_state_deltas_txn(txn):
# First we calculate the max stream id that will give us less than
# N results.
# We arbitarily limit to 100 stream_id entries to ensure we don't
# select toooo many.
sql = """
SELECT stream_id, count(*)
FROM current_state_delta_stream
WHERE stream_id > ?
GROUP BY stream_id
ORDER BY stream_id ASC
LIMIT 100
"""
txn.execute(sql, (prev_stream_id,))
total = 0
max_stream_id = prev_stream_id
for max_stream_id, count in txn:
total += count
if total > 100:
# We arbitarily limit to 100 entries to ensure we don't
# select toooo many.
break
# Now actually get the deltas
sql = """
SELECT stream_id, room_id, type, state_key, event_id, prev_event_id
FROM current_state_delta_stream
WHERE ? < stream_id AND stream_id <= ?
ORDER BY stream_id ASC
"""
txn.execute(sql, (prev_stream_id, max_stream_id,))
return self.cursor_to_dict(txn)
return self.runInteraction(
"get_current_state_deltas", get_current_state_deltas_txn
)
def get_max_stream_id_in_current_state_deltas(self):
return self._simple_select_one_onecol(
table="current_state_delta_stream",
keyvalues={},
retcol="COALESCE(MAX(stream_id), -1)",
desc="get_max_stream_id_in_current_state_deltas",
)
@defer.inlineCallbacks
def search_user_dir(self, user_id, search_term, limit):
"""Searches for users in directory