Compare commits

...

13 Commits

Author SHA1 Message Date
Mathieu Velten
5d0352c207 Remove validate 2023-09-08 13:39:12 +02:00
Mathieu Velten
2da21f6204 Merge remote-tracking branch 'origin/develop' into mv/add-mxid-validation-log 2023-09-08 13:37:43 +02:00
Mathieu Velten
01c582ff36 Change to is_valid 2023-09-08 13:37:00 +02:00
Mathieu Velten
42a392f4e2 Merge branch 'develop' into mv/add-mxid-validation-log 2023-09-07 12:37:16 +02:00
Mathieu Velten
830a29482a Merge branch 'develop' into mv/add-mxid-validation-log 2023-08-07 14:10:58 +02:00
Mathieu Velten
44f7df09bb Unused import 2023-08-04 15:35:15 +02:00
Mathieu Velten
25597e97f3 Change changelog 2023-08-04 15:32:24 +02:00
Mathieu Velten
7ad75e6d20 Add check to EDUs and move PDUs check in the event storage controller 2023-08-04 15:30:18 +02:00
Mathieu Velten
7c224e149b fix comments 2023-08-04 15:09:43 +02:00
Mathieu Velten
1fe7be0be1 Merge branch 'develop' into mv/add-mxid-validation-log 2023-08-04 12:46:08 +02:00
Mathieu Velten
dc9957dbba Remove : from the historical localpart authorized chars 2023-08-04 12:21:27 +02:00
Mathieu Velten
09a7adf85d Add changelog 2023-08-03 17:20:50 +02:00
Mathieu Velten
aa9e47e144 Add logging of invalid mxids when persisting events 2023-08-03 17:18:48 +02:00
8 changed files with 73 additions and 6 deletions

1
changelog.d/16060.misc Normal file
View File

@@ -0,0 +1 @@
Add logging of sender invalid mxids when persisting events and receiving EDUs.

View File

@@ -1106,6 +1106,10 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
) )
prev_ids = [str(p) for p in prev_ids] # They may come as ints prev_ids = [str(p) for p in prev_ids] # They may come as ints
# The result of `is_valid` is not used yet because for now we only want to
# log invalid mxids in the wild.
UserID.is_valid(user_id, allow_historical_mxids=True)
if get_domain_from_id(user_id) != origin: if get_domain_from_id(user_id) != origin:
# TODO: Raise? # TODO: Raise?
logger.warning( logger.warning(

View File

@@ -109,6 +109,10 @@ class DeviceMessageHandler:
origin, origin,
sender_user_id, sender_user_id,
) )
# The result of `is_valid` is not used yet because for now we only want to
# log invalid mxids in the wild.
UserID.is_valid(sender_user_id, allow_historical_mxids=True)
message_type = content["type"] message_type = content["type"]
message_id = content["message_id"] message_id = content["message_id"]
for user_id, by_device in content["messages"].items(): for user_id, by_device in content["messages"].items():

View File

@@ -1593,6 +1593,10 @@ class SigningKeyEduUpdater:
logger.warning("Got signing key update edu for %r from %r", user_id, origin) logger.warning("Got signing key update edu for %r from %r", user_id, origin)
return return
# The result of `is_valid` is not used yet because for now we only want to
# log invalid mxids in the wild.
UserID.is_valid(user_id, allow_historical_mxids=True)
room_ids = await self.store.get_rooms_for_user(user_id) room_ids = await self.store.get_rooms_for_user(user_id)
if not room_ids: if not room_ids:
# We don't share any rooms with this user. Ignore update, as we # We don't share any rooms with this user. Ignore update, as we

View File

@@ -117,6 +117,10 @@ class ReceiptsHandler:
max_batch_id: Optional[int] = None max_batch_id: Optional[int] = None
for receipt in receipts: for receipt in receipts:
# The result of `is_valid` is not used yet because for now we only want to
# log invalid mxids in the wild.
UserID.is_valid(receipt.user_id, allow_historical_mxids=True)
res = await self.store.insert_receipt( res = await self.store.insert_receipt(
receipt.room_id, receipt.room_id,
receipt.receipt_type, receipt.receipt_type,

View File

@@ -370,6 +370,10 @@ class TypingWriterHandler(FollowerTypingHandler):
room_id = content["room_id"] room_id = content["room_id"]
user_id = content["user_id"] user_id = content["user_id"]
# The result of `is_valid` is not used yet because for now we only want to
# log invalid mxids in the wild.
UserID.is_valid(user_id, allow_historical_mxids=True)
# If we're not in the room just ditch the event entirely. This is # If we're not in the room just ditch the event entirely. This is
# probably an old server that has come back and thinks we're still in # probably an old server that has come back and thinks we're still in
# the room (or we've been rejoined to the room by a state reset). # the room (or we've been rejoined to the room by a state reset).

View File

@@ -63,6 +63,7 @@ from synapse.types import (
PersistedEventPosition, PersistedEventPosition,
RoomStreamToken, RoomStreamToken,
StateMap, StateMap,
UserID,
get_domain_from_id, get_domain_from_id,
) )
from synapse.types.state import StateFilter from synapse.types.state import StateFilter
@@ -397,6 +398,10 @@ class EventsPersistenceStorageController:
event_ids: List[str] = [] event_ids: List[str] = []
partitioned: Dict[str, List[Tuple[EventBase, EventContext]]] = {} partitioned: Dict[str, List[Tuple[EventBase, EventContext]]] = {}
for event, ctx in events_and_contexts: for event, ctx in events_and_contexts:
# The result of `is_valid` is not used yet because for now we only want to
# log invalid mxids in the wild.
UserID.is_valid(event.user_id, allow_historical_mxids=True)
partitioned.setdefault(event.room_id, []).append((event, ctx)) partitioned.setdefault(event.room_id, []).append((event, ctx))
event_ids.append(event.event_id) event_ids.append(event.event_id)

View File

@@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import abc import abc
import logging
import re import re
import string import string
from enum import Enum from enum import Enum
@@ -64,6 +65,9 @@ if TYPE_CHECKING:
from synapse.storage.databases.main import DataStore, PurgeEventsStore from synapse.storage.databases.main import DataStore, PurgeEventsStore
from synapse.storage.databases.main.appservice import ApplicationServiceWorkerStore from synapse.storage.databases.main.appservice import ApplicationServiceWorkerStore
logger = logging.getLogger(__name__)
# Define a state map type from type/state_key to T (usually an event ID or # Define a state map type from type/state_key to T (usually an event ID or
# event) # event)
T = TypeVar("T") T = TypeVar("T")
@@ -306,7 +310,7 @@ class DomainSpecificString(metaclass=abc.ABCMeta):
return "%s%s:%s" % (self.SIGIL, self.localpart, self.domain) return "%s%s:%s" % (self.SIGIL, self.localpart, self.domain)
@classmethod @classmethod
def is_valid(cls: Type[DS], s: str) -> bool: def is_valid(cls: Type[DS], s: str, **kwargs: Any) -> bool:
"""Parses the input string and attempts to ensure it is valid.""" """Parses the input string and attempts to ensure it is valid."""
# TODO: this does not reject an empty localpart or an overly-long string. # TODO: this does not reject an empty localpart or an overly-long string.
# See https://spec.matrix.org/v1.2/appendices/#identifier-grammar # See https://spec.matrix.org/v1.2/appendices/#identifier-grammar
@@ -329,6 +333,35 @@ class UserID(DomainSpecificString):
SIGIL = "@" SIGIL = "@"
@classmethod
def is_valid(cls: Type[DS], s: str, **kwargs: Any) -> bool:
""""""
"""Parses the user id str and attempts to ensure it is valid per the spec.
Args:
allow_historical_mxids: True to allow historical mxids, which can
include all printable ASCII chars minus `:`
Returns:
False if the user ID is invalid per the spec
"""
allow_historical_mxids = kwargs.get("allow_historical_mxids", False)
is_valid = DomainSpecificString.is_valid(s)
if len(s.encode("utf-8")) > 255:
logger.warn(
f"User ID {s} has more than 255 bytes and is invalid per the spec"
)
is_valid = False
obj = UserID.from_string(s)
if contains_invalid_mxid_characters(obj.localpart, allow_historical_mxids):
logger.warn(
f"localpart of User ID {s} contains invalid characters per the spec"
)
is_valid = False
return is_valid
@attr.s(slots=True, frozen=True, repr=False) @attr.s(slots=True, frozen=True, repr=False)
class RoomAlias(DomainSpecificString): class RoomAlias(DomainSpecificString):
@@ -355,22 +388,30 @@ MXID_LOCALPART_ALLOWED_CHARACTERS = set(
"_-./=+" + string.ascii_lowercase + string.digits "_-./=+" + string.ascii_lowercase + string.digits
) )
MXID_HISTORICAL_LOCALPART_ALLOWED_CHARACTERS = set(string.printable.replace(":", ""))
# Guest user IDs are purely numeric. # Guest user IDs are purely numeric.
GUEST_USER_ID_PATTERN = re.compile(r"^\d+$") GUEST_USER_ID_PATTERN = re.compile(r"^\d+$")
def contains_invalid_mxid_characters(localpart: str) -> bool: def contains_invalid_mxid_characters(
localpart: str, allow_historical_mxids: Optional[bool] = False
) -> bool:
"""Check for characters not allowed in an mxid or groupid localpart """Check for characters not allowed in an mxid or groupid localpart
Args: Args:
localpart: the localpart to be checked localpart: the localpart to be checked
use_extended_character_set: True to use the extended allowed characters allow_historical_mxids: True to allow historical mxids, which can
from MSC4009. include all printable ASCII chars minus `:`
Returns: Returns:
True if there are any naughty characters True if there are any naughty characters
""" """
return any(c not in MXID_LOCALPART_ALLOWED_CHARACTERS for c in localpart)
if allow_historical_mxids:
allowed_characters = MXID_HISTORICAL_LOCALPART_ALLOWED_CHARACTERS
else:
allowed_characters = MXID_LOCALPART_ALLOWED_CHARACTERS
return any(c not in allowed_characters for c in localpart)
UPPER_CASE_PATTERN = re.compile(b"[A-Z_]") UPPER_CASE_PATTERN = re.compile(b"[A-Z_]")