Add plain-text handling for rich-text topics as per MSC3765 (#18195)

This implements
https://github.com/matrix-org/matrix-spec-proposals/pull/3765 which is
already merged and, therefore, can use stable identifiers.

For `/publicRooms` and `/hierarchy`, the topic is read from the
eponymous field of the `current_state_events` table. Rather than
introduce further columns in this table, I changed the insertion /
update logic to write the plain-text topic from the rich topic into the
existing field. This will not take effect for existing rooms unless
their topic is changed. However, existing rooms shouldn't have rich
topics to begin with.

Similarly, for server-side search, I changed the insertion logic of the
`event_search` table to prefer the value from the rich topic. Again,
existing events shouldn't have rich topics and, therefore, don't need to
be migrated in the table.

Spec doc: https://spec.matrix.org/v1.15/client-server-api/#mroomtopic

Part of supporting Matrix v1.15:
https://spec.matrix.org/v1.15/client-server-api/#mroomtopic

Signed-off-by: Johannes Marbach <n0-0ne+github@mailbox.org>
Co-authored-by: Eric Eastwood <erice@element.io>
This commit is contained in:
Johannes Marbach
2025-07-09 21:13:54 +02:00
committed by GitHub
parent e1b429d88e
commit b9b8775db7
13 changed files with 356 additions and 53 deletions

View File

@@ -0,0 +1 @@
Add plain-text handling for rich-text topics as per [MSC3765](https://github.com/matrix-org/matrix-spec-proposals/pull/3765).

View File

@@ -262,6 +262,11 @@ class EventContentFields:
TOMBSTONE_SUCCESSOR_ROOM: Final = "replacement_room"
# Used in m.room.topic events.
TOPIC: Final = "topic"
M_TOPIC: Final = "m.topic"
M_TEXT: Final = "m.text"
class EventUnsignedContentFields:
"""Fields found inside the 'unsigned' data on events"""
@@ -270,6 +275,13 @@ class EventUnsignedContentFields:
MEMBERSHIP: Final = "membership"
class MTextFields:
"""Fields found inside m.text content blocks."""
BODY: Final = "body"
MIMETYPE: Final = "mimetype"
class RoomTypes:
"""Understood values of the room_type field of m.room.create events."""

View File

@@ -27,8 +27,6 @@ from typing import Any, Dict, List, Optional, Union
import attr
from synapse._pydantic_compat import (
BaseModel,
Extra,
StrictBool,
StrictInt,
StrictStr,
@@ -47,6 +45,7 @@ from synapse.config.server import (
parse_listener_def,
)
from synapse.types import JsonDict
from synapse.util.pydantic_models import ParseModel
_DEPRECATED_WORKER_DUTY_OPTION_USED = """
The '%s' configuration option is deprecated and will be removed in a future
@@ -90,30 +89,7 @@ def _instance_to_list_converter(obj: Union[str, List[str]]) -> List[str]:
return obj
class ConfigModel(BaseModel):
"""A custom version of Pydantic's BaseModel which
- ignores unknown fields and
- does not allow fields to be overwritten after construction,
but otherwise uses Pydantic's default behaviour.
For now, ignore unknown fields. In the future, we could change this so that unknown
config values cause a ValidationError, provided the error messages are meaningful to
server operators.
Subclassing in this way is recommended by
https://pydantic-docs.helpmanual.io/usage/model_config/#change-behaviour-globally
"""
class Config:
# By default, ignore fields that we don't recognise.
extra = Extra.ignore
# By default, don't allow fields to be reassigned after parsing.
allow_mutation = False
class InstanceTcpLocationConfig(ConfigModel):
class InstanceTcpLocationConfig(ParseModel):
"""The host and port to talk to an instance via HTTP replication."""
host: StrictStr
@@ -129,7 +105,7 @@ class InstanceTcpLocationConfig(ConfigModel):
return f"{self.host}:{self.port}"
class InstanceUnixLocationConfig(ConfigModel):
class InstanceUnixLocationConfig(ParseModel):
"""The socket file to talk to an instance via HTTP replication."""
path: StrictStr

View File

@@ -51,6 +51,7 @@ from synapse.api.constants import (
HistoryVisibility,
JoinRules,
Membership,
MTextFields,
RoomCreationPreset,
RoomEncryptionAlgorithms,
RoomTypes,
@@ -1303,7 +1304,13 @@ class RoomCreationHandler:
topic = room_config["topic"]
topic_event, topic_context = await create_event(
EventTypes.Topic,
{"topic": topic},
{
EventContentFields.TOPIC: topic,
EventContentFields.M_TOPIC: {
# The mimetype property defaults to `text/plain` if omitted.
EventContentFields.M_TEXT: [{MTextFields.BODY: topic}]
},
},
True,
)
events_to_send.append((topic_event, topic_context))

View File

@@ -36,6 +36,7 @@ from synapse.metrics import event_processing_positions
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.storage.databases.main.state_deltas import StateDelta
from synapse.types import JsonDict
from synapse.util.events import get_plain_text_topic_from_event_content
if TYPE_CHECKING:
from synapse.server import HomeServer
@@ -299,7 +300,9 @@ class StatsHandler:
elif delta.event_type == EventTypes.Name:
room_state["name"] = event_content.get("name")
elif delta.event_type == EventTypes.Topic:
room_state["topic"] = event_content.get("topic")
room_state["topic"] = get_plain_text_topic_from_event_content(
event_content
)
elif delta.event_type == EventTypes.RoomAvatar:
room_state["avatar"] = event_content.get("url")
elif delta.event_type == EventTypes.CanonicalAlias:

View File

@@ -78,6 +78,7 @@ from synapse.types import (
from synapse.types.handlers import SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES
from synapse.types.state import StateFilter
from synapse.util import json_encoder
from synapse.util.events import get_plain_text_topic_from_event_content
from synapse.util.iterutils import batch_iter, sorted_topologically
from synapse.util.stringutils import non_null_str_or_none
@@ -3102,7 +3103,10 @@ class PersistEventsStore:
def _store_room_topic_txn(self, txn: LoggingTransaction, event: EventBase) -> None:
if isinstance(event.content.get("topic"), str):
self.store_event_search_txn(
txn, event, "content.topic", event.content["topic"]
txn,
event,
"content.topic",
get_plain_text_topic_from_event_content(event.content) or "",
)
def _store_room_name_txn(self, txn: LoggingTransaction, event: EventBase) -> None:

View File

@@ -49,6 +49,7 @@ from synapse.storage.database import (
from synapse.storage.databases.main.events_worker import EventRedactBehaviour
from synapse.storage.engines import PostgresEngine, Sqlite3Engine
from synapse.types import JsonDict
from synapse.util.events import get_plain_text_topic_from_event_content
if TYPE_CHECKING:
from synapse.server import HomeServer
@@ -212,7 +213,9 @@ class SearchBackgroundUpdateStore(SearchWorkerStore):
value = content["body"]
elif etype == "m.room.topic":
key = "content.topic"
value = content["topic"]
value = (
get_plain_text_topic_from_event_content(content) or "",
)
elif etype == "m.room.name":
key = "content.name"
value = content["name"]

View File

@@ -48,6 +48,7 @@ from synapse.storage.databases.main.events_worker import InvalidEventError
from synapse.storage.databases.main.state_deltas import StateDeltasStore
from synapse.types import JsonDict
from synapse.util.caches.descriptors import cached
from synapse.util.events import get_plain_text_topic_from_event_content
if TYPE_CHECKING:
from synapse.server import HomeServer
@@ -611,7 +612,9 @@ class StatsStore(StateDeltasStore):
elif event.type == EventTypes.Name:
room_state["name"] = event.content.get("name")
elif event.type == EventTypes.Topic:
room_state["topic"] = event.content.get("topic")
room_state["topic"] = get_plain_text_topic_from_event_content(
event.content
)
elif event.type == EventTypes.RoomAvatar:
room_state["avatar"] = event.content.get("url")
elif event.type == EventTypes.CanonicalAlias:

View File

@@ -18,26 +18,8 @@
# [This file includes modifications made by New Vector Limited]
#
#
from synapse._pydantic_compat import BaseModel, Extra
from synapse.util.pydantic_models import ParseModel
class RequestBodyModel(BaseModel):
"""A custom version of Pydantic's BaseModel which
- ignores unknown fields and
- does not allow fields to be overwritten after construction,
but otherwise uses Pydantic's default behaviour.
Ignoring unknown fields is a useful default. It means that clients can provide
unstable field not known to the server without the request being refused outright.
Subclassing in this way is recommended by
https://pydantic-docs.helpmanual.io/usage/model_config/#change-behaviour-globally
"""
class Config:
# By default, ignore fields that we don't recognise.
extra = Extra.ignore
# By default, don't allow fields to be reassigned after parsing.
allow_mutation = False
class RequestBodyModel(ParseModel):
pass

View File

@@ -13,6 +13,11 @@
#
#
from typing import Any, List, Optional
from synapse._pydantic_compat import Field, StrictStr, ValidationError, validator
from synapse.types import JsonDict
from synapse.util.pydantic_models import ParseModel
from synapse.util.stringutils import random_string
@@ -27,3 +32,100 @@ def generate_fake_event_id() -> str:
A string intended to look like an event ID, but with no actual meaning.
"""
return "$" + random_string(43)
class MTextRepresentation(ParseModel):
"""
See `TextualRepresentation` in the Matrix specification.
"""
body: StrictStr
mimetype: Optional[StrictStr]
class MTopic(ParseModel):
"""
`m.room.topic` -> `content` -> `m.topic`
Textual representation of the room topic in different mimetypes. Added in Matrix v1.15.
See `TopicContentBlock` in the Matrix specification.
"""
m_text: Optional[List[MTextRepresentation]] = Field(alias="m.text")
"""
An ordered array of textual representations in different mimetypes.
"""
# Because "Receivers SHOULD use the first representation in the array that they
# understand.", we ignore invalid representations in the `m.text` field and use
# what we can.
@validator("m_text", pre=True)
def ignore_invalid_representations(
cls, m_text: Any
) -> Optional[List[MTextRepresentation]]:
if not isinstance(m_text, list):
raise ValueError("m.text must be a list")
representations = []
for element in m_text:
try:
representations.append(MTextRepresentation.parse_obj(element))
except ValidationError:
continue
return representations
class TopicContent(ParseModel):
"""
Represents the `content` field of an `m.room.topic` event
"""
topic: StrictStr
"""
The topic in plain text.
"""
m_topic: Optional[MTopic] = Field(alias="m.topic")
"""
Textual representation of the room topic in different mimetypes.
"""
# We ignore invalid `m.topic` fields as we can always fall back to the plain-text
# `topic` field.
@validator("m_topic", pre=True)
def ignore_invalid_m_topic(cls, m_topic: Any) -> Optional[MTopic]:
try:
return MTopic.parse_obj(m_topic)
except ValidationError:
return None
def get_plain_text_topic_from_event_content(content: JsonDict) -> Optional[str]:
"""
Given the `content` of an `m.room.topic` event, returns the plain-text topic
representation. Prefers pulling plain-text from the newer `m.topic` field if
available with a fallback to `topic`.
Args:
content: The `content` field of an `m.room.topic` event.
Returns:
A string representing the plain text topic.
"""
try:
topic_content = TopicContent.parse_obj(content)
except ValidationError:
return None
# Find the first `text/plain` topic ("Receivers SHOULD use the first
# representationin the array that they understand.")
if topic_content.m_topic and topic_content.m_topic.m_text:
for representation in topic_content.m_topic.m_text:
# The mimetype property defaults to `text/plain` if omitted.
if not representation.mimetype or representation.mimetype == "text/plain":
return representation.body
# Fallback to the plain-old `topic` field if there isn't any `text/plain` topic
# representation available.
return topic_content.topic

View File

@@ -0,0 +1,39 @@
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright (C) 2024 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
#
from synapse._pydantic_compat import BaseModel, Extra
class ParseModel(BaseModel):
"""A custom version of Pydantic's BaseModel which
- ignores unknown fields and
- does not allow fields to be overwritten after construction,
but otherwise uses Pydantic's default behaviour.
For now, ignore unknown fields. In the future, we could change this so that unknown
config values cause a ValidationError, provided the error messages are meaningful to
server operators.
Subclassing in this way is recommended by
https://pydantic-docs.helpmanual.io/usage/model_config/#change-behaviour-globally
"""
class Config:
# By default, ignore fields that we don't recognise.
extra = Extra.ignore
# By default, don't allow fields to be reassigned after parsing.
allow_mutation = False

View File

@@ -757,6 +757,59 @@ class RoomsCreateTestCase(RoomBase):
assert channel.resource_usage is not None
self.assertEqual(37, channel.resource_usage.db_txn_count)
def test_post_room_topic(self) -> None:
# POST with topic key, expect new room id
channel = self.make_request("POST", "/createRoom", b'{"topic":"shenanigans"}')
self.assertEqual(HTTPStatus.OK, channel.code)
self.assertTrue("room_id" in channel.json_body)
room_id = channel.json_body["room_id"]
# GET topic event, expect content from topic key
channel = self.make_request("GET", "/rooms/%s/state/m.room.topic" % (room_id,))
self.assertEqual(HTTPStatus.OK, channel.code)
self.assertEqual(
{"topic": "shenanigans", "m.topic": {"m.text": [{"body": "shenanigans"}]}},
channel.json_body,
)
def test_post_room_topic_initial_state(self) -> None:
# POST with m.room.topic in initial state, expect new room id
channel = self.make_request(
"POST",
"/createRoom",
b'{"initial_state":[{"type": "m.room.topic", "content": {"topic": "foobar"}}]}',
)
self.assertEqual(HTTPStatus.OK, channel.code)
self.assertTrue("room_id" in channel.json_body)
room_id = channel.json_body["room_id"]
# GET topic event, expect content from initial state
channel = self.make_request("GET", "/rooms/%s/state/m.room.topic" % (room_id,))
self.assertEqual(HTTPStatus.OK, channel.code)
self.assertEqual(
{"topic": "foobar"},
channel.json_body,
)
def test_post_room_topic_overriding_initial_state(self) -> None:
# POST with m.room.topic in initial state and topic key, expect new room id
channel = self.make_request(
"POST",
"/createRoom",
b'{"initial_state":[{"type": "m.room.topic", "content": {"topic": "foobar"}}], "topic":"shenanigans"}',
)
self.assertEqual(HTTPStatus.OK, channel.code)
self.assertTrue("room_id" in channel.json_body)
room_id = channel.json_body["room_id"]
# GET topic event, expect content from topic key
channel = self.make_request("GET", "/rooms/%s/state/m.room.topic" % (room_id,))
self.assertEqual(HTTPStatus.OK, channel.code)
self.assertEqual(
{"topic": "shenanigans", "m.topic": {"m.text": [{"body": "shenanigans"}]}},
channel.json_body,
)
def test_post_room_visibility_key(self) -> None:
# POST with visibility config key, expect new room id
channel = self.make_request("POST", "/createRoom", b'{"visibility":"private"}')

118
tests/util/test_events.py Normal file
View File

@@ -0,0 +1,118 @@
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright (C) 2025 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
#
from synapse.util.events import get_plain_text_topic_from_event_content
from tests import unittest
class EventsTestCase(unittest.TestCase):
def test_get_plain_text_topic_no_topic(self) -> None:
# No legacy or rich topic, expect None
topic = get_plain_text_topic_from_event_content({})
self.assertEqual(None, topic)
def test_get_plain_text_topic_no_rich_topic(self) -> None:
# Only legacy topic, expect legacy topic
topic = get_plain_text_topic_from_event_content({"topic": "shenanigans"})
self.assertEqual("shenanigans", topic)
def test_get_plain_text_topic_rich_topic_without_representations(self) -> None:
# Legacy topic and rich topic without representations, expect legacy topic
topic = get_plain_text_topic_from_event_content(
{"topic": "shenanigans", "m.topic": {"m.text": []}}
)
self.assertEqual("shenanigans", topic)
def test_get_plain_text_topic_rich_topic_without_plain_text_representation(
self,
) -> None:
# Legacy topic and rich topic without plain text representation, expect legacy topic
topic = get_plain_text_topic_from_event_content(
{
"topic": "shenanigans",
"m.topic": {
"m.text": [
{"mimetype": "text/html", "body": "<strong>foobar</strong>"}
]
},
}
)
self.assertEqual("shenanigans", topic)
def test_get_plain_text_topic_rich_topic_with_plain_text_representation(
self,
) -> None:
# Legacy topic and rich topic with plain text representation, expect plain text representation
topic = get_plain_text_topic_from_event_content(
{
"topic": "shenanigans",
"m.topic": {"m.text": [{"mimetype": "text/plain", "body": "foobar"}]},
}
)
self.assertEqual("foobar", topic)
def test_get_plain_text_topic_rich_topic_with_implicit_plain_text_representation(
self,
) -> None:
# Legacy topic and rich topic with implicit plain text representation, expect plain text representation
topic = get_plain_text_topic_from_event_content(
{"topic": "shenanigans", "m.topic": {"m.text": [{"body": "foobar"}]}}
)
self.assertEqual("foobar", topic)
def test_get_plain_text_topic_rich_topic_with_invalid_plain_text_representation(
self,
) -> None:
# Legacy topic and rich topic with invalid plain text representation, expect legacy topic
topic = get_plain_text_topic_from_event_content(
{"topic": "shenanigans", "m.topic": {"m.text": [{"body": 1337}]}}
)
self.assertEqual("shenanigans", topic)
def test_get_plain_text_topic_rich_topic_with_invalid_and_second_valid_plain_text_representation(
self,
) -> None:
# Legacy topic and rich topic with invalid and second valid plain text representation, expect second plain text representation
topic = get_plain_text_topic_from_event_content(
{
"topic": "shenanigans",
"m.topic": {"m.text": [{"body": 1337}, {"body": "foobar"}]},
}
)
self.assertEqual("foobar", topic)
def test_get_plain_text_topic_rich_topic_with_plain_text_and_other_representation(
self,
) -> None:
# Legacy topic and rich topic with plain text representation, expect plain text representation
topic = get_plain_text_topic_from_event_content(
{
"topic": "shenanigans",
"m.topic": {
"m.text": [
{"mimetype": "text/html", "body": "<strong>foobar</strong>"},
{"mimetype": "text/plain", "body": "foobar"},
]
},
}
)
self.assertEqual("foobar", topic)