Compare commits

...

37 Commits

Author SHA1 Message Date
Devon Hudson
070356d1c6 Merge branch 'develop' into devon/too-large-http-response 2025-12-05 22:30:55 +00:00
Devon Hudson
93f58e6eed Add validation for content_length too large 2025-12-05 15:28:45 -07:00
Devon Hudson
e07fd8ba08 Better error messages 2025-12-05 13:57:34 -07:00
Devon Hudson
bea38d47fc Update tests/test_server.py
Co-authored-by: Eric Eastwood <erice@element.io>
2025-12-05 20:53:54 +00:00
Devon Hudson
e7fe25f11c Update synapse/http/site.py
Co-authored-by: Eric Eastwood <erice@element.io>
2025-12-05 20:53:31 +00:00
Devon Hudson
b5b464364e Refactor extracted function to be class member 2025-12-04 17:01:51 -07:00
Devon Hudson
b0954372bd Add changelog entry 2025-12-04 16:40:09 -07:00
Devon Hudson
485283cefa Update log 2025-12-04 15:32:35 -07:00
Devon Hudson
2a2cd4b2cd Move log 2025-12-04 15:25:03 -07:00
Devon Hudson
b2c219e693 Add return type 2025-12-04 12:43:50 -07:00
Devon Hudson
7b0b9b2e3b Refactor error response writing to be reusable 2025-12-04 12:29:32 -07:00
Devon Hudson
5ab4736f0e Fix test content-length header addition logic 2025-12-04 09:33:58 -07:00
Devon Hudson
c2b8d2ca1b Fix mypy errors 2025-12-04 09:08:20 -07:00
Devon Hudson
72c3ab3a6d Add comment about why we reject multiple content-length headers 2025-12-04 08:59:34 -07:00
Devon Hudson
7648b3b882 Fix linter errors 2025-12-04 08:55:55 -07:00
Devon Hudson
d1b04297b3 Add test for too small Content-Length header 2025-12-04 08:51:47 -07:00
Devon Hudson
c647a0acd7 Fix exception handling 2025-12-04 08:51:26 -07:00
Devon Hudson
e4f2194763 Refactor content-length extraction 2025-12-03 17:54:53 -07:00
Devon Hudson
9e2b2a15ab Add FIXME comment 2025-12-03 17:24:13 -07:00
Devon Hudson
8c594c9daa Add more comments 2025-12-03 17:18:30 -07:00
Devon Hudson
536bf2d49a Address review comments 2025-12-03 17:17:26 -07:00
Devon Hudson
3f9af8d12a Remove invalid comment 2025-12-03 17:15:01 -07:00
Devon Hudson
36973e019d Merge branch 'develop' into devon/too-large-http-response 2025-12-03 17:12:52 -07:00
Devon Hudson
e8d8a3b79a Temp changes for incorrect Content-Length 2025-12-03 17:03:20 -07:00
Devon Hudson
7e86d3de4b Log rejection at info level 2025-12-03 17:03:13 -07:00
Devon Hudson
296ed42ed8 Update synapse/http/site.py
Co-authored-by: Eric Eastwood <erice@element.io>
2025-12-02 00:11:01 +00:00
Devon Hudson
426b676f3f Remove unused variable 2025-11-26 17:01:29 -07:00
Devon Hudson
21b114c9a6 Don't add duplicate Content-Length header in tests 2025-11-26 16:55:04 -07:00
Devon Hudson
5f4a097d1a Add ref to upstream twisted changes needed 2025-11-26 16:22:00 -07:00
Devon Hudson
9c4536bc3f Rejig max_request_size usage to use common constants everywhere 2025-11-26 14:09:24 -07:00
Devon Hudson
85d84f5236 Upade comment wording 2025-11-26 13:59:18 -07:00
Devon Hudson
5cf7b962eb Address review comments 2025-11-26 13:56:20 -07:00
Devon Hudson
26a2c8f0fa Log redacted uri in content-type check 2025-11-21 12:12:06 -07:00
Devon Hudson
c3be0742b3 Log redacted uri 2025-11-21 12:11:24 -07:00
Devon Hudson
27b59189a5 Merge branch 'develop' into devon/too-large-http-response 2025-11-21 17:15:12 +00:00
Devon Hudson
1e8eaa1d47 Add changelog entry 2025-11-21 10:15:04 -07:00
Devon Hudson
40d60c8976 Add HTTP 413 response when incoming request is too large 2025-11-21 10:07:47 -07:00
10 changed files with 346 additions and 51 deletions

1
changelog.d/19212.misc Normal file
View File

@@ -0,0 +1 @@
Respond with useful error codes with `Content-Length` header/s are invalid.

View File

@@ -29,6 +29,19 @@ from typing import Final
# the max size of a (canonical-json-encoded) event
MAX_PDU_SIZE = 65536
# The maximum allowed size of an HTTP request.
# Other than media uploads, the biggest request we expect to see is a fully-loaded
# /federation/v1/send request.
#
# The main thing in such a request is up to 50 PDUs, and up to 100 EDUs. PDUs are
# limited to 65536 bytes (possibly slightly more if the sender didn't use canonical
# json encoding); there is no specced limit to EDUs (see
# https://github.com/matrix-org/matrix-doc/issues/3121).
#
# in short, we somewhat arbitrarily limit requests to 200 * 64K (about 12.5M)
#
MAX_REQUEST_SIZE = 200 * MAX_PDU_SIZE
# Max/min size of ints in canonical JSON
CANONICALJSON_MAX_INT = (2**53) - 1
CANONICALJSON_MIN_INT = -CANONICALJSON_MAX_INT

View File

@@ -59,7 +59,7 @@ from twisted.python.threadpool import ThreadPool
from twisted.web.resource import Resource
import synapse.util.caches
from synapse.api.constants import MAX_PDU_SIZE
from synapse.api.constants import MAX_REQUEST_SIZE
from synapse.app import check_bind_error
from synapse.config import ConfigError
from synapse.config._base import format_config_error
@@ -895,17 +895,8 @@ def sdnotify(state: bytes) -> None:
def max_request_body_size(config: HomeServerConfig) -> int:
"""Get a suitable maximum size for incoming HTTP requests"""
# Other than media uploads, the biggest request we expect to see is a fully-loaded
# /federation/v1/send request.
#
# The main thing in such a request is up to 50 PDUs, and up to 100 EDUs. PDUs are
# limited to 65536 bytes (possibly slightly more if the sender didn't use canonical
# json encoding); there is no specced limit to EDUs (see
# https://github.com/matrix-org/matrix-doc/issues/3121).
#
# in short, we somewhat arbitrarily limit requests to 200 * 64K (about 12.5M)
#
max_request_size = 200 * MAX_PDU_SIZE
# Baseline default for any request that isn't configured in the homeserver config
max_request_size = MAX_REQUEST_SIZE
# if we have a media repo enabled, we may need to allow larger uploads than that
if config.media.can_load_media_repo:

View File

@@ -19,6 +19,7 @@
#
#
import contextlib
import json
import logging
import time
from http import HTTPStatus
@@ -36,6 +37,7 @@ from twisted.web.http import HTTPChannel
from twisted.web.resource import IResource, Resource
from twisted.web.server import Request
from synapse.api.errors import Codes
from synapse.config.server import ListenerConfig
from synapse.http import get_request_user_agent, redact_uri
from synapse.http.proxy import ProxySite
@@ -46,7 +48,7 @@ from synapse.logging.context import (
PreserveLoggingContext,
)
from synapse.metrics import SERVER_NAME_LABEL
from synapse.types import ISynapseReactor, Requester
from synapse.types import ISynapseReactor, JsonDict, Requester
if TYPE_CHECKING:
import opentracing
@@ -59,6 +61,16 @@ logger = logging.getLogger(__name__)
_next_request_seq = 0
class ContentLengthError(Exception):
"""Raised when content-length validation fails."""
def __init__(self, status: HTTPStatus, errcode: str, message: str):
self.status = status
self.errcode = errcode
self.message = message
super().__init__(message)
class SynapseRequest(Request):
"""Class which encapsulates an HTTP request to synapse.
@@ -144,9 +156,125 @@ class SynapseRequest(Request):
self.synapse_site.site_tag,
)
def _respond_with_error(self, error_code: HTTPStatus, error_json: JsonDict) -> None:
"""Send an error response and close the connection."""
self.code = error_code.value
self.code_message = bytes(error_code.phrase, "ascii")
error_response_bytes = json.dumps(error_json).encode()
self.responseHeaders.setRawHeaders(b"Content-Type", [b"application/json"])
self.responseHeaders.setRawHeaders(
b"Content-Length", [f"{len(error_response_bytes)}"]
)
self.write(error_response_bytes)
self.loseConnection()
def _get_content_length_from_headers(self) -> int | None:
"""Attempts to obtain the `Content-Length` value from the request's headers.
Returns:
Content length as `int` if present. Otherwise `None`.
Raises:
ContentLengthError: if multiple `Content-Length` headers are present or the
value is not an `int`.
"""
content_length_headers = self.requestHeaders.getRawHeaders(b"Content-Length")
if content_length_headers is None:
return None
# If there are multiple `Content-Length` headers return an error.
# We don't want to even try to pick the right one if there are multiple
# as we could run into problems similar to request smuggling vulnerabilities
# which rely on the mismatch of how different systems interpret information.
if len(content_length_headers) != 1:
raise ContentLengthError(
HTTPStatus.BAD_REQUEST,
Codes.UNKNOWN,
"Multiple Content-Length headers received",
)
try:
return int(content_length_headers[0])
except (ValueError, TypeError):
raise ContentLengthError(
HTTPStatus.BAD_REQUEST,
Codes.UNKNOWN,
"Content-Length header value is not a valid integer",
)
def _validate_content_length(self) -> None:
"""Validate Content-Length header and actual content size.
Raises:
ContentLengthError: If validation fails.
"""
# we should have a `content` by now.
assert self.content, "_validate_content_length() called before gotLength()"
content_length = self._get_content_length_from_headers()
if content_length is None:
return
actual_content_length = self.content.tell()
if content_length > self._max_request_body_size:
logger.info(
"Rejecting request from %s because Content-Length %d exceeds maximum size %d: %s %s",
self.client,
content_length,
self._max_request_body_size,
self.get_method(),
self.get_redacted_uri(),
)
raise ContentLengthError(
HTTPStatus.REQUEST_ENTITY_TOO_LARGE,
Codes.TOO_LARGE,
f"Request content is too large (>{self._max_request_body_size})",
)
if content_length != actual_content_length:
comparison = (
"smaller" if content_length < actual_content_length else "larger"
)
logger.info(
"Rejecting request from %s because Content-Length %d is %s than the request content size %d: %s %s",
self.client,
content_length,
comparison,
actual_content_length,
self.get_method(),
self.get_redacted_uri(),
)
raise ContentLengthError(
HTTPStatus.BAD_REQUEST,
Codes.UNKNOWN,
f"Rejecting request as the Content-Length header value {content_length} "
f"is {comparison} than the actual request content size {actual_content_length}",
)
# Twisted machinery: this method is called by the Channel once the full request has
# been received, to dispatch the request to a resource.
def requestReceived(self, command: bytes, path: bytes, version: bytes) -> None:
# In the case of a Content-Length header being present, and it's value being too
# large, throw a proper error to make debugging issues due to overly large requests much
# easier. Currently we handle such cases in `handleContentChunk` and abort the
# connection without providing a proper HTTP response.
#
# Attempting to write an HTTP response from within `handleContentChunk` does not
# work, so the code here has been added to at least provide a response in the
# case of the Content-Length header being present.
self.method, self.uri = command, path
self.clientproto = version
try:
self._validate_content_length()
except ContentLengthError as e:
self._respond_with_error(
e.status, {"errcode": e.errcode, "error": e.message}
)
return
# We're patching Twisted to bail/abort early when we see someone trying to upload
# `multipart/form-data` so we can avoid Twisted parsing the entire request body into
# in-memory (specific problem of this specific `Content-Type`). This protects us
@@ -154,26 +282,27 @@ class SynapseRequest(Request):
# the server with a `MemoryError`, or carefully block just enough resources to cause
# all other requests to fail.
#
# FIXME: This can be removed once we Twisted releases a fix and we update to a
# FIXME: This can be removed once Twisted releases a fix and we update to a
# version that is patched
def requestReceived(self, command: bytes, path: bytes, version: bytes) -> None:
# See: https://github.com/element-hq/synapse/security/advisories/GHSA-rfq8-j7rh-8hf2
if command == b"POST":
ctype = self.requestHeaders.getRawHeaders(b"content-type")
if ctype and b"multipart/form-data" in ctype[0]:
self.method, self.uri = command, path
self.clientproto = version
logger.warning(
"Aborting connection from %s because `content-type: multipart/form-data` is unsupported: %s %s",
self.client,
self.get_method(),
self.get_redacted_uri(),
)
self.code = HTTPStatus.UNSUPPORTED_MEDIA_TYPE.value
self.code_message = bytes(
HTTPStatus.UNSUPPORTED_MEDIA_TYPE.phrase, "ascii"
)
self.responseHeaders.setRawHeaders(b"content-length", [b"0"])
logger.warning(
"Aborting connection from %s because `content-type: multipart/form-data` is unsupported: %s %s",
self.client,
command,
path,
)
# FIXME: Return a better error response here similar to the
# `error_response_json` returned in other code paths here.
self.responseHeaders.setRawHeaders(b"Content-Length", [b"0"])
self.write(b"")
self.loseConnection()
return

View File

@@ -22,6 +22,7 @@
from twisted.internet.address import IPv6Address
from twisted.internet.testing import MemoryReactor, StringTransport
from synapse.app._base import max_request_body_size
from synapse.app.homeserver import SynapseHomeServer
from synapse.server import HomeServer
from synapse.util.clock import Clock
@@ -143,3 +144,104 @@ class SynapseRequestTestCase(HomeserverTestCase):
# we should get a 415
self.assertRegex(transport.value().decode(), r"^HTTP/1\.1 415 ")
def test_content_length_too_large(self) -> None:
"""HTTP requests with Content-Length exceeding max size should be rejected with 413"""
self.hs.start_listening()
# find the HTTP server which is configured to listen on port 0
(port, factory, _backlog, interface) = self.reactor.tcpServers[0]
self.assertEqual(interface, "::")
self.assertEqual(port, 0)
# complete the connection and wire it up to a fake transport
client_address = IPv6Address("TCP", "::1", 2345)
protocol = factory.buildProtocol(client_address)
transport = StringTransport()
protocol.makeConnection(transport)
# Send a request with Content-Length header that exceeds the limit.
# Default max is 50MB (from media max_upload_size), so send something larger.
oversized_length = 1 + max_request_body_size(self.hs.config)
protocol.dataReceived(
b"POST / HTTP/1.1\r\n"
b"Connection: close\r\n"
b"Content-Length: " + str(oversized_length).encode() + b"\r\n"
b"\r\n"
b"" + b"x" * oversized_length + b"\r\n"
b"\r\n"
)
# Advance the reactor to process the request
while not transport.disconnecting:
self.reactor.advance(1)
# We should get a 413 Content Too Large
response = transport.value().decode()
self.assertRegex(response, r"^HTTP/1\.1 413 ")
self.assertSubstring("M_TOO_LARGE", response)
def test_too_many_content_length_headers(self) -> None:
"""HTTP requests with multiple Content-Length headers should be rejected with 400"""
self.hs.start_listening()
# find the HTTP server which is configured to listen on port 0
(port, factory, _backlog, interface) = self.reactor.tcpServers[0]
self.assertEqual(interface, "::")
self.assertEqual(port, 0)
# complete the connection and wire it up to a fake transport
client_address = IPv6Address("TCP", "::1", 2345)
protocol = factory.buildProtocol(client_address)
transport = StringTransport()
protocol.makeConnection(transport)
protocol.dataReceived(
b"POST / HTTP/1.1\r\n"
b"Connection: close\r\n"
b"Content-Length: " + str(5).encode() + b"\r\n"
b"Content-Length: " + str(5).encode() + b"\r\n"
b"\r\n"
b"" + b"xxxxx" + b"\r\n"
b"\r\n"
)
# Advance the reactor to process the request
while not transport.disconnecting:
self.reactor.advance(1)
# We should get a 400
response = transport.value().decode()
self.assertRegex(response, r"^HTTP/1\.1 400 ")
def test_invalid_content_length_headers(self) -> None:
"""HTTP requests with invalid Content-Length header should be rejected with 400"""
self.hs.start_listening()
# find the HTTP server which is configured to listen on port 0
(port, factory, _backlog, interface) = self.reactor.tcpServers[0]
self.assertEqual(interface, "::")
self.assertEqual(port, 0)
# complete the connection and wire it up to a fake transport
client_address = IPv6Address("TCP", "::1", 2345)
protocol = factory.buildProtocol(client_address)
transport = StringTransport()
protocol.makeConnection(transport)
protocol.dataReceived(
b"POST / HTTP/1.1\r\n"
b"Connection: close\r\n"
b"Content-Length: eight\r\n"
b"\r\n"
b"" + b"xxxxx" + b"\r\n"
b"\r\n"
)
# Advance the reactor to process the request
while not transport.disconnecting:
self.reactor.advance(1)
# We should get a 400
response = transport.value().decode()
self.assertRegex(response, r"^HTTP/1\.1 400 ")

View File

@@ -1728,9 +1728,6 @@ class UsernamePickerTestCase(HomeserverTestCase):
content_is_form=True,
custom_headers=[
("Cookie", "username_mapping_session=" + session_id),
# old versions of twisted don't do form-parsing without a valid
# content-length header.
("Content-Length", str(len(content))),
],
)
self.assertEqual(chan.code, 302, chan.result)
@@ -1818,9 +1815,6 @@ class UsernamePickerTestCase(HomeserverTestCase):
content_is_form=True,
custom_headers=[
("Cookie", "username_mapping_session=" + session_id),
# old versions of twisted don't do form-parsing without a valid
# content-length header.
("Content-Length", str(len(content))),
],
)
self.assertEqual(chan.code, 302, chan.result)

View File

@@ -2590,7 +2590,6 @@ class AuthenticatedMediaTestCase(unittest.HomeserverTestCase):
self.tok,
shorthand=False,
content_type=b"image/png",
custom_headers=[("Content-Length", str(67))],
)
self.assertEqual(channel.code, 200)
res = channel.json_body.get("content_uri")
@@ -2750,7 +2749,6 @@ class AuthenticatedMediaTestCase(unittest.HomeserverTestCase):
self.tok,
shorthand=False,
content_type=b"image/png",
custom_headers=[("Content-Length", str(67))],
)
self.assertEqual(channel.code, 200)
res = channel.json_body.get("content_uri")
@@ -2909,7 +2907,6 @@ class MediaUploadLimits(unittest.HomeserverTestCase):
access_token=self.tok,
shorthand=False,
content_type=b"text/plain",
custom_headers=[("Content-Length", str(size))],
)
def test_upload_under_limit(self) -> None:
@@ -3074,7 +3071,6 @@ class MediaUploadLimitsModuleOverrides(unittest.HomeserverTestCase):
access_token=tok,
shorthand=False,
content_type=b"text/plain",
custom_headers=[("Content-Length", str(size))],
)
def test_upload_under_limit(self) -> None:

View File

@@ -612,7 +612,6 @@ class RestHelper:
filename: The filename of the media to be uploaded
expect_code: The return code to expect from attempting to upload the media
"""
image_length = len(image_data)
path = "/_matrix/media/r0/upload?filename=%s" % (filename,)
channel = make_request(
self.reactor,
@@ -621,7 +620,6 @@ class RestHelper:
path,
content=image_data,
access_token=tok,
custom_headers=[("Content-Length", str(image_length))],
)
assert channel.code == expect_code, "Expected: %d, got: %d, resp: %r" % (

View File

@@ -81,6 +81,7 @@ from twisted.web.http_headers import Headers
from twisted.web.resource import IResource
from twisted.web.server import Request, Site
from synapse.api.constants import MAX_REQUEST_SIZE
from synapse.config.database import DatabaseConnectionConfig
from synapse.config.homeserver import HomeServerConfig
from synapse.events.auto_accept_invites import InviteAutoAccepter
@@ -241,7 +242,6 @@ class FakeChannel:
def loseConnection(self) -> None:
self.unregisterProducer()
self.transport.loseConnection()
# Type ignore: mypy doesn't like the fact that producer isn't an IProducer.
def registerProducer(self, producer: IProducer, streaming: bool) -> None:
@@ -428,13 +428,24 @@ def make_request(
channel = FakeChannel(site, reactor, ip=client_ip)
req = request(channel, site, our_server_name="test_server")
req = request(
channel,
site,
our_server_name="test_server",
max_request_body_size=MAX_REQUEST_SIZE,
)
channel.request = req
req.content = BytesIO(content)
# Twisted expects to be at the end of the content when parsing the request.
req.content.seek(0, SEEK_END)
# If `Content-Length` was passed in as a custom header, don't automatically add it
# here.
if custom_headers is None or not any(
(k if isinstance(k, bytes) else k.encode("ascii")) == b"Content-Length"
for k, _ in custom_headers
):
# Old version of Twisted (<20.3.0) have issues with parsing x-www-form-urlencoded
# bodies if the Content-Length header is missing
req.requestHeaders.addRawHeader(

View File

@@ -212,6 +212,66 @@ class JsonResourceTests(unittest.TestCase):
self.assertEqual(channel.code, 200)
self.assertNotIn("body", channel.result)
def test_content_larger_than_content_length(self) -> None:
"""
HTTP requests with content size exceeding Content-Length should be rejected with 400.
"""
def _callback(
request: SynapseRequest, **kwargs: object
) -> tuple[int, JsonDict]:
return 200, {}
res = JsonResource(self.homeserver)
res.register_paths(
"POST", [re.compile("^/_matrix/foo$")], _callback, "test_servlet"
)
channel = make_request(
self.reactor,
FakeSite(res, self.reactor),
b"POST",
b"/_matrix/foo",
{},
# Set the `Content-Length` value to be smaller than the actual content size
custom_headers=[("Content-Length", "1")],
# The request should disconnect early so don't await the result
await_result=False,
)
self.reactor.advance(0.1)
self.assertEqual(channel.code, 400)
def test_content_length_larger_than_content(self) -> None:
"""
HTTP requests with content size smaller than Content-Length should be rejected with 400.
"""
def _callback(
request: SynapseRequest, **kwargs: object
) -> tuple[int, JsonDict]:
return 200, {}
res = JsonResource(self.homeserver)
res.register_paths(
"POST", [re.compile("^/_matrix/foo$")], _callback, "test_servlet"
)
channel = make_request(
self.reactor,
FakeSite(res, self.reactor),
b"POST",
b"/_matrix/foo",
{},
# Set the `Content-Length` value to be larger than the actual content size
custom_headers=[("Content-Length", "10")],
# The request should disconnect early so don't await the result
await_result=False,
)
self.reactor.advance(0.1)
self.assertEqual(channel.code, 400)
class OptionsResourceTests(unittest.TestCase):
def setUp(self) -> None: