Compare commits

...

6 Commits

Author SHA1 Message Date
Eric Eastwood
99b99c2f6f start_background_tasks in tests 2025-09-05 13:46:07 -05:00
Eric Eastwood
2f235e31a5 start_background_tasks where we start other background tasks
See https://github.com/element-hq/synapse/pull/18886#discussion_r2325789462
2025-09-05 13:40:43 -05:00
Eric Eastwood
ee290306ce Use correct changelog number 2025-09-05 11:43:36 -05:00
Eric Eastwood
313da3e21c Add changelog 2025-09-05 11:32:42 -05:00
Eric Eastwood
e39a2196bd Add context 2025-09-05 11:32:18 -05:00
Eric Eastwood
38b67ca04c Fix CPU time going backwards when daemonize
When we `daemonize`, we fork the process and cputime metrics get confused
about the per-thread resource usage appearing to go backwards because we're
comparing the resource usage (`rusage`) from the original process to the
forked process.

We now kick off the background tasks (`run_as_background_process`) after we
have forked the process so the `rusage` we record when we `start` is in the
same thread when we `stop`.

Bad log examples from before:
```
synapse.logging.context - ERROR - _schedule_next_expiry-0 - utime went backwards! 0.050467 < 0.886526
synapse.logging.context - ERROR - _schedule_db_events-0 - stime went backwards! 0.009941 < 0.155106
synapse.logging.context - ERROR - wake_destinations_needing_catchup-0 - stime went backwards! 0.010175 < 0.130923
synapse.logging.context - ERROR - resume_sync_partial_state_room-0 - utime went backwards! 0.052898 < 0.886526
```

Testing strategy:

 1. Run with `daemonize: true` in your `homeserver.yaml`
 1. `poetry run synapse_homeserver --config-path homeserver.yaml`
 1. Shutdown the server
 1. Look for any bad log entries in your homeserver logs:
    - `Expected logging context sentinel but found main`
    - `Expected logging context main was lost`
    - `utime went backwards!`/`stime went backwards!`
2025-09-05 10:31:40 -05:00
4 changed files with 32 additions and 9 deletions

1
changelog.d/18886.misc Normal file
View File

@@ -0,0 +1 @@
Start background tasks after we fork the process (daemonize).

View File

@@ -609,10 +609,28 @@ async def start(hs: "HomeServer") -> None:
setup_sentry(hs)
setup_sdnotify(hs)
# If background tasks are running on the main process or this is the worker in
# charge of them, start collecting the phone home stats and shared usage metrics.
# Register background tasks required by this server. This must be done
# somewhat manually due to the background tasks not being registered
# unless handlers are instantiated.
#
# While we could "start" these before the reactor runs, nothing will happen until
# the reactor is running, so we may as well do it here in `start`.
#
# Additionally, this means we also start them after we daemonize and fork the
# process which means we can avoid any potential problems with cputime metrics
# getting confused about the per-thread resource usage appearing to go backwards
# because we're comparing the resource usage (`rusage`) from the original process to
# the forked process.
if hs.config.worker.run_background_tasks:
hs.start_background_tasks()
# TODO: This should be moved to same pattern we use for other background tasks:
# Add to `REQUIRED_ON_BACKGROUND_TASK_STARTUP` and rely on
# `start_background_tasks` to start it.
await hs.get_common_usage_metrics_manager().setup()
# TODO: This feels like another pattern that should refactored as one of the
# `REQUIRED_ON_BACKGROUND_TASK_STARTUP`
start_phone_stats_home(hs)
# We now freeze all allocated objects in the hopes that (almost)

View File

@@ -366,12 +366,6 @@ class HomeServer(metaclass=abc.ABCMeta):
self.datastores = Databases(self.DATASTORE_CLASS, self)
logger.info("Finished setting up.")
# Register background tasks required by this server. This must be done
# somewhat manually due to the background tasks not being registered
# unless handlers are instantiated.
if self.config.worker.run_background_tasks:
self.setup_background_tasks()
def __del__(self) -> None:
"""
Called when an the homeserver is garbage collected.
@@ -410,7 +404,7 @@ class HomeServer(metaclass=abc.ABCMeta):
appropriate listeners.
"""
def setup_background_tasks(self) -> None:
def start_background_tasks(self) -> None:
"""
Some handlers have side effects on instantiation (like registering
background updates). This function causes them to be fetched, and

View File

@@ -1160,6 +1160,16 @@ def setup_test_homeserver(
with patch("synapse.storage.database.make_pool", side_effect=make_fake_db_pool):
hs.setup()
# Register background tasks required by this server. This must be done
# somewhat manually due to the background tasks not being registered
# unless handlers are instantiated.
#
# Since, we don't have to worry about `daemonize` (forking the process) in tests, we
# can just start the background tasks straight away after `hs.setup`. (compare this
# with where we call `hs.start_background_tasks()` outside of the test environment).
if hs.config.worker.run_background_tasks:
hs.start_background_tasks()
# Since we've changed the databases to run DB transactions on the same
# thread, we need to stop the event fetcher hogging that one thread.
hs.get_datastores().main.USE_DEDICATED_DB_THREADS_FOR_EVENT_FETCHING = False