Machine-readable config description (#17892)

This commit is contained in:
V02460
2025-06-03 11:29:38 +02:00
committed by GitHub
parent 2436512a25
commit fae72f181b
12 changed files with 8356 additions and 2479 deletions

View File

@@ -78,6 +78,18 @@ jobs:
mdbook build
cp book/welcome_and_overview.html book/index.html
- name: Prepare and publish schema files
run: |
sudo apt-get update && sudo apt-get install -y yq
mkdir -p book/schema
# Remove developer notice before publishing.
rm schema/v*/Do\ not\ edit\ files\ in\ this\ folder
# Copy schema files that are independent from current Synapse version.
cp -r -t book/schema schema/v*/
# Convert config schema from YAML source file to JSON.
yq < schema/synapse-config.schema.yaml \
> book/schema/synapse-config.schema.json
# Deploy to the target directory.
- name: Deploy to gh pages
uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0

54
.github/workflows/schema.yaml vendored Normal file
View File

@@ -0,0 +1,54 @@
name: Schema
on:
pull_request:
paths:
- schema/**
- docs/usage/configuration/config_documentation.md
jobs:
validate-schema:
name: Ensure Synapse config schema is valid
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
with:
python-version: "3.x"
- name: Install check-jsonschema
run: pip install check-jsonschema==0.33.0
- name: Validate meta schema
run: check-jsonschema --check-metaschema schema/v*/meta.schema.json
- name: Validate schema
run: |-
# Please bump on introduction of a new meta schema.
LATEST_META_SCHEMA_VERSION=v1
check-jsonschema \
--schemafile="schema/$LATEST_META_SCHEMA_VERSION/meta.schema.json" \
schema/synapse-config.schema.yaml
- name: Validate default config
# Populates the empty instance with default values and checks against the schema.
run: |-
echo "{}" | check-jsonschema \
--fill-defaults --schemafile=schema/synapse-config.schema.yaml -
check-doc-generation:
name: Ensure generated documentation is up-to-date
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
with:
python-version: "3.x"
- name: Install PyYAML
run: pip install PyYAML==6.0.2
- name: Regenerate config documentation
run: |
scripts-dev/gen_config_documentation.py \
schema/synapse-config.schema.yaml \
> docs/usage/configuration/config_documentation.md
- name: Error in case of any differences
# Errors if there are now any modified files (untracked files are ignored).
run: 'git diff || ! git status --porcelain=1 | grep "^ M"'

1
changelog.d/17892.doc Normal file
View File

@@ -0,0 +1 @@
Generate config documentation from JSON Schema file.

View File

@@ -63,6 +63,18 @@ mdbook serve
The URL at which the docs can be viewed at will be logged.
## Synapse configuration documentation
The [Configuration
Manual](https://element-hq.github.io/synapse/latest/usage/configuration/config_documentation.html)
page is generated from a YAML file,
[schema/synapse-config.schema.yaml](../schema/synapse-config.schema.yaml). To
add new options or modify existing ones, first edit that file, then run
[scripts-dev/gen_config_documentation.py](../scripts-dev/gen_config_documentation.py)
to generate an updated Configuration Manual markdown file.
Build the book as described above to preview it in a web browser.
## Configuration and theming
The look and behaviour of the website is configured by the [book.toml](../book.toml) file

View File

@@ -255,7 +255,7 @@ line to `/etc/default/matrix-synapse`:
LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libjemalloc.so.2
*Note*: You may need to set `PYTHONMALLOC=malloc` to ensure that `jemalloc` can accurately calculate memory usage. By default, Python uses its internal small-object allocator, which may interfere with jemalloc's ability to track memory consumption correctly. This could prevent the [cache_autotuning](../configuration/config_documentation.md#caches-and-associated-values) feature from functioning as expected, as the Python allocator may not reach the memory threshold set by `max_cache_memory_usage`, thus not triggering the cache eviction process.
*Note*: You may need to set `PYTHONMALLOC=malloc` to ensure that `jemalloc` can accurately calculate memory usage. By default, Python uses its internal small-object allocator, which may interfere with jemalloc's ability to track memory consumption correctly. This could prevent the [cache_autotuning](../configuration/config_documentation.md#caches) feature from functioning as expected, as the Python allocator may not reach the memory threshold set by `max_cache_memory_usage`, thus not triggering the cache eviction process.
This made a significant difference on Python 2.7 - it's unclear how
much of an improvement it provides on Python 3.x.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,2 @@
If you want to update the meta schema, copy this folder and increase its version
number instead.

View File

@@ -0,0 +1,29 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://element-hq.github.io/synapse/latest/schema/v1/meta.schema.json",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/core": true,
"https://json-schema.org/draft/2020-12/vocab/applicator": true,
"https://json-schema.org/draft/2020-12/vocab/unevaluated": true,
"https://json-schema.org/draft/2020-12/vocab/validation": true,
"https://json-schema.org/draft/2020-12/vocab/meta-data": true,
"https://json-schema.org/draft/2020-12/vocab/format-annotation": true,
"https://json-schema.org/draft/2020-12/vocab/content": true,
"https://element-hq.github.io/synapse/latest/schema/v1/vocab/documentation": false
},
"$ref": "https://json-schema.org/draft/2020-12/schema",
"properties": {
"io.element.type_name": {
"type": "string",
"description": "Human-readable type of a schema that is displayed instead of the standard JSON Schema types like `object` or `integer`. In case the JSON Schema type contains `null`, this information should be presented alongside the human-readable type name.",
"examples": ["duration", "byte size"]
},
"io.element.post_description": {
"type": "string",
"description": "Additional description of a schema, better suited to be placed less prominently in the generated documentation, e.g., at the end of a section after listings of items and properties.",
"examples": [
"### Advanced uses\n\nThe spent coffee grounds can be added to compost for improving soil and growing plants."
]
}
}
}

View File

@@ -0,0 +1,11 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta http-equiv="refresh" content="0; URL=../meta.schema.json">
<meta charset="UTF-8">
<title>Redirecting to ../meta.schema.json…</title>
</head>
<body>
<p>Redirecting to <a href="../meta.schema.json">../meta.schema.json</a></p>
</body>
</html>

View File

@@ -0,0 +1,503 @@
#!/usr/bin/env python3
"""Generate Synapse documentation from JSON Schema file."""
import json
import re
import sys
from typing import Any, Optional
import yaml
HEADER = """<!-- Document auto-generated by scripts-dev/gen_config_documentation.py -->
# Configuring Synapse
This is intended as a guide to the Synapse configuration. The behavior of a Synapse instance can be modified
through the many configuration settings documented here — each config option is explained,
including what the default is, how to change the default and what sort of behaviour the setting governs.
Also included is an example configuration for each setting. If you don't want to spend a lot of time
thinking about options, the config as generated sets sensible defaults for all values. Do note however that the
database defaults to SQLite, which is not recommended for production usage. You can read more on this subject
[here](../../setup/installation.md#using-postgresql).
## Config Conventions
Configuration options that take a time period can be set using a number
followed by a letter. Letters have the following meanings:
* `s` = second
* `m` = minute
* `h` = hour
* `d` = day
* `w` = week
* `y` = year
For example, setting `redaction_retention_period: 5m` would remove redacted
messages from the database after 5 minutes, rather than 5 months.
In addition, configuration options referring to size use the following suffixes:
* `K` = KiB, or 1024 bytes
* `M` = MiB, or 1,048,576 bytes
* `G` = GiB, or 1,073,741,824 bytes
* `T` = TiB, or 1,099,511,627,776 bytes
For example, setting `max_avatar_size: 10M` means that Synapse will not accept files larger than 10,485,760 bytes
for a user avatar.
## Config Validation
The configuration file can be validated with the following command:
```bash
python -m synapse.config read <config key to print> -c <path to config>
```
To validate the entire file, omit `read <config key to print>`:
```bash
python -m synapse.config -c <path to config>
```
To see how to set other options, check the help reference:
```bash
python -m synapse.config --help
```
### YAML
The configuration file is a [YAML](https://yaml.org/) file, which means that certain syntax rules
apply if you want your config file to be read properly. A few helpful things to know:
* `#` before any option in the config will comment out that setting and either a default (if available) will
be applied or Synapse will ignore the setting. Thus, in example #1 below, the setting will be read and
applied, but in example #2 the setting will not be read and a default will be applied.
Example #1:
```yaml
pid_file: DATADIR/homeserver.pid
```
Example #2:
```yaml
#pid_file: DATADIR/homeserver.pid
```
* Indentation matters! The indentation before a setting
will determine whether a given setting is read as part of another
setting, or considered on its own. Thus, in example #1, the `enabled` setting
is read as a sub-option of the `presence` setting, and will be properly applied.
However, the lack of indentation before the `enabled` setting in example #2 means
that when reading the config, Synapse will consider both `presence` and `enabled` as
different settings. In this case, `presence` has no value, and thus a default applied, and `enabled`
is an option that Synapse doesn't recognize and thus ignores.
Example #1:
```yaml
presence:
enabled: false
```
Example #2:
```yaml
presence:
enabled: false
```
In this manual, all top-level settings (ones with no indentation) are identified
at the beginning of their section (i.e. "### `example_setting`") and
the sub-options, if any, are identified and listed in the body of the section.
In addition, each setting has an example of its usage, with the proper indentation
shown.
"""
SECTION_HEADERS = {
"modules": {
"title": "Modules",
"description": (
"Server admins can expand Synapse's functionality with external "
"modules.\n\n"
"See [here](../../modules/index.md) for more documentation on how "
"to configure or create custom modules for Synapse."
),
},
"server_name": {
"title": "Server",
"description": "Define your homeserver name and other base options.",
},
"admin_contact": {
"title": "Homeserver blocking",
"description": "Useful options for Synapse admins.",
},
"tls_certificate_path": {
"title": "TLS",
"description": "Options related to TLS.",
},
"federation_domain_whitelist": {
"title": "Federation",
"description": "Options related to federation.",
},
"event_cache_size": {
"title": "Caching",
"description": "Options related to caching.",
},
"database": {
"title": "Database",
"description": "Config options related to database settings.",
},
"log_config": {
"title": "Logging",
"description": ("Config options related to logging."),
},
"rc_message": {
"title": "Ratelimiting",
"description": (
"Options related to ratelimiting in Synapse.\n\n"
"Each ratelimiting configuration is made of two parameters:\n"
"- `per_second`: number of requests a client can send per second.\n"
"- `burst_count`: number of requests a client can send before "
"being throttled."
),
},
"enable_authenticated_media": {
"title": "Media Store",
"description": "Config options related to Synapse's media store.",
},
"recaptcha_public_key": {
"title": "Captcha",
"description": (
"See [here](../../CAPTCHA_SETUP.md) for full details on setting up captcha."
),
},
"turn_uris": {
"title": "TURN",
"description": ("Options related to adding a TURN server to Synapse."),
},
"enable_registration": {
"title": "Registration",
"description": (
"Registration can be rate-limited using the parameters in the "
"[Ratelimiting](#ratelimiting) section of this manual."
),
},
"session_lifetime": {
"title": "User session management",
"description": ("Config options related to user session management."),
},
"enable_metrics": {
"title": "Metrics",
"description": ("Config options related to metrics."),
},
"room_prejoin_state": {
"title": "API Configuration",
"description": ("Config settings related to the client/server API."),
},
"signing_key_path": {
"title": "Signing Keys",
"description": ("Config options relating to signing keys."),
},
"saml2_config": {
"title": "Single sign-on integration",
"description": (
"The following settings can be used to make Synapse use a single sign-on provider for authentication, instead of its internal password database.\n\n"
"You will probably also want to set the following options to `false` to disable the regular login/registration flows:\n"
"* [`enable_registration`](#enable_registration)\n"
"* [`password_config.enabled`](#password_config)"
),
},
"push": {
"title": "Push",
"description": ("Configuration settings related to push notifications."),
},
"encryption_enabled_by_default_for_room_type": {
"title": "Rooms",
"description": ("Config options relating to rooms."),
},
"opentracing": {
"title": "Opentracing",
"description": ("Configuration options related to Opentracing support."),
},
"worker_replication_secret": {
"title": "Coordinating workers",
"description": (
"Configuration options related to workers which belong in the main config file (usually called `homeserver.yaml`). A Synapse deployment can scale horizontally by running multiple Synapse processes called _workers_. Incoming requests are distributed between workers to handle higher loads. Some workers are privileged and can accept requests from other workers.\n\n"
"As a result, the worker configuration is divided into two parts.\n\n"
"1. The first part (in this section of the manual) defines which shardable tasks are delegated to privileged workers. This allows unprivileged workers to make requests to a privileged worker to act on their behalf.\n"
"2. [The second part](#individual-worker-configuration) controls the behaviour of individual workers in isolation.\n\n"
"For guidance on setting up workers, see the [worker documentation](../../workers.md)."
),
},
"worker_app": {
"title": "Individual worker configuration",
"description": (
"These options configure an individual worker, in its worker configuration file. They should be not be provided when configuring the main process.\n\n"
"Note also the configuration above for [coordinating a cluster of workers](#coordinating-workers).\n\n"
"For guidance on setting up workers, see the [worker documentation](../../workers.md)."
),
},
"background_updates": {
"title": "Background Updates",
"description": ("Configuration settings related to background updates."),
},
"auto_accept_invites": {
"title": "Auto Accept Invites",
"description": (
"Configuration settings related to automatically accepting invites."
),
},
}
INDENT = " "
has_error = False
def error(text: str) -> None:
global has_error
print(f"ERROR: {text}", file=sys.stderr)
has_error = True
def indent(text: str, first_line: bool = True) -> str:
"""Indents each non-empty line of the given text."""
text = re.sub(r"(\n)([^\n])", r"\1" + INDENT + r"\2", text)
if first_line:
text = re.sub(r"^([^\n])", INDENT + r"\1", text)
return text
def em(s: Optional[str]) -> str:
"""Add emphasis to text."""
return f"*{s}*" if s else ""
def a(s: Optional[str], suffix: str = " ") -> str:
"""Appends a space if the given string is not empty."""
return s + suffix if s else ""
def p(s: Optional[str], prefix: str = " ") -> str:
"""Prepend a space if the given string is not empty."""
return prefix + s if s else ""
def resolve_local_refs(schema: dict) -> dict:
"""Returns the given schema with local $ref properties replaced by their keywords.
Crude approximation that will override keywords.
"""
defs = schema["$defs"]
def replace_ref(d: Any) -> Any:
if isinstance(d, dict):
the_def = {}
if "$ref" in d:
# Found a "$ref" key.
def_name = d["$ref"].removeprefix("#/$defs/")
del d["$ref"]
the_def = defs[def_name]
new_dict = {k: replace_ref(v) for k, v in d.items()}
if common_keys := (new_dict.keys() & the_def.keys()) - {"properties"}:
print(
f"WARN: '{def_name}' overrides keys '{common_keys}'",
file=sys.stderr,
)
new_dict_props = new_dict.get("properties", {})
the_def_props = the_def.get("properties", {})
if common_props := new_dict_props.keys() & the_def_props.keys():
print(
f"WARN: '{def_name}' overrides properties '{common_props}'",
file=sys.stderr,
)
if merged_props := {**new_dict_props, **the_def_props}:
return {**new_dict, **the_def, "properties": merged_props}
else:
return {**new_dict, **the_def}
elif isinstance(d, list):
return [replace_ref(v) for v in d]
else:
return d
return replace_ref(schema)
def sep(values: dict) -> str:
"""Separator between parts of the description."""
# If description is multiple paragraphs already, add new ones. Otherwise
# append to same paragraph.
return "\n\n" if "\n\n" in values.get("description", "") else " "
def type_str(values: dict) -> str:
"""Type of the current value."""
if t := values.get("io.element.type_name"):
# Allow custom overrides for the type name, for documentation clarity
return f"({t})"
if not (t := values.get("type")):
return ""
if not isinstance(t, list):
t = [t]
joined = "|".join(t)
return f"({joined})"
def items(values: dict) -> str:
"""A block listing properties of array items."""
if not (items := values.get("items")):
return ""
if not (item_props := items.get("properties")):
return ""
return "\nOptions for each entry include:\n\n" + "\n".join(
sub_section(k, v) for k, v in item_props.items()
)
def properties(values: dict) -> str:
"""A block listing object properties."""
if not (properties := values.get("properties")):
return ""
return "\nThis setting has the following sub-options:\n\n" + "\n".join(
sub_section(k, v) for k, v in properties.items()
)
def sub_section(prop: str, values: dict) -> str:
"""Formats a bullet point about the given sub-property."""
sep = lambda: globals()["sep"](values)
type_str = lambda: globals()["type_str"](values)
items = lambda: globals()["items"](values)
properties = lambda: globals()["properties"](values)
def default() -> str:
try:
default = values["default"]
return f"Defaults to `{json.dumps(default)}`."
except KeyError:
return ""
def description() -> str:
if not (description := values.get("description")):
error(f"missing description for {prop}")
return "MISSING DESCRIPTION\n"
return f"{description}{p(default(), sep())}\n"
return (
f"* `{prop}`{p(type_str())}: "
+ f"{indent(description(), first_line=False)}"
+ indent(items())
+ indent(properties())
)
def section(prop: str, values: dict) -> str:
"""Formats a section about the given property."""
sep = lambda: globals()["sep"](values)
type_str = lambda: globals()["type_str"](values)
items = lambda: globals()["items"](values)
properties = lambda: globals()["properties"](values)
def is_simple_default() -> bool:
"""Whether the given default is simple enough for a one-liner."""
if not (d := values.get("default")):
return True
return not isinstance(d, dict) and not isinstance(d, list)
def default_str() -> str:
try:
default = values["default"]
except KeyError:
t = values.get("type", [])
if "object" == t or "object" in t:
# Skip objects as they probably have child defaults.
return ""
return "There is no default for this option."
if not is_simple_default():
# Show complex defaults as a code block instead.
return ""
return f"Defaults to `{json.dumps(default)}`."
def header() -> str:
try:
title = SECTION_HEADERS[prop]["title"]
description = SECTION_HEADERS[prop]["description"]
return f"## {title}\n\n{description}\n\n---\n"
except KeyError:
return ""
def title() -> str:
return f"### `{prop}`\n"
def description() -> str:
if not (description := values.get("description")):
error(f"missing description for {prop}")
return "MISSING DESCRIPTION\n"
return f"\n{a(em(type_str()))}{description}{p(default_str(), sep())}\n"
def example_str(example: Any) -> str:
return "```yaml\n" + f"{yaml.dump({prop: example}, sort_keys=False)}" + "```\n"
def default_example() -> str:
if is_simple_default():
return ""
default_cfg = example_str(values["default"])
return f"\nDefault configuration:\n{default_cfg}"
def examples() -> str:
if not (examples := values.get("examples")):
return ""
examples_str = "\n".join(example_str(e) for e in examples)
if len(examples) >= 2:
return f"\nExample configurations:\n{examples_str}"
else:
return f"\nExample configuration:\n{examples_str}"
def post_description() -> str:
# Sometimes it's helpful to have a description after the list of fields,
# e.g. with a subsection that consists only of text.
# This helps with that.
if not (description := values.get("io.element.post_description")):
return ""
return f"\n{description}\n\n"
return (
"---\n"
+ header()
+ title()
+ description()
+ items()
+ properties()
+ default_example()
+ examples()
+ post_description()
)
def main() -> None:
def usage(err_msg: str) -> int:
script_name = (sys.argv[:1] or ["__main__.py"])[0]
print(err_msg, file=sys.stderr)
print(f"Usage: {script_name} <JSON Schema file>", file=sys.stderr)
print(f"\n{__doc__}", file=sys.stderr)
exit(1)
def read_json_file_arg() -> Any:
if len(sys.argv) > 2:
exit(usage("Too many arguments."))
if not (filepath := (sys.argv[1:] or [""])[0]):
exit(usage("No schema file provided."))
with open(filepath) as f:
return yaml.safe_load(f)
schema = read_json_file_arg()
schema = resolve_local_refs(schema)
sections = (section(k, v) for k, v in schema["properties"].items())
print(HEADER + "".join(sections), end="")
if has_error:
print("There were errors.", file=sys.stderr)
exit(2)
if __name__ == "__main__":
main()

View File

@@ -254,6 +254,12 @@ def _prepare() -> None:
# Update the version specified in pyproject.toml.
subprocess.check_output(["poetry", "version", new_version])
# Update config schema $id.
schema_file = "schema/synapse-config.schema.yaml"
major_minor_version = ".".join(new_version.split(".")[:2])
url = f"https://element-hq.github.io/synapse/schema/synapse/v{major_minor_version}/synapse-config.schema.json"
subprocess.check_output(["sed", "-i", f"0,/^\\$id: .*/s||$id: {url}|", schema_file])
# Generate changelogs.
generate_and_write_changelog(synapse_repo, current_version, new_version)