Compare commits

...

3 Commits

Author SHA1 Message Date
Erik Johnston
a48a2fb7a0 Fix regression where we did not report failure 2025-11-26 11:43:41 +00:00
Erik Johnston
5800e601d9 Newsfile 2025-11-26 11:43:41 +00:00
Erik Johnston
6fdb2b61a5 Use sqlglot to properly check SQL delta files 2025-11-26 11:43:35 +00:00
5 changed files with 128 additions and 82 deletions

View File

@@ -110,7 +110,7 @@ jobs:
- uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: "3.x"
- run: "pip install 'click==8.1.1' 'GitPython>=3.1.20'"
- run: "pip install 'click==8.1.1' 'GitPython>=3.1.20' 'sqlglot>=28.0.0'"
- run: scripts-dev/check_schema_delta.py --force-colors
check-lockfile:

1
changelog.d/19224.misc Normal file
View File

@@ -0,0 +1 @@
Improve robustness of the SQL schema linting in CI.

62
poetry.lock generated
View File

@@ -31,7 +31,7 @@ description = "The ultimate Python library in building OAuth and OpenID Connect
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"all\" or extra == \"jwt\" or extra == \"oidc\""
markers = "extra == \"oidc\" or extra == \"jwt\" or extra == \"all\""
files = [
{file = "authlib-1.6.5-py2.py3-none-any.whl", hash = "sha256:3e0e0507807f842b02175507bdee8957a1d5707fd4afb17c32fb43fee90b6e3a"},
{file = "authlib-1.6.5.tar.gz", hash = "sha256:6aaf9c79b7cc96c900f0b284061691c5d4e61221640a948fe690b556a6d6d10b"},
@@ -446,7 +446,7 @@ description = "XML bomb protection for Python stdlib modules"
optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
groups = ["main"]
markers = "extra == \"all\" or extra == \"saml2\""
markers = "extra == \"saml2\" or extra == \"all\""
files = [
{file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"},
{file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"},
@@ -471,7 +471,7 @@ description = "XPath 1.0/2.0/3.0/3.1 parsers and selectors for ElementTree and l
optional = true
python-versions = ">=3.7"
groups = ["main"]
markers = "extra == \"all\" or extra == \"saml2\""
markers = "extra == \"saml2\" or extra == \"all\""
files = [
{file = "elementpath-4.1.5-py3-none-any.whl", hash = "sha256:2ac1a2fb31eb22bbbf817f8cf6752f844513216263f0e3892c8e79782fe4bb55"},
{file = "elementpath-4.1.5.tar.gz", hash = "sha256:c2d6dc524b29ef751ecfc416b0627668119d8812441c555d7471da41d4bacb8d"},
@@ -521,7 +521,7 @@ description = "Python wrapper for hiredis"
optional = true
python-versions = ">=3.8"
groups = ["main"]
markers = "extra == \"all\" or extra == \"redis\""
markers = "extra == \"redis\" or extra == \"all\""
files = [
{file = "hiredis-3.3.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:9937d9b69321b393fbace69f55423480f098120bc55a3316e1ca3508c4dbbd6f"},
{file = "hiredis-3.3.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:50351b77f89ba6a22aff430b993653847f36b71d444509036baa0f2d79d1ebf4"},
@@ -844,7 +844,7 @@ description = "Jaeger Python OpenTracing Tracer implementation"
optional = true
python-versions = ">=3.7"
groups = ["main"]
markers = "extra == \"all\" or extra == \"opentracing\""
markers = "extra == \"opentracing\" or extra == \"all\""
files = [
{file = "jaeger-client-4.8.0.tar.gz", hash = "sha256:3157836edab8e2c209bd2d6ae61113db36f7ee399e66b1dcbb715d87ab49bfe0"},
]
@@ -982,7 +982,7 @@ description = "A strictly RFC 4510 conforming LDAP V3 pure Python client library
optional = true
python-versions = "*"
groups = ["main"]
markers = "extra == \"all\" or extra == \"matrix-synapse-ldap3\""
markers = "extra == \"matrix-synapse-ldap3\" or extra == \"all\""
files = [
{file = "ldap3-2.9.1-py2.py3-none-any.whl", hash = "sha256:5869596fc4948797020d3f03b7939da938778a0f9e2009f7a072ccf92b8e8d70"},
{file = "ldap3-2.9.1.tar.gz", hash = "sha256:f3e7fc4718e3f09dda568b57100095e0ce58633bcabbed8667ce3f8fbaa4229f"},
@@ -998,7 +998,7 @@ description = "Powerful and Pythonic XML processing library combining libxml2/li
optional = true
python-versions = ">=3.8"
groups = ["main"]
markers = "extra == \"all\" or extra == \"url-preview\""
markers = "extra == \"url-preview\" or extra == \"all\""
files = [
{file = "lxml-6.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e77dd455b9a16bbd2a5036a63ddbd479c19572af81b624e79ef422f929eef388"},
{file = "lxml-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d444858b9f07cefff6455b983aea9a67f7462ba1f6cbe4a21e8bf6791bf2153"},
@@ -1284,7 +1284,7 @@ description = "An LDAP3 auth provider for Synapse"
optional = true
python-versions = ">=3.7"
groups = ["main"]
markers = "extra == \"all\" or extra == \"matrix-synapse-ldap3\""
markers = "extra == \"matrix-synapse-ldap3\" or extra == \"all\""
files = [
{file = "matrix-synapse-ldap3-0.3.0.tar.gz", hash = "sha256:8bb6517173164d4b9cc44f49de411d8cebdb2e705d5dd1ea1f38733c4a009e1d"},
{file = "matrix_synapse_ldap3-0.3.0-py3-none-any.whl", hash = "sha256:8b4d701f8702551e98cc1d8c20dbed532de5613584c08d0df22de376ba99159d"},
@@ -1526,7 +1526,7 @@ description = "OpenTracing API for Python. See documentation at http://opentraci
optional = true
python-versions = "*"
groups = ["main"]
markers = "extra == \"all\" or extra == \"opentracing\""
markers = "extra == \"opentracing\" or extra == \"all\""
files = [
{file = "opentracing-2.4.0.tar.gz", hash = "sha256:a173117e6ef580d55874734d1fa7ecb6f3655160b8b8974a2a1e98e5ec9c840d"},
]
@@ -1716,7 +1716,7 @@ description = "psycopg2 - Python-PostgreSQL Database Adapter"
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"all\" or extra == \"postgres\""
markers = "extra == \"postgres\" or extra == \"all\""
files = [
{file = "psycopg2-2.9.11-cp310-cp310-win_amd64.whl", hash = "sha256:103e857f46bb76908768ead4e2d0ba1d1a130e7b8ed77d3ae91e8b33481813e8"},
{file = "psycopg2-2.9.11-cp311-cp311-win_amd64.whl", hash = "sha256:210daed32e18f35e3140a1ebe059ac29209dd96468f2f7559aa59f75ee82a5cb"},
@@ -1734,7 +1734,7 @@ description = ".. image:: https://travis-ci.org/chtd/psycopg2cffi.svg?branch=mas
optional = true
python-versions = "*"
groups = ["main"]
markers = "platform_python_implementation == \"PyPy\" and (extra == \"all\" or extra == \"postgres\")"
markers = "platform_python_implementation == \"PyPy\" and (extra == \"postgres\" or extra == \"all\")"
files = [
{file = "psycopg2cffi-2.9.0.tar.gz", hash = "sha256:7e272edcd837de3a1d12b62185eb85c45a19feda9e62fa1b120c54f9e8d35c52"},
]
@@ -1750,7 +1750,7 @@ description = "A Simple library to enable psycopg2 compatability"
optional = true
python-versions = "*"
groups = ["main"]
markers = "platform_python_implementation == \"PyPy\" and (extra == \"all\" or extra == \"postgres\")"
markers = "platform_python_implementation == \"PyPy\" and (extra == \"postgres\" or extra == \"all\")"
files = [
{file = "psycopg2cffi-compat-1.1.tar.gz", hash = "sha256:d25e921748475522b33d13420aad5c2831c743227dc1f1f2585e0fdb5c914e05"},
]
@@ -2031,7 +2031,7 @@ description = "A development tool to measure, monitor and analyze the memory beh
optional = true
python-versions = ">=3.6"
groups = ["main"]
markers = "extra == \"all\" or extra == \"cache-memory\""
markers = "extra == \"cache-memory\" or extra == \"all\""
files = [
{file = "Pympler-1.0.1-py3-none-any.whl", hash = "sha256:d260dda9ae781e1eab6ea15bacb84015849833ba5555f141d2d9b7b7473b307d"},
{file = "Pympler-1.0.1.tar.gz", hash = "sha256:993f1a3599ca3f4fcd7160c7545ad06310c9e12f70174ae7ae8d4e25f6c5d3fa"},
@@ -2091,7 +2091,7 @@ description = "Python implementation of SAML Version 2 Standard"
optional = true
python-versions = ">=3.9,<4.0"
groups = ["main"]
markers = "extra == \"all\" or extra == \"saml2\""
markers = "extra == \"saml2\" or extra == \"all\""
files = [
{file = "pysaml2-7.5.0-py3-none-any.whl", hash = "sha256:bc6627cc344476a83c757f440a73fda1369f13b6fda1b4e16bca63ffbabb5318"},
{file = "pysaml2-7.5.0.tar.gz", hash = "sha256:f36871d4e5ee857c6b85532e942550d2cf90ea4ee943d75eb681044bbc4f54f7"},
@@ -2116,7 +2116,7 @@ description = "Extensions to the standard Python datetime module"
optional = true
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
groups = ["main"]
markers = "extra == \"all\" or extra == \"saml2\""
markers = "extra == \"saml2\" or extra == \"all\""
files = [
{file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
{file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
@@ -2144,7 +2144,7 @@ description = "World timezone definitions, modern and historical"
optional = true
python-versions = "*"
groups = ["main"]
markers = "extra == \"all\" or extra == \"saml2\""
markers = "extra == \"saml2\" or extra == \"all\""
files = [
{file = "pytz-2022.7.1-py2.py3-none-any.whl", hash = "sha256:78f4f37d8198e0627c5f1143240bb0206b8691d8d7ac6d78fee88b78733f8c4a"},
{file = "pytz-2022.7.1.tar.gz", hash = "sha256:01a0681c4b9684a28304615eba55d1ab31ae00bf68ec157ec3708a8182dbbcd0"},
@@ -2548,7 +2548,7 @@ description = "Python client for Sentry (https://sentry.io)"
optional = true
python-versions = ">=3.6"
groups = ["main"]
markers = "extra == \"all\" or extra == \"sentry\""
markers = "extra == \"sentry\" or extra == \"all\""
files = [
{file = "sentry_sdk-2.46.0-py2.py3-none-any.whl", hash = "sha256:4eeeb60198074dff8d066ea153fa6f241fef1668c10900ea53a4200abc8da9b1"},
{file = "sentry_sdk-2.46.0.tar.gz", hash = "sha256:91821a23460725734b7741523021601593f35731808afc0bb2ba46c27b8acd91"},
@@ -2723,6 +2723,22 @@ files = [
{file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"},
]
[[package]]
name = "sqlglot"
version = "28.0.0"
description = "An easily customizable SQL parser and transpiler"
optional = false
python-versions = ">=3.9"
groups = ["dev"]
files = [
{file = "sqlglot-28.0.0-py3-none-any.whl", hash = "sha256:ac1778e7fa4812f4f7e5881b260632fc167b00ca4c1226868891fb15467122e4"},
{file = "sqlglot-28.0.0.tar.gz", hash = "sha256:cc9a651ef4182e61dac58aa955e5fb21845a5865c6a4d7d7b5a7857450285ad4"},
]
[package.extras]
dev = ["duckdb (>=0.6)", "maturin (>=1.4,<2.0)", "mypy", "pandas", "pandas-stubs", "pdoc", "pre-commit", "pyperf", "python-dateutil", "pytz", "ruff (==0.7.2)", "types-python-dateutil", "types-pytz", "typing_extensions"]
rs = ["sqlglotrs (==0.7.3)"]
[[package]]
name = "systemd-python"
version = "235"
@@ -2742,7 +2758,7 @@ description = "Tornado IOLoop Backed Concurrent Futures"
optional = true
python-versions = "*"
groups = ["main"]
markers = "extra == \"all\" or extra == \"opentracing\""
markers = "extra == \"opentracing\" or extra == \"all\""
files = [
{file = "threadloop-1.0.2-py2-none-any.whl", hash = "sha256:5c90dbefab6ffbdba26afb4829d2a9df8275d13ac7dc58dccb0e279992679599"},
{file = "threadloop-1.0.2.tar.gz", hash = "sha256:8b180aac31013de13c2ad5c834819771992d350267bddb854613ae77ef571944"},
@@ -2758,7 +2774,7 @@ description = "Python bindings for the Apache Thrift RPC system"
optional = true
python-versions = "*"
groups = ["main"]
markers = "extra == \"all\" or extra == \"opentracing\""
markers = "extra == \"opentracing\" or extra == \"all\""
files = [
{file = "thrift-0.16.0.tar.gz", hash = "sha256:2b5b6488fcded21f9d312aa23c9ff6a0195d0f6ae26ddbd5ad9e3e25dfc14408"},
]
@@ -2831,7 +2847,7 @@ description = "Tornado is a Python web framework and asynchronous networking lib
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"all\" or extra == \"opentracing\""
markers = "extra == \"opentracing\" or extra == \"all\""
files = [
{file = "tornado-6.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:f81067dad2e4443b015368b24e802d0083fecada4f0a4572fdb72fc06e54a9a6"},
{file = "tornado-6.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:9ac1cbe1db860b3cbb251e795c701c41d343f06a96049d6274e7c77559117e41"},
@@ -2965,7 +2981,7 @@ description = "non-blocking redis client for python"
optional = true
python-versions = "*"
groups = ["main"]
markers = "extra == \"all\" or extra == \"redis\""
markers = "extra == \"redis\" or extra == \"all\""
files = [
{file = "txredisapi-1.4.11-py3-none-any.whl", hash = "sha256:ac64d7a9342b58edca13ef267d4fa7637c1aa63f8595e066801c1e8b56b22d0b"},
{file = "txredisapi-1.4.11.tar.gz", hash = "sha256:3eb1af99aefdefb59eb877b1dd08861efad60915e30ad5bf3d5bf6c5cedcdbc6"},
@@ -3211,7 +3227,7 @@ description = "An XML Schema validator and decoder"
optional = true
python-versions = ">=3.7"
groups = ["main"]
markers = "extra == \"all\" or extra == \"saml2\""
markers = "extra == \"saml2\" or extra == \"all\""
files = [
{file = "xmlschema-2.4.0-py3-none-any.whl", hash = "sha256:dc87be0caaa61f42649899189aab2fd8e0d567f2cf548433ba7b79278d231a4a"},
{file = "xmlschema-2.4.0.tar.gz", hash = "sha256:d74cd0c10866ac609e1ef94a5a69b018ad16e39077bc6393408b40c6babee793"},
@@ -3346,4 +3362,4 @@ url-preview = ["lxml"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.10.0,<4.0.0"
content-hash = "4f8d98723236eaf3d13f440dce95ec6cc3c4dc49ba3a0e45bf9cfbb51aca899c"
content-hash = "98b9062f48205a3bcc99b43ae665083d360a15d4a208927fa978df9c36fd5315"

View File

@@ -370,6 +370,9 @@ towncrier = ">=18.6.0rc1"
# Used for checking the Poetry lockfile
tomli = ">=1.2.3"
# Used for checking the schema delta files
sqlglot = ">=28.0.0"
[build-system]
# The upper bounds here are defensive, intended to prevent situations like

View File

@@ -9,15 +9,11 @@ from typing import Any
import click
import git
import sqlglot
import sqlglot.expressions
SCHEMA_FILE_REGEX = re.compile(r"^synapse/storage/schema/(.*)/delta/(.*)/(.*)$")
INDEX_CREATION_REGEX = re.compile(
r"CREATE .*INDEX .*ON ([a-z_0-9]+)", flags=re.IGNORECASE
)
INDEX_DELETION_REGEX = re.compile(r"DROP .*INDEX ([a-z_0-9]+)", flags=re.IGNORECASE)
TABLE_CREATION_REGEX = re.compile(
r"CREATE .*TABLE.* ([a-z_0-9]+)\s*\(", flags=re.IGNORECASE
)
# The base branch we want to check against. We use the main development branch
# on the assumption that is what we are developing against.
@@ -141,6 +137,9 @@ def main(force_colors: bool) -> None:
color=force_colors,
)
# Mark this run as not successful, but continue so that we report *all*
# errors.
return_code = 1
else:
click.secho(
f"All deltas are in the correct folder: {current_schema_version}!",
@@ -153,60 +152,87 @@ def main(force_colors: bool) -> None:
# and delta files are also numbered in order.
changed_delta_files.sort()
# Now check that we're not trying to create or drop indices. If we want to
# do that they should be in background updates. The exception is when we
# create indices on tables we've just created.
created_tables = set()
for delta_file in changed_delta_files:
with open(delta_file) as fd:
delta_lines = fd.readlines()
for line in delta_lines:
# Strip SQL comments
line = line.split("--", maxsplit=1)[0]
# Check and track any tables we create
match = TABLE_CREATION_REGEX.search(line)
if match:
table_name = match.group(1)
created_tables.add(table_name)
# Check for dropping indices, these are always banned
match = INDEX_DELETION_REGEX.search(line)
if match:
clause = match.group()
click.secho(
f"Found delta with index deletion: '{clause}' in {delta_file}",
fg="red",
bold=True,
color=force_colors,
)
click.secho(
" ↪ These should be in background updates.",
)
return_code = 1
# Check for index creation, which is only allowed for tables we've
# created.
match = INDEX_CREATION_REGEX.search(line)
if match:
clause = match.group()
table_name = match.group(1)
if table_name not in created_tables:
click.secho(
f"Found delta with index creation for existing table: '{clause}' in {delta_file}",
fg="red",
bold=True,
color=force_colors,
)
click.secho(
" ↪ These should be in background updates (or the table should be created in the same delta).",
)
return_code = 1
success = check_schema_delta(changed_delta_files, force_colors)
if not success:
return_code = 1
click.get_current_context().exit(return_code)
def check_schema_delta(delta_files: list[str], force_colors: bool) -> bool:
"""Check that the given schema delta files do not create or drop indices
inappropriately.
Index creation is only allowed on tables created in the same set of deltas.
Index deletion is never allowed and should be done in background updates.
"""
# The tables created in this delta
created_tables = set[str]()
# The indices created/dropped in this delta, a tuple of (table_name, sql)
created_indices = list[tuple[str, str]]()
# The indices dropped in this delta, just the sql
dropped_indices = list[str]()
for delta_file in delta_files:
with open(delta_file) as fd:
delta_contents = fd.read()
# Assume the SQL dialect from the file extension, defaulting to Postgres.
sql_lang = "postgres"
if delta_file.endswith(".sqlite"):
sql_lang = "sqlite"
statements = sqlglot.parse(delta_contents, read=sql_lang)
for statement in statements:
if isinstance(statement, sqlglot.expressions.Create):
if statement.kind == "TABLE":
assert isinstance(statement.this, sqlglot.expressions.Schema)
assert isinstance(statement.this.this, sqlglot.expressions.Table)
table_name = statement.this.this.name
created_tables.add(table_name)
elif statement.kind == "INDEX":
assert isinstance(statement.this, sqlglot.expressions.Index)
table_name = statement.this.args["table"].name
created_indices.append((table_name, statement.sql()))
elif isinstance(statement, sqlglot.expressions.Drop):
if statement.kind == "INDEX":
dropped_indices.append(statement.sql())
success = True
for table_name, clause in created_indices:
if table_name not in created_tables:
click.secho(
f"Found delta with index creation for existing table: '{clause}'",
fg="red",
bold=True,
color=force_colors,
)
click.secho(
" ↪ These should be in background updates (or the table should be created in the same delta).",
)
success = False
for clause in dropped_indices:
click.secho(
f"Found delta with index deletion: '{clause}'",
fg="red",
bold=True,
color=force_colors,
)
click.secho(
" ↪ These should be in background updates.",
)
success = False
return success
if __name__ == "__main__":
main()