Skip to content
Merged
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
e6a3c46
Add background job to clear unreferenced state groups
devonh Feb 12, 2025
f9670ff
Add changelog entry
devonh Feb 12, 2025
9afe80b
Merge branch 'develop' into devon/unreferenced-bg
devonh Feb 12, 2025
20efdd2
Add test for unreferenced state group cleanup
devonh Feb 12, 2025
9c50123
Remove comments
devonh Feb 12, 2025
28679b6
Fix linter errors
devonh Feb 12, 2025
e606f42
Merge branch 'develop' into devon/unreferenced-bg
devonh Feb 12, 2025
977d83b
Order state_groups
devonh Feb 12, 2025
a487bcb
Update synapse/storage/controllers/purge_events.py
devonh Feb 13, 2025
69d72c2
Update synapse/storage/controllers/purge_events.py
devonh Feb 13, 2025
d8bfac4
Update synapse/storage/controllers/purge_events.py
devonh Feb 13, 2025
0955b7b
Update synapse/storage/controllers/purge_events.py
devonh Feb 13, 2025
ce87ba6
Change mark as pending deletion to do nothing on conflict
devonh Feb 13, 2025
3900791
Fix linter errors
devonh Feb 13, 2025
fe1df20
Move state group deletion job to background updates
devonh Feb 13, 2025
cc9e33b
Fix linter error
devonh Feb 13, 2025
801ca86
Pull over all the db calls since that's what it wants...
devonh Feb 13, 2025
ccb2158
Try OVERRIDING SYSTEM VALUE
devonh Feb 14, 2025
ca7ed76
Move OVERRIDING SYSTEM VALUE
devonh Feb 14, 2025
f45dcb1
Update synapse/storage/databases/state/bg_updates.py
devonh Feb 18, 2025
5e05af2
Update synapse/storage/databases/state/bg_updates.py
devonh Feb 18, 2025
7d1ce8d
Review comments & cleanup
devonh Feb 18, 2025
3c50f71
No string interpolation for sql
devonh Feb 18, 2025
7f611e0
Move background task to current schema version
devonh Feb 18, 2025
21dc067
Comment ignoring table port
devonh Feb 18, 2025
09a817f
Deduplicate find_unreferenced_groups
devonh Feb 18, 2025
042af6e
Don't reuse variables
devonh Feb 18, 2025
4cae2e5
Switch to not use single transaction
devonh Feb 18, 2025
ae367b2
Try casting
devonh Feb 18, 2025
977a8d8
Readd duplication
devonh Feb 19, 2025
d8f920b
Put it back in place
devonh Feb 19, 2025
6582fed
Put it back in place
devonh Feb 19, 2025
ecb8ed5
Fix linter error
devonh Feb 19, 2025
8eae7dd
Use multiple db pools
devonh Feb 19, 2025
8ef4a23
Remove duplication again
devonh Feb 19, 2025
dfa55a9
Lift logic to purge events controller
devonh Feb 19, 2025
02c2c87
Add IGNORED_BACKGROUND_UPDATES to port_db
devonh Feb 19, 2025
6851eaa
Fix error
devonh Feb 19, 2025
d1ca8c7
Update comment on ignoring state_groups_pending_deletion
devonh Feb 19, 2025
0f7c874
Try different sql
devonh Feb 19, 2025
35f15e7
Fixes
devonh Feb 19, 2025
89ec2a3
Update synapse/_scripts/synapse_port_db.py
devonh Feb 24, 2025
f5e59f2
Fix port_db syntax
devonh Feb 24, 2025
92d459d
Remove unnecessary code
devonh Feb 24, 2025
5f5f090
Only clear state groups up to max from first iteration
devonh Feb 24, 2025
8a479ee
Update synapse/storage/controllers/purge_events.py
devonh Feb 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/18154.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add background job to clear unreferenced state groups.
2 changes: 1 addition & 1 deletion docs/development/database_schema.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ by a unique name, the current status (stored in JSON), and some dependency infor
* Whether the update requires a previous update to be complete.
* A rough ordering for which to complete updates.

A new background updates needs to be added to the `background_updates` table:
A new background update needs to be added to the `background_updates` table:

```sql
INSERT INTO background_updates (ordering, update_name, depends_on, progress_json) VALUES
Expand Down
5 changes: 5 additions & 0 deletions synapse/_scripts/synapse_port_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,11 @@


IGNORED_TABLES = {
# Porting the auto generated sequence in this table is non-trivial.
# And anything not ported, will get automatically added back by the
# `delete_unreferenced_state_groups_bg_update` background task.
# This makes it safe to ignore porting this table.
"state_groups_pending_deletion",
# We don't port these tables, as they're a faff and we can regenerate
# them anyway.
"user_directory",
Expand Down
73 changes: 8 additions & 65 deletions synapse/storage/controllers/purge_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
#
#

import itertools
import logging
from typing import TYPE_CHECKING, Collection, Mapping, Set
from typing import TYPE_CHECKING, Mapping

from synapse.logging.context import nested_logging_context
from synapse.metrics.background_process_metrics import wrap_as_background_process
from synapse.storage.databases import Databases
from synapse.storage.databases.state.bg_updates import find_unreferenced_groups

if TYPE_CHECKING:
from synapse.server import HomeServer
Expand Down Expand Up @@ -72,76 +72,19 @@ async def purge_history(
)

logger.info("[purge] finding state groups that can be deleted")
sg_to_delete = await self._find_unreferenced_groups(state_groups)
# sg_to_delete = await self._find_unreferenced_groups(state_groups)
sg_to_delete = await find_unreferenced_groups(
self.stores.main.db_pool,
self.stores.state.db_pool,
state_groups,
)

# Mark these state groups as pending deletion, they will actually
# get deleted automatically later.
await self.stores.state_deletion.mark_state_groups_as_pending_deletion(
sg_to_delete
)

async def _find_unreferenced_groups(
self, state_groups: Collection[int]
) -> Set[int]:
"""Used when purging history to figure out which state groups can be
deleted.

Args:
state_groups: Set of state groups referenced by events
that are going to be deleted.

Returns:
The set of state groups that can be deleted.
"""
# Set of events that we have found to be referenced by events
referenced_groups = set()

# Set of state groups we've already seen
state_groups_seen = set(state_groups)

# Set of state groups to handle next.
next_to_search = set(state_groups)
while next_to_search:
# We bound size of groups we're looking up at once, to stop the
# SQL query getting too big
if len(next_to_search) < 100:
current_search = next_to_search
next_to_search = set()
else:
current_search = set(itertools.islice(next_to_search, 100))
next_to_search -= current_search

referenced = await self.stores.main.get_referenced_state_groups(
current_search
)
referenced_groups |= referenced

# We don't continue iterating up the state group graphs for state
# groups that are referenced.
current_search -= referenced

edges = await self.stores.state.get_previous_state_groups(current_search)

prevs = set(edges.values())
# We don't bother re-handling groups we've already seen
prevs -= state_groups_seen
next_to_search |= prevs
state_groups_seen |= prevs

# We also check to see if anything referencing the state groups are
# also unreferenced. This helps ensure that we delete unreferenced
# state groups, if we don't then we will de-delta them when we
# delete the other state groups leading to increased DB usage.
next_edges = await self.stores.state.get_next_state_groups(current_search)
nexts = set(next_edges.keys())
nexts -= state_groups_seen
next_to_search |= nexts
state_groups_seen |= nexts

to_delete = state_groups_seen - referenced_groups

return to_delete

@wrap_as_background_process("_delete_state_groups_loop")
async def _delete_state_groups_loop(self) -> None:
"""Background task that deletes any state groups that may be pending
Expand Down
Loading
Loading