From 952ebb5ee8b443210fca311fe4da25343ecfbb0f Mon Sep 17 00:00:00 2001 From: FrenchgGithubUser Date: Tue, 25 Nov 2025 16:46:11 +0100 Subject: [PATCH 1/2] fix: provide remote servers a way to find out about an event created during the remote join handshake --- changelog.d/19390.bugfix | 1 + synapse/federation/federation_server.py | 25 ++++++++++++++++++ synapse/handlers/message.py | 34 ++++++++++++++++++++++--- 3 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 changelog.d/19390.bugfix diff --git a/changelog.d/19390.bugfix b/changelog.d/19390.bugfix new file mode 100644 index 00000000000..706f779174f --- /dev/null +++ b/changelog.d/19390.bugfix @@ -0,0 +1 @@ +Provide remote servers a way to find out about an event created during the remote join handshake. Contributed by @FrenchGithubUser and @jason-famedly @ Famedly. diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 1912d545f55..9052f6019c0 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -758,6 +758,10 @@ async def on_send_join_request( event, context = await self._on_send_membership_event( origin, content, Membership.JOIN, room_id ) + # Collect this now, the internal metadata of event(which should have it) doesn't + stream_ordering_of_join = ( + await self.store.get_current_room_stream_token_for_room_id(room_id) + ) prev_state_ids = await context.get_prev_state_ids() @@ -798,6 +802,27 @@ async def on_send_join_request( "members_omitted": caller_supports_partial_state, } + # Check the forward extremities for the room here. If there is more than one, it + # is likely that another event was created in the room during the + # make_join/send_join handshake. Without being able to determine how long until + # the next event will be created that references this 'missing event', + # proactively send a dummy extensible event that ties these forward extremities + # together. The remote server should search out this missing event on its own. + # + # By not sending the 'missing event' directly, the stream ordering for it will + # be consistent between servers(in that it technically was created before the + # join itself). + + forward_extremities = await self.store._get_forward_extremeties_for_room( + room_id, stream_ordering_of_join.get_max_stream_pos() + ) + + if len(forward_extremities) > 1: + # I do not feel it is necessary to set this onto the FederationServer class + # itself. Its likelihood of being used is extremely low. Make it on-demand + _creation_handler = self.hs.get_event_creation_handler() + await _creation_handler._send_dummy_events_to_patch_room(room_id) + if servers_in_room is not None: resp["servers_in_room"] = list(servers_in_room) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 99ce120736d..76aa3c7c320 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -2220,7 +2220,36 @@ async def _send_dummy_events_to_fill_extremities(self) -> None: now = self.clock.time_msec() self._rooms_to_exclude_from_dummy_event_insertion[room_id] = now - async def _send_dummy_event_for_room(self, room_id: str) -> bool: + async def _send_dummy_events_to_patch_room(self, room_id: str) -> None: + """ + Send a dummy event into this room to patch in a missed forward extremity. + This should only be triggered during a remote join if there was a forward + extremity that occurred during the make_join/send_join handshake. + """ + async with self._worker_lock_handler.acquire_read_write_lock( + NEW_EVENT_DURING_PURGE_LOCK_NAME, room_id, write=False + ): + dummy_event_sent = await self._send_dummy_event_for_room( + room_id, proactively_send=True + ) + + if not dummy_event_sent: + # Did not find a valid user in the room, so remove from future attempts + # Exclusion is time limited, so the room will be rechecked in the future + # dependent on _DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY + logger.info( + "Failed to send dummy event into room %s. Will exclude it from " + "future attempts until cache expires", + room_id, + ) + # This mapping is room_id -> time of last attempt(in ms) + self._rooms_to_exclude_from_dummy_event_insertion[room_id] = ( + self.clock.time_msec() + ) + + async def _send_dummy_event_for_room( + self, room_id: str, proactively_send: bool = False + ) -> bool: """Attempt to send a dummy event for the given room. Args: @@ -2252,8 +2281,7 @@ async def _send_dummy_event_for_room(self, room_id: str) -> bool: }, ) context = await unpersisted_context.persist(event) - - event.internal_metadata.proactively_send = False + event.internal_metadata.proactively_send = proactively_send # Since this is a dummy-event it is OK if it is sent by a # shadow-banned user. From 22cc6ac0b4799e97c5ddaeff52e4d8a25b3b879e Mon Sep 17 00:00:00 2001 From: FrenchGithubUser Date: Mon, 2 Mar 2026 15:54:42 +0100 Subject: [PATCH 2/2] address some comments --- synapse/federation/federation_server.py | 21 +++++++++++---------- synapse/handlers/message.py | 11 +++++++---- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 9052f6019c0..f58fd31b028 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -804,24 +804,25 @@ async def on_send_join_request( # Check the forward extremities for the room here. If there is more than one, it # is likely that another event was created in the room during the - # make_join/send_join handshake. Without being able to determine how long until - # the next event will be created that references this 'missing event', - # proactively send a dummy extensible event that ties these forward extremities - # together. The remote server should search out this missing event on its own. + # make_join/send_join handshake. The joining server is likely to thus miss this event + # until a second event is created when references it - which could be some time. + # In that case, we proactively send a dummy extensible event that ties these + # forward extremities together. The remote server will then attempt to backfill + # the missing event on its own. # - # By not sending the 'missing event' directly, the stream ordering for it will - # be consistent between servers(in that it technically was created before the - # join itself). + # By not sending the 'missing event' directly, but instead having the joining + # homeserver backfill it, the stream ordering for the missing event will be + # "before" the join (which is what we expect). forward_extremities = await self.store._get_forward_extremeties_for_room( room_id, stream_ordering_of_join.get_max_stream_pos() ) if len(forward_extremities) > 1: - # I do not feel it is necessary to set this onto the FederationServer class - # itself. Its likelihood of being used is extremely low. Make it on-demand + # The likelihood of this being used is extremely low, thus only build the handler + # when necessary. _creation_handler = self.hs.get_event_creation_handler() - await _creation_handler._send_dummy_events_to_patch_room(room_id) + await _creation_handler._send_dummy_event_after_room_join(room_id) if servers_in_room is not None: resp["servers_in_room"] = list(servers_in_room) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 76aa3c7c320..a4235e6ebca 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -2220,11 +2220,14 @@ async def _send_dummy_events_to_fill_extremities(self) -> None: now = self.clock.time_msec() self._rooms_to_exclude_from_dummy_event_insertion[room_id] = now - async def _send_dummy_events_to_patch_room(self, room_id: str) -> None: + async def _send_dummy_event_after_room_join(self, room_id: str) -> None: """ - Send a dummy event into this room to patch in a missed forward extremity. - This should only be triggered during a remote join if there was a forward - extremity that occurred during the make_join/send_join handshake. + Creates and sends a dummy event into the given room, referencing the + current forward extremities (via `prev_events`). + This should only be triggered when handling a remote join while there was + events sent during the make_join/send_join handshake. The joining + homeserver would otherwise not immediately know to backfill this event, + and would "miss it". """ async with self._worker_lock_handler.acquire_read_write_lock( NEW_EVENT_DURING_PURGE_LOCK_NAME, room_id, write=False