From 6b70d44470f39a146500d5699787e8dabdde0e10 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 8 Mar 2023 17:30:03 +0000 Subject: [PATCH 1/8] Test --- tests/federation/test_federation_catch_up.py | 77 +++++++++++++++++++- tests/test_utils/event_injection.py | 31 ++++++++ 2 files changed, 107 insertions(+), 1 deletion(-) diff --git a/tests/federation/test_federation_catch_up.py b/tests/federation/test_federation_catch_up.py index 6381583c24b4..1dad81e25fdb 100644 --- a/tests/federation/test_federation_catch_up.py +++ b/tests/federation/test_federation_catch_up.py @@ -1,4 +1,5 @@ -from typing import Callable, List, Optional, Tuple +from typing import Callable, Collection, List, Optional, Tuple +from unittest import mock from unittest.mock import Mock from twisted.test.proto_helpers import MemoryReactor @@ -500,3 +501,77 @@ def test_not_latest_event(self) -> None: self.assertEqual(len(sent_pdus), 1) self.assertEqual(sent_pdus[0].event_id, event_2.event_id) self.assertFalse(per_dest_queue._catching_up) + + def test_catch_up_is_not_blocked_by_partial_state_room(self) -> None: + """Detects (part of?) https://github.com/matrix-org/synapse/issues/15220.""" + # ARRANGE: + # - a local user (u1) + # - a room with which contains u1 and two remote users, @u2:host2 and @u3:other + # - events in that room such that + # - history visibility is restricted + # - u1 sent message events + # - afterwards, u3 sent a remote event + # - catchup to begin for host2 + per_dest_queue, sent_pdus = self.make_fake_destination_queue() + + self.register_user("u1", "you the one") + u1_token = self.login("u1", "you the one") + room = self.helper.create_room_as("u1", tok=u1_token) + self.helper.send_state( + room_id=room, + event_type="m.room.history_visibility", + body={"history_visibility": "joined"}, + tok=u1_token, + ) + self.get_success( + event_injection.inject_member_event(self.hs, room, "@u2:host2", "join") + ) + self.get_success( + event_injection.inject_member_event(self.hs, room, "@u3:other", "join") + ) + + # create some events + event_id_1 = self.helper.send(room, "hello", tok=u1_token)["event_id"] + event_id_2 = self.helper.send(room, "world", tok=u1_token)["event_id"] + # pretend that u3 changes their displayname + event_id_3 = self.get_success( + event_injection.inject_member_event(self.hs, room, "@u3:other", "join") + ).event_id + + # destination_rooms should already be populated, but let us pretend that we already + # sent (successfully) up to and including event id 1 + event_1 = self.get_success(self.hs.get_datastores().main.get_event(event_id_1)) + assert event_1.internal_metadata.stream_ordering is not None + self.get_success( + self.hs.get_datastores().main.set_destination_last_successful_stream_ordering( + "host2", event_1.internal_metadata.stream_ordering + ) + ) + + # Mock event 3 as having partial state + self.get_success( + event_injection.mark_event_as_partial_state(self.hs, event_id_3, room) + ) + + # Fail the test if we block on full state for event 3. + async def mock_await_full_state(event_ids: Collection[str]) -> None: + if event_id_3 in event_ids: + raise AssertionError("Tried to await full state for event_id_3") + + # ACT + with mock.patch.object( + self.hs.get_storage_controllers().state._partial_state_events_tracker, + "await_full_state", + mock_await_full_state, + ): + self.get_success(per_dest_queue._catch_up_transmission_loop()) + + # ASSERT + # We should have: + # - not sent event 3: it's not ours, and the room is partial stated + # - fallen back to sending event 2: it's the most recent event in the room + # we tried to send to host2 + # - completed catch-up + self.assertEqual(len(sent_pdus), 1) + self.assertEqual(sent_pdus[0].event_id, event_id_2) + self.assertFalse(per_dest_queue._catching_up) diff --git a/tests/test_utils/event_injection.py b/tests/test_utils/event_injection.py index a6330ed840f0..9679904c3321 100644 --- a/tests/test_utils/event_injection.py +++ b/tests/test_utils/event_injection.py @@ -102,3 +102,34 @@ async def create_event( context = await unpersisted_context.persist(event) return event, context + + +async def mark_event_as_partial_state( + hs: synapse.server.HomeServer, + event_id: str, + room_id: str, +) -> None: + """ + (Falsely) mark an event as having partial state. + + Naughty, but occasionally useful when checking that partial state doesn't + block something from happening. + + If the event already has partial state, this insert will fail (event_id is unique + in this table). + """ + store = hs.get_datastores().main + await store.db_pool.simple_upsert( + table="partial_state_rooms", + keyvalues={"room_id": room_id}, + values={}, + insertion_values={"room_id": room_id}, + ) + + await store.db_pool.simple_insert( + table="partial_state_events", + values={ + "room_id": room_id, + "event_id": event_id, + }, + ) From bebd7d29fcde2f287967fd0cf14bb7f0bb661973 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 8 Mar 2023 18:02:44 +0000 Subject: [PATCH 2/8] Tweak docstring and type hint --- synapse/visibility.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index e442de31739e..28f44fc31a51 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -14,7 +14,7 @@ # limitations under the License. import logging from enum import Enum, auto -from typing import Collection, Dict, FrozenSet, List, Optional, Tuple +from typing import Collection, Dict, FrozenSet, List, Mapping, Optional, Sequence, Tuple import attr from typing_extensions import Final @@ -565,19 +565,31 @@ async def filter_events_for_server( storage: StorageControllers, target_server_name: str, local_server_name: str, - events: List[EventBase], + events: Sequence[EventBase], redact: bool = True, check_history_visibility_only: bool = False, ) -> List[EventBase]: - """Filter a list of events based on whether given server is allowed to + """Filter a list of events based on whether the target server is allowed to see them. + For a fully stated room, the target server is allowed to see an event E if: + - the state at E has world readable or shared history vis, OR + - the state at E says that the target server is in the room. + + For a partially stated room, the target server is allowed to see E if: + - E was created by this homeserver, AND: + - the partial state at E has world readable or shared history vis, OR + - the partial state at E says that the target server is in the room. + + TODO: state before or state after? + Args: storage - server_name + target_server_name + local_server_name events - redact: Whether to return a redacted version of the event, or - to filter them out entirely. + redact: Controls what to do with events which have been filtered out. + If True, include their redacted forms; if False, omit them entirely. check_history_visibility_only: Whether to only check the history visibility, rather than things like if the sender has been erased. This is used e.g. during pagination to decide whether to @@ -587,7 +599,7 @@ async def filter_events_for_server( The filtered events. """ - def is_sender_erased(event: EventBase, erased_senders: Dict[str, bool]) -> bool: + def is_sender_erased(event: EventBase, erased_senders: Mapping[str, bool]) -> bool: if erased_senders and erased_senders[event.sender]: logger.info("Sender of %s has been erased, redacting", event.event_id) return True From c813f89de6a279349a1d5f4d65095b2130970f8f Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 8 Mar 2023 18:12:45 +0000 Subject: [PATCH 3/8] Flip logic and provide better name --- synapse/handlers/federation.py | 4 ++-- synapse/visibility.py | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 5f2057269dac..00448560f68f 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -392,7 +392,7 @@ async def _maybe_backfill_inner( get_prev_content=False, ) - # We set `check_history_visibility_only` as we might otherwise get false + # We unset `filter_out_erased_senders` as we might otherwise get false # positives from users having been erased. filtered_extremities = await filter_events_for_server( self._storage_controllers, @@ -400,7 +400,7 @@ async def _maybe_backfill_inner( self.server_name, events_to_check, redact=False, - check_history_visibility_only=True, + filter_out_erased_senders=False, ) if filtered_extremities: extremities_to_request.append(bp.event_id) diff --git a/synapse/visibility.py b/synapse/visibility.py index 28f44fc31a51..cb9a2d1e0ab4 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -567,7 +567,7 @@ async def filter_events_for_server( local_server_name: str, events: Sequence[EventBase], redact: bool = True, - check_history_visibility_only: bool = False, + filter_out_erased_senders: bool = True, ) -> List[EventBase]: """Filter a list of events based on whether the target server is allowed to see them. @@ -590,8 +590,7 @@ async def filter_events_for_server( events redact: Controls what to do with events which have been filtered out. If True, include their redacted forms; if False, omit them entirely. - check_history_visibility_only: Whether to only check the - history visibility, rather than things like if the sender has been + filter_out_erased_senders: If true, also filter out events whose sender has been erased. This is used e.g. during pagination to decide whether to backfill or not. @@ -628,7 +627,7 @@ def check_event_is_visible( # server has no users in the room: redact return False - if not check_history_visibility_only: + if filter_out_erased_senders: erased_senders = await storage.main.are_users_erased(e.sender for e in events) else: # We don't want to check whether users are erased, which is equivalent @@ -644,7 +643,7 @@ def check_event_is_visible( # this check but would base the filtering on an outdated view of the membership events. partial_state_invisible_events = set() - if not check_history_visibility_only: + if filter_out_erased_senders: for e in events: sender_domain = get_domain_from_id(e.sender) if ( From 0aa020145267829d1ccf9dff6200ebaef4c07c7a Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 8 Mar 2023 18:14:17 +0000 Subject: [PATCH 4/8] Unconditionally omit remote events in partial state rooms I can't see why erased senders makes a difference here? --- synapse/visibility.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index cb9a2d1e0ab4..92955fc2ce6c 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -643,14 +643,13 @@ def check_event_is_visible( # this check but would base the filtering on an outdated view of the membership events. partial_state_invisible_events = set() - if filter_out_erased_senders: - for e in events: - sender_domain = get_domain_from_id(e.sender) - if ( - sender_domain != local_server_name - and await storage.main.is_partial_state_room(e.room_id) - ): - partial_state_invisible_events.add(e) + for e in events: + sender_domain = get_domain_from_id(e.sender) + if ( + sender_domain != local_server_name + and await storage.main.is_partial_state_room(e.room_id) + ): + partial_state_invisible_events.add(e) # Let's check to see if all the events have a history visibility # of "shared" or "world_readable". If that's the case then we don't From 7f977832d63388c2db0ff2315f622052af6c9946 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 8 Mar 2023 18:44:00 +0000 Subject: [PATCH 5/8] Separate decision from action --- synapse/visibility.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index 92955fc2ce6c..d9bd67163283 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -668,8 +668,7 @@ def check_event_is_visible( target_server_name, ) - to_return = [] - for e in events: + def include_event_in_output(e: EventBase) -> bool: erased = is_sender_erased(e, erased_senders) visible = check_event_is_visible( event_to_history_vis[e.event_id], event_to_memberships.get(e.event_id, {}) @@ -678,7 +677,11 @@ def check_event_is_visible( if e in partial_state_invisible_events: visible = False - if visible and not erased: + return visible and not erased + + to_return = [] + for e in events: + if include_event_in_output(e): to_return.append(e) elif redact: to_return.append(prune_event(e)) From 1cb55e90be34d227551987b6d4b3cd191bf3427e Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 8 Mar 2023 18:46:29 +0000 Subject: [PATCH 6/8] Track a set of strings, not EventBases --- synapse/visibility.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index d9bd67163283..852678b773ad 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -14,7 +14,17 @@ # limitations under the License. import logging from enum import Enum, auto -from typing import Collection, Dict, FrozenSet, List, Mapping, Optional, Sequence, Tuple +from typing import ( + Collection, + Dict, + FrozenSet, + List, + Mapping, + Optional, + Sequence, + Set, + Tuple, +) import attr from typing_extensions import Final @@ -642,14 +652,14 @@ def check_event_is_visible( # otherwise a room could be fully joined after we retrieve those, which would then bypass # this check but would base the filtering on an outdated view of the membership events. - partial_state_invisible_events = set() + partial_state_invisible_event_ids: Set[str] = set() for e in events: sender_domain = get_domain_from_id(e.sender) if ( sender_domain != local_server_name and await storage.main.is_partial_state_room(e.room_id) ): - partial_state_invisible_events.add(e) + partial_state_invisible_event_ids.add(e.event_id) # Let's check to see if all the events have a history visibility # of "shared" or "world_readable". If that's the case then we don't @@ -674,7 +684,7 @@ def include_event_in_output(e: EventBase) -> bool: event_to_history_vis[e.event_id], event_to_memberships.get(e.event_id, {}) ) - if e in partial_state_invisible_events: + if e.event_id in partial_state_invisible_event_ids: visible = False return visible and not erased From f1392475ae99893fa0a8d9de798740ac9e0cb315 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 8 Mar 2023 18:50:37 +0000 Subject: [PATCH 7/8] Avoid state lookups for events we'll ignore --- synapse/visibility.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index 852678b773ad..4802019a61fc 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -653,6 +653,7 @@ def check_event_is_visible( # this check but would base the filtering on an outdated view of the membership events. partial_state_invisible_event_ids: Set[str] = set() + maybe_visible_events: List[EventBase] = [] for e in events: sender_domain = get_domain_from_id(e.sender) if ( @@ -660,18 +661,20 @@ def check_event_is_visible( and await storage.main.is_partial_state_room(e.room_id) ): partial_state_invisible_event_ids.add(e.event_id) + else: + maybe_visible_events.append(e) # Let's check to see if all the events have a history visibility # of "shared" or "world_readable". If that's the case then we don't # need to check membership (as we know the server is in the room). - event_to_history_vis = await _event_to_history_vis(storage, events) + event_to_history_vis = await _event_to_history_vis(storage, maybe_visible_events) # for any with restricted vis, we also need the memberships event_to_memberships = await _event_to_memberships( storage, [ e - for e in events + for e in maybe_visible_events if event_to_history_vis[e.event_id] not in (HistoryVisibility.SHARED, HistoryVisibility.WORLD_READABLE) ], @@ -679,14 +682,14 @@ def check_event_is_visible( ) def include_event_in_output(e: EventBase) -> bool: + if e.event_id in partial_state_invisible_event_ids: + return False + erased = is_sender_erased(e, erased_senders) visible = check_event_is_visible( event_to_history_vis[e.event_id], event_to_memberships.get(e.event_id, {}) ) - if e.event_id in partial_state_invisible_event_ids: - visible = False - return visible and not erased to_return = [] From 31f2b152d29a37e9b3c5912436a6693daf7580b8 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 8 Mar 2023 19:25:41 +0000 Subject: [PATCH 8/8] Changelog --- changelog.d/15228.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/15228.bugfix diff --git a/changelog.d/15228.bugfix b/changelog.d/15228.bugfix new file mode 100644 index 000000000000..a7855d91bf35 --- /dev/null +++ b/changelog.d/15228.bugfix @@ -0,0 +1 @@ +Temp changelog entry (TODO).