-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Fix message duplication if something goes wrong after persisting the event #8476
Changes from 3 commits
29d4b88
9b68b63
76d1d94
0869a4f
3613071
1a49e23
d22241e
801dda2
136b97e
33909dd
cd89f44
7a1c417
8f5931d
22eb206
3daf421
9503e17
46d4919
807d44c
58a70d2
6419d09
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Fix message duplication if something goes wrong after persisting the event. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,7 +12,6 @@ | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| import itertools | ||
| import logging | ||
| import threading | ||
|
|
@@ -130,6 +129,15 @@ def __init__(self, database: DatabasePool, db_conn, hs): | |
| db_conn, "events", "stream_ordering", step=-1 | ||
| ) | ||
|
|
||
| if not hs.config.worker.worker_app: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can probably be put on the background worker?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ugh, yes but that work isn't in 1.21 :(
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, right. 🤦 I'll update it when this gets merged forward.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So if we're retargetting this to develop, seems like we can make this change! |
||
| # We periodically clean out old transaction ID mappings | ||
| self._clock.looping_call( | ||
| run_as_background_process, | ||
| 5 * 60 * 1000, | ||
| "_cleanup_old_transaction_ids", | ||
| self._cleanup_old_transaction_ids, | ||
| ) | ||
|
|
||
| self._get_event_cache = Cache( | ||
| "*getEvent*", | ||
| keylen=3, | ||
|
|
@@ -1287,3 +1295,55 @@ def get_next_event_to_expire_txn(txn): | |
| return await self.db_pool.runInteraction( | ||
| desc="get_next_event_to_expire", func=get_next_event_to_expire_txn | ||
| ) | ||
|
|
||
| async def get_event_id_from_transaction_id( | ||
| self, user_id: str, token_id: str, txn_id: str | ||
erikjohnston marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| ) -> Optional[str]: | ||
| """Look up if we have already persisted an event for the transaction ID, | ||
| returning the event ID if so. | ||
| """ | ||
| return await self.db_pool.simple_select_one_onecol( | ||
| table="event_txn_id", | ||
| keyvalues={"user_id": user_id, "token_id": token_id, "txn_id": txn_id}, | ||
| retcol="event_id", | ||
| allow_none=True, | ||
| desc="get_event_id_from_transaction_id", | ||
| ) | ||
|
|
||
| async def get_already_persisted_events( | ||
| self, events: Iterable[EventBase] | ||
| ) -> Dict[str, str]: | ||
| """Look up if we have already persisted an event for the transaction ID, | ||
| returning a mapping from event ID in the given list to the event ID of | ||
| an existing event. | ||
| """ | ||
|
|
||
| mapping = {} | ||
|
|
||
| for event in events: | ||
| token_id = getattr(event.internal_metadata, "token_id", None) | ||
| txn_id = getattr(event.internal_metadata, "txn_id", None) | ||
| if token_id and txn_id: | ||
|
Comment on lines
+1341
to
+1353
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. slightly feeling that it shouldn't be the storage layer's responsibility to do this digging, but ymmv
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeeeah, though this only gets called from
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
| existing = await self.get_event_id_from_transaction_id( | ||
| event.sender, token_id, txn_id | ||
| ) | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know if we want to try and batch these up a bit? The standard
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like something similar could be made which takes an iterable of tuples (or dicts)?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup, I'm just not sure how much I want to try and do that for something that is going in an RC
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. park it for now, optimise later? |
||
| if existing: | ||
| mapping[event.event_id] = existing | ||
|
|
||
| return mapping | ||
|
|
||
| async def _cleanup_old_transaction_ids(self): | ||
| """Cleans out transaction id mappings older than 24hrs. | ||
| """ | ||
|
|
||
| def _cleanup_old_transaction_ids_txn(txn): | ||
| sql = """ | ||
| DELETE FROM event_txn_id | ||
| WHERE inserted_ts < ? | ||
| """ | ||
| one_day_ago = self._clock.time_msec() - 24 * 60 * 60 * 1000 | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One day ago is essentially the largest you could get behind on federation and still not have duplicates?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We only get transaction IDs from local clients, who I believe shouldn't retry if significant time has passed. |
||
| txn.execute(sql, (one_day_ago,)) | ||
|
|
||
| return await self.db_pool.runInteraction( | ||
| "_cleanup_old_transaction_ids", _cleanup_old_transaction_ids_txn, | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| /* Copyright 2020 The Matrix.org Foundation C.I.C | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
|
|
||
| -- A map of recent events persisted with transaction IDs. Used to deduplicate | ||
| -- send event requests with the same transaction ID. | ||
|
Comment on lines
+17
to
+18
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might be nice to mention what these are expected to be deduplicated across (user, device, transaction). I think most of the other tables we have use the |
||
| CREATE TABLE event_txn_id ( | ||
erikjohnston marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| event_id TEXT NOT NULL, | ||
erikjohnston marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| user_id TEXT NOT NULL, | ||
| token_id BIGINT NOT NULL, | ||
richvdh marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| txn_id TEXT NOT NULL, | ||
| inserted_ts BIGINT NOT NULL | ||
| ); | ||
|
|
||
| CREATE UNIQUE INDEX event_txn_id_event_id ON event_txn_id(event_id); | ||
| CREATE UNIQUE INDEX event_txn_id_txn_id ON event_txn_id(user_id, token_id, txn_id); | ||
| CREATE INDEX event_txn_id_ts ON event_txn_id(inserted_ts); | ||
Uh oh!
There was an error while loading. Please reload this page.