@@ -230,6 +230,7 @@ cdef class BacktestEngine:
230230 self._iteration: uint64_t = 0
231231 self._last_ns : uint64_t = 0
232232 self._end_ns : uint64_t = 0
233+ self._sorted: bint = True
233234
234235 # Timing
235236 self._run_started: pd.Timestamp | None = None
@@ -767,6 +768,30 @@ cdef class BacktestEngine:
767768 Caution if adding data without `sort` being True , as this could lead to running backtests
768769 on a stream which does not have monotonically increasing timestamps.
769770
771+ Notes
772+ -----
773+ For optimal performance when loading large datasets , consider using `sort = False ` for all
774+ calls to `add_data()`, then calling `sort_data()` once after all data has been added:
775+
776+ .. code-block:: python
777+
778+ # Add multiple data streams without sorting
779+ engine.add_data(instrument1_bars , sort = False )
780+ engine.add_data(instrument2_bars , sort = False )
781+ engine.add_data(instrument3_bars , sort = False )
782+
783+ # Sort once at the end
784+ engine.sort_data()
785+
786+ This approach avoids repeatedly sorting the entire data stream on each call ,
787+ significantly reducing load time for large datasets.
788+
789+ **Contract invariants:**
790+
791+ - When `sort = True `: Data is immediately available for backtesting via `run()`.
792+ - When `sort = False `: You **must** call `sort_data()` or add data with `sort = True ` before `run()`.
793+ - The provided `data` list is always copied internally to prevent external mutations from affecting the engine state.
794+
770795 """
771796 Condition.not_empty(data , "data")
772797 Condition.list_type(data , Data , "data")
@@ -822,8 +847,10 @@ cdef class BacktestEngine:
822847
823848 if sort:
824849 self ._data = sorted (self ._data, key = lambda x : x.ts_init)
825-
826- self ._data_iterator.add_data(" backtest_data" , self ._data)
850+ self ._data_iterator.add_data(" backtest_data" , self ._data, append_data = True , presorted = True )
851+ self ._sorted = True
852+ else :
853+ self ._sorted = False
827854
828855 for data_point in data:
829856 data_type = type (data_point)
@@ -1049,6 +1076,8 @@ cdef class BacktestEngine:
10491076 """
10501077 Condition.not_none(data , "data")
10511078 self._data = pickle.loads(data)
1079+ self._data_iterator.add_data("backtest_data", self._data , append_data = True , presorted = True )
1080+ self._sorted = True
10521081
10531082 self._log.info(
10541083 f"Loaded {len(self._data ):_} data "
@@ -1191,7 +1220,10 @@ cdef class BacktestEngine:
11911220 # Reset timing
11921221 self ._iteration = 0
11931222 self ._data_iterator = BacktestDataIterator()
1194- self ._data_iterator.add_data(" backtest_data" , self ._data)
1223+
1224+ if self ._sorted:
1225+ self ._data_iterator.add_data(" backtest_data" , self ._data, append_data = True , presorted = True )
1226+
11951227 self ._run_started = None
11961228 self ._run_finished = None
11971229 self ._backtest_start = None
@@ -1204,7 +1236,9 @@ cdef class BacktestEngine:
12041236 Sort the engines internal data stream.
12051237
12061238 """
1207- self._data.sort()
1239+ self._data = sorted (self ._data, key = lambda x : x.ts_init)
1240+ self._data_iterator.add_data("backtest_data", self._data , append_data = True , presorted = True )
1241+ self._sorted = True
12081242
12091243 def clear_data(self ) -> None:
12101244 """
@@ -1218,6 +1252,7 @@ cdef class BacktestEngine:
12181252 self._data.clear()
12191253 self._data_len = 0
12201254 self._data_iterator = BacktestDataIterator()
1255+ self._sorted = True
12211256
12221257 def clear_actors(self ) -> None:
12231258 """
@@ -1294,6 +1329,16 @@ cdef class BacktestEngine:
12941329 If no data has been added to the engine.
12951330 ValueError
12961331 If the `start` is >= the `end` datetime.
1332+ RuntimeError
1333+ If data has been added with `sort= False ` but `sort_data()` has not been called.
1334+
1335+ Notes
1336+ -----
1337+ ** Contract invariants:**
1338+
1339+ - All data added via `add_data()` must be sorted and synced to the internal iterator before calling `run()`.
1340+ - If any data was added with `sort= False `, you must call `sort_data()` or add data with `sort= True ` before this method.
1341+ - The engine validates this requirement and will raise `RuntimeError ` if unsorted data is detected.
12971342
12981343 """
12991344 self._run(start, end, run_config_id, streaming)
@@ -1393,6 +1438,13 @@ cdef class BacktestEngine:
13931438 run_config_id: str | None = None,
13941439 bint streaming = False,
13951440 ):
1441+ # Validate data has been sorted and synced to iterator
1442+ if self._data and not self._sorted:
1443+ raise RuntimeError(
1444+ "Data has been added but not sorted, "
1445+ "call `engine.sort_data()` or use `engine.add_data(..., sort=True)` before running"
1446+ )
1447+
13961448 # Validate data
13971449 cdef:
13981450 SimulatedExchange exchange
@@ -1962,7 +2014,13 @@ cdef class BacktestDataIterator:
19622014 self._single_data_index = 0
19632015 self._is_single_data = False
19642016
1965- def add_data(self, data_name, list data, bint append_data=True):
2017+ def add_data(
2018+ self,
2019+ str data_name,
2020+ list data,
2021+ bint append_data = True,
2022+ bint presorted = False,
2023+ ) -> None:
19662024 """
19672025 Add (or replace) a named, pre- sorted data list for static data loading.
19682026
@@ -1979,6 +2037,9 @@ cdef class BacktestDataIterator:
19792037 Controls stream priority for timestamp ties:
19802038 ``True `` – lower priority (appended).
19812039 ``False `` – higher priority (prepended).
2040+ presorted : bool , default ``False ``
2041+ If the data is guaranteed to be pre- sorted by `ts_init`.
2042+ When ``True ``, skips internal sorting for better performance.
19822043
19832044 Raises
19842045 ------
@@ -1991,13 +2052,14 @@ cdef class BacktestDataIterator:
19912052 if not data:
19922053 return
19932054
1994- def data_generator():
1995- yield data
1996- # Generator ends after yielding once
1997-
1998- self.init_data(data_name, data_generator(), append_data)
2055+ self._add_data(data_name, data, append_data, presorted)
19992056
2000- def init_data(self, str data_name, data_generator, bint append_data=True):
2057+ def init_data(
2058+ self,
2059+ str data_name,
2060+ data_generator,
2061+ bint append_data = True,
2062+ ) -> None:
20012063 """
20022064 Add (or replace) a named data generator for streaming large datasets.
20032065
@@ -2042,7 +2104,13 @@ cdef class BacktestDataIterator:
20422104 # Generator is already exhausted, nothing to add
20432105 pass
20442106
2045- cdef void _add_data(self, str data_name, list data_list, bint append_data=True):
2107+ cdef void _add_data(
2108+ self,
2109+ str data_name,
2110+ list data_list,
2111+ bint append_data = True,
2112+ bint presorted = False,
2113+ ):
20462114 if len(data_list) == 0:
20472115 return
20482116
@@ -2062,7 +2130,12 @@ cdef class BacktestDataIterator:
20622130 if self._is_single_data:
20632131 self._deactivate_single_data()
20642132
2065- self._data[data_priority] = sorted(data_list, key=lambda data: data.ts_init)
2133+ # Copy and optionally sort to avoid aliasing caller's list
2134+ if presorted:
2135+ self._data[data_priority] = list(data_list)
2136+ else:
2137+ self._data[data_priority] = sorted(data_list, key=lambda data: data.ts_init)
2138+
20662139 self._data_name[data_priority] = data_name
20672140 self._data_priority[data_name] = data_priority
20682141 self._data_len[data_priority] = len(data_list)
0 commit comments