SpikeInterface · alejoe91 · Nov 14, 2025 · Nov 14, 2025 · Nov 14, 2025 · Nov 14, 2025
diff --git a/doc/api.rst b/doc/api.rst
@@ -73,7 +73,7 @@ Low-level
 .. automodule:: spikeinterface.core
     :noindex:
 
-    .. autoclass:: ChunkRecordingExecutor
+    .. autoclass:: ChunkExecutor
 
 
 Back-compatibility with ``WaveformExtractor`` (version > 0.100.0)

diff --git a/src/spikeinterface/core/__init__.py b/src/spikeinterface/core/__init__.py
@@ -95,7 +95,7 @@
     get_best_job_kwargs,
     ensure_n_jobs,
     ensure_chunk_size,
-    ChunkRecordingExecutor,
+    ChunkExecutor,
     split_job_kwargs,
     fix_job_kwargs,
 )

diff --git a/src/spikeinterface/core/baserecording.py b/src/spikeinterface/core/baserecording.py
@@ -184,6 +184,20 @@ def add_recording_segment(self, recording_segment):
         self._recording_segments.append(recording_segment)
         recording_segment.set_parent_extractor(self)
 
+    def get_sample_size(self):
+        """
+        Returns the size of a single sample across all channels in bytes.
+
+        Returns
+        -------
+        int
+            The size of a single sample in bytes
+        """
+        num_channels = self.get_num_channels()
+        dtype_size_bytes = self.get_dtype().itemsize
+        sample_size = num_channels * dtype_size_bytes
+        return sample_size
+
     def get_num_samples(self, segment_index: int | None = None) -> int:
         """
         Returns the number of samples for a segment.

diff --git a/src/spikeinterface/core/job_tools.py b/src/spikeinterface/core/job_tools.py
@@ -217,16 +217,16 @@ def divide_segment_into_chunks(num_frames, chunk_size):
     return chunks
 
 
-def divide_recording_into_chunks(recording, chunk_size):
-    recording_slices = []
+def divide_extractor_into_chunks(recording, chunk_size):
+    slices = []
     for segment_index in range(recording.get_num_segments()):
         num_frames = recording.get_num_samples(segment_index)
         chunks = divide_segment_into_chunks(num_frames, chunk_size)
-        recording_slices.extend([(segment_index, frame_start, frame_stop) for frame_start, frame_stop in chunks])
-    return recording_slices
+        slices.extend([(segment_index, frame_start, frame_stop) for frame_start, frame_stop in chunks])
+    return slices
 
 
-def ensure_n_jobs(recording, n_jobs=1):
+def ensure_n_jobs(extractor, n_jobs=1):
     if n_jobs == -1:
         n_jobs = os.cpu_count()
     elif n_jobs == 0:
@@ -244,10 +244,10 @@ def ensure_n_jobs(recording, n_jobs=1):
         print(f"Python {sys.version} does not support parallel processing")
         n_jobs = 1
 
-    if not recording.check_if_memory_serializable():
+    if not extractor.check_if_memory_serializable():
         if n_jobs != 1:
             raise RuntimeError(
-                "Recording is not serializable to memory and can't be processed in parallel. "
+                "Extractor is not serializable to memory and can't be processed in parallel. "
                 "You can use the `rec = recording.save(folder=...)` function or set 'n_jobs' to 1."
             )
 
@@ -271,10 +271,10 @@ def chunk_duration_to_chunk_size(chunk_duration, recording):
 
 
 def ensure_chunk_size(
-    recording, total_memory=None, chunk_size=None, chunk_memory=None, chunk_duration=None, n_jobs=1, **other_kwargs
+    extractor, total_memory=None, chunk_size=None, chunk_memory=None, chunk_duration=None, n_jobs=1, **other_kwargs
 ):
     """
-    "chunk_size" is the traces.shape[0] for each worker.
+    "chunk_size" is the number of samples for each worker.
 
     Flexible chunk_size setter with 3 ways:
         * "chunk_size" : is the length in sample for each chunk independently of channel count and dtype.
@@ -305,34 +305,30 @@ def ensure_chunk_size(
         assert total_memory is None
         # set by memory per worker size
         chunk_memory = convert_string_to_bytes(chunk_memory)
-        n_bytes = np.dtype(recording.get_dtype()).itemsize
-        num_channels = recording.get_num_channels()
-        chunk_size = int(chunk_memory / (num_channels * n_bytes))
+        chunk_size = int(chunk_memory / extractor.get_sample_size())
     elif total_memory is not None:
         # clip by total memory size
-        n_jobs = ensure_n_jobs(recording, n_jobs=n_jobs)
+        n_jobs = ensure_n_jobs(extractor, n_jobs=n_jobs)
         total_memory = convert_string_to_bytes(total_memory)
-        n_bytes = np.dtype(recording.get_dtype()).itemsize
-        num_channels = recording.get_num_channels()
-        chunk_size = int(total_memory / (num_channels * n_bytes * n_jobs))
+        chunk_size = int(total_memory / (extractor.get_sample_size() * n_jobs))
     elif chunk_duration is not None:
-        chunk_size = chunk_duration_to_chunk_size(chunk_duration, recording)
+        chunk_size = chunk_duration_to_chunk_size(chunk_duration, extractor)
     else:
         # Edge case to define single chunk per segment for n_jobs=1.
         # All chunking parameters equal None mean single chunk per segment
         if n_jobs == 1:
-            num_segments = recording.get_num_segments()
-            samples_in_larger_segment = max([recording.get_num_samples(segment) for segment in range(num_segments)])
+            num_segments = extractor.get_num_segments()
+            samples_in_larger_segment = max([extractor.get_num_samples(segment) for segment in range(num_segments)])
             chunk_size = samples_in_larger_segment
         else:
             raise ValueError("For n_jobs >1 you must specify total_memory or chunk_size or chunk_memory")
 
     return chunk_size
 
 
-class ChunkRecordingExecutor:
+class ChunkExecutor:
     """
-    Core class for parallel processing to run a "function" over chunks on a recording.
+    Core class for parallel processing to run a "function" over chunks on a chunkable extractor.
 
     It supports running a function:
         * in loop with chunk processing (low RAM usage)
@@ -344,8 +340,9 @@ class ChunkRecordingExecutor:
 
     Parameters
     ----------
-    recording : RecordingExtractor
-        The recording to be processed
+    extractor : BaseExtractor
+        The extractor to be processed.
+        It needs to implement the `get_sample_size()`, `get_num_samples()` and `get_num_segments()`
     func : function
         Function that runs on each chunk
     init_func : function
@@ -393,7 +390,7 @@ class ChunkRecordingExecutor:
 
     def __init__(
         self,
-        recording,
+        extractor: "BaseExtractor",
         func,
         init_func,
         init_args,
@@ -412,14 +409,15 @@ def __init__(
         max_threads_per_worker=1,
         need_worker_index=False,
     ):
-        self.recording = recording
+        self.extractor = extractor
         self.func = func
         self.init_func = init_func
         self.init_args = init_args
 
         if pool_engine == "process":
             if mp_context is None:
-                mp_context = recording.get_preferred_mp_context()
+                if hasattr(extractor, "get_preferred_mp_context"):
+                    mp_context = extractor.get_preferred_mp_context()
             if mp_context is not None and platform.system() == "Windows":
                 assert mp_context != "fork", "'fork' mp_context not supported on Windows!"
             elif mp_context == "fork" and platform.system() == "Darwin":
@@ -433,9 +431,8 @@ def __init__(
         self.handle_returns = handle_returns
         self.gather_func = gather_func
 
-        self.n_jobs = ensure_n_jobs(recording, n_jobs=n_jobs)
-        self.chunk_size = ensure_chunk_size(
-            recording,
+        self.n_jobs = ensure_n_jobs(self.extractor, n_jobs=n_jobs)
+        self.chunk_size = self.ensure_chunk_size(
             total_memory=total_memory,
             chunk_size=chunk_size,
             chunk_memory=chunk_memory,
@@ -450,9 +447,9 @@ def __init__(
         self.need_worker_index = need_worker_index
 
         if verbose:
-            chunk_memory = self.chunk_size * recording.get_num_channels() * np.dtype(recording.get_dtype()).itemsize
+            chunk_memory = self.get_chunk_memory()
             total_memory = chunk_memory * self.n_jobs
-            chunk_duration = self.chunk_size / recording.get_sampling_frequency()
+            chunk_duration = self.chunk_size / extractor.sampling_frequency
             chunk_memory_str = convert_bytes_to_str(chunk_memory)
             total_memory_str = convert_bytes_to_str(total_memory)
             chunk_duration_str = convert_seconds_to_str(chunk_duration)
@@ -467,13 +464,24 @@ def __init__(
                 f"chunk_duration={chunk_duration_str}",
             )
 
-    def run(self, recording_slices=None):
+    def get_chunk_memory(self):
+        return self.chunk_size * self.extractor.get_sample_size()
+
+    def ensure_chunk_size(
+        self, total_memory=None, chunk_size=None, chunk_memory=None, chunk_duration=None, n_jobs=1, **other_kwargs
+    ):
+        return ensure_chunk_size(
+            self.extractor, total_memory, chunk_size, chunk_memory, chunk_duration, n_jobs, **other_kwargs
+        )
+
+    def run(self, slices=None):
         """
         Runs the defined jobs.
         """
 
-        if recording_slices is None:
-            recording_slices = divide_recording_into_chunks(self.recording, self.chunk_size)
+        if slices is None:
+            # TODO: rename
+            slices = divide_extractor_into_chunks(self.extractor, self.chunk_size)
 
         if self.handle_returns:
             returns = []
@@ -482,23 +490,21 @@ def run(self, recording_slices=None):
 
         if self.n_jobs == 1:
             if self.progress_bar:
-                recording_slices = tqdm(
-                    recording_slices, desc=f"{self.job_name} (no parallelization)", total=len(recording_slices)
-                )
+                slices = tqdm(slices, desc=f"{self.job_name} (no parallelization)", total=len(slices))
 
             worker_dict = self.init_func(*self.init_args)
             if self.need_worker_index:
                 worker_dict["worker_index"] = 0
 
-            for segment_index, frame_start, frame_stop in recording_slices:
+            for segment_index, frame_start, frame_stop in slices:
                 res = self.func(segment_index, frame_start, frame_stop, worker_dict)
                 if self.handle_returns:
                     returns.append(res)
                 if self.gather_func is not None:
                     self.gather_func(res)
 
         else:
-            n_jobs = min(self.n_jobs, len(recording_slices))
+            n_jobs = min(self.n_jobs, len(slices))
 
             if self.pool_engine == "process":
 
@@ -526,11 +532,11 @@ def run(self, recording_slices=None):
                         array_pid,
                     ),
                 ) as executor:
-                    results = executor.map(process_function_wrapper, recording_slices)
+                    results = executor.map(process_function_wrapper, slices)
 
                     if self.progress_bar:
                         results = tqdm(
-                            results, desc=f"{self.job_name} (workers: {n_jobs} processes)", total=len(recording_slices)
+                            results, desc=f"{self.job_name} (workers: {n_jobs} processes)", total=len(slices)
                         )
 
                     for res in results:
@@ -549,7 +555,7 @@ def run(self, recording_slices=None):
                 if self.progress_bar:
                     # here the tqdm threading do not work (maybe collision) so we need to create a pbar
                     # before thread spawning
-                    pbar = tqdm(desc=f"{self.job_name} (workers: {n_jobs} threads)", total=len(recording_slices))
+                    pbar = tqdm(desc=f"{self.job_name} (workers: {n_jobs} threads)", total=len(slices))
 
                 if self.need_worker_index:
                     lock = threading.Lock()
@@ -570,8 +576,8 @@ def run(self, recording_slices=None):
                     ),
                 ) as executor:
 
-                    recording_slices2 = [(thread_local_data,) + tuple(args) for args in recording_slices]
-                    results = executor.map(thread_function_wrapper, recording_slices2)
+                    slices2 = [(thread_local_data,) + tuple(args) for args in slices]
+                    results = executor.map(thread_function_wrapper, slices2)
 
                     for res in results:
                         if self.progress_bar:

diff --git a/src/spikeinterface/core/node_pipeline.py b/src/spikeinterface/core/node_pipeline.py
@@ -11,7 +11,7 @@
 import numpy as np
 
 from spikeinterface.core import BaseRecording, get_chunk_with_margin
-from spikeinterface.core.job_tools import ChunkRecordingExecutor, fix_job_kwargs, _shared_job_kwargs_doc
+from spikeinterface.core.job_tools import ChunkExecutor, fix_job_kwargs, _shared_job_kwargs_doc
 from spikeinterface.core import get_channel_distances
 
 
@@ -533,7 +533,7 @@ def run_node_pipeline(
     names=None,
     verbose=False,
     skip_after_n_peaks=None,
-    recording_slices=None,
+    slices=None,
 ):
     """
     Machinery to compute in parallel operations on peaks and traces.
@@ -585,7 +585,7 @@ def run_node_pipeline(
     skip_after_n_peaks : None | int
         Skip the computation after n_peaks.
         This is not an exact because internally this skip is done per worker in average.
-    recording_slices : None | list[tuple]
+    slices : None | list[tuple]
         Optionaly give a list of slices to run the pipeline only on some chunks of the recording.
         It must be a list of (segment_index, frame_start, frame_stop).
         If None (default), the function iterates over the entire duration of the recording.
@@ -616,7 +616,7 @@ def run_node_pipeline(
 
     init_args = (recording, nodes, skip_after_n_peaks_per_worker)
 
-    processor = ChunkRecordingExecutor(
+    processor = ChunkExecutor(
         recording,
         _compute_peak_pipeline_chunk,
         _init_peak_pipeline,
@@ -627,7 +627,7 @@ def run_node_pipeline(
         **job_kwargs,
     )
 
-    processor.run(recording_slices=recording_slices)
+    processor.run(slices=slices)
 
     outs = gather_func.finalize_buffers(squeeze_output=squeeze_output)
     return outs