44
55import enum
66import math
7- import threading
7+ import sys
88import warnings
99from abc import ABC , abstractmethod
10+ from contextlib import AbstractContextManager , nullcontext
1011from copy import deepcopy
1112from functools import cached_property , partial , reduce
1213from itertools import chain
1314from textwrap import indent
15+ from threading import Lock
1416from time import monotonic_ns
1517from typing import TYPE_CHECKING , Any , Literal , Protocol , Self , assert_never , cast
1618
3133)
3234from ._typing import FieldMap , Name
3335
36+ if sys .version_info >= (3 , 13 ):
37+ LockType = Lock
38+ else :
39+ from _thread import LockType
40+
3441if TYPE_CHECKING :
3542 from ._typing import HCIArray , RealArray
3643
@@ -461,7 +468,8 @@ def __init__(
461468 self .metadata = deepcopy (metadata ) if metadata is not None else {}
462469
463470 self ._hci : HCIArray | None = None
464- self ._hci_lock = threading .Lock ()
471+ self ._hci_lock = Lock ()
472+ self ._deposit_lock = Lock ()
465473
466474 super ().__init__ ()
467475
@@ -666,6 +674,7 @@ def deposit(
666674 return_ghost_padded_array : bool = False ,
667675 weight_field : Name | None = None ,
668676 weight_field_boundaries : dict [Name , tuple [Name , Name ]] | None = None ,
677+ lock : Literal ["per-instance" ] | None | LockType = "per-instance" ,
669678 ) -> np .ndarray :
670679 r"""
671680 Perform particle deposition and return the result as a grid field.
@@ -710,6 +719,19 @@ def deposit(
710719 combinations with boundaries.
711720
712721 Boundary recipes are applied the weight field (if any) first.
722+
723+ lock (keyword only): 'per-instance' (default), None, or threading.Lock
724+ Fine tune performance for multi-threaded applications: define a
725+ locking strategy around the deposition hotloop.
726+ - 'per-instance': allow multiple Dataset instances to run deposition
727+ concurrently, but forbid concurrent accesses to any specific
728+ instance
729+ - None: no locking is applied. Within some restricted conditions
730+ (e.g. depositing a couple fields concurrently in a sorted dataset),
731+ this may improve walltime performance, but it is also expected to
732+ degrade it in a more general case as it encourages cache-misses
733+ - an arbitrary threading.Lock instance may be supplied to implement
734+ a custom strategy
713735 """
714736 if callable (method ):
715737 from inspect import signature
@@ -760,6 +782,20 @@ def deposit(
760782 self ._sanitize_boundaries (boundaries )
761783 self ._sanitize_boundaries (weight_field_boundaries )
762784
785+ lock_ctx : AbstractContextManager
786+ match lock :
787+ case "per-instance" :
788+ lock_ctx = self ._deposit_lock
789+ case None :
790+ lock_ctx = nullcontext ()
791+ case LockType ():
792+ lock_ctx = lock
793+ case _:
794+ raise ValueError (
795+ f"Received { lock = !r} . Expected either 'per-instance', "
796+ "None, or an instance of threading.Lock"
797+ )
798+
763799 field = self .particles .fields [particle_field_key ]
764800 padded_ret_array = np .zeros (self .grid ._padded_shape , dtype = field .dtype )
765801 if weight_field is not None :
@@ -773,24 +809,26 @@ def deposit(
773809 self ._hci = self ._setup_host_cell_index (verbose )
774810
775811 tstart = monotonic_ns ()
776- if weight_field is not None :
812+ with lock_ctx :
813+ if weight_field is not None :
814+ func (
815+ * self ._get_padded_cell_edges (),
816+ * self ._get_3D_particle_coordinates (),
817+ wfield ,
818+ np .array ((), dtype = field .dtype ),
819+ self ._hci ,
820+ wfield_dep ,
821+ )
822+
777823 func (
778824 * self ._get_padded_cell_edges (),
779825 * self ._get_3D_particle_coordinates (),
826+ field ,
780827 wfield ,
781- np .array ((), dtype = field .dtype ),
782828 self ._hci ,
783- wfield_dep ,
829+ padded_ret_array ,
784830 )
785831
786- func (
787- * self ._get_padded_cell_edges (),
788- * self ._get_3D_particle_coordinates (),
789- field ,
790- wfield ,
791- self ._hci ,
792- padded_ret_array ,
793- )
794832 tstop = monotonic_ns ()
795833 if verbose :
796834 print (
0 commit comments