diff --git a/sycl/doc/design/opencl-extensions/cl_intel_16bit_atomics.asciidoc b/sycl/doc/design/opencl-extensions/cl_intel_16bit_atomics.asciidoc new file mode 100644 index 0000000000000..5f7cab5b46959 --- /dev/null +++ b/sycl/doc/design/opencl-extensions/cl_intel_16bit_atomics.asciidoc @@ -0,0 +1,306 @@ +:data-uri: +:sectanchors: +:icons: font +:source-highlighter: coderay + +ifdef::backend-html5[] +:CL_DEVICE_BF16_ATOMIC_CAPABILITIES_INTEL: pass:q[`CL_DEVICE_BF16_ATOMIC_CAPABILITIES_INTEL`] +:CL_DEVICE_INT16_ATOMIC_CAPABILITIES_INTEL: pass:q[`CL_DEVICE_INT16_ATOMIC_CAPABILITIES_INTEL`] +:cl_device_atomic_operations_intel_TYPE: pass:q[`cl_device_atomic_operations_intel`] +:CL_DEVICE_GLOBAL_ATOMIC_LOAD_STORE_INTEL: pass:q[`CL_DEVICE_GLOBAL_ATOMIC_LOAD_STORE_INTEL`] +:CL_DEVICE_GLOBAL_ATOMIC_ADD_INTEL: pass:q[`CL_DEVICE_GLOBAL_ATOMIC_ADD_INTEL`] +:CL_DEVICE_GLOBAL_ATOMIC_MIN_MAX_INTEL: pass:q[`CL_DEVICE_GLOBAL_ATOMIC_MIN_MAX_INTEL`] +:CL_DEVICE_GLOBAL_ATOMIC_CMPXCHG_INTEL: pass:q[`CL_DEVICE_GLOBAL_ATOMIC_CMPXCHG_INTEL`] +:CL_DEVICE_GLOBAL_ATOMIC_AND_OR_XOR_INTEL: pass:q[`CL_DEVICE_GLOBAL_ATOMIC_AND_OR_XOR_INTEL`] +:CL_DEVICE_LOCAL_ATOMIC_LOAD_STORE_INTEL: pass:q[`CL_DEVICE_LOCAL_ATOMIC_LOAD_STORE_INTEL`] +:CL_DEVICE_LOCAL_ATOMIC_ADD_INTEL: pass:q[`CL_DEVICE_LOCAL_ATOMIC_ADD_INTEL`] +:CL_DEVICE_LOCAL_ATOMIC_MIN_MAX_INTEL: pass:q[`CL_DEVICE_LOCAL_ATOMIC_MIN_MAX_INTEL`] +:CL_DEVICE_LOCAL_ATOMIC_CMPXCHG_INTEL: pass:q[`CL_DEVICE_LOCAL_ATOMIC_CMPXCHG_INTEL`] +:CL_DEVICE_LOCAL_ATOMIC_AND_OR_XOR_INTEL: pass:q[`CL_DEVICE_LOCAL_ATOMIC_AND_OR_XOR_INTEL`] +:cl_device_fp_atomic_capabilities_ext_TYPE: pass:q[`cl_device_fp_atomic_capabilities_ext`] +:CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT: pass:q[`CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT`] +:CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT: pass:q[`CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT`] +:CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT: pass:q[`CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT`] +:CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT: pass:q[`CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT`] +:CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT: pass:q[`CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT`] +:CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT: pass:q[`CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT`] +endif::[] +ifndef::backend-html5[] +:CL_DEVICE_BF16_ATOMIC_CAPABILITIES_INTEL: pass:q[`CL_DEVICE_​BF16_​ATOMIC_​CAPABILITIES_​INTEL`] +:CL_DEVICE_INT16_ATOMIC_CAPABILITIES_INTEL: pass:q[`CL_DEVICE_​INT16_​ATOMIC_​CAPABILITIES_​INTEL`] +:cl_device_atomic_operations_intel_TYPE: pass:q[`cl_device_​atomic_​operations_​intel`] +:CL_DEVICE_GLOBAL_ATOMIC_LOAD_STORE_INTEL: pass:q[`CL_DEVICE_​GLOBAL_​ATOMIC_​LOAD_​STORE_​INTEL`] +:CL_DEVICE_GLOBAL_ATOMIC_ADD_INTEL: pass:q[`CL_DEVICE_​GLOBAL_​ATOMIC_​ADD_​INTEL`] +:CL_DEVICE_GLOBAL_ATOMIC_MIN_MAX_INTEL: pass:q[`CL_DEVICE_​GLOBAL_​ATOMIC_​MIN_​MAX_​INTEL`] +:CL_DEVICE_GLOBAL_ATOMIC_CMPXCHG_INTEL: pass:q[`CL_DEVICE_​GLOBAL_​ATOMIC_​CMPXCHG_​INTEL`] +:CL_DEVICE_GLOBAL_ATOMIC_AND_OR_XOR_INTEL: pass:q[`CL_DEVICE_​GLOBAL_​ATOMIC_​AND_​OR_​XOR_​INTEL`] +:CL_DEVICE_LOCAL_ATOMIC_LOAD_STORE_INTEL: pass:q[`CL_DEVICE_​LOCAL_​ATOMIC_​LOAD_​STORE_​INTEL`] +:CL_DEVICE_LOCAL_ATOMIC_ADD_INTEL: pass:q[`CL_DEVICE_​LOCAL_​ATOMIC_​ADD_​INTEL`] +:CL_DEVICE_LOCAL_ATOMIC_MIN_MAX_INTEL: pass:q[`CL_DEVICE_​LOCAL_​ATOMIC_​MIN_​MAX_​INTEL`] +:CL_DEVICE_LOCAL_ATOMIC_CMPXCHG_INTEL: pass:q[`CL_DEVICE_​LOCAL_​ATOMIC_​CMPXCHG_​INTEL`] +:CL_DEVICE_LOCAL_ATOMIC_AND_OR_XOR_INTEL: pass:q[`CL_DEVICE_​LOCAL_​ATOMIC_​AND_​OR_​XOR_​INTEL`] +:cl_device_fp_atomic_capabilities_ext_TYPE: pass:q[`cl_device_​fp_​atomic_​capabilities_​ext`] +:CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT: pass:q[`CL_DEVICE_​GLOBAL_​FP_​ATOMIC_​LOAD_​STORE_​EXT`] +:CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT: pass:q[`CL_​DEVICE_​GLOBAL_​FP_​ATOMIC_​ADD_​EXT`] +:CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT: pass:q[`CL_DEVICE_​GLOBAL_​FP_​ATOMIC_​MIN_​MAX_​EXT`] +:CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT: pass:q[`CL_DEVICE_​LOCAL_​FP_​ATOMIC_​LOAD_​STORE_​EXT`] +:CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT: pass:q[`CL_DEVICE_​LOCAL_​FP_​ATOMIC_​ADD_​EXT`] +:CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT: pass:q[`CL_DEVICE_​LOCAL_​FP_​ATOMIC_​MIN_​MAX_​EXT`] +endif::[] + += cl_intel_16bit_atomics + +== Name Strings + +`cl_intel_16bit_atomics` + +== Contact + +Ben Ashbaugh, Intel (ben 'dot' ashbaugh 'at' intel 'dot' com) + +== Contributors + +// spell-checker: disable +Ben Ashbaugh, Intel + +Filip Hazubski, Intel + +Kacper Kasper, Intel + +Kacper Nowak, Intel + +// spell-checker: enable + +== Notice + +Copyright (c) 2025 Intel Corporation. All rights reserved. + +== Status + +* Working Draft + +This is a preview extension specification, intended to provide early access to a feature for review and community feedback. When the feature matures, this specification may be released as a formal extension. + +Because the interfaces defined by this specification are not final and are subject to change they are not intended to be used by shipping software products. If you are interested in using this feature in your software product, please let us know! + +== Version + +Built On: 2025-10-13 + +Revision: 0.9.0 + +== Dependencies + +This extension is written against the OpenCL API Specification, OpenCL C Specification, and OpenCL SPIR-V Environment Specification Versions 3.0.19. + +This extension reuses the `cl_device_fp_atomic_capabilities_ext` bitfield type and related bits to describe the suppported floating-point atomic capabilities for the 16-bit `bfloat16` type supported by this extension, therefore this extension depends on the `cl_ext_float_atomics` extension. + +This extension depends on the SPIR-V `SPV_INTEL_16bit_atomics` extension to provide the SPIR-V capabilities used by this extension. + +== Overview + +This extension enables programmers to perform atomic operations on 16-bit numbers in memory. +Specifically, this extension adds queries to determine the atomic operations supported by an OpenCL device on 16-bit integer values (`short` or `ushort`) and 16-bit `bfloat16` floating-point values (typically abbreviated as `bf16`). + +For these types, an OpenCL device may support basic atomic operations (load, store, and exchange), compare-and-exchange (cmpxchg), addition and subtraction, min and max, and bitwise operations (and, or, and xor). +These operations may be supported in global memory, local memory, or both. + +The initial version of this extension only supports atomic operations on these types through SPIR-V. +Subsequent versions of this extension may extend the OpenCL C programming language to support the additional atomic operations, also. + +[NOTE] +==== +Support for atomic operations on 16-bit half-precision floating-point values (`half` or `fp16`) is already provided by the `cl_ext_float_atomics` extension. +==== + +== New API Functions + +None. + +== New API Enums + +Accepted value for the _param_name_ parameter to *clGetDeviceInfo* to query the 16-bit atomic capabilities of an OpenCL device: + +[source] +---- +#define CL_DEVICE_INT16_ATOMIC_CAPABILITIES_INTEL 0x4258 +#define CL_DEVICE_BF16_ATOMIC_CAPABILITIES_INTEL 0x4259 +---- + +Bitfield type describing atomic operations that are supported by of an OpenCL device. +Subsequent versions of this extension may add additional supported atomic operations: + +[source] +---- +typedef cl_bitfield cl_device_atomic_operations_intel; + +#define CL_DEVICE_GLOBAL_ATOMIC_LOAD_STORE_INTEL (1 << 0) +#define CL_DEVICE_GLOBAL_ATOMIC_ADD_INTEL (1 << 1) +#define CL_DEVICE_GLOBAL_ATOMIC_MIN_MAX_INTEL (1 << 2) +#define CL_DEVICE_GLOBAL_ATOMIC_CMPXCHG_INTEL (1 << 3) +#define CL_DEVICE_GLOBAL_ATOMIC_AND_OR_XOR_INTEL (1 << 4) + +/* bits 5 - 15 are currently unused */ + +#define CL_DEVICE_LOCAL_ATOMIC_LOAD_STORE_INTEL (1 << 16) +#define CL_DEVICE_LOCAL_ATOMIC_ADD_INTEL (1 << 17) +#define CL_DEVICE_LOCAL_ATOMIC_MIN_MAX_INTEL (1 << 18) +#define CL_DEVICE_LOCAL_ATOMIC_CMPXCHG_INTEL (1 << 19) +#define CL_DEVICE_LOCAL_ATOMIC_AND_OR_XOR_INTEL (1 << 20) + +/* bits 21 and beyond are currently unused */ +---- + +[NOTE] +==== +These bits intentionally match the bits defined by the `cl_ext_float_atomics` extension, with additional bits for compare-and-exchange and bitwise and, or, and xor operations. +==== + +== New OpenCL C Feature Names + +None. + +== New OpenCL C Types + +None. + +== New OpenCL C Functions + +None. + +== Modifications to the OpenCL API Specification + +Add to Table 5 - OpenCL Device Queries in Section 4.2 - Querying Devices: :: ++ +-- +[caption="Table 5. "] +.List of supported param_names by clGetDeviceInfo +[width="100%",cols="4,3,5",options="header"] +|==== +| Device Info | Return Type | Description +| {CL_DEVICE_INT16_ATOMIC_CAPABILITIES_INTEL} + | {cl_device_atomic_operations_intel_TYPE} + | Returns the 16-bit integer atomic operations supported by the device. + This is a bit-field that may include any combination of the following values: + + {CL_DEVICE_GLOBAL_ATOMIC_LOAD_STORE_INTEL} - Can perform load, store, and exchange atomic operations in global memory. + + {CL_DEVICE_GLOBAL_ATOMIC_ADD_INTEL} - Can perform addition and subtraction atomic operations in global memory. + + {CL_DEVICE_GLOBAL_ATOMIC_MIN_MAX_INTEL} - Can perform min and max atomic operations in global memory. + + {CL_DEVICE_GLOBAL_ATOMIC_CMPXCHG_INTEL} - Can perform bitwise compare-and-exchange atomic operations in global memory. + + {CL_DEVICE_GLOBAL_ATOMIC_AND_OR_XOR_INTEL} - Can perform bitwise and, or, and xor operations in global memory. + + + {CL_DEVICE_LOCAL_ATOMIC_LOAD_STORE_INTEL} - Can perform floating-point load, store, and exchange atomic operations in local memory. + + {CL_DEVICE_LOCAL_ATOMIC_ADD_INTEL} - Can perform floating-point addition and subtraction atomic operations in local memory. + + {CL_DEVICE_LOCAL_ATOMIC_MIN_MAX_INTEL} - Can perform floating-point min and max atomic operations in local memory. + + {CL_DEVICE_LOCAL_ATOMIC_CMPXCHG_INTEL} - Can perform bitwise compare-and-exchange atomic operations in local memory. + + {CL_DEVICE_LOCAL_ATOMIC_AND_OR_XOR_INTEL} - Can perform bitwise and, or, and xor operations in local memory. + + + There is no mandated minimum capability. +| {CL_DEVICE_BF16_ATOMIC_CAPABILITIES_INTEL} + | {cl_device_fp_atomic_capabilities_ext_TYPE} + | Returns the 16-bit `bfloat16` floating-point atomic operations supported by the device. + This is a bit-field that may include a combination of the following values: + + {CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT} - Can perform floating-point load, store, and exchange atomic operations in global memory. + + {CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT} - Can perform floating-point addition and subtraction atomic operations in global memory. + + {CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT} - Can perform floating-point min and max atomic operations in global memory. + + + {CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT} - Can perform floating-point load, store, and exchange atomic operations in local memory. + + {CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT} - Can perform floating-point addition and subtraction atomic operations in local memory. + + {CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT} - Can perform floating-point min and max atomic operations in local memory. + + + There is no mandated minimum capability. +|==== +-- + +== Modifications to the OpenCL SPIR-V Environment Specification + +(Add a new section 5.2.X - `cl_intel_16bit_atomics`) :: ++ +-- +If the OpenCL environment supports the extension `cl_intel_16bit_atomics` and the {CL_DEVICE_INT16_ATOMIC_CAPABILITIES_INTEL} bitfield includes {CL_DEVICE_GLOBAL_ATOMIC_LOAD_STORE_INTEL}, {CL_DEVICE_GLOBAL_ATOMIC_CMPXCHG_INTEL}, {CL_DEVICE_LOCAL_ATOMIC_LOAD_STORE_INTEL}, or {CL_DEVICE_LOCAL_ATOMIC_CMPXCHG_INTEL}, then the environment must accept modules that declare use of the extension `SPV_INTEL_16bit_atomics` and that declare the SPIR-V capability *AtomicInt16CompareExchangeINTEL*. +For the instructions enabled by this capability: + + * When the _Pointer_ operand is a pointer to the *CrossWorkGroup* _Storage Class_, behavior is undefined unless {CL_DEVICE_INT16_ATOMIC_CAPABILITIES_INTEL} includes {CL_DEVICE_GLOBAL_ATOMIC_LOAD_STORE_INTEL} or {CL_DEVICE_GLOBAL_ATOMIC_CMPXCHG_INTEL}. + * When the _Pointer_ operand is a pointer to the *Workgroup* _Storage Class_, behavior is undefined unless {CL_DEVICE_INT16_ATOMIC_CAPABILITIES_INTEL} includes {CL_DEVICE_LOCAL_ATOMIC_LOAD_STORE_INTEL} or {CL_DEVICE_LOCAL_ATOMIC_CMPXCHG_INTEL}, then + * When the _Pointer_ operand is a pointer to the *Generic* _Storage Class_, behavior is undefined unless {CL_DEVICE_INT16_ATOMIC_CAPABILITIES_INTEL} includes {CL_DEVICE_GLOBAL_ATOMIC_LOAD_STORE_INTEL} and {CL_DEVICE_LOCAL_ATOMIC_LOAD_STORE_INTEL}, or {CL_DEVICE_GLOBAL_ATOMIC_CMPXCHG_INTEL} and {CL_DEVICE_LOCAL_ATOMIC_CMPXCHG_INTEL}. + +If the OpenCL environment supports the extension `cl_intel_16bit_atomics` and the {CL_DEVICE_INT16_ATOMIC_CAPABILITIES_INTEL} bitfield includes {CL_DEVICE_GLOBAL_ATOMIC_ADD_INTEL}, {CL_DEVICE_GLOBAL_ATOMIC_MIN_MAX_INTEL}, {CL_DEVICE_GLOBAL_ATOMIC_AND_OR_XOR_INTEL}, {CL_DEVICE_LOCAL_ATOMIC_ADD_INTEL}, {CL_DEVICE_LOCAL_ATOMIC_MIN_MAX_INTEL}, or {CL_DEVICE_LOCAL_ATOMIC_AND_OR_XOR_INTEL}, then the environment must accept modules that declare use of the extension `SPV_INTEL_16bit_atomics` and that declare the SPIR-V capability *Int16AtomicsINTEL*. +For the instructions enabled by this capability: + + * When the _Pointer_ operand is a pointer to the *CrossWorkGroup* _Storage Class_, behavior is undefined unless {CL_DEVICE_INT16_ATOMIC_CAPABILITIES_INTEL} includes {CL_DEVICE_GLOBAL_ATOMIC_ADD_INTEL}, {CL_DEVICE_GLOBAL_ATOMIC_MIN_MAX_INTEL}, or {CL_DEVICE_GLOBAL_ATOMIC_AND_OR_XOR_INTEL}. + * When the _Pointer_ operand is a pointer to the *Workgroup* _Storage Class_, behavior is undefined unless {CL_DEVICE_INT16_ATOMIC_CAPABILITIES_INTEL} includes {CL_DEVICE_LOCAL_ATOMIC_ADD_INTEL}, {CL_DEVICE_LOCAL_ATOMIC_MIN_MAX_INTEL}, or {CL_DEVICE_LOCAL_ATOMIC_AND_OR_XOR_INTEL}. + * When the _Pointer_ operand is a pointer to the *Generic* _Storage Class_, behavior is undefined unless {CL_DEVICE_INT16_ATOMIC_CAPABILITIES_INTEL} includes {CL_DEVICE_GLOBAL_ATOMIC_ADD_INTEL} and {CL_DEVICE_LOCAL_ATOMIC_ADD_INTEL}, or {CL_DEVICE_GLOBAL_ATOMIC_MIN_MAX_INTEL} and {CL_DEVICE_LOCAL_ATOMIC_MIN_MAX_INTEL}, or {CL_DEVICE_GLOBAL_ATOMIC_AND_OR_XOR_INTEL} and {CL_DEVICE_LOCAL_ATOMIC_AND_OR_XOR_INTEL}. + +If the OpenCL environment supports the extension `cl_intel_16bit_atomics` and the {CL_DEVICE_BF16_ATOMIC_CAPABILITIES_INTEL} bitfield includes {CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT} or {CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT}, then the environment must accept modules that declare use of the extension `SPV_INTEL_16bit_atomics` and that declare the SPIR-V capability *AtomicBFloat16LoadStoreINTEL*. +For the instructions enabled by this capability: + + * When the _Pointer_ operand is a pointer to the *CrossWorkGroup* _Storage Class_, behavior is undefined unless {CL_DEVICE_BF16_ATOMIC_CAPABILITIES_INTEL} includes {CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT}. + * When the _Pointer_ operand is a pointer to the *Workgroup* _Storage Class_, behavior is undefined unless {CL_DEVICE_BF16_ATOMIC_CAPABILITIES_INTEL} includes {CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT}. + * When the _Pointer_ operand is a pointer to the *Generic* _Storage Class_, behavior is undefined unless {CL_DEVICE_BF16_ATOMIC_CAPABILITIES_INTEL} includes {CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT} and {CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT}. + +If the OpenCL environment supports the extension `cl_intel_16bit_atomics` and the {CL_DEVICE_BF16_ATOMIC_CAPABILITIES_INTEL} bitfield includes {CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT} or {CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT}, then the environment must accept modules that declare use of the extension `SPV_INTEL_16bit_atomics` and that declare the SPIR-V capability *AtomicBFloat16AddINTEL*. +For the instructions enabled by this capability: + + * The instructions may be affected by compiler options affecting floating-point behavior, such as `-cl-no-signed-zeros`, `-cl-denorms-are-zero`, and `-cl-finite-math-only`. + * When the _Pointer_ operand is a pointer to the *CrossWorkGroup* _Storage Class_, behavior is undefined unless {CL_DEVICE_BF16_ATOMIC_CAPABILITIES_INTEL} includes {CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT}. + * When the _Pointer_ operand is a pointer to the *Workgroup* _Storage Class_, behavior is undefined unless {CL_DEVICE_BF16_ATOMIC_CAPABILITIES_INTEL} includes {CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT}. + * When the _Pointer_ operand is a pointer to the *Generic* _Storage Class_, behavior is undefined unless {CL_DEVICE_BF16_ATOMIC_CAPABILITIES_INTEL} includes {CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT} and {CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT}. + +If the OpenCL environment supports the extension `cl_intel_16bit_atomics` and the {CL_DEVICE_BF16_ATOMIC_CAPABILITIES_INTEL} bitfield includes {CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT} or {CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT}, then the environment must accept modules that declare use of the extension `SPV_INTEL_16bit_atomics` and that declare the SPIR-V capability *AtomicBFloat16MinMaxINTEL*. +For the instructions enabled by this capability: + + * The instructions may be affected by compiler options affecting floating-point behavior, such as `-cl-no-signed-zeros`, `-cl-denorms-are-zero`, and `-cl-finite-math-only`. + * When the _Pointer_ operand is a pointer to the *CrossWorkGroup* _Storage Class_, behavior is undefined unless {CL_DEVICE_BF16_ATOMIC_CAPABILITIES_INTEL} includes {CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT}. + * When the _Pointer_ operand is a pointer to the *Workgroup* _Storage Class_, behavior is undefined unless {CL_DEVICE_BF16_ATOMIC_CAPABILITIES_INTEL} includes {CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT}. + * When the _Pointer_ operand is a pointer to the *Generic* _Storage Class_, behavior is undefined unless {CL_DEVICE_BF16_ATOMIC_CAPABILITIES_INTEL} includes {CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT} and {CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT}. +-- + +== Issues + +. What should this extension be called? ++ +-- +`RESOLVED`: The name of the extension will be `cl_intel_16bit_atomics`, matching the name of the related SPIR-V extension. + +The extension name `cl_intel_int16_atomics` was considered, but `int16` is a vector type in OpenCL C, and this extension applies to 16-bit `bfloat16` floating-point values in addition to 16-bit integer values. + +There is no need to differentiate between global and local atomics in the extension name, similar to the `cl_khr_global_int32_base_atomics` or `cl_khr_local_int32_extended_atomics` extensions. + +Note that applications should already be setup to handle extension names that begin with a digit due to the `cl_khr_3d_image_writes` extension. +-- + +. Do we need to support 16-bit atomics through OpenCL C? ++ +-- +`RESOLVED`: Not in the initial version. +Support for 16-bit atomics in OpenCL C may be added in a subsequent extension version. +-- + +. Do we need to document any special floating-point behavior for floating-point atomic add? ++ +-- +`RESOLVED`: Floating-point atomic add may be affected by compiler options affecting floating-point behavior, such as `-cl-no-signed-zeros`, `-cl-denorms-are-zero`, and `-cl-finite-math-only`, otherwise there is no special behavior. +-- + +. Do we need to document any special floating-point behavior for floating-point atomic min and max? ++ +-- +`RESOLVED`: This spec inherits all of the special-case NaN behavior from the SPIR-V atomic min and max spec. +Additionally, floating-point atomic min and max may be affected by compiler options affecting floating-point behavior, such as `-cl-no-signed-zeros`, `-cl-denorms-are-zero`, and `-cl-finite-math-only`. +Otherwise, there is no special behavior. +-- + +== Revision History + +[cols="5,15,15,65"] +[grid="rows"] +[options="header"] +|======================================== +|Version|Date|Author|Changes +|0.9.0|2025-10-13|Ben Ashbaugh|*Initial draft.* +|======================================== + +//************************************************************************ +//Other formatting suggestions: +// +//* Use *bold* text for host APIs, or [source] syntax highlighting. +//* Use `mono` text for device APIs, or [source] syntax highlighting. +//* Use `mono` text for extension names, types, or enum values. +//* Use _italics_ for parameters. +//************************************************************************ diff --git a/sycl/doc/design/spirv-extensions/SPV_INTEL_16bit_atomics.asciidoc b/sycl/doc/design/spirv-extensions/SPV_INTEL_16bit_atomics.asciidoc new file mode 100644 index 0000000000000..0292ded0c64cb --- /dev/null +++ b/sycl/doc/design/spirv-extensions/SPV_INTEL_16bit_atomics.asciidoc @@ -0,0 +1,201 @@ +:extension_name: SPV_INTEL_16bit_atomics +:capability_name_int16_cmpxchg: AtomicInt16CompareExchangeINTEL +:capability_token_int16_cmpxchg: 6260 +:capability_name_int16_atomics: Int16AtomicsINTEL +:capability_token_int16_atomics: 6261 +:capability_name_bf16_loadstore: AtomicBFloat16LoadStoreINTEL +:capability_token_bf16_loadstore: 6262 +:capability_name_bf16_add: AtomicBFloat16AddINTEL +:capability_token_bf16_add: 6255 +:capability_name_bf16_minmax: AtomicBFloat16MinMaxINTEL +:capability_token_bf16_minmax: 6256 + += {extension_name} + +== Name Strings + +{extension_name} + +== Contact + +To report problems with this extension, please open a new issue at: + +https://github.com/intel/llvm + +// TODO: When the extension is published, change this link to the Khronos registry: +//https://github.com/KhronosGroup/SPIRV-Registry + +== Contributors + +* Ben Ashbaugh, Intel + +== Status + +* Working Draft + +This is a preview extension specification, intended to provide early access to a feature for review and community feedback. When the feature matures, this specification may be released as a formal extension. + +Because the interfaces defined by this specification are not final and are subject to change they are not intended to be used by shipping software products. If you are interested in using this feature in your software product, please let us know! + +== Version + +[width="40%",cols="25,25"] +|======================================== +| Last Modified Date | 2025-10-01 +| Revision | 1 +|======================================== + +== Dependencies + +This extension is written against the SPIR-V Specification, Version 1.6 Revision 6. + +This extension requires SPIR-V 1.0. + +This extension extends the *SPV_EXT_shader_atomic_float_add* and +*SPV_EXT_shader_atomic_float_min_max* extensions, which provide the +floating-point atomic operations used by this extension. + +This extension depends on the *SPV_KHR_bfloat16* extension, which adds the +`bfloat16` floating-point type used by this extension. + +== Overview + +This extension extends the atomic support in SPIR-V to support atomic operations +on 16-bit integer in memory, and to support floating-point atomic addition, +minimum, and maximum on 16-bit `bfloat16` floating-point numbers in memory. + +== Extension Name + +To use this extension within a SPIR-V module, the following *OpExtension* must +be present in the module: + +[subs="attributes"] +---- +OpExtension "{extension_name}" +---- + +== Modifications to the SPIR-V Specification, Version 1.6 + +Modify Section 3.2.30, "Capability", adding to the Capability table: + +-- +[cols="1,15,15",options="header"] +|==== +2+^| Capability ^| Implicitly Declares +| {capability_token_int16_cmpxchg} | *{capability_name_int16_cmpxchg}* + +Uses the *OpAtomicLoad*, *OpAtomicStore*, *OpAtomicExchange*, *OpAtomicCompareExchange* or *OpAtomicCompareExchangeWeak* instructions with 16-bit integer values. +| *Int16* + +| {capability_token_int16_atomics} | *{capability_name_int16_atomics}* + +Uses atomic instructions on 16-bit integer types. +| *{capability_name_int16_cmpxchg}* + +| {capability_token_bf16_loadstore} | *{capability_name_bf16_loadstore}* + +Uses the *OpAtomicLoad*, *OpAtomicStore*, or *OpAtomicExchange* instructions with 16-bit `bfloat16` floating point values. +| *BFloat16TypeKHR* + +| {capability_token_bf16_add} | *{capability_name_bf16_add}* + +Uses the *OpAtomicFAddEXT* instruction with 16-bit `bfloat16` floating point values. +| *BFloat16TypeKHR* + +| {capability_token_bf16_minmax} | *{capability_name_bf16_minmax}* + +Uses the *OpAtomicFMinEXT* or *OpAtomicFMaxEXT* instructions with 16-bit `bfloat16` floating point values. +| *BFloat16TypeKHR* +|==== +-- + +Add the *{capability_name_bf16_add}* capability to the *OpAtomicFAddEXT* instruction added by *SPV_EXT_shader_atomic_float_add*: + +[width="100%",cols="1,1,6*4"] +|====== +7+|[[OpAtomicFAddEXT]]*OpAtomicFAddEXT* + + + +(The description of this instruction is unchanged from *SPV_EXT_shader_atomic_float_add*.) + +1+|Capability: + +*AtomicFloat32AddEXT* *AtomicFloat64AddEXT* *{capability_name_bf16_add}* +| 7 | 6035 | _ Result type_ | _Result _ | __ Pointer | _Scope Memory_ | Memory Semantics Semantics | Value +|====== + +Add the *{capability_name_bf16_minmax}* capability to the *OpAtomicFMinEXT* and *OpAtomicFMaxEXT* instructions added by *SPV_EXT_shader_atomic_float_min_max*: + +[width="100%",cols="1,1,6*4"] +|====== +7+|[[OpAtomicFMinEXT]]*OpAtomicFMinEXT* + + + +(The description of this instruction is unchanged from *SPV_EXT_shader_atomic_float_min_max*.) + +1+|Capability: + +*AtomicFloat16MinMaxEXT* *AtomicFloat32MinMaxEXT* *AtomicFloat64MinMaxEXT* *{capability_name_bf16_minmax}* +| 7 | 5614 | _ Result type_ | _Result _ | __ Pointer | _Scope Memory_ | Memory Semantics Semantics | Value +|====== + +[width="100%",cols="1,1,6*4"] +|====== +7+|[[OpAtomicFMaxEXT]]*OpAtomicFMaxEXT* + + + +(The description of this instruction is unchanged from *SPV_EXT_shader_atomic_float_min_max*.) +1+|Capability: + +*AtomicFloat16MinMaxEXT* *AtomicFloat32MinMaxEXT* *AtomicFloat64MinMaxEXT* *{capability_name_bf16_minmax}* +| 7 | 5615 | _ Result type_ | _Result _ | __ Pointer | _Scope Memory_ | Memory Semantics Semantics | Value +|====== + +== Validation Rules + +* For all atomic instructions, the _Result Type_ may be a 16-bit integer type, +and the type of _Value_ may be a 16-bit integer type. +** If the _Result Type_ for *OpAtomicLoad*, *OpAtomicExchange*, +*OpAtomicCompareExchange*, or *OpAtomicCompareExchangeWeak* is a 16-bit integer +type, then the *{capability_name_int16_cmpxchg}* capability must be declared. +** If the type of _Value_ for *OpAtomicStore* is a 16-bit integer type, then the +*{capability_name_int16_cmpxchg}* capability must be declared. +** For all other atomic instructions, if the _Result Type_ is a 16-bit integer +type or the type of _Value_ is a 16-bit integer type, then the +*{capability_name_int16_atomics}* capability must be declared. + +* For the instructions *OpAtomicLoad*, *OpAtomicStore*, *OpAtomicExchange*, +*OpAtomicFAddEXT*, *OpAtomicFMinEXT*, or *OpAtomicFMaxEXT*, the _Result Type_ +may be a 16-bit floating-point type with the *BFloat16KHR* encoding, and the +type of _Value_ may be a 16-bit floating-point type with the *BFloat16KHR* +encoding. +** If the _Result Type_ for *OpAtomicLoad* or *OpAtomicExchange* is a 16-bit +floating-point type with the *Bfloat16KHR* encoding, then the +*{capability_name_bf16_loadstore}* capability must be declared. +** If the type of _Value_ for *OpAtomicStore* is a 16-bit floating-point type +with the *BFloat16KHR* encoding, then the *{capability_name_bf16_loadstore}* +capability must be declared. +** If the _Result Type_ for *OpAtomicFAddEXT* is a 16-bit floating-point type +with the *BFloat16KHR* encoding, then the *{capability_name_bf16_add}* capability +must be declared. +** If the _Result Type_ for *OpAtomicFMinEXT* or *OpAtomicFMaxEXT* is a 16-bit +floating-point type with the *BFloat16KHR* encoding, then the +*{capability_name_bf16_minmax}* capability must be declared. + +== Issues + +. Do we need to support the case where `bfloat16` values are represented as +16-bit integers where the bit pattern represents a `bfloat16` value? ++ +-- +*RESOLVED*: No, we will only support the case with a real `bfloat16` type, +specifically a 16-bit floating-point type with the *BFloat16KHR* encoding. This +means that this extension has a dependency on *SPV_KHR_bfloat16*. +-- + +. Do we need a separate capability for an `int16` atomic load and store? ++ +-- +*RESOLVED*: No, this is not necessary. The *{capability_name_int16_cmpxchg}* +capability will also enable support for 16-bit integer atomic loads, stores, and +exchanges. +-- + +== Revision History + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|======================================== +|Rev|Date|Author|Changes +|1 |2025-10-01 |Ben Ashbaugh|Initial revision for public preview +|========================================