@@ -2503,26 +2503,77 @@ device kernel, the attribute is not ignored and it is propagated to the kernel.
25032503 [[intel::num_simd_work_items(N)]] void operator()() const {}
25042504 };
25052505
2506- If the`` intel::reqd_work_group_size`` or ``cl::reqd_work_group_size``
2507- attribute is specified on a declaration along with a
2508- intel::num_simd_work_items attribute, the work group size attribute
2509- argument (the first argument) must be evenly divisible by the argument specified
2510- in the ``intel::num_simd_work_items`` attribute.
2506+ If the ``reqd_work_group_size`` attribute is specified on a declaration along
2507+ with ``num_simd_work_items``, the required work group size specified
2508+ by ``num_simd_work_items`` attribute must evenly divide the index that
2509+ increments fastest in the ``reqd_work_group_size`` attribute.
2510+
2511+ The arguments to ``reqd_work_group_size`` are ordered based on which index
2512+ increments the fastest. In OpenCL, the first argument is the index that
2513+ increments the fastest, and in SYCL, the last argument is the index that
2514+ increments the fastest.
2515+
2516+ In OpenCL, all three arguments are required.
2517+
2518+ In SYCL, the attribute accepts either one, two, or three arguments; in each
2519+ form, the last (or only) argument is the index that increments fastest.
2520+ The number of arguments passed to the attribute must match the dimensionality
2521+ of the kernel the attribute is applied to.
25112522
25122523.. code-block:: c++
25132524
2525+ // Note, '64' is evenly divisible by '4'; in SYCL, the last
2526+ // argument to the attribute is the one which increments fastest.
25142527 struct func {
25152528 [[intel::num_simd_work_items(4)]]
2516- [[intel::reqd_work_group_size(64, 64 , 64)]]
2529+ [[intel::reqd_work_group_size(7, 4 , 64)]]
25172530 void operator()() const {}
25182531 };
25192532
2533+ // Note, '8' is evenly divisible by '8'; in SYCL, the last
2534+ // argument to the attribute is the one which increments fastest.
25202535 struct bar {
2521- [[intel::reqd_work_group_size(64, 64, 64)]]
2536+ [[intel::reqd_work_group_size(1, 1, 8)]]
2537+ [[intel::num_simd_work_items(8)]]
2538+ void operator()() const {}
2539+ };
2540+
2541+ // Note, '10' is evenly divisible by '5'; in SYCL, the last
2542+ // argument to the attribute is the one which increments fastest.
2543+ [[cl::reqd_work_group_size(7, 5, 10)]]
2544+ [[intel::num_simd_work_items(5)]] void fun2() {}
2545+
2546+ // Note, '8' is evenly divisible by '4'; in SYCL, the last
2547+ // argument to the attribute is the one which increments fastest.
2548+ [[intel::num_simd_work_items(4)]]
2549+ [[cl::reqd_work_group_size(5, 4, 8)]] void fun3() {}
2550+
2551+ // Note, '8' is evenly divisible by '8'; in SYCL, the last
2552+ // argument to the attribute is the one which increments fastest.
2553+ struct func1 {
2554+ [[intel::num_simd_work_items(8)]]
2555+ [[cl::reqd_work_group_size(1, 1, 8)]]
2556+ void operator()() const {}
2557+ };
2558+
2559+ // Note, '8' is evenly divisible by '4'; in SYCL, the last
2560+ // argument to the attribute is the one which increments fastest.
2561+ struct bar1 {
2562+ [[cl::reqd_work_group_size(7, 4, 8)]]
25222563 [[intel::num_simd_work_items(4)]]
25232564 void operator()() const {}
25242565 };
25252566
2567+ // Note, '4' is evenly divisible by '2'; in SYCL, the last
2568+ // argument to the attribute is the one which increments fastest.
2569+ [[intel::num_simd_work_items(2)]]
2570+ __attribute__((reqd_work_group_size(3, 2, 4))) void test();
2571+
2572+ // Note, '8' is evenly divisible by '2'; in SYCL, the last
2573+ // argument to the attribute is the one which increments fastest.
2574+ __attribute__((reqd_work_group_size(3, 2, 8)))
2575+ [intel::num_simd_work_items(2)]] void test();
2576+
25262577 }];
25272578}
25282579
@@ -2636,6 +2687,77 @@ In OpenCL C, this attribute is available in GNU spelling
26362687
26372688 __kernel __attribute__((reqd_work_group_size(8, 16, 32))) void test() {}
26382689
2690+ The arguments to ``reqd_work_group_size`` are ordered based on which index
2691+ increments the fastest. In OpenCL, the first argument is the index that
2692+ increments the fastest, and in SYCL, the last argument is the index that
2693+ increments the fastest.
2694+
2695+ In OpenCL, all three arguments are required.
2696+
2697+ In SYCL, the attribute accepts either one, two, or three arguments; in each
2698+ form, the last (or only) argument is the index that increments fastest. The
2699+ number of arguments passed to the attribute must match the dimensionality of
2700+ the kernel the attribute is applied to.
2701+
2702+ If the ``reqd_work_group_size attribute`` is specified on a declaration along
2703+ with ``num_simd_work_items``, the required work group size specified by
2704+ ``num_simd_work_items`` must evenly divide the index that increments fastest
2705+ in the ``reqd_work_group_size`` attribute.
2706+
2707+ .. code-block:: c++
2708+
2709+ // Note, '64' is evenly divisible by '4'; in SYCL, the last
2710+ // argument to the attribute is the one which increments fastest.
2711+ struct func {
2712+ [[intel::num_simd_work_items(4)]]
2713+ [[intel::reqd_work_group_size(7, 4, 64)]]
2714+ void operator()() const {}
2715+ };
2716+
2717+ // Note, '8' is evenly divisible by '8'; in SYCL, the last
2718+ // argument to the attribute is the one which increments fastest.
2719+ struct bar {
2720+ [[intel::reqd_work_group_size(1, 1, 8)]]
2721+ [[intel::num_simd_work_items(8)]]
2722+ void operator()() const {}
2723+ };
2724+
2725+ // Note, '10' is evenly divisible by '5'; in SYCL, the last
2726+ // argument to the attribute is the one which increments fastest.
2727+ [[cl::reqd_work_group_size(7, 5, 10)]]
2728+ [[intel::num_simd_work_items(5)]] void fun2() {}
2729+
2730+ // Note, '8' is evenly divisible by '4'; in SYCL, the last
2731+ // argument to the attribute is the one which increments fastest.
2732+ [[intel::num_simd_work_items(4)]]
2733+ [[cl::reqd_work_group_size(5, 4, 8)]] void fun3() {}
2734+
2735+ // Note, '8' is evenly divisible by '8'; in SYCL, the last
2736+ // argument to the attribute is the one which increments fastest.
2737+ struct func1 {
2738+ [[intel::num_simd_work_items(8)]]
2739+ [[cl::reqd_work_group_size(1, 1, 8)]]
2740+ void operator()() const {}
2741+ };
2742+
2743+ // Note, '8' is evenly divisible by '4'; in SYCL, the last
2744+ // argument to the attribute is the one which increments fastest.
2745+ struct bar1 {
2746+ [[cl::reqd_work_group_size(7, 4, 8)]]
2747+ [[intel::num_simd_work_items(4)]]
2748+ void operator()() const {}
2749+ };
2750+
2751+ // Note, '4' is evenly divisible by '2'; in SYCL, the last
2752+ // argument to the attribute is the one which increments fastest.
2753+ [[intel::num_simd_work_items(2)]]
2754+ __attribute__((reqd_work_group_size(3, 2, 4))) void test();
2755+
2756+ // Note, '8' is evenly divisible by '2'; in SYCL, the last
2757+ // argument to the attribute is the one which increments fastest.
2758+ __attribute__((reqd_work_group_size(3, 2, 8)))
2759+ [intel::num_simd_work_items(2)]] void test();
2760+
26392761 }];
26402762}
26412763
0 commit comments