intel · pvchupin · Nov 28, 2022 · Aug 3, 2022 · Aug 3, 2022 · Aug 24, 2022
@@ -135,13 +135,11 @@ public:
   bfloat16(const float &a);
   bfloat16 &operator=(const float &a);
 
-  // Convert from bfloat16 to float
+  // Convert bfloat16 to floating-point types
   operator float() const;
+  operator sycl::half() const;
 
-  // Get bfloat16 as uint16.
-  operator storage_t() const;
-
-  // Convert to bool type
+  // Convert bfloat16 to bool type
   explicit operator bool();
 
   friend bfloat16 operator-(bfloat16 &bf) { /* ... */ }
@@ -195,11 +193,11 @@ Table 1. Member functions of `bfloat16` class.
 | `operator float() const;`
 |  Return `bfloat16` value converted to `float`.
 
-| `operator storage_t() const;`
-| Return `uint16_t` value, whose bits represent `bfloat16` value.
+| `operator sycl::half() const;`
+| Return `bfloat16` value converted to `sycl::half`.
 
 | `explicit operator bool() { /* ... */ }`
-| Convert `bfloat16` to `bool` type. Return `false` if the value equals to
+| Convert `bfloat16` to `bool` type. Return `false` if the `value` equals to
   zero, return `true` otherwise.
 
 | `friend bfloat16 operator-(bfloat16 &bf) { /* ... */ }`
@@ -408,4 +406,5 @@ Compute absolute value of a `bfloat16`.
 |3|2021-08-18|Alexey Sotkin |Remove `uint16_t` constructor
 |4|2022-03-07|Aidan Belton and Jack Kirk |Switch from Intel vendor specific to oneapi
 |5|2022-04-05|Jack Kirk | Added section for bfloat16 math builtins
+|6|2022-08-03|Alexey Sotkin |Add `operator sycl::half()`
 |========================================
@@ -15,7 +15,6 @@ __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
 namespace ext {
 namespace oneapi {
-namespace experimental {
 
 class bfloat16 {
   using storage_t = uint16_t;
@@ -165,7 +164,6 @@ class bfloat16 {
   // for floating-point types.
 };
 
-} // namespace experimental
 } // namespace oneapi
 } // namespace ext
 

@@ -15,7 +15,7 @@
 #include <sycl/detail/type_traits.hpp>
 
 #include <CL/__spirv/spirv_ops.hpp>
-#include <sycl/ext/oneapi/experimental/bfloat16.hpp>
+#include <sycl/ext/oneapi/bfloat16.hpp>
 
 // TODO Decide whether to mark functions with this attribute.
 #define __NOEXC /*noexcept*/

@@ -10,7 +10,7 @@
 
 #include <CL/__spirv/spirv_ops.hpp>
 #include <sycl/detail/defines_elementary.hpp>
-#include <sycl/ext/oneapi/experimental/bfloat16.hpp>
+#include <sycl/ext/oneapi/bfloat16.hpp>
 #include <sycl/feature_test.hpp>
 
 __SYCL_INLINE_NAMESPACE(cl) {
@@ -458,18 +458,16 @@ class wi_element<uint16_t, NumRows, NumCols, Layout, Group> {
 };
 
 template <size_t NumRows, size_t NumCols, matrix_layout Layout, typename Group>
-class wi_element<sycl::ext::oneapi::experimental::bfloat16, NumRows, NumCols,
-                 Layout, Group> {
-  joint_matrix<sycl::ext::oneapi::experimental::bfloat16, NumRows, NumCols,
-               Layout, Group> &M;
+class wi_element<sycl::ext::oneapi::bfloat16, NumRows, NumCols, Layout, Group> {
+  joint_matrix<sycl::ext::oneapi::bfloat16, NumRows, NumCols, Layout, Group> &M;
   std::size_t idx;
 
 public:
-  wi_element(joint_matrix<sycl::ext::oneapi::experimental::bfloat16, NumRows,
-                          NumCols, Layout, Group> &Mat,
+  wi_element(joint_matrix<sycl::ext::oneapi::bfloat16, NumRows, NumCols, Layout,
+                          Group> &Mat,
              std::size_t i)
       : M(Mat), idx(i) {}
-  operator sycl::ext::oneapi::experimental::bfloat16() {
+  operator sycl::ext::oneapi::bfloat16() {
 #ifdef __SYCL_DEVICE_ONLY__
     return __spirv_VectorExtractDynamic(M.spvm, idx);
 #else
@@ -488,7 +486,7 @@ class wi_element<sycl::ext::oneapi::experimental::bfloat16, NumRows, NumCols,
 #endif // __SYCL_DEVICE_ONLY__
   }
 
-  wi_element &operator=(const sycl::ext::oneapi::experimental::bfloat16 &rhs) {
+  wi_element &operator=(const sycl::ext::oneapi::bfloat16 &rhs) {
 #ifdef __SYCL_DEVICE_ONLY__
     M.spvm = __spirv_VectorInsertDynamic(M.spvm, rhs, idx);
     return *this;
@@ -499,9 +497,8 @@ class wi_element<sycl::ext::oneapi::experimental::bfloat16, NumRows, NumCols,
 #endif // __SYCL_DEVICE_ONLY__
   }
 
-  wi_element &
-  operator=(const wi_element<sycl::ext::oneapi::experimental::bfloat16, NumRows,
-                             NumCols, Layout, Group> &rhs) {
+  wi_element &operator=(const wi_element<sycl::ext::oneapi::bfloat16, NumRows,
+                                         NumCols, Layout, Group> &rhs) {
 #ifdef __SYCL_DEVICE_ONLY__
     M.spvm = __spirv_VectorInsertDynamic(
         M.spvm, __spirv_VectorExtractDynamic(rhs.M.spvm, rhs.idx), idx);
@@ -515,16 +512,14 @@ class wi_element<sycl::ext::oneapi::experimental::bfloat16, NumRows, NumCols,
 
 #if __SYCL_DEVICE_ONLY__
 #define OP(opassign, op)                                                       \
-  wi_element &operator opassign(                                               \
-      const sycl::ext::oneapi::experimental::bfloat16 &rhs) {                  \
+  wi_element &operator opassign(const sycl::ext::oneapi::bfloat16 &rhs) {      \
     M.spvm = __spirv_VectorInsertDynamic(                                      \
         M.spvm, __spirv_VectorExtractDynamic(M.spvm, idx) op rhs, idx);        \
     return *this;                                                              \
   }
 #else // __SYCL_DEVICE_ONLY__
 #define OP(opassign, op)                                                       \
-  wi_element &operator opassign(                                               \
-      const sycl::ext::oneapi::experimental::bfloat16 &rhs) {                  \
+  wi_element &operator opassign(const sycl::ext::oneapi::bfloat16 &rhs) {      \
     (void)rhs;                                                                 \
     throw runtime_error("joint matrix is not supported on host device.",       \
                         PI_ERROR_INVALID_DEVICE);                              \
@@ -539,34 +534,34 @@ class wi_element<sycl::ext::oneapi::experimental::bfloat16, NumRows, NumCols,
 #if __SYCL_DEVICE_ONLY__
 #define OP(type, op)                                                           \
   friend type operator op(                                                     \
-      const wi_element<sycl::ext::oneapi::experimental::bfloat16, NumRows,     \
-                       NumCols, Layout, Group> &lhs,                           \
-      const sycl::ext::oneapi::experimental::bfloat16 &rhs) {                  \
+      const wi_element<sycl::ext::oneapi::bfloat16, NumRows, NumCols, Layout,  \
+                       Group> &lhs,                                            \
+      const sycl::ext::oneapi::bfloat16 &rhs) {                                \
     return __spirv_VectorExtractDynamic(lhs.M.spvm, lhs.idx) op rhs;           \
   }                                                                            \
   friend type operator op(                                                     \
-      const sycl::ext::oneapi::experimental::bfloat16 &lhs,                    \
-      const wi_element<sycl::ext::oneapi::experimental::bfloat16, NumRows,     \
-                       NumCols, Layout, Group> &rhs) {                         \
+      const sycl::ext::oneapi::bfloat16 &lhs,                                  \
+      const wi_element<sycl::ext::oneapi::bfloat16, NumRows, NumCols, Layout,  \
+                       Group> &rhs) {                                          \
     return __spirv_VectorExtractDynamic(rhs.M.spvm, rhs.idx) op lhs;           \
   }
-  OP(sycl::ext::oneapi::experimental::bfloat16, +)
-  OP(sycl::ext::oneapi::experimental::bfloat16, -)
-  OP(sycl::ext::oneapi::experimental::bfloat16, *)
-  OP(sycl::ext::oneapi::experimental::bfloat16, /)
+  OP(sycl::ext::oneapi::bfloat16, +)
+  OP(sycl::ext::oneapi::bfloat16, -)
+  OP(sycl::ext::oneapi::bfloat16, *)
+  OP(sycl::ext::oneapi::bfloat16, /)
 #undef OP
 #define OP(type, op)                                                           \
   friend type operator op(                                                     \
-      const wi_element<sycl::ext::oneapi::experimental::bfloat16, NumRows,     \
-                       NumCols, Layout, Group> &lhs,                           \
-      const sycl::ext::oneapi::experimental::bfloat16 &rhs) {                  \
+      const wi_element<sycl::ext::oneapi::bfloat16, NumRows, NumCols, Layout,  \
+                       Group> &lhs,                                            \
+      const sycl::ext::oneapi::bfloat16 &rhs) {                                \
     return type{static_cast<float>(__spirv_VectorExtractDynamic(               \
         lhs.M.spvm, lhs.idx)) op static_cast<float>(rhs)};                     \
   }                                                                            \
   friend type operator op(                                                     \
-      const sycl::ext::oneapi::experimental::bfloat16 &lhs,                    \
-      const wi_element<sycl::ext::oneapi::experimental::bfloat16, NumRows,     \
-                       NumCols, Layout, Group> &rhs) {                         \
+      const sycl::ext::oneapi::bfloat16 &lhs,                                  \
+      const wi_element<sycl::ext::oneapi::bfloat16, NumRows, NumCols, Layout,  \
+                       Group> &rhs) {                                          \
     return type{static_cast<float>(__spirv_VectorExtractDynamic(               \
         rhs.M.spvm, rhs.idx)) op static_cast<float>(lhs)};                     \
   }
@@ -579,24 +574,23 @@ class wi_element<sycl::ext::oneapi::experimental::bfloat16, NumRows, NumCols,
 #undef OP
 #else // __SYCL_DEVICE_ONLY__
 #define OP(type, op)                                                           \
-  friend type operator op(                                                     \
-      const wi_element<sycl::ext::oneapi::experimental::bfloat16, NumRows,     \
-                       NumCols, Layout, Group> &,                              \
-      const sycl::ext::oneapi::experimental::bfloat16 &) {                     \
+  friend type operator op(const wi_element<sycl::ext::oneapi::bfloat16,        \
+                                           NumRows, NumCols, Layout, Group> &, \
+                          const sycl::ext::oneapi::bfloat16 &) {               \
     throw runtime_error("joint matrix is not supported on host device.",       \
                         PI_ERROR_INVALID_DEVICE);                              \
   }                                                                            \
   friend type operator op(                                                     \
-      const sycl::ext::oneapi::experimental::bfloat16 &,                       \
-      const wi_element<sycl::ext::oneapi::experimental::bfloat16, NumRows,     \
-                       NumCols, Layout, Group> &) {                            \
+      const sycl::ext::oneapi::bfloat16 &,                                     \
+      const wi_element<sycl::ext::oneapi::bfloat16, NumRows, NumCols, Layout,  \
+                       Group> &) {                                             \
     throw runtime_error("joint matrix is not supported on host device.",       \
                         PI_ERROR_INVALID_DEVICE);                              \
   }
-  OP(sycl::ext::oneapi::experimental::bfloat16, +)
-  OP(sycl::ext::oneapi::experimental::bfloat16, -)
-  OP(sycl::ext::oneapi::experimental::bfloat16, *)
-  OP(sycl::ext::oneapi::experimental::bfloat16, /)
+  OP(sycl::ext::oneapi::bfloat16, +)
+  OP(sycl::ext::oneapi::bfloat16, -)
+  OP(sycl::ext::oneapi::bfloat16, *)
+  OP(sycl::ext::oneapi::bfloat16, /)
   OP(bool, ==)
   OP(bool, !=)
   OP(bool, <)

@@ -7,7 +7,7 @@
 // ===--------------------------------------------------------------------=== //
 
 #pragma once
-#include <sycl/ext/oneapi/experimental/bfloat16.hpp>
+#include <sycl/ext/oneapi/bfloat16.hpp>
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
@@ -219,8 +219,7 @@ struct joint_matrix_load_impl<
                 S, Use, NumRows, NumCols, Layout, sycl::sub_group> &res,
             multi_ptr<T, Space> src, size_t stride) {
     if constexpr (std::is_same<T, uint16_t>::value ||
-                  std::is_same<
-                      T, sycl::ext::oneapi::experimental::bfloat16>::value) {
+                  std::is_same<T, sycl::ext::oneapi::bfloat16>::value) {
       auto tileptr = reinterpret_cast<int32_t const *>(src.get());
       auto destptr = reinterpret_cast<int32_t *>(&res.wi_marray);
       if constexpr (NumRows == 16 && NumCols == 16) {
@@ -585,8 +584,8 @@ struct joint_matrix_mad_impl<
               get_layout_pair_id<LayoutA, LayoutB>(), 0);
         }
       } else if constexpr (std::is_same<T1, uint16_t>::value ||
-                           std::is_same<T1, sycl::ext::oneapi::experimental::
-                                                bfloat16>::value) {
+                           std::is_same<T1,
+                                        sycl::ext::oneapi::bfloat16>::value) {
         __mma_bf16_m16n16k16_mma_f32(
             reinterpret_cast<float *>(&D.wi_marray),
             reinterpret_cast<int32_t const *>(&A.wi_marray),
@@ -622,8 +621,8 @@ struct joint_matrix_mad_impl<
               get_layout_pair_id<LayoutA, LayoutB>(), 0);
         }
       } else if constexpr (std::is_same<T1, uint16_t>::value ||
-                           std::is_same<T1, sycl::ext::oneapi::experimental::
-                                                bfloat16>::value) {
+                           std::is_same<T1,
+                                        sycl::ext::oneapi::bfloat16>::value) {
         __mma_bf16_m8n32k16_mma_f32(
             reinterpret_cast<float *>(&D.wi_marray),
             reinterpret_cast<int32_t const *>(&A.wi_marray),
@@ -645,8 +644,8 @@ struct joint_matrix_mad_impl<
                                  get_layout_pair_id<LayoutA, LayoutB>(), 0);
         }
       } else if constexpr (std::is_same<T1, uint16_t>::value ||
-                           std::is_same<T1, sycl::ext::oneapi::experimental::
-                                                bfloat16>::value) {
+                           std::is_same<T1,
+                                        sycl::ext::oneapi::bfloat16>::value) {
         __mma_bf16_m32n8k16_mma_f32(
             reinterpret_cast<float *>(&D.wi_marray),
             reinterpret_cast<int32_t const *>(&A.wi_marray),

@@ -6,7 +6,7 @@
 
 using namespace sycl;
 using namespace sycl::ext::oneapi::experimental::matrix;
-using sycl::ext::oneapi::experimental::bfloat16;
+using sycl::ext::oneapi::bfloat16;
 
 constexpr int stride = 16;
 

@@ -2,10 +2,10 @@
 
 // UNSUPPORTED: cuda || hip_amd
 
-#include <sycl/ext/oneapi/experimental/bfloat16.hpp>
+#include <sycl/ext/oneapi/bfloat16.hpp>
 #include <sycl/sycl.hpp>
 
-using sycl::ext::oneapi::experimental::bfloat16;
+using sycl::ext::oneapi::bfloat16;
 
 SYCL_EXTERNAL uint16_t some_bf16_intrinsic(uint16_t x, uint16_t y);
 SYCL_EXTERNAL void foo(long x, sycl::half y);

@@ -4,7 +4,7 @@
 #include <iostream>
 
 using namespace sycl::ext::oneapi::experimental::matrix;
-using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;
+using bfloat16 = sycl::ext::oneapi::bfloat16;
 
 static constexpr auto TILE_SZ = 16;
 static constexpr auto TM = TILE_SZ - 1;