From fc98c22d13c5a5220822c5fb2b795d6f307b10f3 Mon Sep 17 00:00:00 2001
From: Dounia <dounia.khaldi@intel.com>
Date: Tue, 29 Aug 2023 08:10:49 -0700
Subject: [PATCH 01/13] [SYCL][matrix] Update the query interface with the
 latest joint matrix approved syntax

---
 ...cl_ext_oneapi_device_architecture.asciidoc |   1 +
 .../experimental/device_architecture.hpp      |   1 +
 .../ext/oneapi/matrix/static-query-use.hpp    | 647 +++++++++---------
 .../Matrix/joint_matrix_query_default.cpp     |   5 +-
 sycl/test/matrix/query-use.cpp                | 212 +++---
 sycl/test/matrix/runtime-query.cpp            |  50 ++
 6 files changed, 463 insertions(+), 453 deletions(-)
 create mode 100644 sycl/test/matrix/runtime-query.cpp
diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
index 6c7bc2cce84b..cd8f878f56f7 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
@@ -99,6 +99,7 @@ namespace sycl::ext::oneapi::experimental {
 
 enum class architecture : /* unspecified */ {
   x86_64,
+  intel_cpu_spr,
   intel_gpu_bdw,
   intel_gpu_skl,
   intel_gpu_kbl,
diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
index 5eb0a301ce4b..624ae83897e4 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
@@ -14,6 +14,7 @@ namespace ext::oneapi::experimental {
 
 enum class architecture {
   x86_64,
+  intel_cpu_spr,
   intel_gpu_bdw,
   intel_gpu_skl,
   intel_gpu_kbl,
diff --git a/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp b/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp
index f30ff53bb8a5..a3db6eca3ec3 100644
--- a/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp
+++ b/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp
@@ -35,40 +35,42 @@ namespace ext {
 namespace oneapi {
 namespace experimental::matrix {
 
-enum class tpu {
-  xmx8,
-  xmx16,
-  amx,
-};
 enum class matrix_type {
-  bf8,
   bf16,
   fp16,
   tf32,
   fp32,
   fp64,
-  sint2,
-  sint4,
   sint8,
   sint16,
   sint32,
   sint64,
-  uint2,
-  uint4,
   uint8,
   uint16,
   uint32,
   uint64
 };
 
-enum class scope_t { sub_group, work_group };
+struct combination {
+  size_t max_msize;
+  size_t max_nsize;
+  size_t max_ksize;
+  size_t msize;
+  size_t nsize;
+  size_t ksize;
+  matrix_type atype;
+  matrix_type btype;
+  matrix_type ctype;
+  matrix_type dtype;
+};
 
-template <tpu u, typename Ta = void, typename Tb = void, typename Tc = void,
-          int sM = 0, int sN = 0, int sK = 0, typename Enabled = void>
-struct tpu_params;
+template <architecture u, typename Ta, typename Tb, typename Tc,
+          typename Td = Tc, size_t sM = 0, size_t sN = 0, size_t sK = 0,
+          typename Enabled = void>
+struct matrix_params;
 
 template <typename Ta, typename Tb, typename Tc>
-constexpr bool is_combination_valid_amx(int sM, int sN, int sK) {
+constexpr bool is_combination_valid_amx(size_t sM, size_t sN, size_t sK) {
   // is_same_v is a C++17 feature
   if ((std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, int8_t> &&
        std::is_same_v<Tc, int> && sM <= 16 && sN <= 16 && sK <= 64) ||
@@ -104,46 +106,14 @@ constexpr bool are_types_valid_amx() {
     return false;
 }
 
-// General query:
-// types are not given, no default sizes and no implicit matrix construction
-template <int sM, int sN, int sK>
-struct tpu_params<tpu::amx, void, void, void, sM, sN, sK> {
-  static constexpr std::size_t M = -1; // depends on the type
-  static constexpr std::size_t N = -1;
-  static constexpr std::size_t K = -1;
-
-  uint32_t numtiles = 8;
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-  struct combination {
-    uint32_t max_msize;
-    uint32_t max_nsize;
-    uint32_t max_ksize;
-    matrix_type atype;
-    matrix_type btype;
-    matrix_type accumulatortype;
-    uint32_t msize;
-    uint32_t nsize;
-    uint32_t ksize;
-  };
-  using mt = matrix_type;
-  static constexpr combination combinations[] = {
-      {16, 16, 64, mt::sint8, mt::sint8, mt::sint32},
-      {16, 16, 64, mt::sint8, mt::uint8, mt::sint32},
-      {16, 16, 64, mt::uint8, mt::sint8, mt::sint32},
-      {16, 16, 64, mt::uint8, mt::uint8, mt::sint32},
-      {16, 16, 32, mt::bf16, mt::bf16, mt::fp32}};
-  static constexpr int num_combinations =
-      sizeof(combinations) / sizeof(combination);
-};
-
-// Sizes-only query
+// Default values query
 // Specialization for when only types are given, need to query only sizes
-template <typename Ta, typename Tb, typename Tc>
-struct tpu_params<tpu::amx, Ta, Tb, Tc, 0, 0, 0,
-                  typename std::enable_if<(!std::is_same_v<Ta, void> &&
-                                           !std::is_same_v<Tb, void> &&
-                                           !std::is_same_v<Tc, void>)>::type> {
+template <typename Ta, typename Tb, typename Tc, typename Td>
+struct matrix_params<
+    architecture::intel_cpu_spr, Ta, Tb, Tc, Td, 0, 0, 0,
+    typename std::enable_if<(!std::is_same_v<Ta, void> &&
+                             !std::is_same_v<Tb, void> &&
+                             !std::is_same_v<Tc, void>)>::type> {
   static_assert((are_types_valid_amx<Ta, Tb, Tc>()),
                 "Invalid types for AMX, supported types are int8_t, uint8_t, "
                 "and bf16 (Note that unsigned short should be used in the"
@@ -159,34 +129,17 @@ struct tpu_params<tpu::amx, Ta, Tb, Tc, 0, 0, 0,
   template <typename Group, layout Layout>
   using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
   template <typename Group>
-  using joint_matrix_accumulator =
-      joint_matrix<Group, Tc, use::accumulator, M, N>;
-
-  uint32_t numtiles = 8;
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-  struct combination {
-    uint32_t max_msize;
-    uint32_t max_nsize;
-    uint32_t max_ksize;
-    matrix_type atype;
-    matrix_type btype;
-    matrix_type accumulatortype;
-    uint32_t msize;
-    uint32_t nsize;
-    uint32_t ksize;
-  };
-  static constexpr combination combinations[] = {
-      {16, 16, (sizeof(Ta) == 1) ? 64 : 32}};
-  static constexpr int num_combinations =
-      sizeof(combinations) / sizeof(combination);
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
 };
 
-// Valid or not:
+// Validation query
 // Specialization when both types and sizes are given
-template <typename Ta, typename Tb, typename Tc, int sM, int sN, int sK>
-struct tpu_params<
-    tpu::amx, Ta, Tb, Tc, sM, sN, sK,
+template <typename Ta, typename Tb, typename Tc, typename Td, size_t sM,
+          size_t sN, size_t sK>
+struct matrix_params<
+    architecture::intel_cpu_spr, Ta, Tb, Tc, Td, sM, sN, sK,
     typename std::enable_if<(
         !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
         !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
@@ -195,27 +148,24 @@ struct tpu_params<
       (sM == 0 && sN == 0 && sK == 0) ||
           (is_combination_valid_amx<Ta, Tb, Tc>(sM, sN, sK)),
       "Invalid parameters for AMX, query valid types and maximum sizes "
-      "using: tpu_params<tpu::amx> myparams; and then check out "
+      "using: matrix_params<architecture::intel_cpu_spr> myparams; and then "
+      "check out "
       "myparams.combinations array");
 
   // if combination is valid, construct the matrices
 
-  static constexpr std::size_t M = (sM != 0) ? sM : 16;
-  static constexpr std::size_t N = (sN != 0) ? sN : 16;
-  static constexpr std::size_t K =
-      (sK != 0) ? sK : ((sizeof(Ta) == 1) ? 64 : 32);
+  static constexpr std::size_t M = sM;
+  static constexpr std::size_t N = sN;
+  static constexpr std::size_t K = sK;
 
   template <typename Group, layout Layout>
   using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
   template <typename Group, layout Layout>
   using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
   template <typename Group>
-  using joint_matrix_accumulator =
-      joint_matrix<Group, Tc, use::accumulator, M, N>;
-
-  uint32_t numtiles = 8;
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
 };
 
 // Intel XMX with SIMD8 capability
@@ -224,25 +174,25 @@ struct tpu_params<
 // capabilities of the Intel XMX hardware.
 
 template <typename Ta, typename Tb, typename Tc>
-constexpr bool is_combination_valid_xmx8(int sM, int sN, int sK) {
+constexpr bool is_combination_valid_xmx8(size_t sM, size_t sN, size_t sK) {
   if ((std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 8 && sK == 32) ||
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 8 &&
+       sK == 32) ||
       (std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 8 && sK == 32) ||
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 8 &&
+       sK == 32) ||
       (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 8 && sK == 32) ||
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 8 &&
+       sK == 32) ||
       (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 8 && sK == 32) ||
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 8 &&
+       sK == 32) ||
       (std::is_same_v<Ta, half> && std::is_same_v<Tb, half> &&
-       std::is_same_v<Tc, float> &&
-       (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 8 && sK == 16) ||
+       std::is_same_v<Tc, float> && (sM >= 1 && sM <= 8) && sN == 8 &&
+       sK == 16) ||
       (std::is_same_v<Ta, unsigned short> &&
        std::is_same_v<Tb, unsigned short> && std::is_same_v<Tc, float> &&
-       (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 8 && sK == 16))
+       (sM >= 1 && sM <= 8) && sN == 8 && sK == 16))
     return true;
   else
     return false;
@@ -267,72 +217,140 @@ constexpr bool are_types_valid_xmx8() {
     return false;
 }
 
-// General Query
-// specialization for when types are not given --> no default values
-template <int sM, int sN, int sK>
-struct tpu_params<tpu::xmx8, void, void, void, sM, sN, sK> {
-  static constexpr std::size_t M = -1; // depends on the type
-  static constexpr std::size_t N = -1;
-  static constexpr std::size_t K = -1;
-
-  uint32_t numtiles = -1; // does not apply for XMX8
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-
-  struct combination {
-    uint32_t max_msize;
-    uint32_t max_nsize;
-    uint32_t max_ksize;
-    matrix_type atype;
-    matrix_type btype;
-    matrix_type accumulatortype;
-    uint32_t msize;
-    uint32_t nsize;
-    uint32_t ksize;
-  };
-  using mt = matrix_type;
-  static constexpr combination combinations[] = {
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 1, 8, 32},
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 2, 8, 32},
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 4, 8, 32},
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 8, 8, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 1, 8, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 2, 8, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 4, 8, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 8, 8, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 1, 8, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 2, 8, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 4, 8, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 8, 8, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 1, 8, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 2, 8, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 4, 8, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 8, 8, 32},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 1, 8, 16},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 2, 8, 16},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 4, 8, 16},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 8, 8, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 1, 8, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 2, 8, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 4, 8, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 8, 8, 16},
-  };
-  static constexpr int num_combinations =
-      sizeof(combinations) / sizeof(combination);
+// Default-values query:
+// Specialization for when only types are given, need to query only sizes
+
+template <typename Ta, typename Tb, typename Tc, typename Td>
+struct matrix_params<
+    architecture::intel_gpu_dg1, Ta, Tb, Tc, Td, 0, 0, 0,
+    typename std::enable_if<(!std::is_same_v<Ta, void> &&
+                             !std::is_same_v<Tb, void> &&
+                             !std::is_same_v<Tc, void>)>::type> {
+  static_assert((are_types_valid_xmx8<Ta, Tb, Tc>()),
+                "Invalid types for architecture::intel_gpu_dg1, supported "
+                "types are int8_t, uint8_t, half, and bf16");
+
+  // construct the matrices using the default sizes
+
+  static constexpr std::size_t M = 8;
+  static constexpr std::size_t N = 8;
+  static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16);
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+
+// Validation query:
+// Specialization when both types and sizes are given
+template <typename Ta, typename Tb, typename Tc, typename Td, size_t sM,
+          size_t sN, size_t sK>
+struct matrix_params<
+    architecture::intel_gpu_dg1, Ta, Tb, Tc, Td, sM, sN, sK,
+    typename std::enable_if<(
+        !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
+        !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
+  // Validate that parameters are supported
+  static_assert(
+      (sM == 0 && sN == 0 && sK == 0) ||
+          (is_combination_valid_xmx8<Ta, Tb, Tc>(sM, sN, sK)),
+      "Invalid parameters for XMX8, query valid combinations "
+      "using: "
+      "q.get_device().get_info<sycl::info::device::matrix::combinations>()");
+
+  // if combination is valid, construct the matrices
+  static constexpr std::size_t M = sM;
+  static constexpr std::size_t N = sN;
+  static constexpr std::size_t K = sK;
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
 };
 
-// Sizes-only query:
+// Default-values query:
 // Specialization for when only types are given, need to query only sizes
 
-template <typename Ta, typename Tb, typename Tc>
-struct tpu_params<tpu::xmx8, Ta, Tb, Tc, 0, 0, 0,
-                  typename std::enable_if<(!std::is_same_v<Ta, void> &&
-                                           !std::is_same_v<Tb, void> &&
-                                           !std::is_same_v<Tc, void>)>::type> {
+template <typename Ta, typename Tb, typename Tc, typename Td>
+struct matrix_params<
+    architecture::intel_gpu_dg2_g10, Ta, Tb, Tc, Td, 0, 0, 0,
+    typename std::enable_if<(!std::is_same_v<Ta, void> &&
+                             !std::is_same_v<Tb, void> &&
+                             !std::is_same_v<Tc, void>)>::type> {
+  static_assert((are_types_valid_xmx8<Ta, Tb, Tc>()),
+                "Invalid types for architecture::intel_gpu_dg1, supported "
+                "types are int8_t, uint8_t, half, and bf16");
+
+  // construct the matrices using the default sizes
+
+  static constexpr std::size_t M = 8;
+  static constexpr std::size_t N = 8;
+  static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16);
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+
+// Validation query:
+// Specialization when both types and sizes are given
+template <typename Ta, typename Tb, typename Tc, typename Td, size_t sM,
+          size_t sN, size_t sK>
+struct matrix_params<
+    architecture::intel_gpu_dg2_g10, Ta, Tb, Tc, Td, sM, sN, sK,
+    typename std::enable_if<(
+        !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
+        !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
+  // Validate that parameters are supported
+  static_assert(
+      (sM == 0 && sN == 0 && sK == 0) ||
+          (is_combination_valid_xmx8<Ta, Tb, Tc>(sM, sN, sK)),
+      "Invalid parameters for XMX8, query valid combinations "
+      "using: "
+      "q.get_device().get_info<sycl::info::device::matrix::combinations>()");
+
+  // if combination is valid, construct the matrices
+  static constexpr std::size_t M = sM;
+  static constexpr std::size_t N = sN;
+  static constexpr std::size_t K = sK;
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+
+// Default-values query:
+// Specialization for when only types are given, need to query only sizes
+
+template <typename Ta, typename Tb, typename Tc, typename Td>
+struct matrix_params<
+    architecture::intel_gpu_dg2_g11, Ta, Tb, Tc, Td, 0, 0, 0,
+    typename std::enable_if<(!std::is_same_v<Ta, void> &&
+                             !std::is_same_v<Tb, void> &&
+                             !std::is_same_v<Tc, void>)>::type> {
   static_assert((are_types_valid_xmx8<Ta, Tb, Tc>()),
-                "Invalid types for XMX8, supported types are int8_t, uint8_t, "
-                "half, and bf16 (Note that unsigned short should be used in the"
-                "DPC++ code to implement bf16)");
+                "Invalid types for architecture::intel_gpu_dg1, supported "
+                "types are int8_t, uint8_t, half, and bf16");
 
   // construct the matrices using the default sizes
 
@@ -345,68 +363,102 @@ struct tpu_params<tpu::xmx8, Ta, Tb, Tc, 0, 0, 0,
   template <typename Group, layout Layout>
   using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
   template <typename Group>
-  using joint_matrix_accumulator =
-      joint_matrix<Group, Tc, use::accumulator, M, N>;
-
-  uint32_t numtiles = -1; // does not apply for XMX8
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-  struct combination {
-    uint32_t max_msize;
-    uint32_t max_nsize;
-    uint32_t max_ksize;
-    matrix_type atype;
-    matrix_type btype;
-    matrix_type accumulatortype;
-    uint32_t msize;
-    uint32_t nsize;
-    uint32_t ksize;
-  };
-  using mt = matrix_type;
-  static constexpr combination combinations[] = {
-      // The types used in the initialization below are fake and not used. In
-      // this case, users already chose the types, they are only looking for
-      // the
-      // sizes
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 1, 8, (sizeof(Ta) == 1) ? 32 : 16},
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 2, 8, (sizeof(Ta) == 1) ? 32 : 16},
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 4, 8, (sizeof(Ta) == 1) ? 32 : 16},
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 8, 8, (sizeof(Ta) == 1) ? 32 : 16},
-  };
-  static constexpr int num_combinations =
-      sizeof(combinations) / sizeof(combination);
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
 };
 
-// Valid or not:
+// Validation query:
 // Specialization when both types and sizes are given
-template <typename Ta, typename Tb, typename Tc, int sM, int sN, int sK>
-struct tpu_params<
-    tpu::xmx8, Ta, Tb, Tc, sM, sN, sK,
-    typename std::enable_if<((!std::is_same_v<Ta, void> && sM != 0))>::type> {
+template <typename Ta, typename Tb, typename Tc, typename Td, size_t sM,
+          size_t sN, size_t sK>
+struct matrix_params<
+    architecture::intel_gpu_dg2_g11, Ta, Tb, Tc, Td, sM, sN, sK,
+    typename std::enable_if<(
+        !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
+        !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
   // Validate that parameters are supported
-  static_assert((sM == 0 && sN == 0 && sK == 0) ||
-                    (is_combination_valid_xmx8<Ta, Tb, Tc>(sM, sN, sK)),
-                "Invalid parameters for XMX8, query valid combinations "
-                "using: tpu_params<tpu::xmx8> myparams; and then check out "
-                "myparams.combinations array");
+  static_assert(
+      (sM == 0 && sN == 0 && sK == 0) ||
+          (is_combination_valid_xmx8<Ta, Tb, Tc>(sM, sN, sK)),
+      "Invalid parameters for XMX8, query valid combinations "
+      "using: "
+      "q.get_device().get_info<sycl::info::device::matrix::combinations>()");
 
   // if combination is valid, construct the matrices
-  static constexpr std::size_t M = (sM != 0) ? sM : 8;
-  static constexpr std::size_t N = (sN != 0) ? sN : 8;
-  static constexpr std::size_t K =
-      (sK != 0) ? sK : ((sizeof(Ta) == 1) ? 32 : 16);
+  static constexpr std::size_t M = sM;
+  static constexpr std::size_t N = sN;
+  static constexpr std::size_t K = sK;
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+
+// Default-values query:
+// Specialization for when only types are given, need to query only sizes
+
+template <typename Ta, typename Tb, typename Tc, typename Td>
+struct matrix_params<
+    architecture::intel_gpu_dg2_g12, Ta, Tb, Tc, Td, 0, 0, 0,
+    typename std::enable_if<(!std::is_same_v<Ta, void> &&
+                             !std::is_same_v<Tb, void> &&
+                             !std::is_same_v<Tc, void>)>::type> {
+  static_assert((are_types_valid_xmx8<Ta, Tb, Tc>()),
+                "Invalid types for architecture::intel_gpu_dg1, supported "
+                "types are int8_t, uint8_t, half, and bf16");
+
+  // construct the matrices using the default sizes
+
+  static constexpr std::size_t M = 8;
+  static constexpr std::size_t N = 8;
+  static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16);
 
   template <typename Group, layout Layout>
   using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
   template <typename Group, layout Layout>
   using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
   template <typename Group>
-  using joint_matrix_accumulator =
-      joint_matrix<Group, Tc, use::accumulator, M, N>;
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
 
-  uint32_t numtiles = -1; // does not apply for XMX8
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
+// Validation query:
+// Specialization when both types and sizes are given
+template <typename Ta, typename Tb, typename Tc, typename Td, size_t sM,
+          size_t sN, size_t sK>
+struct matrix_params<
+    architecture::intel_gpu_dg2_g12, Ta, Tb, Tc, Td, sM, sN, sK,
+    typename std::enable_if<(
+        !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
+        !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
+  // Validate that parameters are supported
+  static_assert(
+      (sM == 0 && sN == 0 && sK == 0) ||
+          (is_combination_valid_xmx8<Ta, Tb, Tc>(sM, sN, sK)),
+      "Invalid parameters for XMX8, query valid combinations "
+      "using: "
+      "q.get_device().get_info<sycl::info::device::matrix::combinations>()");
+
+  // if combination is valid, construct the matrices
+  static constexpr std::size_t M = sM;
+  static constexpr std::size_t N = sN;
+  static constexpr std::size_t K = sK;
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
 };
 
 // Intel XMX with SIMD16 capability
@@ -415,25 +467,25 @@ struct tpu_params<
 // capabilities of the Intel XMX hardware.
 
 template <typename Ta, typename Tb, typename Tc>
-constexpr bool is_combination_valid_xmx16(int sM, int sN, int sK) {
+constexpr bool is_combination_valid_xmx16(size_t sM, size_t sN, size_t sK) {
   if ((std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 16 && sK == 32) ||
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 16 &&
+       sK == 32) ||
       (std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 16 && sK == 32) ||
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 16 &&
+       sK == 32) ||
       (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 16 && sK == 32) ||
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 16 &&
+       sK == 32) ||
       (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 16 && sK == 32) ||
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 16 &&
+       sK == 32) ||
       (std::is_same_v<Ta, half> && std::is_same_v<Tb, half> &&
-       std::is_same_v<Tc, float> &&
-       (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 16 && sK == 16) ||
+       std::is_same_v<Tc, float> && (sM >= 1 && sM <= 8) && sN == 16 &&
+       sK == 16) ||
       (std::is_same_v<Ta, unsigned short> &&
        std::is_same_v<Tb, unsigned short> && std::is_same_v<Tc, float> &&
-       (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 16 && sK == 16))
+       (sM >= 1 && sM <= 8) && sN == 16 && sK == 16))
     return true;
   else
     return false;
@@ -458,72 +510,19 @@ constexpr bool are_types_valid_xmx16() {
     return false;
 }
 
-// General Query
-// specialization for when types are not given --> no default values
-template <int sM, int sN, int sK>
-struct tpu_params<tpu::xmx16, void, void, void, sM, sN, sK> {
-  static constexpr std::size_t M = -1; // depends on the type
-  static constexpr std::size_t N = -1;
-  static constexpr std::size_t K = -1;
-
-  uint32_t numtiles = -1; // does not apply for XMX
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-
-  struct combination {
-    uint32_t max_msize;
-    uint32_t max_nsize;
-    uint32_t max_ksize;
-    matrix_type atype;
-    matrix_type btype;
-    matrix_type accumulatortype;
-    uint32_t msize;
-    uint32_t nsize;
-    uint32_t ksize;
-  };
-  using mt = matrix_type;
-  static constexpr combination combinations[] = {
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 1, 16, 32},
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 2, 16, 32},
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 4, 16, 32},
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 8, 16, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 1, 16, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 2, 16, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 4, 16, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 8, 16, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 1, 16, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 2, 16, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 4, 16, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 8, 16, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 1, 16, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 2, 16, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 4, 16, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 8, 16, 32},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 1, 16, 16},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 2, 16, 16},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 4, 16, 16},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 8, 16, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 1, 16, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 2, 16, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 4, 16, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 8, 16, 16},
-  };
-  static constexpr int num_combinations =
-      sizeof(combinations) / sizeof(combination);
-};
-
-// Sizes-only query:
+// Default values query:
 // Specialization for when only types are given, need to query only sizes
 
-template <typename Ta, typename Tb, typename Tc>
-struct tpu_params<tpu::xmx16, Ta, Tb, Tc, 0, 0, 0,
-                  typename std::enable_if<(!std::is_same_v<Ta, void> &&
-                                           !std::is_same_v<Tb, void> &&
-                                           !std::is_same_v<Tc, void>)>::type> {
+template <typename Ta, typename Tb, typename Tc, typename Td>
+struct matrix_params<
+    architecture::intel_gpu_pvc, Ta, Tb, Tc, Td, 0, 0, 0,
+    typename std::enable_if<(!std::is_same_v<Ta, void> &&
+                             !std::is_same_v<Tb, void> &&
+                             !std::is_same_v<Tc, void>)>::type> {
   static_assert((are_types_valid_xmx16<Ta, Tb, Tc>()),
-                "Invalid types for XMX16, supported types are int8_t, uint8_t, "
-                "half, and bf16 (Note that unsigned short should be used in the"
-                "DPC++ code to implement bf16)");
+                "Invalid types for architecture::intel_gpu_pvc, supported "
+                "types are int8_t, uint8_t, "
+                "half, and bf16");
 
   // construct the matrices using the default sizes
 
@@ -536,68 +535,42 @@ struct tpu_params<tpu::xmx16, Ta, Tb, Tc, 0, 0, 0,
   template <typename Group, layout Layout>
   using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
   template <typename Group>
-  using joint_matrix_accumulator =
-      joint_matrix<Group, Tc, use::accumulator, M, N>;
-
-  uint32_t numtiles = -1; // does not apply for XMX
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-  struct combination {
-    uint32_t max_msize;
-    uint32_t max_nsize;
-    uint32_t max_ksize;
-    matrix_type atype;
-    matrix_type btype;
-    matrix_type accumulatortype;
-    uint32_t msize;
-    uint32_t nsize;
-    uint32_t ksize;
-  };
-  using mt = matrix_type;
-  static constexpr combination combinations[] = {
-      // The types used in the initialization below are fake and not used. In
-      // this case, users already chose the types, they are only looking for
-      // the
-      // sizes
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 1, 16, (sizeof(Ta) == 1) ? 32 : 16},
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 2, 16, (sizeof(Ta) == 1) ? 32 : 16},
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 4, 16, (sizeof(Ta) == 1) ? 32 : 16},
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 8, 16, (sizeof(Ta) == 1) ? 32 : 16},
-  };
-  static constexpr int num_combinations =
-      sizeof(combinations) / sizeof(combination);
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
 };
 
-// Valid or not:
+// Validation query:
 // Specialization when both types and sizes are given
-template <typename Ta, typename Tb, typename Tc, int sM, int sN, int sK>
-struct tpu_params<
-    tpu::xmx16, Ta, Tb, Tc, sM, sN, sK,
-    typename std::enable_if<((!std::is_same_v<Ta, void> && sM != 0))>::type> {
+template <typename Ta, typename Tb, typename Tc, typename Td, size_t sM,
+          size_t sN, size_t sK>
+struct matrix_params<
+    architecture::intel_gpu_pvc, Ta, Tb, Tc, Td, sM, sN, sK,
+    typename std::enable_if<(
+        !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
+        !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
   // Validate that parameters are supported
-  static_assert((sM == 0 && sN == 0 && sK == 0) ||
-                    (is_combination_valid_xmx16<Ta, Tb, Tc>(sM, sN, sK)),
-                "Invalid parameters for XMX16, query valid combinations "
-                "using: tpu_params<tpu::xmx16> myparams; and then check out "
-                "myparams.combinations array");
+  static_assert(
+      (sM == 0 && sN == 0 && sK == 0) ||
+          (is_combination_valid_xmx16<Ta, Tb, Tc>(sM, sN, sK)),
+      "Invalid parameters for architecture::intel_gpu_pvc, query valid "
+      "combinations "
+      "using: "
+      "q.get_device().get_info<sycl::info::device::matrix::combinations>()");
 
   // if combination is valid, construct the matrices
-  static constexpr std::size_t M = (sM != 0) ? sM : 8;
-  static constexpr std::size_t N = (sN != 0) ? sN : 8;
-  static constexpr std::size_t K =
-      (sK != 0) ? sK : ((sizeof(Ta) == 1) ? 32 : 16);
+  static constexpr std::size_t M = sM;
+  static constexpr std::size_t N = sN;
+  static constexpr std::size_t K = sK;
 
   template <typename Group, layout Layout>
   using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
   template <typename Group, layout Layout>
   using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
   template <typename Group>
-  using joint_matrix_accumulator =
-      joint_matrix<Group, Tc, use::accumulator, M, N>;
-
-  uint32_t numtiles = -1; // does not apply for XMX16
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
 };
 } // namespace experimental::matrix
 } // namespace oneapi
diff --git a/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp b/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp
index 048aed6341f6..ccf50a5a76da 100644
--- a/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp
+++ b/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp
@@ -39,7 +39,8 @@ void matrix_multiply(big_matrix<T1, NUM_ROWS_C, NUM_COLS_C> &C,
   size_t K = NUM_COLS_A;
   assert(NUM_ROWS_C == NUM_ROWS_A && NUM_COLS_A == NUM_ROWS_B * 4);
 
-  using myparams2 = tpu_params<tpu::amx, int8_t, int8_t, int>;
+  using myparams2 =
+      matrix_params<architecture::intel_cpu_spr, int8_t, int8_t, int>;
   constexpr int TM = myparams2::M;
   constexpr int TN = myparams2::N;
   constexpr int TK = myparams2::K;
@@ -80,7 +81,7 @@ void matrix_multiply(big_matrix<T1, NUM_ROWS_C, NUM_COLS_C> &C,
            myparams2::joint_matrix_b<
                sub_group, ext::intel::experimental::matrix::layout::packed>
                sub_b;
-           myparams2::joint_matrix_accumulator<sub_group> sub_c;
+           myparams2::joint_matrix_c<sub_group> sub_c;
 
            joint_matrix_load(
                sg, sub_c,
diff --git a/sycl/test/matrix/query-use.cpp b/sycl/test/matrix/query-use.cpp
index 9afc8e117304..e39361bbeead 100644
--- a/sycl/test/matrix/query-use.cpp
+++ b/sycl/test/matrix/query-use.cpp
@@ -1,162 +1,146 @@
-// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o query-use %s
+// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o compile-time-query %s
 #include <iostream>
 #include <sycl/sycl.hpp>
 
 using namespace sycl;
+using namespace sycl::ext::oneapi::experimental;
 using namespace sycl::ext::oneapi::experimental::matrix;
 
-void query_amx() {
+void query_amx_spr() {
 
   // generates combination assert
-  // using myparams = tpu_params<tpu::amx, int, int, int, 2, 8, 32>;
+  // using myparams = matrix_params<architecture::intel_cpu_spr, int, int, int,
+  // 2, 8, 32>;
 
   // generates types assert
-  // using myparams2 = tpu_params<tpu::amx, int, int, int>;
+  // using myparams2 = matrix_params<architecture::intel_cpu_spr, int, int,
+  // int>;
 
   // tells whether a combination is valid or not, if valid, those will be set as
   // default
-  using myparams = tpu_params<tpu::amx, int8_t, int8_t, int, 2, 8, 32>;
+  using myparams = matrix_params<architecture::intel_cpu_spr, int8_t, int8_t,
+                                 int, int, 2, 8, 32>;
 
   size_t dmsize = myparams::M;
   size_t dnsize = myparams::N;
   size_t dksize = myparams::K;
-  std::cout << "sizes of AMX tpu_params chosen by the user are: M " << dmsize
+  std::cout << "sizes of AMX matrix_params chosen by the user are: M " << dmsize
             << " N " << dnsize << " K " << dksize << std::endl;
 
   // Sizes-only query: types are given, generate default sizes
-  using myparams2 = tpu_params<tpu::amx, int8_t, int8_t, int>;
+  using myparams2 =
+      matrix_params<architecture::intel_cpu_spr, int8_t, int8_t, int>;
   myparams2 p;
   dmsize = myparams2::M;
   dnsize = myparams2::N;
   dksize = myparams2::K;
-  std::cout << "default AMX sizes tpu_params  are: M " << dmsize << " N "
-            << dnsize << " K " << dksize << "\n AMX int8 num combinations is "
-            << p.num_combinations << std::endl;
-
-  // general query: types are not given
-  tpu_params<tpu::amx> myparams3;
-
-  if (myparams3.num_scopes > 0)
-    if (myparams3.scopes[0] == scope_t::sub_group)
-      std::cout << "There are " << myparams3.num_scopes
-                << " Scopes that are supported by AMX implementation and "
-                   "subgroup is one of them "
-                << std::endl;
-
-  std::cout << "AMX query num combinations: " << myparams3.num_combinations
-            << std::endl;
-
-  if (myparams3.combinations[0].msize != 0) // this is a max params hardware
-    return;
-  constexpr int msize = myparams3.combinations[0].max_msize;
-  constexpr int nsize = myparams3.combinations[0].max_nsize;
-  constexpr int ksize = myparams3.combinations[0].max_ksize;
-  std::cout << "AMX query sizes are: M " << msize << " N " << nsize << " K "
-            << ksize << std::endl;
-
-  size_t NDRangeM = 1024 / msize;
-  size_t NDRangeN = 1024 / nsize;
-  queue q;
-  q.submit([&](handler &cgh) {
-    cgh.parallel_for<class imatrix>(
-        nd_range<2>({NDRangeM, NDRangeN}, {1, 1}),
-        [msize, ksize, nsize](nd_item<2> spmd_item) {
-          sub_group sg = spmd_item.get_sub_group();
-          myparams2::joint_matrix_a<sub_group, layout::row_major> sub_a1;
-          myparams2::joint_matrix_b<
-              sub_group, sycl::ext::intel::experimental::matrix::layout::packed>
-              sub_b1;
-          myparams2::joint_matrix_accumulator<sub_group> sub_c1;
-
-          joint_matrix<sub_group, unsigned short, use::a, msize, ksize> sub_a;
-          joint_matrix<sub_group, unsigned short, use::b, ksize, nsize> sub_b;
-          joint_matrix<sub_group, float, use::accumulator, msize, nsize> sub_c;
-        });
-  });
+  std::cout << "default AMX sizes matrix_params  are: M " << dmsize << " N "
+            << dnsize << " K " << dksize << std::endl;
+  return;
 }
 
-void query_xmx8() {
+void query_xmx_dg() {
 
   // generates combination assert
-  // using myparams = tpu_params<tpu::xmx8, int, int, int, 2, 8, 32>;
+  // using myparams = matrix_params<architecture::intel_gpu_dg1, int, int, int,
+  // 2, 8, 32>;
 
   // generate combination of type assert
-  // using myparams = tpu_params<tpu::xmx8, int, int, int>;
+  // using myparams = matrix_params<architecture::intel_gpu_dg1, int, int, int>;
 
   // tells whether a combination is valid or not, if valid, those will be set as
   // default
-  using myparams = tpu_params<tpu::xmx8, int8_t, int8_t, int, 2, 8, 32>;
+  using myparams = matrix_params<architecture::intel_gpu_dg1, int8_t, int8_t,
+                                 int, int, 2, 8, 32>;
 
   size_t dmsize = myparams::M;
   size_t dnsize = myparams::N;
   size_t dksize = myparams::K;
-  std::cout << "sizes of XMX8 tpu_params chosen by the user are: M " << dmsize
-            << " N " << dnsize << " K " << dksize << std::endl;
+  std::cout << "sizes of Intel XMX of architecture::intel_gpu_dg1 "
+               "matrix_params chosen by the user are: M "
+            << dmsize << " N " << dnsize << " K " << dksize << std::endl;
 
   // sizes-only query: types are given, generate default sizes
-  using myparams2 = tpu_params<tpu::xmx8, int8_t, int8_t, int>;
-  myparams2 p;
+  using myparams2 =
+      matrix_params<architecture::intel_gpu_dg1, int8_t, int8_t, int>;
   dmsize = myparams2::M;
   dnsize = myparams2::N;
   dksize = myparams2::K;
-  std::cout << "Default XMX8 sizes  are: M " << dmsize << " N " << dnsize
-            << " K " << dksize << "\n XMX8 int8 num combinations is "
-            << p.num_combinations << std::endl;
-
-  dmsize = myparams2::combinations[0].msize;
-  dnsize = myparams2::combinations[0].nsize;
-  dksize = myparams2::combinations[0].ksize;
-  std::cout << "one of XMX8 combination sizes  is: M " << dmsize << " N "
-            << dnsize << " K " << dksize << std::endl;
+  std::cout << "Default Intel XMX of architecture::intel_gpu_dg1 sizes  are: M "
+            << dmsize << " N " << dnsize << " K " << dksize << std::endl;
+  return;
+}
+
+void query_xmx_ats() {
+
+  // generates combination assert
+  // using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int, int,
+  // int, // 2, 8, 32>;
+
+  // generate combination of type assert
+  // using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int, int,
+  // int>;
 
-  // general query: types are not given
-  tpu_params<tpu::xmx8> myparams3;
-
-  if (myparams3.num_scopes > 0)
-    if (myparams3.scopes[0] == scope_t::sub_group)
-      std::cout << "There are " << myparams3.num_scopes
-                << " Scopes that are supported by XMX8 implementation and "
-                   "subgroup is one of them "
-                << std::endl;
-
-  std::cout << "XMX8 query num combinations: " << myparams3.num_combinations
-            << std::endl;
-
-  if (myparams3.combinations[0].msize == 0) // this is not a max params hardware
-    return;
-  constexpr int msize = myparams3.combinations[0].msize;
-  constexpr int nsize = myparams3.combinations[0].nsize;
-  constexpr int ksize = myparams3.combinations[0].ksize;
-  std::cout << "XMX8 query sizes are: M " << msize << " N " << nsize << " K "
-            << ksize << std::endl;
-  std::cout << "XMX8 query max sizes are: M "
-            << myparams3.combinations[0].max_msize << " N "
-            << myparams3.combinations[0].max_nsize << " K "
-            << myparams3.combinations[0].max_ksize << std::endl;
-
-  size_t NDRangeM = 1024 / msize;
-  size_t NDRangeN = 1024 / nsize;
-  queue q;
-  q.submit([&](handler &cgh) {
-    cgh.parallel_for<class dmatrix>(
-        nd_range<2>({NDRangeM, NDRangeN}, {1, 1}),
-        [msize, ksize, nsize](nd_item<2> spmd_item) {
-          sub_group sg = spmd_item.get_sub_group();
-          myparams2::joint_matrix_a<sub_group, layout::row_major> sub_a1;
-          myparams2::joint_matrix_b<
-              sub_group, sycl::ext::intel::experimental::matrix::layout::packed>
-              sub_b1;
-          myparams2::joint_matrix_accumulator<sub_group> sub_c1;
-
-          joint_matrix<sub_group, unsigned short, use::a, msize, ksize> sub_a;
-          joint_matrix<sub_group, unsigned short, use::b, ksize, nsize> sub_b;
-          joint_matrix<sub_group, float, use::accumulator, msize, nsize> sub_c;
-        });
-  });
+  // tells whether a combination is valid or not, if valid, those will be set as
+  // default
+  using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int8_t,
+                                 int8_t, int, int, 2, 8, 32>;
+
+  size_t dmsize = myparams::M;
+  size_t dnsize = myparams::N;
+  size_t dksize = myparams::K;
+  std::cout << "sizes of Intel XMX of architecture::intel_gpu_dg2_g10 "
+               "matrix_params chosen by the user are: M "
+            << dmsize << " N " << dnsize << " K " << dksize << std::endl;
+
+  // sizes-only query: types are given, generate default sizes
+  using myparams2 =
+      matrix_params<architecture::intel_gpu_dg2_g10, int8_t, int8_t, int>;
+  dmsize = myparams2::M;
+  dnsize = myparams2::N;
+  dksize = myparams2::K;
+  std::cout
+      << "Default Intel XMX of architecture::intel_gpu_dg2_g10 sizes  are: M "
+      << dmsize << " N " << dnsize << " K " << dksize << std::endl;
+  return;
+}
+
+void query_xmx_pvc() {
+
+  // generates combination assert
+  // using myparams = matrix_params<architecture::intel_gpu_pvc, int, int, int,
+  // 2, 8, 32>;
+
+  // generate combination of type assert
+  // using myparams = matrix_params<architecture::intel_gpu_pvc, int, int, int>;
+
+  // tells whether a combination is valid or not, if valid, those will be set as
+  // default
+  using myparams = matrix_params<architecture::intel_gpu_pvc, int8_t, int8_t,
+                                 int, int, 2, 16, 32>;
+
+  size_t dmsize = myparams::M;
+  size_t dnsize = myparams::N;
+  size_t dksize = myparams::K;
+  std::cout << "sizes of architecture::intel_gpu_pvc matrix_params chosen by "
+               "the user are: M "
+            << dmsize << " N " << dnsize << " K " << dksize << std::endl;
+
+  // sizes-only query: types are given, generate default sizes
+  using myparams2 =
+      matrix_params<architecture::intel_gpu_pvc, int8_t, int8_t, int>;
+  dmsize = myparams2::M;
+  dnsize = myparams2::N;
+  dksize = myparams2::K;
+  std::cout << "Default Intel XMX of architecture::intel_gpu_pvc sizes  are: M "
+            << dmsize << " N " << dnsize << " K " << dksize << std::endl;
+  return;
 }
 
 int main() {
-  query_amx();
-  query_xmx8();
+  query_amx_spr();
+  query_xmx_dg();
+  query_xmx_ats();
+  query_xmx_pvc();
   return 0;
 }
diff --git a/sycl/test/matrix/runtime-query.cpp b/sycl/test/matrix/runtime-query.cpp
new file mode 100644
index 000000000000..64090faad465
--- /dev/null
+++ b/sycl/test/matrix/runtime-query.cpp
@@ -0,0 +1,50 @@
+// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o runtime-query %s
+// XFAIL: *
+
+#include <iostream>
+#include <sycl/sycl.hpp>
+
+using namespace sycl;
+using namespace sycl::ext::oneapi::experimental;
+using namespace sycl::ext::oneapi::experimental::matrix;
+
+template <matrix_type Ta, matrix_type Tb, matrix_type Tc, matrix_type Td>
+void matrix_runtime_query(queue q) {
+
+  std::vector<combination> combinations =
+      q.get_device().get_info<sycl::info::device::matrix::combinations>();
+
+  std::cout << "The matrix hardware implementation in this device provides "
+               "this number of combinations: "
+            << combinations.size() << std::endl;
+
+  bool max_sizes;
+  if (combinations[0].maxsize == 0)
+    max_sizes = true; // this is a max params hardware
+  else
+    max_sizes = false;
+  for (int i = 0; i < combinations.size(); i++) {
+    if (Ta == combinations[i].atype && Tb == combinations[i].btype &&
+        Tc == combinations[i].ctype && Td == combinations[i].dtype) {
+      // joint matrix GEMM kernel can be called using these sizes
+      if (max_sizes)
+        std::cout << "The matrix hardware implementation in this device "
+                     "provides the following max sizes are: M "
+                  << combinations[i].max_msize << " N "
+                  << combinations[i].max_nsize << " K "
+                  << combinations[i].max_ksize << std::endl;
+      else
+        std::cout << "The matrix hardware implementation in this device "
+                     "provides the following sizes are: M "
+                  << combinations[i].msize << " N " << combinations[i].nsize
+                  << " K " << combinations[i].ksize << std::endl;
+    }
+  }
+}
+
+int main() {
+  queue q;
+  matrix_runtime_query<matrix_type::bf16, matrix_type::bf16, matrix_type::float,
+                       matrix_type::float>(q);
+  return 0;
+}

From 3c42caca1eb0350ea15cb63843a8003804336fe9 Mon Sep 17 00:00:00 2001
From: Dounia <dounia.khaldi@intel.com>
Date: Thu, 31 Aug 2023 10:16:58 -0700
Subject: [PATCH 02/13] Add dg2 names

---
 .../sycl_ext_oneapi_matrix.asciidoc           | 40 ++++++-----
 .../ext/oneapi/matrix/static-query-use.hpp    | 67 +------------------
 2 files changed, 27 insertions(+), 80 deletions(-)

diff --git a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
index 94c2bebe0490..2f215228bbea 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
@@ -875,32 +875,40 @@ table below.
 
 ==== Intel XMX Supported Combinations
 This is currently available in devices with the architecture
-`architecture::intel_gpu_pvc` and `architecture::intel_gpu_dg2`. In
-these architectures' implementation, the type of the C matrix must be
-the same as the type of the D matrix. Therefore, that common type is
-shown in a single column in the table below.
+`architecture::intel_gpu_pvc`, `architecture::intel_gpu_dg2_g10`,
+`architecture::intel_gpu_dg2_g11`, and
+`architecture::intel_gpu_dg2_g12`. In these architectures'
+implementation, the type of the C matrix must be the same as the type
+of the D matrix. Therefore, that common type is shown in a single
+column in the table below.
 
 [frame="none",options="header"]
 |======================
 | A type | B type | C and D type | M | N | K | device
 | `matrix_type::uint8`  | `matrix_type::uint8` |
-`matrix_type::int32`  |  +<=+ 8 |  16 |  32 | architecture::intel_gpu_pvc
-| | | | |8||architecture::intel_gpu_dg2
+`matrix_type::int32`  |  +<=+ 8 |  16 |  32 | `architecture::intel_gpu_pvc`
+| | | | |8||`architecture::intel_gpu_dg2_g10,
+architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
 | `matrix_type::uint8`  | `matrix_type::int8` |
-`matrix_type::int32`  |  +<=+ 8 |  16 |  32 | architecture::intel_gpu_pvc
-| | | | |8||architecture::intel_gpu_dg2
+`matrix_type::int32`  |  +<=+ 8 |  16 |  32 | `architecture::intel_gpu_pvc`
+| | | | |8||`architecture::intel_gpu_dg2_g10,
+architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
 | `matrix_type::int8`  | `matrix_type::uint8` |
-`matrix_type::int32`  |  +<=+ 8 |  16 |  32 | architecture::intel_gpu_pvc
-| | | | |8||architecture::intel_gpu_dg2
+`matrix_type::int32`  |  +<=+ 8 |  16 |  32 | `architecture::intel_gpu_pvc`
+| | | | |8||`architecture::intel_gpu_dg2_g10,
+architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
 | `matrix_type::int8`  | `matrix_type::int8` |
-`matrix_type::int32`  |  +<=+ 8 |  16 |  32 | architecture::intel_gpu_pvc
-| | | | |8||architecture::intel_gpu_dg2
+`matrix_type::int32`  |  +<=+ 8 |  16 |  32 | `architecture::intel_gpu_pvc`
+| | | | |8||`architecture::intel_gpu_dg2_g10,
+architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
 |  `matrix_type::fp16`       |  `matrix_type::fp16`   |
-`matrix_type::fp32`   |  +<=+ 8 |  16   |  16 | architecture::intel_gpu_pvc
-| | | | |8|| architecture::intel_gpu_dg2
+`matrix_type::fp32`   |  +<=+ 8 |  16   |  16 | `architecture::intel_gpu_pvc`
+| | | | |8|| `architecture::intel_gpu_dg2_g10,
+architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
 |  `matrix_type::bf16`       |  `matrix_type::bf16`   |
-`matrix_type::fp32`   |  +<=+ 8 |  16   |  16 | architecture::intel_gpu_pvc
-| | | | |8|| architecture::intel_gpu_dg2
+`matrix_type::fp32`   |  +<=+ 8 |  16   |  16 | `architecture::intel_gpu_pvc`
+| | | | |8|| `architecture::intel_gpu_dg2_g10,
+architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
 |======================
 
 ==== Nvidia Tensor Cores Supported Combinations
diff --git a/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp b/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp
index a3db6eca3ec3..cfc699d86b38 100644
--- a/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp
+++ b/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp
@@ -220,67 +220,6 @@ constexpr bool are_types_valid_xmx8() {
 // Default-values query:
 // Specialization for when only types are given, need to query only sizes
 
-template <typename Ta, typename Tb, typename Tc, typename Td>
-struct matrix_params<
-    architecture::intel_gpu_dg1, Ta, Tb, Tc, Td, 0, 0, 0,
-    typename std::enable_if<(!std::is_same_v<Ta, void> &&
-                             !std::is_same_v<Tb, void> &&
-                             !std::is_same_v<Tc, void>)>::type> {
-  static_assert((are_types_valid_xmx8<Ta, Tb, Tc>()),
-                "Invalid types for architecture::intel_gpu_dg1, supported "
-                "types are int8_t, uint8_t, half, and bf16");
-
-  // construct the matrices using the default sizes
-
-  static constexpr std::size_t M = 8;
-  static constexpr std::size_t N = 8;
-  static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16);
-
-  template <typename Group, layout Layout>
-  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
-  template <typename Group, layout Layout>
-  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
-  template <typename Group>
-  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
-  template <typename Group>
-  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
-};
-
-// Validation query:
-// Specialization when both types and sizes are given
-template <typename Ta, typename Tb, typename Tc, typename Td, size_t sM,
-          size_t sN, size_t sK>
-struct matrix_params<
-    architecture::intel_gpu_dg1, Ta, Tb, Tc, Td, sM, sN, sK,
-    typename std::enable_if<(
-        !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
-        !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
-  // Validate that parameters are supported
-  static_assert(
-      (sM == 0 && sN == 0 && sK == 0) ||
-          (is_combination_valid_xmx8<Ta, Tb, Tc>(sM, sN, sK)),
-      "Invalid parameters for XMX8, query valid combinations "
-      "using: "
-      "q.get_device().get_info<sycl::info::device::matrix::combinations>()");
-
-  // if combination is valid, construct the matrices
-  static constexpr std::size_t M = sM;
-  static constexpr std::size_t N = sN;
-  static constexpr std::size_t K = sK;
-
-  template <typename Group, layout Layout>
-  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
-  template <typename Group, layout Layout>
-  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
-  template <typename Group>
-  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
-  template <typename Group>
-  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
-};
-
-// Default-values query:
-// Specialization for when only types are given, need to query only sizes
-
 template <typename Ta, typename Tb, typename Tc, typename Td>
 struct matrix_params<
     architecture::intel_gpu_dg2_g10, Ta, Tb, Tc, Td, 0, 0, 0,
@@ -288,7 +227,7 @@ struct matrix_params<
                              !std::is_same_v<Tb, void> &&
                              !std::is_same_v<Tc, void>)>::type> {
   static_assert((are_types_valid_xmx8<Ta, Tb, Tc>()),
-                "Invalid types for architecture::intel_gpu_dg1, supported "
+                "Invalid types for architecture::intel_gpu_dg2_g10, supported "
                 "types are int8_t, uint8_t, half, and bf16");
 
   // construct the matrices using the default sizes
@@ -349,7 +288,7 @@ struct matrix_params<
                              !std::is_same_v<Tb, void> &&
                              !std::is_same_v<Tc, void>)>::type> {
   static_assert((are_types_valid_xmx8<Ta, Tb, Tc>()),
-                "Invalid types for architecture::intel_gpu_dg1, supported "
+                "Invalid types for architecture::intel_gpu_dg2_g11, supported"
                 "types are int8_t, uint8_t, half, and bf16");
 
   // construct the matrices using the default sizes
@@ -410,7 +349,7 @@ struct matrix_params<
                              !std::is_same_v<Tb, void> &&
                              !std::is_same_v<Tc, void>)>::type> {
   static_assert((are_types_valid_xmx8<Ta, Tb, Tc>()),
-                "Invalid types for architecture::intel_gpu_dg1, supported "
+                "Invalid types for architecture::intel_gpu_dg2_g12, supported "
                 "types are int8_t, uint8_t, half, and bf16");
 
   // construct the matrices using the default sizes

From 3ca9bfdd0ea8fe90f30d725e460ab923d39feecf Mon Sep 17 00:00:00 2001
From: Dounia <dounia.khaldi@intel.com>
Date: Thu, 7 Sep 2023 11:31:28 -0700
Subject: [PATCH 03/13] Address Greg's comment: merge blank cells

---
 .../sycl_ext_oneapi_matrix.asciidoc           | 36 +++++++++----------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
index 2f215228bbea..734f7858d0ac 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
@@ -885,29 +885,29 @@ column in the table below.
 [frame="none",options="header"]
 |======================
 | A type | B type | C and D type | M | N | K | device
-| `matrix_type::uint8`  | `matrix_type::uint8` |
-`matrix_type::int32`  |  +<=+ 8 |  16 |  32 | `architecture::intel_gpu_pvc`
-| | | | |8||`architecture::intel_gpu_dg2_g10,
+.2+| `matrix_type::uint8`  .2+| `matrix_type::uint8` .2+|
+`matrix_type::int32`  .2+|  +<=+ 8 |  16 .2+|  32
+|`architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10,
 architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
-| `matrix_type::uint8`  | `matrix_type::int8` |
-`matrix_type::int32`  |  +<=+ 8 |  16 |  32 | `architecture::intel_gpu_pvc`
-| | | | |8||`architecture::intel_gpu_dg2_g10,
+.2+| `matrix_type::uint8`  .2+| `matrix_type::int8` .2+|
+`matrix_type::int32`  .2+|  +<=+ 8 |  16 .2+|  32 |
+`architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10,
 architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
-| `matrix_type::int8`  | `matrix_type::uint8` |
-`matrix_type::int32`  |  +<=+ 8 |  16 |  32 | `architecture::intel_gpu_pvc`
-| | | | |8||`architecture::intel_gpu_dg2_g10,
+.2+| `matrix_type::int8`  .2+| `matrix_type::uint8` .2+|
+`matrix_type::int32`  .2+|  +<=+ 8 |  16 .2+|  32 |
+`architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10,
 architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
-| `matrix_type::int8`  | `matrix_type::int8` |
-`matrix_type::int32`  |  +<=+ 8 |  16 |  32 | `architecture::intel_gpu_pvc`
-| | | | |8||`architecture::intel_gpu_dg2_g10,
+.2+| `matrix_type::int8`  .2+| `matrix_type::int8` .2+|
+`matrix_type::int32`  .2+|  +<=+ 8 |  16 .2+|  32 |
+`architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10,
 architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
-|  `matrix_type::fp16`       |  `matrix_type::fp16`   |
-`matrix_type::fp32`   |  +<=+ 8 |  16   |  16 | `architecture::intel_gpu_pvc`
-| | | | |8|| `architecture::intel_gpu_dg2_g10,
+.2+|`matrix_type::fp16`       .2+|  `matrix_type::fp16`   .2+|
+`matrix_type::fp32`   .2+|  +<=+ 8 |  16   .2+|  16 |
+`architecture::intel_gpu_pvc`|8| `architecture::intel_gpu_dg2_g10,
 architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
-|  `matrix_type::bf16`       |  `matrix_type::bf16`   |
-`matrix_type::fp32`   |  +<=+ 8 |  16   |  16 | `architecture::intel_gpu_pvc`
-| | | | |8|| `architecture::intel_gpu_dg2_g10,
+.2+|  `matrix_type::bf16`       .2+|  `matrix_type::bf16`   .2+|
+`matrix_type::fp32`   .2+|  +<=+ 8 |  16   .2+|  16 |
+`architecture::intel_gpu_pvc` |8| `architecture::intel_gpu_dg2_g10,
 architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
 |======================
 

From 833b8da5523f0ad1f64c6e832df6751add589ff2 Mon Sep 17 00:00:00 2001
From: Dounia <dounia.khaldi@intel.com>
Date: Thu, 14 Sep 2023 09:02:41 -0700
Subject: [PATCH 04/13] fix typo in XMX and NVIDIA tables: matrix_type::int32
 should be matrix_type::sint32

---
 .../sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc  | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
index 734f7858d0ac..3d277c71ec7a 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
@@ -886,19 +886,19 @@ column in the table below.
 |======================
 | A type | B type | C and D type | M | N | K | device
 .2+| `matrix_type::uint8`  .2+| `matrix_type::uint8` .2+|
-`matrix_type::int32`  .2+|  +<=+ 8 |  16 .2+|  32
+`matrix_type::sint32`  .2+|  +<=+ 8 |  16 .2+|  32
 |`architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10,
 architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
 .2+| `matrix_type::uint8`  .2+| `matrix_type::int8` .2+|
-`matrix_type::int32`  .2+|  +<=+ 8 |  16 .2+|  32 |
+`matrix_type::sint32`  .2+|  +<=+ 8 |  16 .2+|  32 |
 `architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10,
 architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
 .2+| `matrix_type::int8`  .2+| `matrix_type::uint8` .2+|
-`matrix_type::int32`  .2+|  +<=+ 8 |  16 .2+|  32 |
+`matrix_type::sint32`  .2+|  +<=+ 8 |  16 .2+|  32 |
 `architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10,
 architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
 .2+| `matrix_type::int8`  .2+| `matrix_type::int8` .2+|
-`matrix_type::int32`  .2+|  +<=+ 8 |  16 .2+|  32 |
+`matrix_type::sint32`  .2+|  +<=+ 8 |  16 .2+|  32 |
 `architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10,
 architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
 .2+|`matrix_type::fp16`       .2+|  `matrix_type::fp16`   .2+|
@@ -940,11 +940,11 @@ supported parameter combination is specified in the following table.
 |16 |16 |16
 |8 |32 |16
 |32 |8 |16
-.3+| `matrix_type::int8`  .3+| `matrix_type::int32`
+.3+| `matrix_type::int8`  .3+| `matrix_type::sint32`
 |16 |16 |16 .6+| sm_72
 |8 |32 |16
 |32 |8 |16
-.3+|`matrix_type::uint8`  .3+|`matrix_type::int32`
+.3+|`matrix_type::uint8`  .3+|`matrix_type::sint32`
 |16 |16 |16
 |8 |32 |16
 |32 |8 |16

From afe6471b588faea56aa0ad386defe071f0f1f7aa Mon Sep 17 00:00:00 2001
From: Dounia <dounia.khaldi@intel.com>
Date: Fri, 15 Sep 2023 08:56:15 -0700
Subject: [PATCH 05/13] Add clarification about SPR enumerator AOT limitation
 and rename matrix::combinations to matrix_combinations

---
 .../sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc    | 10 +++++-----
 .../sycl_ext_oneapi_device_architecture.asciidoc       |  5 +++++
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
index 3d277c71ec7a..3ca5a8831bb9 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
@@ -741,12 +741,12 @@ descriptors that can be queried using `get_info` API.
 [frame="none",options="header"]
 |======================
 | Device descriptors | Return type| Description
-|`ext::oneapi::experimental::info::device::matrix::combinations` |
+|`ext::oneapi::experimental::info::device::matrix_combinations` |
 `std::vector<combination>`| tells the set of supported matrix sizes
 and types on this device
 |======================
 
-The runtime query returns a vector of `combinations` of `combination`
+The runtime query returns a vector of `matrix_combinations` of `combination`
 type. Each combination includes the sizes and the types for the
 matrices A, B, C, and D. Note that for each matrix hardware,
 the query returns `max_msize, max_nsize, max_ksize` or `msize, nsize,
@@ -790,7 +790,7 @@ struct combination {
 } // namespace sycl::ext::oneapi::experimental::matrix
 ```
 
-Each combination of the `combinations` vector composes the types and
+Each combination of the `matrix_combinations` vector composes the types and
 sizes of A, B, C, and D matrices supported by the device
 implementation. The table below provides a description of each member
 of the `combination` struct.
@@ -832,7 +832,7 @@ the `T` template parameter as follows: +
 ```c++
 // Ta, Tb, Tc, and Td are the types used in applications
 std::vector<combination> combinations =
-           device.get_info<info::device::matrix::combinations>();
+           device.get_info<info::device::matrix_combinations>();
 for (int i = 0; sizeof(combinations); i++) {
   if (Ta == combinations[i].atype &&
       Tb == combinations[i].btype &&
@@ -849,7 +849,7 @@ for (int i = 0; sizeof(combinations); i++) {
 The table below provides a list of the combinations that
 `joint_matrix` implementations support on each of Intel AMX and Intel
 XMX hardware. Note that these can be returned using
-`ext::oneapi::experimental::info::device::matrix::combinations`.
+`ext::oneapi::experimental::info::device::matrix_combinations`.
 
 ==== Intel AMX Supported Combinations
 This is currently available in devices with the architecture
diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
index cd8f878f56f7..993f8874fd57 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
@@ -196,6 +196,10 @@ of these enumerators, and it provides a brief description of their meanings.
 |-
 |Any CPU device with the x86_64 instruction set.
 
+|`intel_cpu_spr`
+|-
+|Intel Xeon processor codenamed Sapphire Rapids.
+
 |`intel_gpu_bdw`
 |-
 |Broadwell Intel graphics architecture.
@@ -589,6 +593,7 @@ feature, the application must be compiled in ahead-of-time (AOT) mode using
 "special target values" listed in the link:../../UsersManual.md[users manual]
 description of the `-fsycl-targets` option.  These are the target names of the
 form "intel_gpu_*", "nvidia_gpu_*", or "amd_gpu_*".
+The support of AOT compilation for `intel_cpu_spr` is currently unavailable.
 
 == Future direction
 

From 1092032119273e15724ed73baff0ab35e771d5d3 Mon Sep 17 00:00:00 2001
From: Dounia <dounia.khaldi@intel.com>
Date: Fri, 15 Sep 2023 09:01:39 -0700
Subject: [PATCH 06/13] minor: change test name

---
 sycl/{test/matrix/query-use.cpp => compile-time-query.cpp} | 0
 sycl/test/matrix/runtime-query.cpp                         | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename sycl/{test/matrix/query-use.cpp => compile-time-query.cpp} (100%)

diff --git a/sycl/test/matrix/query-use.cpp b/sycl/compile-time-query.cpp
similarity index 100%
rename from sycl/test/matrix/query-use.cpp
rename to sycl/compile-time-query.cpp
diff --git a/sycl/test/matrix/runtime-query.cpp b/sycl/test/matrix/runtime-query.cpp
index 64090faad465..d6ccedf24b87 100644
--- a/sycl/test/matrix/runtime-query.cpp
+++ b/sycl/test/matrix/runtime-query.cpp
@@ -12,7 +12,7 @@ template <matrix_type Ta, matrix_type Tb, matrix_type Tc, matrix_type Td>
 void matrix_runtime_query(queue q) {
 
   std::vector<combination> combinations =
-      q.get_device().get_info<sycl::info::device::matrix::combinations>();
+      q.get_device().get_info<sycl::info::device::matrix_combinations>();
 
   std::cout << "The matrix hardware implementation in this device provides "
                "this number of combinations: "

From ca43df65b481c0c582243b00297103bd24b33378 Mon Sep 17 00:00:00 2001
From: Dounia <dounia.khaldi@intel.com>
Date: Fri, 15 Sep 2023 09:03:31 -0700
Subject: [PATCH 07/13] minor: fix directory accident change

---
 sycl/{ => test/matrix}/compile-time-query.cpp | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename sycl/{ => test/matrix}/compile-time-query.cpp (100%)

diff --git a/sycl/compile-time-query.cpp b/sycl/test/matrix/compile-time-query.cpp
similarity index 100%
rename from sycl/compile-time-query.cpp
rename to sycl/test/matrix/compile-time-query.cpp

From 7164c664d56db99aeb9a97e31c3896510349fab2 Mon Sep 17 00:00:00 2001
From: Dounia <dounia.khaldi@intel.com>
Date: Fri, 15 Sep 2023 09:23:51 -0700
Subject: [PATCH 08/13] fix format

---
 sycl/test/matrix/compile-time-query.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sycl/test/matrix/compile-time-query.cpp b/sycl/test/matrix/compile-time-query.cpp
index e39361bbeead..ccab39f90eba 100644
--- a/sycl/test/matrix/compile-time-query.cpp
+++ b/sycl/test/matrix/compile-time-query.cpp
@@ -1,4 +1,5 @@
-// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o compile-time-query %s
+// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o compile-time-query
+// %s
 #include <iostream>
 #include <sycl/sycl.hpp>
 

From 06b2debeb474774e4f1189d19387dd969f40c29c Mon Sep 17 00:00:00 2001
From: Dounia <dounia.khaldi@intel.com>
Date: Fri, 15 Sep 2023 10:01:55 -0700
Subject: [PATCH 09/13] test name change again to avoid format issue

---
 sycl/test/matrix/{compile-time-query.cpp => compile-query.cpp} | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)
 rename sycl/test/matrix/{compile-time-query.cpp => compile-query.cpp} (99%)

diff --git a/sycl/test/matrix/compile-time-query.cpp b/sycl/test/matrix/compile-query.cpp
similarity index 99%
rename from sycl/test/matrix/compile-time-query.cpp
rename to sycl/test/matrix/compile-query.cpp
index ccab39f90eba..133c9b8eb5ec 100644
--- a/sycl/test/matrix/compile-time-query.cpp
+++ b/sycl/test/matrix/compile-query.cpp
@@ -1,5 +1,4 @@
-// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o compile-time-query
-// %s
+// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o compile-query %s
 #include <iostream>
 #include <sycl/sycl.hpp>
 

From 7afef97391db8c8e43457b9d91626f0a1b93f8d5 Mon Sep 17 00:00:00 2001
From: Dounia <dounia.khaldi@intel.com>
Date: Mon, 18 Sep 2023 07:00:35 -0700
Subject: [PATCH 10/13] Address Greg's comments and remove ATS from the test
 case because for names, dg2 is used

---
 ...cl_ext_oneapi_device_architecture.asciidoc | 15 ++++--
 sycl/test/matrix/compile-query.cpp            | 50 +++----------------
 2 files changed, 19 insertions(+), 46 deletions(-)

diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
index 993f8874fd57..50d8b6c8b77c 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
@@ -198,7 +198,9 @@ of these enumerators, and it provides a brief description of their meanings.
 
 |`intel_cpu_spr`
 |-
-|Intel Xeon processor codenamed Sapphire Rapids.
+|Intel Xeon processor codenamed Sapphire Rapids. The utility of this
+enumeration is currently limited. See the section "Limitations with
+the experimental version" for details.
 
 |`intel_gpu_bdw`
 |-
@@ -251,7 +253,7 @@ of these enumerators, and it provides a brief description of their meanings.
 |`intel_gpu_adl_s` +
 `intel_gpu_rpl_s`
 |-
-|Alder Lake S Intel graphics architecture or Raptor Lake Intel graphics 
+|Alder Lake S Intel graphics architecture or Raptor Lake Intel graphics
 architecture.
 
 |`intel_gpu_adl_p`
@@ -593,7 +595,14 @@ feature, the application must be compiled in ahead-of-time (AOT) mode using
 "special target values" listed in the link:../../UsersManual.md[users manual]
 description of the `-fsycl-targets` option.  These are the target names of the
 form "intel_gpu_*", "nvidia_gpu_*", or "amd_gpu_*".
-The support of AOT compilation for `intel_cpu_spr` is currently unavailable.
+The architecture enumeration `intel_cpu_spr` does not currently work
+with any of the APIs described in this extension. It cannot be used
+with the `if_architecture_is` function, the
+`device::ext_oneapi_architecture_is` function, or the
+`info::device::architecture` query descriptor. It currently exists
+only for use with the
+link:sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc[sycl_ext_oneapi_matrix]
+extension.
 
 == Future direction
 
diff --git a/sycl/test/matrix/compile-query.cpp b/sycl/test/matrix/compile-query.cpp
index 133c9b8eb5ec..a430c0c6e228 100644
--- a/sycl/test/matrix/compile-query.cpp
+++ b/sycl/test/matrix/compile-query.cpp
@@ -39,53 +39,19 @@ void query_amx_spr() {
   return;
 }
 
-void query_xmx_dg() {
+void query_xmx_dg2() {
 
   // generates combination assert
-  // using myparams = matrix_params<architecture::intel_gpu_dg1, int, int, int,
-  // 2, 8, 32>;
+  // using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int, int, int, 2, 8, 32>;
 
   // generate combination of type assert
-  // using myparams = matrix_params<architecture::intel_gpu_dg1, int, int, int>;
+  // using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int, int, int>;
 
   // tells whether a combination is valid or not, if valid, those will be set as
   // default
-  using myparams = matrix_params<architecture::intel_gpu_dg1, int8_t, int8_t,
+  using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int8_t, int8_t,
                                  int, int, 2, 8, 32>;
 
-  size_t dmsize = myparams::M;
-  size_t dnsize = myparams::N;
-  size_t dksize = myparams::K;
-  std::cout << "sizes of Intel XMX of architecture::intel_gpu_dg1 "
-               "matrix_params chosen by the user are: M "
-            << dmsize << " N " << dnsize << " K " << dksize << std::endl;
-
-  // sizes-only query: types are given, generate default sizes
-  using myparams2 =
-      matrix_params<architecture::intel_gpu_dg1, int8_t, int8_t, int>;
-  dmsize = myparams2::M;
-  dnsize = myparams2::N;
-  dksize = myparams2::K;
-  std::cout << "Default Intel XMX of architecture::intel_gpu_dg1 sizes  are: M "
-            << dmsize << " N " << dnsize << " K " << dksize << std::endl;
-  return;
-}
-
-void query_xmx_ats() {
-
-  // generates combination assert
-  // using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int, int,
-  // int, // 2, 8, 32>;
-
-  // generate combination of type assert
-  // using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int, int,
-  // int>;
-
-  // tells whether a combination is valid or not, if valid, those will be set as
-  // default
-  using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int8_t,
-                                 int8_t, int, int, 2, 8, 32>;
-
   size_t dmsize = myparams::M;
   size_t dnsize = myparams::N;
   size_t dksize = myparams::K;
@@ -99,9 +65,8 @@ void query_xmx_ats() {
   dmsize = myparams2::M;
   dnsize = myparams2::N;
   dksize = myparams2::K;
-  std::cout
-      << "Default Intel XMX of architecture::intel_gpu_dg2_g10 sizes  are: M "
-      << dmsize << " N " << dnsize << " K " << dksize << std::endl;
+  std::cout << "Default Intel XMX of architecture::intel_gpu_dg2_g10 sizes  are: M "
+            << dmsize << " N " << dnsize << " K " << dksize << std::endl;
   return;
 }
 
@@ -139,8 +104,7 @@ void query_xmx_pvc() {
 
 int main() {
   query_amx_spr();
-  query_xmx_dg();
-  query_xmx_ats();
+  query_xmx_dg2();
   query_xmx_pvc();
   return 0;
 }

From 430b66638a26f498e72172de277a1d25dc205ccf Mon Sep 17 00:00:00 2001
From: Dounia <dounia.khaldi@intel.com>
Date: Mon, 18 Sep 2023 07:04:53 -0700
Subject: [PATCH 11/13] fix formatting

---
 sycl/test/matrix/compile-query.cpp | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/sycl/test/matrix/compile-query.cpp b/sycl/test/matrix/compile-query.cpp
index a430c0c6e228..e110eef22a38 100644
--- a/sycl/test/matrix/compile-query.cpp
+++ b/sycl/test/matrix/compile-query.cpp
@@ -42,15 +42,17 @@ void query_amx_spr() {
 void query_xmx_dg2() {
 
   // generates combination assert
-  // using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int, int, int, 2, 8, 32>;
+  // using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int, int,
+  // int, 2, 8, 32>;
 
   // generate combination of type assert
-  // using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int, int, int>;
+  // using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int, int,
+  // int>;
 
   // tells whether a combination is valid or not, if valid, those will be set as
   // default
-  using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int8_t, int8_t,
-                                 int, int, 2, 8, 32>;
+  using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int8_t,
+                                 int8_t, int, int, 2, 8, 32>;
 
   size_t dmsize = myparams::M;
   size_t dnsize = myparams::N;
@@ -65,8 +67,9 @@ void query_xmx_dg2() {
   dmsize = myparams2::M;
   dnsize = myparams2::N;
   dksize = myparams2::K;
-  std::cout << "Default Intel XMX of architecture::intel_gpu_dg2_g10 sizes  are: M "
-            << dmsize << " N " << dnsize << " K " << dksize << std::endl;
+  std::cout
+      << "Default Intel XMX of architecture::intel_gpu_dg2_g10 sizes  are: M "
+      << dmsize << " N " << dnsize << " K " << dksize << std::endl;
   return;
 }
 

From 07673579e80be7e9536f90df4c0410811e8507c5 Mon Sep 17 00:00:00 2001
From: Dounia <dounia.khaldi@intel.com>
Date: Tue, 19 Sep 2023 09:44:52 -0700
Subject: [PATCH 12/13] Address Dmitry comment

---
 .../sycl_ext_oneapi_matrix.asciidoc                | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
index 3ca5a8831bb9..d7f2ab8d985f 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
@@ -863,11 +863,11 @@ table below.
 | A type | B type | C and D type | M | N | K
 | `matrix_type::uint8`  | `matrix_type::uint8` |
 `matrix_type::sint32`  |  +<=+ 16 |  +<=+ 16 |  +<=+ 64
-| `matrix_type::uint8`  | `matrix_type::int8` |
+| `matrix_type::uint8`  | `matrix_type::sint8` |
 `matrix_type::sint32`  |  +<=+ 16 |  +<=+ 16 |  +<=+ 64
-| `matrix_type::int8`  | `matrix_type::uint8` |
+| `matrix_type::sint8`  | `matrix_type::uint8` |
 `matrix_type::sint32`  |  +<=+ 16 |  +<=+ 16 |  +<=+ 64
-| `matrix_type::int8`  | `matrix_type::int8` |
+| `matrix_type::sint8`  | `matrix_type::sint8` |
 `matrix_type::sint32`  |  +<=+ 16 |  +<=+ 16 |  +<=+ 64
 |  `matrix_type::bf16`       |  `matrix_type::bf16`   |
 `matrix_type::fp32`   |  +<=+ 16 |  +<=+ 16   |  +<=+ 32
@@ -889,15 +889,15 @@ column in the table below.
 `matrix_type::sint32`  .2+|  +<=+ 8 |  16 .2+|  32
 |`architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10,
 architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
-.2+| `matrix_type::uint8`  .2+| `matrix_type::int8` .2+|
+.2+| `matrix_type::uint8`  .2+| `matrix_type::sint8` .2+|
 `matrix_type::sint32`  .2+|  +<=+ 8 |  16 .2+|  32 |
 `architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10,
 architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
-.2+| `matrix_type::int8`  .2+| `matrix_type::uint8` .2+|
+.2+| `matrix_type::sint8`  .2+| `matrix_type::uint8` .2+|
 `matrix_type::sint32`  .2+|  +<=+ 8 |  16 .2+|  32 |
 `architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10,
 architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
-.2+| `matrix_type::int8`  .2+| `matrix_type::int8` .2+|
+.2+| `matrix_type::sint8`  .2+| `matrix_type::sint8` .2+|
 `matrix_type::sint32`  .2+|  +<=+ 8 |  16 .2+|  32 |
 `architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10,
 architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
@@ -940,7 +940,7 @@ supported parameter combination is specified in the following table.
 |16 |16 |16
 |8 |32 |16
 |32 |8 |16
-.3+| `matrix_type::int8`  .3+| `matrix_type::sint32`
+.3+| `matrix_type::sint8`  .3+| `matrix_type::sint32`
 |16 |16 |16 .6+| sm_72
 |8 |32 |16
 |32 |8 |16

From 81ace9033ad9c9ee569be0eabc4d88bedb7d1133 Mon Sep 17 00:00:00 2001
From: Dounia <dounia.khaldi@intel.com>
Date: Wed, 20 Sep 2023 12:06:09 -0700
Subject: [PATCH 13/13] Add blank line for SPR limitation paragraph

---
 .../experimental/sycl_ext_oneapi_device_architecture.asciidoc    | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
index 50d8b6c8b77c..e6a51ed9d103 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
@@ -595,6 +595,7 @@ feature, the application must be compiled in ahead-of-time (AOT) mode using
 "special target values" listed in the link:../../UsersManual.md[users manual]
 description of the `-fsycl-targets` option.  These are the target names of the
 form "intel_gpu_*", "nvidia_gpu_*", or "amd_gpu_*".
+
 The architecture enumeration `intel_cpu_spr` does not currently work
 with any of the APIs described in this extension. It cannot be used
 with the `if_architecture_is` function, the