nod-ai
diff --git a/‎sharktuner/sharktuner/candidate_gen.py‎
Lines changed: 9 additions & 14 deletions b/‎sharktuner/sharktuner/candidate_gen.py‎
Lines changed: 9 additions & 14 deletions
diff --git a/‎sharktuner/sharktuner/common.py‎
Lines changed: 0 additions & 10 deletions b/‎sharktuner/sharktuner/common.py‎
Lines changed: 0 additions & 10 deletions
diff --git a/‎sharktuner/sharktuner/constraint_generator.py‎
Lines changed: 34 additions & 144 deletions b/‎sharktuner/sharktuner/constraint_generator.py‎
Lines changed: 34 additions & 144 deletions
@@ -102,16 +102,15 @@ def supports_root_op(cls, root_op: ir.Operation) -> bool:
 
     def get_constraint_generator(self) -> constraint_generator.ConstraintGenerator:
         return constraint_generator.ContractionOpInterfaceConstraintGenerator(
-            self.get_root_op(), self.get_op_info()
+            self.get_op_info()
         )
 
     def get_td_spec(
         self,
         config_list: list[common.TuningConfiguration],
     ) -> ir.Module:
-        return spec_builder.build_contraction_td_spec(
-            self._tuner_ctx, self.get_op_info(), config_list
-        )
+        builder = spec_builder.ContractionSpecBuilder(self.get_op_info())
+        return builder.build_td_spec(self._tuner_ctx, config_list)
 
     @classmethod
     def get_dispatch_kind(cls) -> common.DispatchKind:
@@ -149,16 +148,15 @@ def supports_root_op(cls, root_op: ir.Operation) -> bool:
 
     def get_constraint_generator(self) -> constraint_generator.ConstraintGenerator:
         return constraint_generator.ConvolutionOpInterfaceConstraintGenerator(
-            self.get_root_op(), self.get_op_info()
+            self.get_op_info()
         )
 
     def get_td_spec(
         self,
         config_list: list[common.TuningConfiguration],
     ) -> ir.Module:
-        return spec_builder.build_convolution_td_spec(
-            self._tuner_ctx, self.get_op_info(), config_list
-        )
+        builder = spec_builder.ConvolutionSpecBuilder(self.get_op_info())
+        return builder.build_td_spec(self._tuner_ctx, config_list)
 
     @classmethod
     def get_dispatch_kind(cls) -> common.DispatchKind:
@@ -183,18 +181,15 @@ def supports_root_op(cls, root_op: ir.Operation) -> bool:
 
     def get_constraint_generator(self) -> constraint_generator.ConstraintGenerator:
         return constraint_generator.AttentionOpInterfaceConstraintGenerator(
-            self.get_root_op()
+            self.get_op_info()
         )
 
     def get_td_spec(
         self,
         config_list: list[common.TuningConfiguration],
     ) -> ir.Module:
-        attention_op = self.get_root_op()
-        func_name = spec_builder.get_matcher_named_sequence_name(attention_op)
-        return spec_builder.build_td_spec(
-            attention_op.context, attention_op, config_list, func_name
-        )
+        builder = spec_builder.AttentionSpecBuilder(self.get_op_info())
+        return builder.build_td_spec(self._tuner_ctx, config_list)
 
     @classmethod
     def get_dispatch_kind(cls) -> common.DispatchKind:
 
@@ -202,16 +202,6 @@ class MatmulShapeType:
     acc_type: ir.IntegerType | ir.FloatType
 
 
-@dataclass
-class AttentionOpInfo:
-    domain_rank: int
-    batch_dims: list[int]
-    m_dims: list[int]
-    n_dims: list[int]
-    k1_dims: list[int]
-    k2_dims: list[int]
-
-
 @dataclass
 class LLVMGPUVectorDistributeContractionKnobs(KnobAssignment):
     # Z3 numeric selections.
 
@@ -284,12 +284,7 @@ def set_cdim_tile_sizes(tile_sizes, contraction_dims, csizes):
 def generate_attention_solutions(
     tuner_ctx: common.TunerContext,
     gpu_target_info: iree_gpu.TargetInfo,
-    opinfo: common.AttentionOpInfo,
-    qk_matmul: common.MatmulShapeType,
-    pv_matmul: common.MatmulShapeType,
-    transposed_q: bool,
-    transposed_k: bool,
-    transposed_v: bool,
+    op_info: dispatch_parser.AttentionOpInfo,
     dispatch_kind: common.DispatchKind,
     codegen_pipeline: iree_codegen.DispatchLoweringPassPipeline = iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorDistribute,
     num_subgroups: int = 4,
@@ -332,11 +327,11 @@ def generate_attention_solutions(
 
     solver = z3.Solver()
     constraints = dispatch_constraints.generate_attention_vector_distribute_constraints(
-        qk_matmul,
-        pv_matmul,
-        transposed_q,
-        transposed_k,
-        transposed_v,
+        op_info.qk_matmul,
+        op_info.pv_matmul,
+        op_info.transposed_q,
+        op_info.transposed_k,
+        op_info.transposed_v,
         [m_var, n_var, k_var],
         num_subgroups,
         subgroup_size,
@@ -360,10 +355,10 @@ def generate_attention_solutions(
             lookup(qk_intrinsic_k),
         )
         qk_mma_attr = dispatch_constraints.getMMAAttr(
-            qk_matmul.acc_type,
+            op_info.qk_matmul.acc_type,
             *qk_intrinsic_mnk_shape,
-            qk_matmul.lhs_type,
-            qk_matmul.rhs_type,
+            op_info.qk_matmul.lhs_type,
+            op_info.qk_matmul.rhs_type,
             gpu_target_info.mma_intrinsics,
         )
 
@@ -373,38 +368,38 @@ def generate_attention_solutions(
             lookup(pv_intrinsic_k),
         )
         pv_mma_attr = dispatch_constraints.getMMAAttr(
-            pv_matmul.acc_type,
+            op_info.pv_matmul.acc_type,
             *pv_intrinsic_mnk_shape,
-            pv_matmul.lhs_type,
-            pv_matmul.rhs_type,
+            op_info.pv_matmul.lhs_type,
+            op_info.pv_matmul.rhs_type,
             gpu_target_info.mma_intrinsics,
         )
 
         # Get workgroup tile sizes.
-        workgroup_tile_sizes = [0] * opinfo.domain_rank
-        reduction_tile_sizes = [0] * opinfo.domain_rank
+        workgroup_tile_sizes = [0] * op_info.domain_rank
+        reduction_tile_sizes = [0] * op_info.domain_rank
 
-        for b in opinfo.batch_dims:
+        for b in op_info.batch_dims:
             workgroup_tile_sizes[b] = 1
-        for m in opinfo.m_dims[:-1]:
+        for m in op_info.m_dims[:-1]:
             workgroup_tile_sizes[m] = 1
-        for n in opinfo.n_dims[:-1]:
+        for n in op_info.n_dims[:-1]:
             workgroup_tile_sizes[n] = 1
-        for k2 in opinfo.k2_dims[:-1]:
+        for k2 in op_info.k2_dims[:-1]:
             reduction_tile_sizes[k2] = 1
 
-        workgroup_tile_sizes[opinfo.m_dims[-1]] = lookup(m_var)
-        workgroup_tile_sizes[opinfo.n_dims[-1]] = lookup(n_var)
-        reduction_tile_sizes[opinfo.k2_dims[-1]] = lookup(k_var)
+        workgroup_tile_sizes[op_info.m_dims[-1]] = lookup(m_var)
+        workgroup_tile_sizes[op_info.n_dims[-1]] = lookup(n_var)
+        reduction_tile_sizes[op_info.k2_dims[-1]] = lookup(k_var)
 
-        subgroup_basis_counts = [1] * opinfo.domain_rank
-        subgroup_basis_mapping = list(range(opinfo.domain_rank))
-        subgroup_basis_counts[opinfo.m_dims[-1]] = lookup(sg_m_cnt)
-        subgroup_basis_counts[opinfo.n_dims[-1]] = lookup(sg_n_cnt)
+        subgroup_basis_counts = [1] * op_info.domain_rank
+        subgroup_basis_mapping = list(range(op_info.domain_rank))
+        subgroup_basis_counts[op_info.m_dims[-1]] = lookup(sg_m_cnt)
+        subgroup_basis_counts[op_info.n_dims[-1]] = lookup(sg_n_cnt)
         qk_basis_mapping = [
             mapping
             for i, mapping in enumerate(subgroup_basis_mapping)
-            if i not in opinfo.n_dims
+            if i not in op_info.n_dims
         ]
         qk_config = {
             "mma_kind": qk_mma_attr,
@@ -419,7 +414,7 @@ def generate_attention_solutions(
         pv_basis_mapping = [
             mapping
             for i, mapping in enumerate(subgroup_basis_mapping)
-            if i not in opinfo.k1_dims
+            if i not in op_info.k1_dims
         ]
         pv_config = {
             "mma_kind": pv_mma_attr,
@@ -504,13 +499,7 @@ def generate_solutions(
 
 
 class ContractionOpInterfaceConstraintGenerator(ConstraintGenerator):
-    def __init__(
-        self, root_op: ir.Operation, op_info: dispatch_parser.ContractionOpInfo
-    ):
-        # TODO(Bangtian): Both root_op and op_info are kept as a temporary solution.
-        # Once convolution and attention ops are supported using the same structure,
-        # only op_info will be needed as it contains all necessary information.
-        self.root_op = root_op
+    def __init__(self, op_info: dispatch_parser.ContractionOpInfo):
         self.op_info = op_info
 
     def generate_solutions(
@@ -535,13 +524,7 @@ def generate_solutions(
 
 
 class ConvolutionOpInterfaceConstraintGenerator(ConstraintGenerator):
-    def __init__(
-        self, root_op: ir.Operation, op_info: dispatch_parser.ConvolutionOpInfo
-    ):
-        # TODO(Bangtian): Both root_op and op_info are kept as a temporary solution.
-        # Once all ops are supported using the same structure, only op_info will be
-        # needed as it contains all necessary information.
-        self.root_op = root_op
+    def __init__(self, op_info: dispatch_parser.ConvolutionOpInfo):
         self.op_info = op_info
 
     def generate_solutions(
@@ -569,102 +552,14 @@ class AttentionOpInterfaceConstraintGenerator(ConstraintGenerator):
     """
     Constraint generator for the IREE LinalgExt AttentionOp.
 
-    This class extracts structure information from the attention op and generates
-    constraints for exploring valid configurations to generate tuning specs. IREE
-    decomposes the operation into two matrix multiplications for the purpose of
-    Tiling:
-    - QK^T : Q @ K.T (producing scores)
-    - PV   : P @ V   (projected output after softmax)
-
-    Assumed operand shapes:
-    - Q  : [B, M, K1]
-    - K  : [B, K2, K1]
-    - V  : [B, K2, N]
-    - O  : [B, M, N]
+    Generates tuning configurations for attention operations.
 
     Attributes:
-        transposed_q (bool): True if Q is logically transposed (k1 dim is not last in map).
-        transposed_k (bool): True if K is logically transposed (k1 dim is not last in map).
-        transposed_v (bool): True if V is logically transposed (k2 dim is not last in map).
-        qk_matmul (MatmulShapeType): Shape metadata for Q @ K^T.
-        pv_matmul (MatmulShapeType): Shape metadata for P @ V.
-        opinfo: dimensions info for attention op.
+        op_info: AttentionOpInfo containing all attention operation metadata.
     """
 
-    def __init__(self, root_op: ir.Operation):
-        self.root_op = root_op
-        indexing_maps_attr = root_op.attributes["indexing_maps"]
-        indexing_maps = [attr.value for attr in indexing_maps_attr]
-        q_map = indexing_maps[0]
-        k_map = indexing_maps[1]
-        v_map = indexing_maps[2]
-        o_map = indexing_maps[-1]
-
-        raw_opinfo = iree_codegen.get_attention_op_detail(q_map, k_map, v_map, o_map)
-        assert raw_opinfo, "no attention info"
-
-        self.opinfo = common.AttentionOpInfo(
-            domain_rank=raw_opinfo.domain_rank,
-            batch_dims=raw_opinfo.batch_dims,
-            m_dims=raw_opinfo.m_dims,
-            n_dims=raw_opinfo.n_dims,
-            k1_dims=raw_opinfo.k1_dims,
-            k2_dims=raw_opinfo.k2_dims,
-        )
-
-        q_type = ir.RankedTensorType(root_op.operands[0].type)
-        k_type = ir.RankedTensorType(root_op.operands[1].type)
-        v_type = ir.RankedTensorType(root_op.operands[2].type)
-        q_shape = q_type.shape
-        k_shape = k_type.shape
-        v_shape = v_type.shape
-        # QK matmul uses f32 as the accumulator type to match IREE's internal assumption.
-        # PV matmul derives the accumulator type from the output tensor's element type.
-        f32_type = ir.F32Type.get()
-        output_type = root_op.results[0].type.element_type
-
-        mDim = self.opinfo.m_dims[-1]
-        k1Dim = self.opinfo.k1_dims[-1]
-        k2Dim = self.opinfo.k2_dims[-1]
-        nDim = self.opinfo.n_dims[-1]
-
-        q_last_expr = q_map.results[-1]
-        k_last_expr = k_map.results[-1]
-        v_last_expr = v_map.results[-1]
-
-        q_dim_expr = ir.AffineDimExpr(q_last_expr)
-        k_dim_expr = ir.AffineDimExpr(k_last_expr)
-        v_dim_expr = ir.AffineDimExpr(v_last_expr)
-
-        self.transposed_k = k1Dim != k_dim_expr.position
-        self.transposed_v = k2Dim != v_dim_expr.position
-        self.transposed_q = k1Dim != q_dim_expr.position
-
-        q_dims = common.get_map_result_dim_positions(q_map)
-        k_dims = common.get_map_result_dim_positions(k_map)
-        v_dims = common.get_map_result_dim_positions(v_map)
-
-        assert q_dims, "no query dims from attention op"
-        assert k_dims, "no key dims from attention op"
-        assert v_dims, "no value dims from attention op"
-
-        self.qk_matmul = common.MatmulShapeType(
-            m=q_shape[q_dims.index(mDim)],
-            n=k_shape[k_dims.index(k2Dim)],
-            k=q_shape[q_dims.index(k1Dim)],
-            lhs_type=q_type.element_type,
-            rhs_type=k_type.element_type,
-            acc_type=f32_type,
-        )
-
-        self.pv_matmul = common.MatmulShapeType(
-            m=q_shape[q_dims.index(mDim)],
-            n=v_shape[v_dims.index(nDim)],
-            k=v_shape[v_dims.index(k2Dim)],
-            lhs_type=v_type.element_type,
-            rhs_type=v_type.element_type,
-            acc_type=output_type,
-        )
+    def __init__(self, op_info: dispatch_parser.AttentionOpInfo):
+        self.op_info = op_info
 
     def generate_solutions(
         self,
@@ -676,12 +571,7 @@ def generate_solutions(
         return generate_attention_solutions(
             tuner_ctx=tuner_context,
             gpu_target_info=gpu_target_info,
-            opinfo=self.opinfo,
-            qk_matmul=self.qk_matmul,
-            pv_matmul=self.pv_matmul,
-            transposed_q=self.transposed_q,
-            transposed_k=self.transposed_k,
-            transposed_v=self.transposed_v,
+            op_info=self.op_info,
             dispatch_kind=common.DispatchKind.attention,
             codegen_pipeline=codegen_pipeline,
             **pipeline_constraint_options,