diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp index 8b5765b7f8dba..f43217f6f27ae 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -917,7 +917,7 @@ static std::map getNumOccurences(ArrayRef vals) { /// This accounts for cases where there are multiple unit-dims, but only a /// subset of those are dropped. For MemRefTypes these can be disambiguated /// using the strides. If a dimension is dropped the stride must be dropped too. -static std::optional +static FailureOr computeMemRefRankReductionMask(MemRefType originalType, MemRefType reducedType, ArrayRef sizes) { llvm::SmallBitVector unusedDims(originalType.getRank()); @@ -941,7 +941,7 @@ computeMemRefRankReductionMask(MemRefType originalType, MemRefType reducedType, getStridesAndOffset(originalType, originalStrides, originalOffset)) || failed( getStridesAndOffset(reducedType, candidateStrides, candidateOffset))) - return std::nullopt; + return failure(); // For memrefs, a dimension is truly dropped if its corresponding stride is // also dropped. This is particularly important when more than one of the dims @@ -976,22 +976,22 @@ computeMemRefRankReductionMask(MemRefType originalType, MemRefType reducedType, candidateStridesNumOccurences[originalStride]) { // This should never happen. Cant have a stride in the reduced rank type // that wasnt in the original one. - return std::nullopt; + return failure(); } } if ((int64_t)unusedDims.count() + reducedType.getRank() != originalType.getRank()) - return std::nullopt; + return failure(); return unusedDims; } llvm::SmallBitVector SubViewOp::getDroppedDims() { MemRefType sourceType = getSourceType(); MemRefType resultType = getType(); - std::optional unusedDims = + FailureOr unusedDims = computeMemRefRankReductionMask(sourceType, resultType, getMixedSizes()); - assert(unusedDims && "unable to find unused dims of subview"); + assert(succeeded(unusedDims) && "unable to find unused dims of subview"); return *unusedDims; } @@ -2745,7 +2745,7 @@ void SubViewOp::build(OpBuilder &b, OperationState &result, Value source, /// For ViewLikeOpInterface. Value SubViewOp::getViewSource() { return getSource(); } -/// Return true if t1 and t2 have equal offsets (both dynamic or of same +/// Return true if `t1` and `t2` have equal offsets (both dynamic or of same /// static value). static bool haveCompatibleOffsets(MemRefType t1, MemRefType t2) { int64_t t1Offset, t2Offset; @@ -2755,56 +2755,41 @@ static bool haveCompatibleOffsets(MemRefType t1, MemRefType t2) { return succeeded(res1) && succeeded(res2) && t1Offset == t2Offset; } -/// Checks if `original` Type type can be rank reduced to `reduced` type. -/// This function is slight variant of `is subsequence` algorithm where -/// not matching dimension must be 1. -static SliceVerificationResult -isRankReducedMemRefType(MemRefType originalType, - MemRefType candidateRankReducedType, - ArrayRef sizes) { - auto partialRes = isRankReducedType(originalType, candidateRankReducedType); - if (partialRes != SliceVerificationResult::Success) - return partialRes; - - auto optionalUnusedDimsMask = computeMemRefRankReductionMask( - originalType, candidateRankReducedType, sizes); - - // Sizes cannot be matched in case empty vector is returned. - if (!optionalUnusedDimsMask) - return SliceVerificationResult::LayoutMismatch; - - if (originalType.getMemorySpace() != - candidateRankReducedType.getMemorySpace()) - return SliceVerificationResult::MemSpaceMismatch; - - // No amount of stride dropping can reconcile incompatible offsets. - if (!haveCompatibleOffsets(originalType, candidateRankReducedType)) - return SliceVerificationResult::LayoutMismatch; - - return SliceVerificationResult::Success; +/// Return true if `t1` and `t2` have equal strides (both dynamic or of same +/// static value). +static bool haveCompatibleStrides(MemRefType t1, MemRefType t2) { + int64_t t1Offset, t2Offset; + SmallVector t1Strides, t2Strides; + auto res1 = getStridesAndOffset(t1, t1Strides, t1Offset); + auto res2 = getStridesAndOffset(t2, t2Strides, t2Offset); + if (failed(res1) || failed(res2)) + return false; + for (auto [s1, s2] : llvm::zip_equal(t1Strides, t2Strides)) + if (s1 != s2) + return false; + return true; } -template static LogicalResult produceSubViewErrorMsg(SliceVerificationResult result, - OpTy op, Type expectedType) { + Operation *op, Type expectedType) { auto memrefType = llvm::cast(expectedType); switch (result) { case SliceVerificationResult::Success: return success(); case SliceVerificationResult::RankTooLarge: - return op.emitError("expected result rank to be smaller or equal to ") + return op->emitError("expected result rank to be smaller or equal to ") << "the source rank. "; case SliceVerificationResult::SizeMismatch: - return op.emitError("expected result type to be ") + return op->emitError("expected result type to be ") << expectedType << " or a rank-reduced version. (mismatch of result sizes) "; case SliceVerificationResult::ElemTypeMismatch: - return op.emitError("expected result element type to be ") + return op->emitError("expected result element type to be ") << memrefType.getElementType(); case SliceVerificationResult::MemSpaceMismatch: - return op.emitError("expected result and source memory spaces to match."); + return op->emitError("expected result and source memory spaces to match."); case SliceVerificationResult::LayoutMismatch: - return op.emitError("expected result type to be ") + return op->emitError("expected result type to be ") << expectedType << " or a rank-reduced version. (mismatch of result layout) "; } @@ -2826,13 +2811,46 @@ LogicalResult SubViewOp::verify() { if (!isStrided(baseType)) return emitError("base type ") << baseType << " is not strided"; - // Verify result type against inferred type. - auto expectedType = SubViewOp::inferResultType( - baseType, getStaticOffsets(), getStaticSizes(), getStaticStrides()); + // Compute the expected result type, assuming that there are no rank + // reductions. + auto expectedType = cast(SubViewOp::inferResultType( + baseType, getStaticOffsets(), getStaticSizes(), getStaticStrides())); + + // Verify all properties of a shaped type: rank, element type and dimension + // sizes. This takes into account potential rank reductions. + auto shapedTypeVerification = isRankReducedType( + /*originalType=*/expectedType, /*candidateReducedType=*/subViewType); + if (shapedTypeVerification != SliceVerificationResult::Success) + return produceSubViewErrorMsg(shapedTypeVerification, *this, expectedType); + + // Make sure that the memory space did not change. + if (expectedType.getMemorySpace() != subViewType.getMemorySpace()) + return produceSubViewErrorMsg(SliceVerificationResult::MemSpaceMismatch, + *this, expectedType); + + // Verify the offset of the layout map. + if (!haveCompatibleOffsets(expectedType, subViewType)) + return produceSubViewErrorMsg(SliceVerificationResult::LayoutMismatch, + *this, expectedType); + + // The only thing that's left to verify now are the strides. First, compute + // the unused dimensions due to rank reductions. We have to look at sizes and + // strides to decide which dimensions were dropped. This function also + // partially verifies strides in case of rank reductions. + auto unusedDims = computeMemRefRankReductionMask(expectedType, subViewType, + getMixedSizes()); + if (failed(unusedDims)) + return produceSubViewErrorMsg(SliceVerificationResult::LayoutMismatch, + *this, expectedType); + + // Strides must match if there are no rank reductions. + // TODO: Verify strides when there are rank reductions. Strides are partially + // checked in `computeMemRefRankReductionMask`. + if (unusedDims->none() && !haveCompatibleStrides(expectedType, subViewType)) + return produceSubViewErrorMsg(SliceVerificationResult::LayoutMismatch, + *this, expectedType); - auto result = isRankReducedMemRefType(llvm::cast(expectedType), - subViewType, getMixedSizes()); - return produceSubViewErrorMsg(result, *this, expectedType); + return success(); } raw_ostream &mlir::operator<<(raw_ostream &os, const Range &range) { @@ -2882,11 +2900,9 @@ static MemRefType getCanonicalSubViewResultType( ArrayRef mixedSizes, ArrayRef mixedStrides) { auto nonRankReducedType = llvm::cast(SubViewOp::inferResultType( sourceType, mixedOffsets, mixedSizes, mixedStrides)); - std::optional unusedDims = - computeMemRefRankReductionMask(currentSourceType, currentResultType, - mixedSizes); - // Return nullptr as failure mode. - if (!unusedDims) + FailureOr unusedDims = computeMemRefRankReductionMask( + currentSourceType, currentResultType, mixedSizes); + if (failed(unusedDims)) return nullptr; auto layout = llvm::cast(nonRankReducedType.getLayout()); diff --git a/mlir/test/Dialect/GPU/decompose-memrefs.mlir b/mlir/test/Dialect/GPU/decompose-memrefs.mlir index d714010d0f254..56fc9a66b7ace 100644 --- a/mlir/test/Dialect/GPU/decompose-memrefs.mlir +++ b/mlir/test/Dialect/GPU/decompose-memrefs.mlir @@ -119,7 +119,7 @@ func.func @decompose_subview(%arg0 : memref) { // CHECK: %[[IDX1:.*]] = affine.apply #[[MAP1]]()[%[[STRIDES]]#1] // CHECK: %[[IDX2:.*]] = affine.apply #[[MAP2]]()[%[[TX]], %[[STRIDES]]#0, %[[TY]], %[[STRIDES]]#1, %[[TZ]]] // CHECK: %[[PTR:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [%[[IDX2]]], sizes: [%{{.*}}, %{{.*}}, %{{.*}}], strides: [%[[IDX]], %[[IDX1]], 4] -// CHECK: "test.test"(%[[PTR]]) : (memref>) -> () +// CHECK: "test.test"(%[[PTR]]) : (memref>) -> () func.func @decompose_subview_strided(%arg0 : memref) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -129,8 +129,8 @@ func.func @decompose_subview_strided(%arg0 : memref) { %block_dim2 = memref.dim %arg0, %c2 : memref gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1) threads(%tx, %ty, %tz) in (%block_x = %block_dim0, %block_y = %block_dim1, %block_z = %block_dim2) { - %res = memref.subview %arg0[%tx, %ty, %tz] [%c2, %c2, %c2] [2, 3, 4] : memref to memref> - "test.test"(%res) : (memref>) -> () + %res = memref.subview %arg0[%tx, %ty, %tz] [%c2, %c2, %c2] [2, 3, 4] : memref to memref> + "test.test"(%res) : (memref>) -> () gpu.terminator } return diff --git a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir index 96b72e042b9e0..3407bdbc7c8f9 100644 --- a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir +++ b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir @@ -595,9 +595,9 @@ func.func @subview_of_subview(%m: memref<1x1024xf32, 3>, %pos: index) { %0 = memref.subview %m[3, %pos] [1, 2] [1, 1] : memref<1x1024xf32, 3> - to memref<1x2xf32, strided<[1024, 2], offset: ?>, 3> + to memref<1x2xf32, strided<[1024, 1], offset: ?>, 3> %1 = memref.subview %0[1, 2] [1, 1] [1, 1] - : memref<1x2xf32, strided<[1024, 2], offset: ?>, 3> + : memref<1x2xf32, strided<[1024, 1], offset: ?>, 3> to memref, 3> return %1 : memref, 3> } @@ -675,9 +675,9 @@ func.func @fold_gpu_subgroup_mma_store_matrix_1d(%dst: memref>, // CHECK-LABEL: func.func @fold_gpu_subgroup_mma_load_matrix_2d // CHECK-SAME: %[[SRC:.+]]: memref<128x128xf32> func.func @fold_gpu_subgroup_mma_load_matrix_2d(%arg0 : memref<128x128xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index) -> !gpu.mma_matrix<16x16xf16, "COp"> { - %subview = memref.subview %arg0[%arg1, %arg2][64, 32][2, 1] : memref<128x128xf32> to memref<64x32xf32, strided<[64, 1], offset: ?>> + %subview = memref.subview %arg0[%arg1, %arg2][64, 32][2, 1] : memref<128x128xf32> to memref<64x32xf32, strided<[256, 1], offset: ?>> // CHECK: gpu.subgroup_mma_load_matrix %[[SRC]][{{.+}}] {leadDimension = 32 : index} : memref<128x128xf32> -> !gpu.mma_matrix<16x16xf16, "COp"> - %matrix = gpu.subgroup_mma_load_matrix %subview[%arg3, %arg4] {leadDimension = 32 : index} : memref<64x32xf32, strided<[64, 1], offset: ?>> -> !gpu.mma_matrix<16x16xf16, "COp"> + %matrix = gpu.subgroup_mma_load_matrix %subview[%arg3, %arg4] {leadDimension = 32 : index} : memref<64x32xf32, strided<[256, 1], offset: ?>> -> !gpu.mma_matrix<16x16xf16, "COp"> return %matrix : !gpu.mma_matrix<16x16xf16, "COp"> } @@ -686,9 +686,9 @@ func.func @fold_gpu_subgroup_mma_load_matrix_2d(%arg0 : memref<128x128xf32>, %ar // CHECK-LABEL: func.func @fold_gpu_subgroup_mma_load_matrix_2d // CHECK-SAME: %[[DST:.+]]: memref<128x128xf32> func.func @fold_gpu_subgroup_mma_load_matrix_2d(%arg0 : memref<128x128xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %matrix: !gpu.mma_matrix<16x16xf16, "COp">) { - %subview = memref.subview %arg0[%arg1, %arg2][64, 32][2, 1] : memref<128x128xf32> to memref<64x32xf32, strided<[64, 1], offset: ?>> + %subview = memref.subview %arg0[%arg1, %arg2][64, 32][2, 1] : memref<128x128xf32> to memref<64x32xf32, strided<[256, 1], offset: ?>> // CHECK: gpu.subgroup_mma_store_matrix %{{.+}}, %[[DST]][{{.+}}] {leadDimension = 32 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<128x128xf32> - gpu.subgroup_mma_store_matrix %matrix, %subview[%arg3, %arg4] {leadDimension = 32 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<64x32xf32, strided<[64, 1], offset: ?>> + gpu.subgroup_mma_store_matrix %matrix, %subview[%arg3, %arg4] {leadDimension = 32 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<64x32xf32, strided<[256, 1], offset: ?>> return } diff --git a/mlir/test/Dialect/MemRef/invalid.mlir b/mlir/test/Dialect/MemRef/invalid.mlir index 7bb7a2affcbd1..be60a3dcb1b20 100644 --- a/mlir/test/Dialect/MemRef/invalid.mlir +++ b/mlir/test/Dialect/MemRef/invalid.mlir @@ -1073,3 +1073,12 @@ func.func @dim_0_ranked(%arg : memref, %arg1 : index) { memref.dim %arg, %arg1 : memref // expected-error {{'memref.dim' op operand #0 must be unranked.memref of any type values or non-0-ranked.memref of any type values, but got 'memref'}} return } + +// ----- + +func.func @subview_invalid_strides(%m: memref<7x22x333x4444xi32>) { + // expected-error @below{{expected result type to be 'memref<7x11x333x4444xi32, strided<[32556744, 2959704, 4444, 1]>>' or a rank-reduced version. (mismatch of result layout)}} + %subview = memref.subview %m[0, 0, 0, 0] [7, 11, 333, 4444] [1, 2, 1, 1] + : memref<7x22x333x4444xi32> to memref<7x11x333x4444xi32> + return +} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort_coo.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort_coo.mlir index 3773cca9c8d69..e7dd0ad32a243 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort_coo.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort_coo.mlir @@ -88,10 +88,10 @@ module { // Prepare a buffer for x0, x1, x2, y0 and a buffer for y1. %xys = memref.alloc() : memref<20xi32> %xy = memref.cast %xys : memref<20xi32> to memref - %x0 = memref.subview %xy[%i0][%i5][%i4] : memref to memref> - %x1 = memref.subview %xy[%i1][%i5][%i4] : memref to memref> - %x2 = memref.subview %xy[%i2][%i5][%i4] : memref to memref> - %y0 = memref.subview %xy[%i3][%i5][%i4] : memref to memref> + %x0 = memref.subview %xy[%i0][%i5][4] : memref to memref> + %x1 = memref.subview %xy[%i1][%i5][4] : memref to memref> + %x2 = memref.subview %xy[%i2][%i5][4] : memref to memref> + %y0 = memref.subview %xy[%i3][%i5][4] : memref to memref> %y1s = memref.alloc() : memref<7xi32> %y1 = memref.cast %y1s : memref<7xi32> to memref