@@ -53,7 +53,7 @@ func.func @non_unit_trailing_dim(%in: memref<1x1x8x?xf32, strided<[3072, 8, 1, 1
5353// CHECK-NOT: vector.shape_cast
5454
5555// Same as the top example within this split, but with a scalable unit dim in
56- // the output vector - not supported
56+ // the output vector - not supported (scalable 1 is _not_ a unit dimension).
5757
5858func.func @negative_scalable_unit_dim (%in: memref <1 x1 x8 x1 xf32 , strided <[3072 , 8 , 1 , 1 ], offset : ?>>) -> vector <1 x8 x[1 ]xf32 >{
5959 %c0 = arith.constant 0 : index
@@ -67,13 +67,13 @@ func.func @negative_scalable_unit_dim(%in: memref<1x1x8x1xf32, strided<[3072, 8,
6767
6868// -----
6969
70- func.func @contiguous_outer_dyn_inner_most (%a: index , %b: index , %memref: memref <?x?x8 x1 xf32 >) -> vector <8 x1 xf32 > {
70+ func.func @contiguous_inner_most_dynamic_outer (%a: index , %b: index , %memref: memref <?x?x8 x1 xf32 >) -> vector <8 x1 xf32 > {
7171 %c0 = arith.constant 0 : index
7272 %pad = arith.constant 0.0 : f32
7373 %v = vector.transfer_read %memref [%a , %b , %c0 , %c0 ], %pad {in_bounds = [true , true ]} : memref <?x?x8 x1 xf32 >, vector <8 x1 xf32 >
7474 return %v : vector <8 x1 xf32 >
7575}
76- // CHECK: func.func @contiguous_outer_dyn_inner_most(
76+ // CHECK: func.func @contiguous_inner_most_dynamic_outer
7777// CHECK-SAME: %[[IDX0:[a-zA-Z0-9]+]]
7878// CHECK-SAME: %[[IDX1:[a-zA-Z0-9]+]]
7979// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
@@ -89,68 +89,154 @@ func.func @contiguous_outer_dyn_inner_most(%a: index, %b: index, %memref: memref
8989// CHECK: %[[RESULT:.+]] = vector.shape_cast %[[VEC]]
9090// CHECK: return %[[RESULT]]
9191
92+ // Same as the top example within this split, but with the outer vector
93+ // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
94+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
95+
96+ func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim (%a: index , %b: index , %memref: memref <?x?x8 x1 xf32 >) -> vector <[8 ]x1 xf32 > {
97+ %c0 = arith.constant 0 : index
98+ %pad = arith.constant 0.0 : f32
99+ %v = vector.transfer_read %memref [%a , %b , %c0 , %c0 ], %pad {in_bounds = [true , true ]} : memref <?x?x8 x1 xf32 >, vector <[8 ]x1 xf32 >
100+ return %v : vector <[8 ]x1 xf32 >
101+ }
102+ // CHECK-LABEL: func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim
103+ // CHECK-SAME: %[[IDX0:[a-zA-Z0-9]+]]
104+ // CHECK-SAME: %[[IDX1:[a-zA-Z0-9]+]]
105+ // CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
106+ // CHECK: %[[VIEW:.+]] = memref.subview %[[SRC]]{{.*}} memref<?x?x8x1xf32> to memref<?x?x8xf32, strided<[?, 8, 1], offset: ?>>
107+ // CHECK: %[[VEC_READ:.+]] = vector.transfer_read %[[VIEW]]
108+ // CHECK-SAME: {in_bounds = [true]}
109+ // CHECK-SAME: memref<?x?x8xf32, strided<[?, 8, 1], offset: ?>>, vector<[8]xf32>
110+ // CHECK: vector.shape_cast %[[VEC_READ]]
111+
92112// -----
93113
94- func.func @contiguous_inner_most_dim (%A: memref <16 x1 xf32 >, %i:index , %j:index ) -> (vector <8 x1 xf32 >) {
114+ func.func @contiguous_inner_most_dim_non_zero_idxs (%A: memref <16 x1 xf32 >, %i:index , %j:index ) -> (vector <8 x1 xf32 >) {
95115 %c0 = arith.constant 0 : index
96116 %f0 = arith.constant 0.0 : f32
97117 %1 = vector.transfer_read %A [%i , %j ], %f0 : memref <16 x1 xf32 >, vector <8 x1 xf32 >
98118 return %1 : vector <8 x1 xf32 >
99119}
100- // CHECK: func @contiguous_inner_most_dim (%[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index, %[[J:.+]]: index) -> vector<8x1xf32>
120+ // CHECK: func @contiguous_inner_most_dim_non_zero_idxs (%[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index, %[[J:.+]]: index) -> vector<8x1xf32>
101121// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
102122// CHECK-SAME: memref<16x1xf32> to memref<16xf32, strided<[1]>>
103123// CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_0]]
104- // CHECK: %[[RESULT]] = vector.shape_cast %[[V]] : vector<8xf32> to vector<8x1xf32>
124+ // CHECK: %[[RESULT:.+ ]] = vector.shape_cast %[[V]] : vector<8xf32> to vector<8x1xf32>
105125// CHECK: return %[[RESULT]]
106126
127+ // Same as the top example within this split, but with the outer vector
128+ // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
129+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
130+
131+ func.func @contiguous_inner_most_dim_non_zero_idxs_scalable_inner_dim (%A: memref <16 x1 xf32 >, %i:index , %j:index ) -> (vector <[8 ]x1 xf32 >) {
132+ %c0 = arith.constant 0 : index
133+ %f0 = arith.constant 0.0 : f32
134+ %1 = vector.transfer_read %A [%i , %j ], %f0 : memref <16 x1 xf32 >, vector <[8 ]x1 xf32 >
135+ return %1 : vector <[8 ]x1 xf32 >
136+ }
137+ // CHECK-LABEL: func @contiguous_inner_most_dim_non_zero_idxs_scalable_inner_dim(
138+ // CHECK-SAME: %[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index, %[[J:.+]]: index) -> vector<[8]x1xf32>
139+ // CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
140+ // CHECK-SAME: memref<16x1xf32> to memref<16xf32, strided<[1]>>
141+ // CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_0]]
142+ // CHECK: %[[RESULT:.+]] = vector.shape_cast %[[V]] : vector<[8]xf32> to vector<[8]x1xf32>
143+ // CHECK: return %[[RESULT]]
144+
107145// -----
108146
109- func.func @contiguous_inner_most_dim_bounds (%A: memref <1000 x1 xf32 >, %i:index , %ii:index ) -> (vector <4 x1 xf32 >) {
147+ func.func @contiguous_inner_most_dim_with_subview (%A: memref <1000 x1 xf32 >, %i:index , %ii:index ) -> (vector <4 x1 xf32 >) {
110148 %c0 = arith.constant 0 : index
111149 %cst = arith.constant 0.0 : f32
112150 %0 = memref.subview %A [%i , 0 ] [40 , 1 ] [1 , 1 ] : memref <1000 x1 xf32 > to memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>
113151 %1 = vector.transfer_read %0 [%ii , %c0 ], %cst {in_bounds = [true , true ]} : memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>, vector <4 x1 xf32 >
114152 return %1 : vector <4 x1 xf32 >
115153}
116- // CHECK: func @contiguous_inner_most_dim_bounds (%[[SRC:.+]]: memref<1000x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1xf32>
154+ // CHECK: func @contiguous_inner_most_dim_with_subview (%[[SRC:.+]]: memref<1000x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1xf32>
117155// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
118156// CHECK: %[[SRC_1:.+]] = memref.subview %[[SRC_0]]
119157// CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_1]]
120158// CHECK-SAME: {in_bounds = [true]}
121159// CHECK-SAME: vector<4xf32>
122160
161+ // Same as the top example within this split, but with the outer vector
162+ // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
163+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
164+
165+ func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim (%A: memref <1000 x1 xf32 >, %i:index , %ii:index ) -> (vector <[4 ]x1 xf32 >) {
166+ %c0 = arith.constant 0 : index
167+ %cst = arith.constant 0.0 : f32
168+ %0 = memref.subview %A [%i , 0 ] [40 , 1 ] [1 , 1 ] : memref <1000 x1 xf32 > to memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>
169+ %1 = vector.transfer_read %0 [%ii , %c0 ], %cst {in_bounds = [true , true ]} : memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>, vector <[4 ]x1 xf32 >
170+ return %1 : vector <[4 ]x1 xf32 >
171+ }
172+ // CHECK-LABEL: func @contiguous_inner_most_dim_with_subview_scalable_inner_dim
173+ // CHECK-SAME: %[[SRC:.+]]: memref<1000x1xf32>
174+ // CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
175+ // CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_0]]
176+ // CHECK-SAME: {in_bounds = [true]}
177+ // CHECK-SAME: vector<[4]xf32>
178+
123179// -----
124180
125- func.func @contiguous_inner_most_dim_bounds_2d (%A: memref <1000 x1 x1 xf32 >, %i:index , %ii:index ) -> (vector <4 x1 x1 xf32 >) {
181+ func.func @contiguous_inner_most_dim_with_subview_2d (%A: memref <1000 x1 x1 xf32 >, %i:index , %ii:index ) -> (vector <4 x1 x1 xf32 >) {
126182 %c0 = arith.constant 0 : index
127183 %cst = arith.constant 0.0 : f32
128184 %0 = memref.subview %A [%i , 0 , 0 ] [40 , 1 , 1 ] [1 , 1 , 1 ] : memref <1000 x1 x1 xf32 > to memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>
129185 %1 = vector.transfer_read %0 [%ii , %c0 , %c0 ], %cst {in_bounds = [true , true , true ]} : memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>, vector <4 x1 x1 xf32 >
130186 return %1 : vector <4 x1 x1 xf32 >
131187}
132- // CHECK: func @contiguous_inner_most_dim_bounds_2d (%[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1x1xf32>
188+ // CHECK: func @contiguous_inner_most_dim_with_subview_2d (%[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1x1xf32>
133189// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
134190// CHECK: %[[SRC_1:.+]] = memref.subview %[[SRC_0]]
135191// CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_1]]
136192// CHECK-SAME: {in_bounds = [true]}
137193// CHECK-SAME: vector<4xf32>
138194
195+ // Same as the top example within this split, but with the outer vector
196+ // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
197+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
198+
199+ func.func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim (%A: memref <1000 x1 x1 xf32 >, %i:index , %ii:index ) -> (vector <[4 ]x1 x1 xf32 >) {
200+ %c0 = arith.constant 0 : index
201+ %cst = arith.constant 0.0 : f32
202+ %0 = memref.subview %A [%i , 0 , 0 ] [40 , 1 , 1 ] [1 , 1 , 1 ] : memref <1000 x1 x1 xf32 > to memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>
203+ %1 = vector.transfer_read %0 [%ii , %c0 , %c0 ], %cst {in_bounds = [true , true , true ]} : memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>, vector <[4 ]x1 x1 xf32 >
204+ return %1 : vector <[4 ]x1 x1 xf32 >
205+ }
206+ // CHECK-LABEL: func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(
207+ // CHECK-SAME: %[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<[4]x1x1xf32>
208+ // CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
209+ // CHECK: %[[SRC_1:.+]] = memref.subview %[[SRC_0]]
210+ // CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_1]]
211+ // CHECK-SAME: {in_bounds = [true]}
212+ // CHECK-SAME: vector<[4]xf32>
213+ // CHECK: vector.shape_cast %[[V]]
214+
139215// -----
140216
141- func.func @contiguous_inner_most_dim_out_of_bounds_2d (%arg0: memref <1 x1 xf32 >) -> vector <4 x8 xf32 > {
217+ // NOTE: This is an out-of-bounds access.
218+
219+ func.func @negative_non_unit_inner_vec_dim (%arg0: memref <4 x1 xf32 >) -> vector <4 x8 xf32 > {
142220 %c0 = arith.constant 0 : index
143221 %cst = arith.constant 0.000000e+00 : f32
144- %0 = vector.transfer_read %arg0 [%c0 , %c0 ], %cst : memref <1 x 1 x f32 >, vector <4 x8 xf32 >
222+ %0 = vector.transfer_read %arg0 [%c0 , %c0 ], %cst : memref <4 x 1 x f32 >, vector <4 x8 xf32 >
145223 return %0 : vector <4 x8 xf32 >
146224}
147- // The inner most unit dim can not be dropped. In this context, we do not
148- // generate rank-reduced memref.subview ops.
149- // CHECK: func.func @contiguous_inner_most_dim_out_of_bounds_2d
150- // CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
225+ // CHECK: func.func @negative_non_unit_inner_vec_dim
226+ // CHECK-NOT: memref.subview
227+ // CHECK: vector.transfer_read
228+
229+ // -----
230+
231+ func.func @negative_non_unit_inner_memref_dim (%arg0: memref <4 x8 xf32 >) -> vector <4 x1 xf32 > {
232+ %c0 = arith.constant 0 : index
233+ %cst = arith.constant 0.000000e+00 : f32
234+ %0 = vector.transfer_read %arg0 [%c0 , %c0 ], %cst : memref <4 x8 xf32 >, vector <4 x1 xf32 >
235+ return %0 : vector <4 x1 xf32 >
236+ }
237+ // CHECK: func.func @negative_non_unit_inner_memref_dim
151238// CHECK-NOT: memref.subview
152- // CHECK: %[[READ:.+]] = vector.transfer_read %[[SRC]]
153- // CHECK: return %[[READ]] : vector<4x8xf32>
239+ // CHECK: vector.transfer_read
154240
155241// -----
156242
@@ -232,20 +318,6 @@ func.func @non_unit_strides(%arg0: memref<512x16x1xf32, strided<[8192, 16, 4], o
232318
233319// -----
234320
235- // Negative test: [1] (scalable 1) is _not_ a unit dimension.
236- func.func @trailing_scalable_one_dim_transfer_read (%dest : memref <24 x1 xf32 >) -> vector <4 x[1 ]xf32 > {
237- %c0 = arith.constant 0 : index
238- %pad = arith.constant 0.0 : f32
239- %0 = vector.transfer_read %dest [%c0 , %c0 ], %pad {in_bounds = [true , true ]} : memref <24 x1 xf32 >, vector <4 x[1 ]xf32 >
240- return %0 : vector <4 x[1 ]xf32 >
241- }
242- // CHECK: func.func @trailing_scalable_one_dim_transfer_read
243- // CHECK-NOT: vector.shape_cast
244- // CHECK: vector.transfer_read {{.*}} : memref<24x1xf32>, vector<4x[1]xf32>
245- // CHECK-NOT: vector.shape_cast
246-
247- // -----
248-
249321func.func @leading_scalable_dimension_transfer_write (%dest : memref <24 x1 xf32 >, %vec: vector <[4 ]x1 xf32 >) {
250322 %c0 = arith.constant 0 : index
251323 vector.transfer_write %vec , %dest [%c0 , %c0 ] {in_bounds = [true , true ]} : vector <[4 ]x1 xf32 >, memref <24 x1 xf32 >
0 commit comments