@@ -947,26 +947,26 @@ tt.func @padded_shared_layout_size() {
947947 // expected-remark @+2 {{offset = 0, size = 510}}
948948 // 255 * 2B = 510B
949949 %alloc0 = ttg.local_alloc : () -> !ttg.memdesc <1 x255 xf16 , #PADDED_SHARED_0 , #ttg.shared_memory , mutable >
950- // expected-remark @+2 {{offset = 0, size = 528 }}
951- // ( 256 + 8) * 2B = 528B
950+ // expected-remark @+2 {{offset = 0, size = 512 }}
951+ // 256 * 2B = 512B
952952 %alloc1 = ttg.local_alloc : () -> !ttg.memdesc <1 x256 xf16 , #PADDED_SHARED_0 , #ttg.shared_memory , mutable >
953953 // expected-remark @+2 {{offset = 0, size = 530}}
954954 // (257 + 8) * 2B = 530B
955955 %alloc2 = ttg.local_alloc : () -> !ttg.memdesc <1 x257 xf16 , #PADDED_SHARED_0 , #ttg.shared_memory , mutable >
956956 // expected-remark @+2 {{offset = 0, size = 1038}}
957957 // (511 + 8) * 2B = 1038B
958958 %alloc3 = ttg.local_alloc : () -> !ttg.memdesc <1 x511 xf16 , #PADDED_SHARED_0 , #ttg.shared_memory , mutable >
959- // expected-remark @+2 {{offset = 0, size = 1056 }}
960- // (512 + 8 * 2 ) * 2B = 1056B
959+ // expected-remark @+2 {{offset = 0, size = 1040 }}
960+ // (512 + 8 * 1 ) * 2B = 1040B
961961 %alloc4 = ttg.local_alloc : () -> !ttg.memdesc <1 x512 xf16 , #PADDED_SHARED_0 , #ttg.shared_memory , mutable >
962962 // expected-remark @+2 {{offset = 0, size = 1058}}
963963 // (513 + 8 * 2) * 2B = 1058B
964964 %alloc5 = ttg.local_alloc : () -> !ttg.memdesc <1 x513 xf16 , #PADDED_SHARED_0 , #ttg.shared_memory , mutable >
965- // expected-remark @+2 {{offset = 0, size = 528 }}
966- // ( 16 * 16 + 8) * 2B = 528B
965+ // expected-remark @+2 {{offset = 0, size = 512 }}
966+ // 16 * 16 * 2B = 512B
967967 %alloc6 = ttg.local_alloc : () -> !ttg.memdesc <16 x16 xf16 , #PADDED_SHARED_0 , #ttg.shared_memory , mutable >
968- // expected-remark @+2 {{offset = 0, size = 1056 }}
969- // (16 * 32 + 8 * 2 ) * 2B = 1056B
968+ // expected-remark @+2 {{offset = 0, size = 1040 }}
969+ // (16 * 32 + 8 * 1 ) * 2B = 1040B
970970 %alloc7 = ttg.local_alloc : () -> !ttg.memdesc <16 x32 xf16 , #PADDED_SHARED_0 , #ttg.shared_memory , mutable >
971971 // expected-remark @+2 {{offset = 0, size = 1008}}
972972 // (31 * 16 + 8) * 2B = 1008B
@@ -975,28 +975,28 @@ tt.func @padded_shared_layout_size() {
975975}
976976
977977// expected-remark @below {{padded_shared_layout_element_type}}
978- // expected-remark @below {{size = 16896 }}
978+ // expected-remark @below {{size = 16864 }}
979979tt.func @padded_shared_layout_element_type () {
980- // expected-remark @+2 {{offset = 0, size = 4224 }}
981- // (16 * 256 + 8 * 16 ) * 1B = 4224B
980+ // expected-remark @+2 {{offset = 0, size = 4216 }}
981+ // (16 * 256 + 8 * 15 ) * 1B = 4216B
982982 %alloc0 = ttg.local_alloc : () -> !ttg.memdesc <16 x256 xi8 , #PADDED_SHARED_0 , #ttg.shared_memory , mutable >
983- // expected-remark @+2 {{offset = 0, size = 8448 }}
984- // (16 * 256 + 8 * 16 ) * 2B = 8448B
983+ // expected-remark @+2 {{offset = 0, size = 8432 }}
984+ // (16 * 256 + 8 * 15 ) * 2B = 8432B
985985 %alloc1 = ttg.local_alloc : () -> !ttg.memdesc <16 x256 xf16 , #PADDED_SHARED_0 , #ttg.shared_memory , mutable >
986- // expected-remark @+2 {{offset = 0, size = 16896 }}
987- // (16 * 256 + 8 * 16 ) * 4B = 16896B
986+ // expected-remark @+2 {{offset = 0, size = 16864 }}
987+ // (16 * 256 + 8 * 15 ) * 4B = 16864B
988988 %alloc2 = ttg.local_alloc : () -> !ttg.memdesc <16 x256 xf32 , #PADDED_SHARED_0 , #ttg.shared_memory , mutable >
989989 tt.return
990990}
991991
992992// expected-remark @below {{padded_shared_layout_multi_tier}}
993- // expected-remark @below {{size = 4480 }}
993+ // expected-remark @below {{size = 4466 }}
994994tt.func @padded_shared_layout_multi_tier () {
995- // expected-remark @+2 {{offset = 0, size = 4352 }}
996- // (16 * 256 + 4 * 32 + 8 * 16 ) * 1B = 4352B
995+ // expected-remark @+2 {{offset = 0, size = 4340 }}
996+ // (16 * 256 + 4 * 31 + 8 * 15 ) * 1B = 4340B
997997 %alloc0 = ttg.local_alloc : () -> !ttg.memdesc <16 x256 xi8 , #PADDED_SHARED_1 , #ttg.shared_memory , mutable >
998- // expected-remark @+2 {{offset = 0, size = 4480 }}
999- // (16 * 256 + 2 * 64 + 4 * 32 + 8 * 16 ) * 1B = 4480B
998+ // expected-remark @+2 {{offset = 0, size = 4466 }}
999+ // (16 * 256 + 2 * 63 + 4 * 31 + 8 * 15 ) * 1B = 4466B
10001000 %alloc1 = ttg.local_alloc : () -> !ttg.memdesc <16 x256 xi8 , #PADDED_SHARED_2 , #ttg.shared_memory , mutable >
10011001 tt.return
10021002}
0 commit comments