Skip to content

Commit 6c93fd4

Browse files
committed
Fix allocation for paded shared layout
1 parent 24285b9 commit 6c93fd4

File tree

2 files changed

+22
-21
lines changed

2 files changed

+22
-21
lines changed

lib/Dialect/TritonGPU/IR/Dialect.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1781,7 +1781,8 @@ int64_t PaddedSharedEncodingAttr::getPaddedSize(ArrayRef<int64_t> shape) const {
17811781
paddingSize += (unpaddedSize >> llvm::Log2_32(interval))
17821782
<< llvm::Log2_32(padding);
17831783
// There is no need for padding after the last element
1784-
paddingSize -= padding;
1784+
if (unpaddedSize % interval == 0)
1785+
paddingSize -= padding;
17851786
}
17861787
return unpaddedSize + paddingSize;
17871788
}

test/Analysis/test-allocation.mlir

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -947,26 +947,26 @@ tt.func @padded_shared_layout_size() {
947947
// expected-remark @+2 {{offset = 0, size = 510}}
948948
// 255 * 2B = 510B
949949
%alloc0 = ttg.local_alloc : () -> !ttg.memdesc<1x255xf16, #PADDED_SHARED_0, #ttg.shared_memory, mutable>
950-
// expected-remark @+2 {{offset = 0, size = 528}}
951-
// (256 + 8) * 2B = 528B
950+
// expected-remark @+2 {{offset = 0, size = 512}}
951+
// 256 * 2B = 512B
952952
%alloc1 = ttg.local_alloc : () -> !ttg.memdesc<1x256xf16, #PADDED_SHARED_0, #ttg.shared_memory, mutable>
953953
// expected-remark @+2 {{offset = 0, size = 530}}
954954
// (257 + 8) * 2B = 530B
955955
%alloc2 = ttg.local_alloc : () -> !ttg.memdesc<1x257xf16, #PADDED_SHARED_0, #ttg.shared_memory, mutable>
956956
// expected-remark @+2 {{offset = 0, size = 1038}}
957957
// (511 + 8) * 2B = 1038B
958958
%alloc3 = ttg.local_alloc : () -> !ttg.memdesc<1x511xf16, #PADDED_SHARED_0, #ttg.shared_memory, mutable>
959-
// expected-remark @+2 {{offset = 0, size = 1056}}
960-
// (512 + 8 * 2) * 2B = 1056B
959+
// expected-remark @+2 {{offset = 0, size = 1040}}
960+
// (512 + 8 * 1) * 2B = 1040B
961961
%alloc4 = ttg.local_alloc : () -> !ttg.memdesc<1x512xf16, #PADDED_SHARED_0, #ttg.shared_memory, mutable>
962962
// expected-remark @+2 {{offset = 0, size = 1058}}
963963
// (513 + 8 * 2) * 2B = 1058B
964964
%alloc5 = ttg.local_alloc : () -> !ttg.memdesc<1x513xf16, #PADDED_SHARED_0, #ttg.shared_memory, mutable>
965-
// expected-remark @+2 {{offset = 0, size = 528}}
966-
// (16 * 16 + 8) * 2B = 528B
965+
// expected-remark @+2 {{offset = 0, size = 512}}
966+
// 16 * 16 * 2B = 512B
967967
%alloc6 = ttg.local_alloc : () -> !ttg.memdesc<16x16xf16, #PADDED_SHARED_0, #ttg.shared_memory, mutable>
968-
// expected-remark @+2 {{offset = 0, size = 1056}}
969-
// (16 * 32 + 8 * 2) * 2B = 1056B
968+
// expected-remark @+2 {{offset = 0, size = 1040}}
969+
// (16 * 32 + 8 * 1) * 2B = 1040B
970970
%alloc7 = ttg.local_alloc : () -> !ttg.memdesc<16x32xf16, #PADDED_SHARED_0, #ttg.shared_memory, mutable>
971971
// expected-remark @+2 {{offset = 0, size = 1008}}
972972
// (31 * 16 + 8) * 2B = 1008B
@@ -975,28 +975,28 @@ tt.func @padded_shared_layout_size() {
975975
}
976976

977977
// expected-remark @below {{padded_shared_layout_element_type}}
978-
// expected-remark @below {{size = 16896}}
978+
// expected-remark @below {{size = 16864}}
979979
tt.func @padded_shared_layout_element_type() {
980-
// expected-remark @+2 {{offset = 0, size = 4224}}
981-
// (16 * 256 + 8 * 16) * 1B = 4224B
980+
// expected-remark @+2 {{offset = 0, size = 4216}}
981+
// (16 * 256 + 8 * 15) * 1B = 4216B
982982
%alloc0 = ttg.local_alloc : () -> !ttg.memdesc<16x256xi8, #PADDED_SHARED_0, #ttg.shared_memory, mutable>
983-
// expected-remark @+2 {{offset = 0, size = 8448}}
984-
// (16 * 256 + 8 * 16) * 2B = 8448B
983+
// expected-remark @+2 {{offset = 0, size = 8432}}
984+
// (16 * 256 + 8 * 15) * 2B = 8432B
985985
%alloc1 = ttg.local_alloc : () -> !ttg.memdesc<16x256xf16, #PADDED_SHARED_0, #ttg.shared_memory, mutable>
986-
// expected-remark @+2 {{offset = 0, size = 16896}}
987-
// (16 * 256 + 8 * 16) * 4B = 16896B
986+
// expected-remark @+2 {{offset = 0, size = 16864}}
987+
// (16 * 256 + 8 * 15) * 4B = 16864B
988988
%alloc2 = ttg.local_alloc : () -> !ttg.memdesc<16x256xf32, #PADDED_SHARED_0, #ttg.shared_memory, mutable>
989989
tt.return
990990
}
991991

992992
// expected-remark @below {{padded_shared_layout_multi_tier}}
993-
// expected-remark @below {{size = 4480}}
993+
// expected-remark @below {{size = 4466}}
994994
tt.func @padded_shared_layout_multi_tier() {
995-
// expected-remark @+2 {{offset = 0, size = 4352}}
996-
// (16 * 256 + 4 * 32 + 8 * 16) * 1B = 4352B
995+
// expected-remark @+2 {{offset = 0, size = 4340}}
996+
// (16 * 256 + 4 * 31 + 8 * 15) * 1B = 4340B
997997
%alloc0 = ttg.local_alloc : () -> !ttg.memdesc<16x256xi8, #PADDED_SHARED_1, #ttg.shared_memory, mutable>
998-
// expected-remark @+2 {{offset = 0, size = 4480}}
999-
// (16 * 256 + 2 * 64 + 4 * 32 + 8 * 16) * 1B = 4480B
998+
// expected-remark @+2 {{offset = 0, size = 4466}}
999+
// (16 * 256 + 2 * 63 + 4 * 31 + 8 * 15) * 1B = 4466B
10001000
%alloc1 = ttg.local_alloc : () -> !ttg.memdesc<16x256xi8, #PADDED_SHARED_2, #ttg.shared_memory, mutable>
10011001
tt.return
10021002
}

0 commit comments

Comments
 (0)