Skip to content

Commit 74f23b4

Browse files
authored
Backport: cpu: x64: fix perf issue for f32 conv (#4108)
1 parent 481afcd commit 74f23b4

1 file changed

Lines changed: 11 additions & 5 deletions

File tree

src/cpu/x64/brgemm/jit_brgemm_kernel.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2432,12 +2432,18 @@ void jit_brgemm_kernel_t<Wmm>::gemm_microkernel(dim_t bd_block2,
24322432
= B_offset(prefetch_count_B++, rd)
24332433
+ static_cast<dim_t>(brg.LDB) * brg.rd_block
24342434
* brg.typesize_B;
2435-
if (is_superset(brg.isa_impl, avx512_core)) {
2436-
prefetcht0(EVEX_compress_addr_safe(reg_aux_B,
2437-
prefetch_offset, reg_tmp_microkernel));
2435+
// Only use EVEX_compress_addr_safe/make_safe_addr
2436+
// when prefetch_offset > INT_MAX forr perf purpose
2437+
if (prefetch_offset <= INT_MAX) {
2438+
prefetcht0(ptr[reg_aux_B + prefetch_offset]);
24382439
} else {
2439-
prefetcht0(make_safe_addr(reg_aux_B, prefetch_offset,
2440-
reg_tmp_microkernel));
2440+
if (is_superset(brg.isa_impl, avx512_core)) {
2441+
prefetcht0(EVEX_compress_addr_safe(reg_aux_B,
2442+
prefetch_offset, reg_tmp_microkernel));
2443+
} else {
2444+
prefetcht0(make_safe_addr(reg_aux_B,
2445+
prefetch_offset, reg_tmp_microkernel));
2446+
}
24412447
}
24422448
}
24432449
for (dim_t ld = 0; ld < ld_block2; ld++) {

0 commit comments

Comments
 (0)