Skip to content

Commit 46d00a0

Browse files
committed
cpu: x64: fix perf issue for f32 conv
1 parent 78e781f commit 46d00a0

1 file changed

Lines changed: 13 additions & 5 deletions

File tree

src/cpu/x64/brgemm/jit_brgemm_kernel.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2432,12 +2432,20 @@ void jit_brgemm_kernel_t<Wmm>::gemm_microkernel(dim_t bd_block2,
24322432
= B_offset(prefetch_count_B++, rd)
24332433
+ static_cast<dim_t>(brg.LDB) * brg.rd_block
24342434
* brg.typesize_B;
2435-
if (is_superset(brg.isa_impl, avx512_core)) {
2436-
prefetcht0(EVEX_compress_addr_safe(reg_aux_B,
2437-
prefetch_offset, reg_tmp_microkernel));
2435+
// Only use EVEX_compress_addr_safe/make_safe_addr
2436+
// when prefetch_offset > INT_MAX forr perf purpose
2437+
if (prefetch_offset <= INT_MAX) {
2438+
prefetcht0(ptr[reg_aux_B
2439+
+ B_offset(prefetch_count_B++, rd)
2440+
+ brg.LDB * brg.rd_block * brg.typesize_B]);
24382441
} else {
2439-
prefetcht0(make_safe_addr(reg_aux_B, prefetch_offset,
2440-
reg_tmp_microkernel));
2442+
if (is_superset(brg.isa_impl, avx512_core)) {
2443+
prefetcht0(EVEX_compress_addr_safe(reg_aux_B,
2444+
prefetch_offset, reg_tmp_microkernel));
2445+
} else {
2446+
prefetcht0(make_safe_addr(reg_aux_B,
2447+
prefetch_offset, reg_tmp_microkernel));
2448+
}
24412449
}
24422450
}
24432451
for (dim_t ld = 0; ld < ld_block2; ld++) {

0 commit comments

Comments
 (0)