22const OCTAVIAN_NUM_TASKS = Ref (1 )
33_nthreads () = OCTAVIAN_NUM_TASKS[]
44
5- @generated function calc_factors (:: Union{Val{nc},StaticInt{nc}} = snum_cores ()) where {nc}
5+ @generated function calc_factors (:: Union{Val{nc},StaticInt{nc}} = num_cores ()) where {nc}
66 t = Expr (:tuple )
77 for i ∈ nc: - 1 : 1
88 d, r = divrem (nc, i)
@@ -12,87 +12,58 @@ _nthreads() = OCTAVIAN_NUM_TASKS[]
1212end
1313# const CORE_FACTORS = calc_factors()
1414
15- @generated function MᵣW_mul_factor ()
16- f = VectorizationBase. has_feature (" x86_64_avx512f" ) ? 4 : 9
17- Expr (:call , Expr (:curly , :StaticInt , f))
18- end
15+ MᵣW_mul_factor (:: True ) = StaticInt {4} ()
16+ MᵣW_mul_factor (:: False ) = StaticInt {9} ()
17+ MᵣW_mul_factor () = MᵣW_mul_factor (has_feature (Val (:x86_64_avx512f )))
1918
20- @generated function W₁Default ()
21- w = if VectorizationBase. has_feature (" x86_64_avx512f" )
22- 0.006089395198610773
23- elseif (Sys. CPU_NAME === " znver2" ) || (Sys. CPU_NAME === " znver3" ) # these are znver2 values, I'm assuming they're better for znver3 than generic
24- 0.1
25- elseif Sys. CPU_NAME === " znver1"
26- 0.053918949422353986
27- else
28- 0.1
29- end
30- Expr (:call , Expr (:curly , :StaticFloat , w))
31- end
32- @generated function W₂Default ()
33- w = if VectorizationBase. has_feature (" x86_64_avx512f" )
34- 0.7979822724696168
35- elseif (Sys. CPU_NAME === " znver2" ) || (Sys. CPU_NAME === " znver3" ) # these are znver2 values, I'm assuming they're better for znver3 than generic
36- 0.993489411720157
37- elseif Sys. CPU_NAME === " znver1"
38- 0.3013238122374886
39- else
40- 0.15989396641218157
41- end
42- Expr (:call , Expr (:curly , :StaticFloat , w))
43- end
44- @generated function R₁Default ()
45- w = if VectorizationBase. has_feature (" x86_64_avx512f" )
46- 0.5900561503730485
47- elseif (Sys. CPU_NAME === " znver2" ) || (Sys. CPU_NAME === " znver3" ) # these are znver2 values, I'm assuming they're better for znver3 than generic
48- 0.6052218809954467
49- elseif Sys. CPU_NAME === " znver1"
50- 0.6077103834481342
51- else
52- 0.4203583148344484
53- end
54- Expr (:call , Expr (:curly , :StaticFloat , w))
55- end
56- @generated function R₂Default ()
57- w = if VectorizationBase. has_feature (" x86_64_avx512f" )
58- 0.762152930709678
59- elseif (Sys. CPU_NAME === " znver2" ) || (Sys. CPU_NAME === " znver3" ) # these are znver2 values, I'm assuming they're better for znver3 than generic
60- 0.7594052633561165
61- elseif Sys. CPU_NAME === " znver1"
62- 0.8775382433240162
63- else
64- 0.6344856142604789
65- end
66- Expr (:call , Expr (:curly , :StaticFloat , w))
67- end
19+ W₁Default (:: True ) = StaticFloat {0.006089395198610773} ()
20+ W₂Default (:: True ) = StaticFloat {0.7979822724696168} ()
21+ R₁Default (:: True ) = StaticFloat {0.5900561503730485} ()
22+ R₂Default (:: True ) = StaticFloat {0.762152930709678} ()
23+
24+ W₁Default_arch (:: Val{:znver1} ) = StaticFloat {0.053918949422353986} ()
25+ W₂Default_arch (:: Val{:znver1} ) = StaticFloat {0.3013238122374886} ()
26+ R₁Default_arch (:: Val{:znver1} ) = StaticFloat {0.6077103834481342} ()
27+ R₂Default_arch (:: Val{:znver1} ) = StaticFloat {0.8775382433240162} ()
28+
29+ W₁Default_arch (:: Union{Val{:znver2},Val{:znver3}} ) = StaticFloat {0.1} ()
30+ W₂Default_arch (:: Union{Val{:znver2},Val{:znver3}} ) = StaticFloat {0.993489411720157} ()
31+ R₁Default_arch (:: Union{Val{:znver2},Val{:znver3}} ) = StaticFloat {0.6052218809954467} ()
32+ R₂Default_arch (:: Union{Val{:znver2},Val{:znver3}} ) = StaticFloat {0.7594052633561165} ()
33+
34+ W₁Default_arch (_) = StaticFloat {0.1} ()
35+ W₂Default_arch (_) = StaticFloat {0.15989396641218157} ()
36+ R₁Default_arch (_) = StaticFloat {0.4203583148344484} ()
37+ R₂Default_arch (_) = StaticFloat {0.8775382433240162} ()
38+
39+ W₁Default (:: False ) = W₁Default_arch (VectorizationBase. cpu_name ())
40+ W₂Default (:: False ) = W₂Default_arch (VectorizationBase. cpu_name ())
41+ R₁Default (:: False ) = R₁Default_arch (VectorizationBase. cpu_name ())
42+ R₂Default (:: False ) = R₂Default_arch (VectorizationBase. cpu_name ())
43+
44+ W₁Default () = W₁Default (has_feature (Val (:x86_64_avx512f )))
45+ W₂Default () = W₂Default (has_feature (Val (:x86_64_avx512f )))
46+ R₁Default () = R₁Default (has_feature (Val (:x86_64_avx512f )))
47+ R₂Default () = R₂Default (has_feature (Val (:x86_64_avx512f )))
48+
49+
50+
51+
52+ first_cache () = ifelse (gt (num_cache_levels (), StaticInt {2} ()), StaticInt {2} (), StaticInt {1} ())
53+ second_cache () = first_cache () + One ()
54+
55+ _first_cache_size (fcs:: StaticInt ) = ifelse (eq (first_cache (), StaticInt (2 )) & cache_inclusive (StaticInt (2 )), fcs - cache_size (One ()), fcs)
56+ _first_cache_size (:: Nothing ) = StaticInt (262144 )
57+ first_cache_size () = _first_cache_size (cache_size (first_cache ()))
58+
59+ _second_cache_size (scs:: StaticInt ) = ifelse (cache_inclusive (second_cache ()), scs - cache_size (first_cache ()), scs)
60+ _second_cache_size (:: Nothing ) = StaticInt (3145728 )
61+ second_cache_size () = _second_cache_size (cache_size (second_cache ()))
6862
69- first_cache () = StaticInt {1} () + (snum_cache_levels () > StaticInt {2} () ? One () : Zero ())
70- second_cache () = StaticInt {2} () + (snum_cache_levels () > StaticInt {2} () ? One () : Zero ())
71-
72- function first_cache_size ()
73- fcs = scache_size (first_cache ())
74- if fcs === Zero ()
75- return StaticInt (262144 )
76- elseif (first_cache () === StaticInt (2 )) && cache_inclusivity ()[2 ]
77- return fcs - scache_size (One ())
78- else
79- return fcs
80- end
81- end
82- function second_cache_size ()
83- scs = scache_size (second_cache ())
84- if scs === Zero ()
85- return StaticInt (3145728 )
86- elseif cache_inclusivity ()[second_cache ()]
87- return scs - scache_size (first_cache ())
88- else
89- return scs
90- end
91- end
9263first_cache_size (:: Type{T} ) where {T} = first_cache_size () ÷ static_sizeof (T)
9364second_cache_size (:: Type{T} ) where {T} = second_cache_size () ÷ static_sizeof (T)
9465
95- bcache_count () = VectorizationBase. scache_count (second_cache ())
66+ bcache_count () = VectorizationBase. num_cache (second_cache ())
9667
9768const BCACHEPTR = Ref {Ptr{Cvoid}} (C_NULL )
9869const BCACHE_LOCK = Threads. Atomic {UInt} (zero (UInt))
0 commit comments