Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions src/llvm-cpufeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,7 @@ Optional<bool> always_have_fma(Function &intr) {
auto intr_name = intr.getName();
auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));

#if defined(_OS_WINDOWS_)
// FMA on Windows is weirdly broken (#43088)
return false;
#elif defined(_CPU_AARCH64_)
#if defined(_CPU_AARCH64_)
return typ == "f32" || typ == "f64";
#else
(void)typ;
Expand Down
104 changes: 104 additions & 0 deletions src/runtime_intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -1175,8 +1175,112 @@ bi_fintrinsic(div,div_float)
bi_fintrinsic(frem,rem_float)

// ternary operators //
// runtime fma is broken on windows, define julia_fma(f) ourself with fma_emulated as reference.
#if defined(_OS_WINDOWS_)
// reinterpret(UInt64, ::Float64)
uint64_t bitcast_d2u(double d) {
uint64_t r;
memcpy(&r, &d, 8);
return r;
}
// reinterpret(Float64, ::UInt64)
double bitcast_u2d(uint64_t d) {
double r;
memcpy(&r, &d, 8);
return r;
}
// Base.splitbits(::Float64)
void splitbits(double *hi, double *lo, double d) {
*hi = bitcast_u2d(bitcast_d2u(d) & 0xfffffffff8000000);
*lo = d - *hi;
}
// Base.exponent(::Float64)
int exponent(double a) {
int e;
frexp(a, &e);
return e - 1;
}
// Base.fma_emulated(::Float32, ::Float32, ::Float32)
float julia_fmaf(float a, float b, float c) {
double ab, res;
ab = (double)a * b;
res = ab + (double)c;
if ((bitcast_d2u(res) & 0x1fffffff) == 0x10000000){
double reslo = fabsf(c) > fabs(ab) ? ab-(res - c) : c-(res - ab);
if (reslo != 0)
res = nextafter(res, copysign(1.0/0.0, reslo));
}
return (float)res;
}
// Base.twomul(::Float64, ::Float64)
void two_mul(double *abhi, double *ablo, double a, double b) {
double ahi, alo, bhi, blo, blohi, blolo;
splitbits(&ahi, &alo, a);
splitbits(&bhi, &blo, b);
splitbits(&blohi, &blolo, blo);
*abhi = a*b;
*ablo = alo*blohi - (((*abhi - ahi*bhi) - alo*bhi) - ahi*blo) + blolo*alo;
}
// Base.issubnormal(::Float64) (Win32's fpclassify seems broken)
int issubnormal(double d) {
uint64_t y = bitcast_d2u(d);
return ((y & 0x7ff0000000000000) == 0) & ((y & 0x000fffffffffffff) != 0);
}
#if defined(_WIN32)
// Win32 needs volatile (avoid over optimization?)
#define VDOUBLE volatile double
#else
#define VDOUBLE double
#endif

// Base.fma_emulated(::Float64, ::Float64, ::Float64)
double julia_fma(double a, double b, double c) {
double abhi, ablo, r, s;
two_mul(&abhi, &ablo, a, b);
if (!isfinite(abhi+c) || fabs(abhi) < 2.0041683600089732e-292 ||
issubnormal(a) || issubnormal(b)) {
int aandbfinite = isfinite(a) && isfinite(b);
if (!(aandbfinite && isfinite(c)))
return aandbfinite ? c : abhi+c;
if (a == 0 || b == 0)
return abhi+c;
int bias = exponent(a) + exponent(b);
VDOUBLE c_denorm = ldexp(c, -bias);
if (isfinite(c_denorm)) {
if (issubnormal(a))
a *= 4.503599627370496e15;
if (issubnormal(b))
b *= 4.503599627370496e15;
a = bitcast_u2d((bitcast_d2u(a) & 0x800fffffffffffff) | 0x3ff0000000000000);
b = bitcast_u2d((bitcast_d2u(b) & 0x800fffffffffffff) | 0x3ff0000000000000);
c = c_denorm;
two_mul(&abhi, &ablo, a, b);
r = abhi+c;
s = (fabs(abhi) > fabs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo);
double sumhi = r+s;
if (issubnormal(ldexp(sumhi, bias))) {
double sumlo = r-sumhi+s;
int bits_lost = -bias-exponent(sumhi)-1022;
if ((bits_lost != 1) ^ ((bitcast_d2u(sumhi)&1) == 1))
if (sumlo != 0)
sumhi = nextafter(sumhi, copysign(1.0/0.0, sumlo));
}
return ldexp(sumhi, bias);
}
if (isinf(abhi) && signbit(c) == signbit(a*b))
return abhi;
}
r = abhi+c;
s = (fabs(abhi) > fabs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo);
return r+s;
}
#define fma(a, b, c) \
sizeof(a) == sizeof(float) ? julia_fmaf(a, b, c) : julia_fma(a, b, c)
#else // On other systems use fma(f) directly
#define fma(a, b, c) \
sizeof(a) == sizeof(float) ? fmaf(a, b, c) : fma(a, b, c)
#endif

#define muladd(a, b, c) a * b + c
ter_fintrinsic(fma,fma_float)
ter_fintrinsic(muladd,muladd_float)
Expand Down
46 changes: 24 additions & 22 deletions test/math.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1289,29 +1289,31 @@ end
end

@testset "fma" begin
if !(@static Sys.iswindows() && Int===Int64) # windows fma currently seems broken somehow.
for func in (fma, Base.fma_emulated)
@test func(nextfloat(1.),nextfloat(1.),-1.0) === 4.440892098500626e-16
@test func(nextfloat(1f0),nextfloat(1f0),-1f0) === 2.3841858f-7
@testset "$T" for T in (Float32, Float64)
@test func(floatmax(T), T(2), -floatmax(T)) === floatmax(T)
@test func(floatmax(T), T(1), eps(floatmax((T)))) === T(Inf)
@test func(T(Inf), T(Inf), T(Inf)) === T(Inf)
@test func(floatmax(T), floatmax(T), -T(Inf)) === -T(Inf)
@test func(floatmax(T), -floatmax(T), T(Inf)) === T(Inf)
@test isnan_type(T, func(T(Inf), T(1), -T(Inf)))
@test isnan_type(T, func(T(Inf), T(0), -T(0)))
@test func(-zero(T), zero(T), -zero(T)) === -zero(T)
for _ in 1:2^18
a, b, c = reinterpret.(T, rand(Base.uinttype(T), 3))
@test isequal(func(a, b, c), fma(a, b, c)) || (a,b,c)
end
fma_list = (fma, Base.fma_emulated)
if !(Sys.islinux() && Int == Int32) # test runtime fma (skip linux32)
fma_list = (fma_list..., Base.fma_float)
end
for func in fma_list
@test func(nextfloat(1.),nextfloat(1.),-1.0) === 4.440892098500626e-16
@test func(nextfloat(1f0),nextfloat(1f0),-1f0) === 2.3841858f-7
@testset "$T" for T in (Float32, Float64)
@test func(floatmax(T), T(2), -floatmax(T)) === floatmax(T)
@test func(floatmax(T), T(1), eps(floatmax((T)))) === T(Inf)
@test func(T(Inf), T(Inf), T(Inf)) === T(Inf)
@test func(floatmax(T), floatmax(T), -T(Inf)) === -T(Inf)
@test func(floatmax(T), -floatmax(T), T(Inf)) === T(Inf)
@test isnan_type(T, func(T(Inf), T(1), -T(Inf)))
@test isnan_type(T, func(T(Inf), T(0), -T(0)))
@test func(-zero(T), zero(T), -zero(T)) === -zero(T)
for _ in 1:2^18
a, b, c = reinterpret.(T, rand(Base.uinttype(T), 3))
@test isequal(func(a, b, c), fma(a, b, c)) || (a,b,c)
end
@test func(floatmax(Float64), nextfloat(1.0), -floatmax(Float64)) === 3.991680619069439e292
@test func(floatmax(Float32), nextfloat(1f0), -floatmax(Float32)) === 4.0564817f31
@test func(1.6341681540852291e308, -2., floatmax(Float64)) == -1.4706431733081426e308 # case where inv(a)*c*a == Inf
@test func(-2., 1.6341681540852291e308, floatmax(Float64)) == -1.4706431733081426e308 # case where inv(b)*c*b == Inf
@test func(-1.9369631f13, 2.1513551f-7, -1.7354427f-24) == -4.1670958f6
end
@test func(floatmax(Float64), nextfloat(1.0), -floatmax(Float64)) === 3.991680619069439e292
@test func(floatmax(Float32), nextfloat(1f0), -floatmax(Float32)) === 4.0564817f31
@test func(1.6341681540852291e308, -2., floatmax(Float64)) == -1.4706431733081426e308 # case where inv(a)*c*a == Inf
@test func(-2., 1.6341681540852291e308, floatmax(Float64)) == -1.4706431733081426e308 # case where inv(b)*c*b == Inf
@test func(-1.9369631f13, 2.1513551f-7, -1.7354427f-24) == -4.1670958f6
end
end