try to fix fma on windows

N5N3 · N5N3 · commit b7abf7b7f1c0 · 2021-12-23T17:55:53.000+08:00
diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
@@ -1563,6 +1563,8 @@ function builtin_nothrow(@nospecialize(f), argtypes::Array{Any, 1}, @nospecializ
     return _builtin_nothrow(f, argtypes, rt)
 end
 
+julia_fma(x, y, z) = error()
+
 function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtypes::Array{Any,1},
                            sv::Union{InferenceState,Nothing})
     if f === tuple
@@ -1572,7 +1574,11 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp
         if is_pure_intrinsic_infer(f) && _all(@nospecialize(a) -> isa(a, Const), argtypes)
             argvals = anymap(@nospecialize(a) -> (a::Const).val, argtypes)
             try
-                return Const(f(argvals...))
+                if f === Intrinsics.fma_float
+                    return Const(julia_fma(argvals...))
+                else
+                    return Const(f(argvals...))
+                end
             catch
             end
         end
diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl
@@ -415,6 +415,14 @@ fma_llvm(x::Float64, y::Float64, z::Float64) = fma_float(x, y, z)
 fma(x::Float32, y::Float32, z::Float32) = Core.Intrinsics.have_fma(Float32) ? fma_llvm(x,y,z) : fma_emulated(x,y,z)
 fma(x::Float64, y::Float64, z::Float64) = Core.Intrinsics.have_fma(Float64) ? fma_llvm(x,y,z) : fma_emulated(x,y,z)
 
+@static if Sys.iswindows()
+    Core.Compiler.julia_fma(x::Float32, y::Float32, z::Float32) = fma_emulated(x,y,z)
+    Core.Compiler.julia_fma(x::Float64, y::Float64, z::Float64) = fma_emulated(x,y,z)
+else
+    Core.Compiler.julia_fma(x::Float32, y::Float32, z::Float32) = fma_float(x,y,z)
+    Core.Compiler.julia_fma(x::Float64, y::Float64, z::Float64) = fma_float(x,y,z)
+end
+
 function fma(a::Float16, b::Float16, c::Float16)
     Float16(muladd(Float32(a), Float32(b), Float32(c))) #don't use fma if the hardware doesn't have it.
 end
diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp
@@ -36,10 +36,10 @@ Optional<bool> always_have_fma(Function &intr) {
     auto intr_name = intr.getName();
     auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
 
-#if defined(_OS_WINDOWS_)
+// #if defined(_OS_WINDOWS_)
     // FMA on Windows is weirdly broken (#43088)
-    return false;
-#elif defined(_CPU_AARCH64_)
+    // return false;
+#if defined(_CPU_AARCH64_)
     return typ == "f32" || typ == "f64";
 #else
     (void)typ;
diff --git a/test/math.jl b/test/math.jl
@@ -1289,29 +1289,32 @@ end
 end
 
 @testset "fma" begin
-    if !(@static Sys.iswindows() && Int===Int64) # windows fma currently seems broken somehow.
-        for func in (fma, Base.fma_emulated)
-            @test func(nextfloat(1.),nextfloat(1.),-1.0) === 4.440892098500626e-16
-            @test func(nextfloat(1f0),nextfloat(1f0),-1f0) === 2.3841858f-7
-            @testset "$T" for T in (Float32, Float64)
-                @test func(floatmax(T), T(2), -floatmax(T)) === floatmax(T)
-                @test func(floatmax(T), T(1), eps(floatmax((T)))) === T(Inf)
-                @test func(T(Inf), T(Inf), T(Inf)) === T(Inf)
-                @test func(floatmax(T), floatmax(T), -T(Inf)) === -T(Inf)
-                @test func(floatmax(T), -floatmax(T), T(Inf)) === T(Inf)
-                @test isnan_type(T, func(T(Inf), T(1), -T(Inf)))
-                @test isnan_type(T, func(T(Inf), T(0), -T(0)))
-                @test func(-zero(T), zero(T), -zero(T)) === -zero(T)
-                for _ in 1:2^18
-                    a, b, c = reinterpret.(T, rand(Base.uinttype(T), 3))
-                    @test isequal(func(a, b, c), fma(a, b, c)) || (a,b,c)
-                end
+    for func in (fma, Base.fma_emulated)
+        @test func(nextfloat(1.),nextfloat(1.),-1.0) === 4.440892098500626e-16
+        @test func(nextfloat(1f0),nextfloat(1f0),-1f0) === 2.3841858f-7
+        @testset "$T" for T in (Float32, Float64)
+            @test func(floatmax(T), T(2), -floatmax(T)) === floatmax(T)
+            @test func(floatmax(T), T(1), eps(floatmax((T)))) === T(Inf)
+            @test func(T(Inf), T(Inf), T(Inf)) === T(Inf)
+            @test func(floatmax(T), floatmax(T), -T(Inf)) === -T(Inf)
+            @test func(floatmax(T), -floatmax(T), T(Inf)) === T(Inf)
+            @test isnan_type(T, func(T(Inf), T(1), -T(Inf)))
+            @test isnan_type(T, func(T(Inf), T(0), -T(0)))
+            @test func(-zero(T), zero(T), -zero(T)) === -zero(T)
+            for _ in 1:2^18
+                a, b, c = reinterpret.(T, rand(Base.uinttype(T), 3))
+                @test isequal(func(a, b, c), fma(a, b, c)) || (a,b,c)
             end
-            @test func(floatmax(Float64), nextfloat(1.0), -floatmax(Float64)) === 3.991680619069439e292
-            @test func(floatmax(Float32), nextfloat(1f0), -floatmax(Float32)) === 4.0564817f31
-            @test func(1.6341681540852291e308, -2., floatmax(Float64)) == -1.4706431733081426e308 # case where inv(a)*c*a == Inf
-            @test func(-2., 1.6341681540852291e308, floatmax(Float64)) == -1.4706431733081426e308 # case where inv(b)*c*b == Inf
-            @test func(-1.9369631f13, 2.1513551f-7, -1.7354427f-24) == -4.1670958f6
         end
+        @test func(floatmax(Float64), nextfloat(1.0), -floatmax(Float64)) === 3.991680619069439e292
+        @test func(floatmax(Float32), nextfloat(1f0), -floatmax(Float32)) === 4.0564817f31
+        @test func(1.6341681540852291e308, -2., floatmax(Float64)) == -1.4706431733081426e308 # case where inv(a)*c*a == Inf
+        @test func(-2., 1.6341681540852291e308, floatmax(Float64)) == -1.4706431733081426e308 # case where inv(b)*c*b == Inf
+        @test func(-1.9369631f13, 2.1513551f-7, -1.7354427f-24) == -4.1670958f6
+    end
+    @static if Sys.iswindows()
+        # TODO: if this pass one day, then we can remove fma hack on windows
+        error = @eval Base.fma_float(-1.9369631f13, 2.1513551f-7, -1.7354427f-24)
+        @test_broken error == fma(-1.9369631f13, 2.1513551f-7, -1.7354427f-24)
     end
 end