diff --git a/base/float.jl b/base/float.jl index 8a611cf38f8e8..4d65e56be4ded 100644 --- a/base/float.jl +++ b/base/float.jl @@ -1,5 +1,7 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +const IEEEFloat = Union{Float16, Float32, Float64} + ## floating point traits ## """ @@ -605,7 +607,7 @@ uabs(x::Signed) = unsigned(abs(x)) The result of `n` iterative applications of `nextfloat` to `x` if `n >= 0`, or `-n` applications of `prevfloat` if `n < 0`. """ -function nextfloat(f::Union{Float16,Float32,Float64}, d::Integer) +function nextfloat(f::IEEEFloat, d::Integer) F = typeof(f) fumax = reinterpret(Unsigned, F(Inf)) U = typeof(fumax) @@ -711,12 +713,12 @@ end Test whether a floating point number is subnormal. """ -function issubnormal end +function issubnormal(x::T) where {T<:IEEEFloat} + y = reinterpret(Unsigned, x) + (y & exponent_mask(T) == 0) & (y & significand_mask(T) != 0) +end @eval begin - issubnormal(x::Float32) = (abs(x) < $(bitcast(Float32, 0x00800000))) & (x!=0) - issubnormal(x::Float64) = (abs(x) < $(bitcast(Float64, 0x0010000000000000))) & (x!=0) - typemin(::Type{Float16}) = $(bitcast(Float16, 0xfc00)) typemax(::Type{Float16}) = $(Inf16) typemin(::Type{Float32}) = $(-Inf32) @@ -864,32 +866,11 @@ exponent_half(::Type{Float16}) = 0x3800 significand_mask(::Type{Float16}) = 0x03ff # integer size of float -fpinttype(::Type{Float64}) = UInt64 -fpinttype(::Type{Float32}) = UInt32 -fpinttype(::Type{Float16}) = UInt16 - -## TwicePrecision utilities -# The numeric constants are half the number of bits in the mantissa -for (F, T, n) in ((Float16, UInt16, 5), (Float32, UInt32, 12), (Float64, UInt64, 26)) - @eval begin - function truncbits(x::$F, nb) - @_inline_meta - truncmask(x, typemax($T) << nb) - end - function truncmask(x::$F, mask) - @_inline_meta - reinterpret($F, mask & reinterpret($T, x)) - end - function splitprec(x::$F) - @_inline_meta - hi = truncmask(x, typemax($T) << $n) - hi, x-hi - end - end -end +uinttype(::Type{Float64}) = UInt64 +uinttype(::Type{Float32}) = UInt32 +uinttype(::Type{Float16}) = UInt16 -truncbits(x, nb) = x -truncmask(x, mask) = x +Base.iszero(x::Float16) = reinterpret(UInt16, x) & ~sign_mask(Float16) == 0x0000 ## Array operations on floating point numbers ## @@ -904,7 +885,8 @@ end float(r::StepRange) = float(r.start):float(r.step):float(last(r)) float(r::UnitRange) = float(r.start):float(last(r)) -float(r::StepRangeLen) = StepRangeLen(float(r.ref), float(r.step), length(r), r.offset) +float(r::StepRangeLen{T}) where {T} = + StepRangeLen{typeof(float(T(r.ref)))}(float(r.ref), float(r.step), length(r), r.offset) function float(r::LinSpace) LinSpace(float(r.start), float(r.stop), length(r)) end diff --git a/base/math.jl b/base/math.jl index 64f957d7a1467..b90caf4574315 100644 --- a/base/math.jl +++ b/base/math.jl @@ -21,11 +21,11 @@ import Base: log, exp, sin, cos, tan, sinh, cosh, tanh, asin, exp10, expm1, log1p using Base: sign_mask, exponent_mask, exponent_one, - exponent_half, fpinttype, significand_mask + exponent_half, uinttype, significand_mask using Core.Intrinsics: sqrt_llvm -const IEEEFloat = Union{Float16, Float32, Float64} +using Base.IEEEFloat @noinline function throw_complex_domainerror(f, x) throw(DomainError(x, string("$f will only return a complex result if called with a ", @@ -588,7 +588,7 @@ function ldexp(x::T, e::Integer) where T<:IEEEFloat return flipsign(T(Inf), x) end if k > 0 # normal case - xu = (xu & ~exponent_mask(T)) | (rem(k, fpinttype(T)) << significand_bits(T)) + xu = (xu & ~exponent_mask(T)) | (rem(k, uinttype(T)) << significand_bits(T)) return reinterpret(T, xu) else # subnormal case if k <= -significand_bits(T) # underflow @@ -598,7 +598,7 @@ function ldexp(x::T, e::Integer) where T<:IEEEFloat end k += significand_bits(T) z = T(2.0)^-significand_bits(T) - xu = (xu & ~exponent_mask(T)) | (rem(k, fpinttype(T)) << significand_bits(T)) + xu = (xu & ~exponent_mask(T)) | (rem(k, uinttype(T)) << significand_bits(T)) return z*reinterpret(T, xu) end end diff --git a/base/range.jl b/base/range.jl index 381efd9ea5f2f..5bf8a2896767c 100644 --- a/base/range.jl +++ b/base/range.jl @@ -205,6 +205,8 @@ end StepRangeLen(ref::R, step::S, len::Integer, offset::Integer = 1) where {R,S} = StepRangeLen{typeof(ref+0*step),R,S}(ref, step, len, offset) +StepRangeLen{T}(ref::R, step::S, len::Integer, offset::Integer = 1) where {T,R,S} = + StepRangeLen{T,R,S}(ref, step, len, offset) ## linspace and logspace @@ -370,9 +372,12 @@ julia> step(linspace(2.5,10.9,85)) """ step(r::StepRange) = r.step step(r::AbstractUnitRange) = 1 -step(r::StepRangeLen) = r.step +step(r::StepRangeLen{T}) where {T} = T(r.step) step(r::LinSpace) = (last(r)-first(r))/r.lendiv +step_hp(r::StepRangeLen) = r.step +step_hp(r::Range) = step(r) + unsafe_length(r::Range) = length(r) # generic fallback function unsafe_length(r::StepRange) @@ -455,10 +460,9 @@ done(r::StepRange, i) = isempty(r) | (i < min(r.start, r.stop)) | (i > max(r.sta done(r::StepRange, i::Integer) = isempty(r) | (i == oftype(i, r.stop) + r.step) -# see also twiceprecision.jl -start(r::StepRangeLen) = (unsafe_getindex(r, 1), 1) -next(r::StepRangeLen{T}, s) where {T} = s[1], (T(s[1]+r.step), s[2]+1) -done(r::StepRangeLen, s) = s[2] > length(r) +start(r::StepRangeLen) = 1 +next(r::StepRangeLen{T}, i) where {T} = unsafe_getindex(r, i), i+1 +done(r::StepRangeLen, i) = i > length(r) start(r::UnitRange{T}) where {T} = oftype(r.start + oneunit(T), r.start) next(r::AbstractUnitRange{T}, i) where {T} = (convert(T, i), i + oneunit(T)) @@ -496,7 +500,7 @@ end function getindex(v::Range{T}, i::Integer) where T @_inline_meta - ret = convert(T, first(v) + (i - 1)*step(v)) + ret = convert(T, first(v) + (i - 1)*step_hp(v)) ok = ifelse(step(v) > zero(step(v)), (ret <= v.stop) & (ret >= v.start), (ret <= v.start) & (ret >= v.stop)) @@ -516,6 +520,11 @@ function unsafe_getindex(r::StepRangeLen{T}, i::Integer) where T T(r.ref + u*r.step) end +function _getindex_hiprec(r::StepRangeLen, i::Integer) # without rounding by T + u = i - r.offset + r.ref + u*r.step +end + function unsafe_getindex(r::LinSpace, i::Integer) lerpi.(i-1, r.lendiv, r.start, r.stop) end @@ -556,11 +565,14 @@ function getindex(r::StepRange, s::Range{<:Integer}) range(st, step(r)*step(s), length(s)) end -function getindex(r::StepRangeLen, s::OrdinalRange{<:Integer}) +function getindex(r::StepRangeLen{T}, s::OrdinalRange{<:Integer}) where {T} @_inline_meta @boundscheck checkbounds(r, s) - vfirst = unsafe_getindex(r, first(s)) - return StepRangeLen(vfirst, r.step*step(s), length(s)) + # Find closest approach to offset by s + ind = linearindices(s) + offset = max(min(1 + round(Int, (r.offset - first(s))/step(s)), last(ind)), first(ind)) + ref = _getindex_hiprec(r, first(s) + (offset-1)*step(s)) + return StepRangeLen{T}(ref, r.step*step(s), length(s), offset) end function getindex(r::LinSpace, s::OrdinalRange{<:Integer}) @@ -725,16 +737,17 @@ end ## linear operations on ranges ## -(r::OrdinalRange) = range(-first(r), -step(r), length(r)) --(r::StepRangeLen) = StepRangeLen(-r.ref, -r.step, length(r), r.offset) +-(r::StepRangeLen{T,R,S}) where {T,R,S} = + StepRangeLen{T,R,S}(-r.ref, -r.step, length(r), r.offset) -(r::LinSpace) = LinSpace(-r.start, -r.stop, length(r)) +(x::Real, r::AbstractUnitRange) = range(x + first(r), length(r)) # For #18336 we need to prevent promotion of the step type: +(x::Number, r::AbstractUnitRange) = range(x + first(r), step(r), length(r)) +(x::Number, r::Range) = (x+first(r)):step(r):(x+last(r)) -function +(x::Number, r::StepRangeLen) +function +(x::Number, r::StepRangeLen{T}) where T newref = x + r.ref - StepRangeLen{eltype(newref),typeof(newref),typeof(r.step)}(newref, r.step, length(r), r.offset) + StepRangeLen{typeof(T(r.ref) + x)}(newref, r.step, length(r), r.offset) end function +(x::Number, r::LinSpace) LinSpace(x + r.start, x + r.stop, r.len) @@ -750,15 +763,18 @@ end -(r::Range, x::Number) = +(-x, r) *(x::Number, r::Range) = range(x*first(r), x*step(r), length(r)) -*(x::Number, r::StepRangeLen) = StepRangeLen(x*r.ref, x*r.step, length(r), r.offset) +*(x::Number, r::StepRangeLen{T}) where {T} = + StepRangeLen{typeof(x*T(r.ref))}(x*r.ref, x*r.step, length(r), r.offset) *(x::Number, r::LinSpace) = LinSpace(x * r.start, x * r.stop, r.len) # separate in case of noncommutative multiplication *(r::Range, x::Number) = range(first(r)*x, step(r)*x, length(r)) -*(r::StepRangeLen, x::Number) = StepRangeLen(r.ref*x, r.step*x, length(r), r.offset) +*(r::StepRangeLen{T}, x::Number) where {T} = + StepRangeLen{typeof(T(r.ref)*x)}(r.ref*x, r.step*x, length(r), r.offset) *(r::LinSpace, x::Number) = LinSpace(r.start * x, r.stop * x, r.len) /(r::Range, x::Number) = range(first(r)/x, step(r)/x, length(r)) -/(r::StepRangeLen, x::Number) = StepRangeLen(r.ref/x, r.step/x, length(r), r.offset) +/(r::StepRangeLen{T}, x::Number) where {T} = + StepRangeLen{typeof(T(r.ref)/x)}(r.ref/x, r.step/x, length(r), r.offset) /(r::LinSpace, x::Number) = LinSpace(r.start / x, r.stop / x, r.len) /(x::Number, r::Range) = [ x/y for y=r ] diff --git a/base/special/exp.jl b/base/special/exp.jl index e49bfb5ed942b..ccabe295597db 100644 --- a/base/special/exp.jl +++ b/base/special/exp.jl @@ -117,11 +117,11 @@ function exp(x::T) where T<:Union{Float32,Float64} if k > -significand_bits(T) # multiply by 2.0 first to prevent overflow, which helps extends the range k == exponent_max(T) && return y * T(2.0) * T(2.0)^(exponent_max(T) - 1) - twopk = reinterpret(T, rem(exponent_bias(T) + k, fpinttype(T)) << significand_bits(T)) + twopk = reinterpret(T, rem(exponent_bias(T) + k, uinttype(T)) << significand_bits(T)) return y*twopk else # add significand_bits(T) + 1 to lift the range outside the subnormals - twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, fpinttype(T)) << significand_bits(T)) + twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, uinttype(T)) << significand_bits(T)) return y * twopk * T(2.0)^(-significand_bits(T) - 1) end elseif xa < reinterpret(Unsigned, exp_small_thres(T)) # |x| < exp_small_thres diff --git a/base/special/exp10.jl b/base/special/exp10.jl index d8ce280468b6c..711da3b04549b 100644 --- a/base/special/exp10.jl +++ b/base/special/exp10.jl @@ -119,11 +119,11 @@ function exp10(x::T) where T<:Union{Float32,Float64} if k > -significand_bits(T) # multiply by 2.0 first to prevent overflow, extending the range k == exponent_max(T) && return y * T(2.0) * T(2.0)^(exponent_max(T) - 1) - twopk = reinterpret(T, rem(exponent_bias(T) + k, fpinttype(T)) << significand_bits(T)) + twopk = reinterpret(T, rem(exponent_bias(T) + k, uinttype(T)) << significand_bits(T)) return y*twopk else # add significand_bits(T) + 1 to lift the range outside the subnormals - twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, fpinttype(T)) << significand_bits(T)) + twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, uinttype(T)) << significand_bits(T)) return y * twopk * T(2.0)^(-significand_bits(T) - 1) end elseif xa < reinterpret(Unsigned, exp10_small_thres(T)) # |x| < exp10_small_thres diff --git a/base/sysimg.jl b/base/sysimg.jl index de68ed222632b..8b180d773cdef 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -84,7 +84,6 @@ include("tuple.jl") include("pair.jl") include("traits.jl") include("range.jl") -include("twiceprecision.jl") include("expr.jl") include("error.jl") @@ -135,6 +134,7 @@ include("hashing.jl") include("rounding.jl") importall .Rounding include("float.jl") +include("twiceprecision.jl") include("complex.jl") include("rational.jl") include("multinverses.jl") diff --git a/base/twiceprecision.jl b/base/twiceprecision.jl index 8a849dcb9af68..1baf1b9a921bd 100644 --- a/base/twiceprecision.jl +++ b/base/twiceprecision.jl @@ -6,6 +6,157 @@ # that return r[3] == 0.3. Otherwise, we have roundoff error due to # 0.1 + 2*0.1 = 0.30000000000000004 +""" + hi, lo = splitprec(F::Type{<:AbstractFloat}, i::Integer) + +Represent an integer `i` as a pair of floating-point numbers `hi` and +`lo` (of type `F`) such that: +- `widen(hi) + widen(lo) ≈ i`. It is exact if 1.5 * (number of precision bits in `F`) is greater than the number of bits in `i`. +- all bits in `hi` are more significant than any of the bits in `lo` +- `hi` can be exactly multiplied by the `hi` component of another call to `splitprec`. + +In particular, while `convert(Float64, i)` can be lossy since Float64 +has only 53 bits of precision, `splitprec(Float64, i)` is exact for +any Int64/UInt64. +""" +function splitprec(::Type{F}, i::Integer) where {F<:AbstractFloat} + hi = truncbits(F(i), cld(precision(F), 2)) + ihi = oftype(i, hi) + hi, F(i - ihi) +end + +function truncmask(x::F, mask) where {F<:IEEEFloat} + reinterpret(F, mask & reinterpret(uinttype(F), x)) +end +truncmask(x, mask) = x + +function truncbits(x::F, nb) where {F<:IEEEFloat} + truncmask(x, typemax(uinttype(F)) << nb) +end +truncbits(x, nb) = x + + +## Dekker arithmetic + +""" + hi, lo = canonicalize2(big, little) + +Generate a representation where all the nonzero bits in `hi` are more +significant than any of the nonzero bits in `lo`. `big` must be larger +in absolute value than `little`. +""" +function canonicalize2(big, little) + h = big+little + h, (big - h) + little +end + +""" + zhi, zlo = add12(x, y) + +A high-precision representation of `x + y` for floating-point +numbers. Mathematically, `zhi + zlo = x + y`, where `zhi` contains the +most significant bits and `zlo` the least significant. + +Because of the way floating-point numbers are printed, `lo` may not +look the way you might expect from the standpoint of decimal +representation, even though it is exact from the standpoint of binary +representation. + +Example: +```julia +julia> 1.0 + 1.0001e-15 +1.000000000000001 + +julia> big(1.0) + big(1.0001e-15) +1.000000000000001000100000000000020165767380775934141445417482375879192346701529 + +julia> hi, lo = Base.add12(1.0, 1.0001e-15) +(1.000000000000001, -1.1012302462515652e-16) + +julia> big(hi) + big(lo) +1.000000000000001000100000000000020165767380775934141445417482375879192346701529 +``` + +`lo` differs from 1.0e-19 because `hi` is not exactly equal to +the first 16 decimal digits of the answer. +""" +function add12(x::T, y::T) where {T} + x, y = ifelse(abs(y) > abs(x), (y, x), (x, y)) + canonicalize2(x, y) +end +add12(x, y) = add12(promote_noncircular(x, y)...) + +""" + zhi, zlo = mul12(x, y) + +A high-precision representation of `x * y` for floating-point +numbers. Mathematically, `zhi + zlo = x * y`, where `zhi` contains the +most significant bits and `zlo` the least significant. + +Example: +```julia +julia> x = Float32(π) +3.1415927f0 + +julia> x * x +9.869605f0 + +julia> Float64(x) * Float64(x) +9.869604950382893 + +julia> hi, lo = Base.mul12(x, x) +(9.869605f0, -1.140092f-7) + +julia> Float64(hi) + Float64(lo) +9.869604950382893 +``` +""" +function mul12(x::T, y::T) where {T<:AbstractFloat} + h = x * y + ifelse(iszero(h) | !isfinite(h), (h, h), canonicalize2(h, fma(x, y, -h))) +end +mul12(x::T, y::T) where {T} = (p = x * y; (p, zero(p))) +mul12(x, y) = mul12(promote_noncircular(x, y)...) + +""" + zhi, zlo = div12(x, y) + +A high-precision representation of `x / y` for floating-point +numbers. Mathematically, `zhi + zlo ≈ x / y`, where `zhi` contains the +most significant bits and `zlo` the least significant. + +Example: +```julia +julia> x, y = Float32(π), 3.1f0 +(3.1415927f0, 3.1f0) + +julia> x / y +1.013417f0 + +julia> Float64(x) / Float64(y) +1.0134170444063078 + +julia> hi, lo = Base.div12(x, y) +(1.013417f0, 3.8867366f-8) + +julia> Float64(hi) + Float64(lo) +1.0134170444063066 +""" +function div12(x::T, y::T) where {T<:AbstractFloat} + # We lose precision if any intermediate calculation results in a subnormal. + # To prevent this from happening, standardize the values. + xs, xe = frexp(x) + ys, ye = frexp(y) + r = xs / ys + rh, rl = canonicalize2(r, -fma(r, ys, -xs)/ys) + ifelse(iszero(r) | !isfinite(r), (r, r), (ldexp(rh, xe-ye), ldexp(rl, xe-ye))) +end +div12(x::T, y::T) where {T} = (p = x / y; (p, zero(p))) +div12(x, y) = div12(promote_noncircular(x, y)...) + + +## TwicePrecision + """ TwicePrecision{T}(hi::T, lo::T) TwicePrecision{T}((num, denom)) @@ -16,7 +167,7 @@ Float64`. `hi` represents the high bits (most significant bits) and `num//denom` can be approximated conveniently using the syntax `TwicePrecision{T}((num, denom))`. -When used with `T<:AbstractFloat` to construct an exact +When used with `T<:Union{Float16,Float32,Float64}` to construct an "exact" `StepRangeLen`, `ref` should be the range element with smallest magnitude and `offset` set to the corresponding index. For efficiency, multiplication of `step` by the index is not performed at @@ -35,30 +186,52 @@ struct TwicePrecision{T} lo::T # least significant bits end -function TwicePrecision{T}(nd::Tuple{I,I}) where {T,I} +TwicePrecision{T}(x::T) where {T} = TwicePrecision{T}(x, zero(T)) + +function TwicePrecision{T}(x) where {T} + xT = convert(T, x) + Δx = x - xT + TwicePrecision{T}(xT, T(Δx)) +end + +TwicePrecision{T}(i::Integer) where {T<:AbstractFloat} = + TwicePrecision{T}(canonicalize2(splitprec(T, i)...)...) + +TwicePrecision(x) = TwicePrecision{typeof(x)}(x) + +# Numerator/Denominator constructors +function TwicePrecision{T}(nd::Tuple{Integer,Integer}) where {T<:Union{Float16,Float32}} n, d = nd - TwicePrecision{T}(n, zero(T)) / d + TwicePrecision{T}(n/d) +end + +function TwicePrecision{T}(nd::Tuple{Any,Any}) where {T} + n, d = nd + TwicePrecision{T}(n) / d end function TwicePrecision{T}(nd::Tuple{I,I}, nb::Integer) where {T,I} twiceprecision(TwicePrecision{T}(nd), nb) end -function twiceprecision(val::T, nb::Integer) where T<:Number +# Truncating constructors. Useful for generating values that can be +# exactly multiplied by small integers. +function twiceprecision(val::T, nb::Integer) where {T<:IEEEFloat} hi = truncbits(val, nb) TwicePrecision{T}(hi, val - hi) end -function twiceprecision(val::TwicePrecision{T}, nb::Integer) where T<:Number +function twiceprecision(val::TwicePrecision{T}, nb::Integer) where {T<:IEEEFloat} hi = truncbits(val.hi, nb) TwicePrecision{T}(hi, (val.hi - hi) + val.lo) end nbitslen(r::StepRangeLen) = nbitslen(eltype(r), length(r), r.offset) -nbitslen(::Type{Float64}, len, offset) = min(26, nbitslen(len, offset)) -nbitslen(::Type{Float32}, len, offset) = min(12, nbitslen(len, offset)) -nbitslen(::Type{Float16}, len, offset) = min(5, nbitslen(len, offset)) -nbitslen(len, offset) = len < 2 ? 0 : ceil(Int, log2(max(offset-1, len-offset))) +nbitslen(::Type{T}, len, offset) where {T<:IEEEFloat} = + min(cld(precision(T), 2), nbitslen(len, offset)) +# The +1 here is for safety, because the precision of the significand +# is 1 bit higher than the number that are explicitly stored. +nbitslen(len, offset) = len < 2 ? 0 : ceil(Int, log2(max(offset-1, len-offset))) + 1 eltype(::Type{TwicePrecision{T}}) where {T} = T @@ -83,25 +256,109 @@ big(x::TwicePrecision) = big(x.hi) + big(x.lo) zero(::Type{TwicePrecision{T}}) where {T} = TwicePrecision{T}(0, 0) +# Arithmetic + +function +(x::TwicePrecision, y::Number) + s_hi, s_lo = add12(x.hi, y) + TwicePrecision(canonicalize2(s_hi, s_lo+x.lo)...) +end ++(x::Number, y::TwicePrecision) = y+x + +function +(x::TwicePrecision{T}, y::TwicePrecision{T}) where T + r = x.hi + y.hi + s = abs(x.hi) > abs(y.hi) ? (((x.hi - r) + y.hi) + y.lo) + x.lo : (((y.hi - r) + x.hi) + x.lo) + y.lo + TwicePrecision(canonicalize2(r, s)...) +end ++(x::TwicePrecision, y::TwicePrecision) = +(promote_noncircular(x, y)...) + +-(x::TwicePrecision, y::TwicePrecision) = x + (-y) +-(x::TwicePrecision, y::Number) = x + (-y) +-(x::Number, y::TwicePrecision) = x + (-y) + +function *(x::TwicePrecision, v::Number) + v == 0 && return TwicePrecision(x.hi*v, x.lo*v) + x * TwicePrecision{typeof(x.hi*v)}(v) +end +function *(x::TwicePrecision{<:IEEEFloat}, v::Integer) + v == 0 && return TwicePrecision(x.hi*v, x.lo*v) + nb = ceil(Int, log2(abs(v))) + u = truncbits(x.hi, nb) + TwicePrecision(canonicalize2(u*v, ((x.hi-u) + x.lo)*v)...) +end +*(v::Number, x::TwicePrecision) = x*v + +function *(x::TwicePrecision{T}, y::TwicePrecision{T}) where {T} + zh, zl = mul12(x.hi, y.hi) + ret = TwicePrecision{T}(canonicalize2(zh, (x.hi * y.lo + x.lo * y.hi) + zl)...) + ifelse(iszero(zh) | !isfinite(zh), TwicePrecision{T}(zh, zh), ret) +end +*(x::TwicePrecision, y::TwicePrecision) = *(promote_noncircular(x, y)...) + +function /(x::TwicePrecision, v::Number) + x / TwicePrecision{typeof(x.hi/v)}(v) +end + +function /(x::TwicePrecision, y::TwicePrecision) + hi = x.hi / y.hi + uh, ul = mul12(hi, y.hi) + lo = ((((x.hi - uh) - ul) + x.lo) - hi*y.lo)/y.hi + ret = TwicePrecision(canonicalize2(hi, lo)...) + ifelse(iszero(hi) | !isfinite(hi), TwicePrecision(hi, hi), ret) +end + ## StepRangeLen -# If using TwicePrecision numbers, deliberately force user to specify offset -StepRangeLen(ref::TwicePrecision{T}, step::TwicePrecision{T}, len::Integer, offset::Integer) where {T} = +# Use TwicePrecision only for Float64; use Float64 for T<:Union{Float16,Float32} +# Ratio-of-integers constructors +function steprangelen_hp(::Type{Float64}, ref::Tuple{Integer,Integer}, + step::Tuple{Integer,Integer}, nb::Integer, + len::Integer, offset::Integer) + StepRangeLen(TwicePrecision{Float64}(ref), + TwicePrecision{Float64}(step, nb), Int(len), offset) +end + +function steprangelen_hp(::Type{T}, ref::Tuple{Integer,Integer}, + step::Tuple{Integer,Integer}, nb::Integer, + len::Integer, offset::Integer) where {T<:IEEEFloat} + StepRangeLen{T}(ref[1]/ref[2], step[1]/step[2], Int(len), offset) +end + +# AbstractFloat constructors (can supply a single number or a 2-tuple +const F_or_FF = Union{AbstractFloat, Tuple{AbstractFloat,AbstractFloat}} +asF64(x::AbstractFloat) = Float64(x) +asF64(x::Tuple{AbstractFloat,AbstractFloat}) = Float64(x[1]) + Float64(x[2]) + +function steprangelen_hp(::Type{Float64}, ref::F_or_FF, + step::F_or_FF, nb::Integer, + len::Integer, offset::Integer) + StepRangeLen(TwicePrecision{Float64}(ref...), + twiceprecision(TwicePrecision{Float64}(step...), nb), Int(len), offset) +end + +function steprangelen_hp(::Type{T}, ref::F_or_FF, + step::F_or_FF, nb::Integer, + len::Integer, offset::Integer) where {T<:IEEEFloat} + StepRangeLen{T}(asF64(ref), + asF64(step), Int(len), offset) +end + + + +StepRangeLen(ref::TwicePrecision{T}, step::TwicePrecision{T}, + len::Integer, offset::Integer=1) where {T} = StepRangeLen{T,TwicePrecision{T},TwicePrecision{T}}(ref, step, len, offset) # Construct range for rational start=start_n/den, step=step_n/den function floatrange(::Type{T}, start_n::Integer, step_n::Integer, len::Integer, den::Integer) where T if len < 2 - return StepRangeLen(TwicePrecision{T}((start_n, den)), - TwicePrecision{T}((step_n, den)), Int(len), 1) + return steprangelen_hp(T, (start_n, den), (step_n, den), 0, Int(len), 1) end # index of smallest-magnitude value imin = clamp(round(Int, -start_n/step_n+1), 1, Int(len)) # Compute smallest-magnitude element to 2x precision ref_n = start_n+(imin-1)*step_n # this shouldn't overflow, so don't check nb = nbitslen(T, len, imin) - StepRangeLen(TwicePrecision{T}((ref_n, den)), - TwicePrecision{T}((step_n, den), nb), Int(len), imin) + steprangelen_hp(T, (ref_n, den), (step_n, den), nb, Int(len), imin) end function floatrange(a::AbstractFloat, st::AbstractFloat, len::Real, divisor::AbstractFloat) @@ -116,8 +373,7 @@ function floatrange(a::AbstractFloat, st::AbstractFloat, len::Real, divisor::Abs end # Fallback (misses the opportunity to set offset different from 1, # but otherwise this is still high-precision) - StepRangeLen(TwicePrecision{T}((a,divisor)), - TwicePrecision{T}((st,divisor), nbitslen(T, len, 1)), Int(len), 1) + steprangelen_hp(T, (a,divisor), (st,divisor), nbitslen(T, len, 1), Int(len), 1) end function colon(start::T, step::T, stop::T) where T<:Union{Float16,Float32,Float64} @@ -158,7 +414,7 @@ function colon(start::T, step::T, stop::T) where T<:Union{Float16,Float32,Float6 # if we've overshot the end, subtract one: len -= (start < stop < stop′) + (start > stop > stop′) end - StepRangeLen(TwicePrecision(start, zero(T)), twiceprecision(step, nbitslen(T, len, 1)), len) + steprangelen_hp(T, start, step, 0, len, 1) end function range(a::T, st::T, len::Integer) where T<:Union{Float16,Float32,Float64} @@ -175,38 +431,28 @@ function range(a::T, st::T, len::Integer) where T<:Union{Float16,Float32,Float64 return floatrange(T, start_n, step_n, len, den) end end - StepRangeLen(TwicePrecision(a, zero(T)), TwicePrecision(st, zero(T)), len) -end - -step(r::StepRangeLen{T,R,S}) where {T,R,S<:TwicePrecision} = convert(eltype(S), r.step) - -start(r::StepRangeLen{<:Any,<:TwicePrecision,<:TwicePrecision}) = 1 -done(r::StepRangeLen{<:Any,<:TwicePrecision,<:TwicePrecision}, i::Int) = length(r) < i -function next(r::StepRangeLen{<:Any,<:TwicePrecision,<:TwicePrecision}, i::Int) - @_inline_meta - unsafe_getindex(r, i), i+1 + steprangelen_hp(T, a, st, 0, len, 1) end # This assumes that r.step has already been split so that (0:len-1)*r.step.hi is exact function unsafe_getindex(r::StepRangeLen{T,<:TwicePrecision,<:TwicePrecision}, i::Integer) where T - # Very similar to _getindex_hiprec, but optimized to avoid a 2nd call to add2 + # Very similar to _getindex_hiprec, but optimized to avoid a 2nd call to add12 @_inline_meta u = i - r.offset shift_hi, shift_lo = u*r.step.hi, u*r.step.lo - x_hi, x_lo = add2(r.ref.hi, shift_hi) + x_hi, x_lo = add12(r.ref.hi, shift_hi) T(x_hi + (x_lo + (shift_lo + r.ref.lo))) end function _getindex_hiprec(r::StepRangeLen{<:Any,<:TwicePrecision,<:TwicePrecision}, i::Integer) u = i - r.offset shift_hi, shift_lo = u*r.step.hi, u*r.step.lo - x_hi, x_lo = add2(r.ref.hi, shift_hi) - x_hi, x_lo = add2(x_hi, x_lo + (shift_lo + r.ref.lo)) + x_hi, x_lo = add12(r.ref.hi, shift_hi) + x_hi, x_lo = add12(x_hi, x_lo + (shift_lo + r.ref.lo)) TwicePrecision(x_hi, x_lo) end function getindex(r::StepRangeLen{T,<:TwicePrecision,<:TwicePrecision}, s::OrdinalRange{<:Integer}) where T - @_inline_meta @boundscheck checkbounds(r, s) soffset = 1 + round(Int, (r.offset - first(s))/step(s)) soffset = clamp(soffset, 1, length(s)) @@ -234,11 +480,15 @@ convert(::Type{StepRangeLen{T,R,S}}, r::StepRangeLen{T,R,S}) where {T<:AbstractF convert(::Type{StepRangeLen{T,R,S}}, r::StepRangeLen) where {T<:AbstractFloat,R<:TwicePrecision,S<:TwicePrecision} = _convertSRL(StepRangeLen{T,R,S}, r) -convert(::Type{StepRangeLen{T}}, r::StepRangeLen) where {T<:Union{Float16,Float32,Float64}} = - _convertSRL(StepRangeLen{T,TwicePrecision{T},TwicePrecision{T}}, r) +convert(::Type{StepRangeLen{Float64}}, r::StepRangeLen) = + _convertSRL(StepRangeLen{Float64,TwicePrecision{Float64},TwicePrecision{Float64}}, r) +convert(::Type{StepRangeLen{T}}, r::StepRangeLen) where {T<:IEEEFloat} = + _convertSRL(StepRangeLen{T,Float64,Float64}, r) -convert(::Type{StepRangeLen{T}}, r::Range) where {T<:Union{Float16,Float32,Float64}} = - _convertSRL(StepRangeLen{T,TwicePrecision{T},TwicePrecision{T}}, r) +convert(::Type{StepRangeLen{Float64}}, r::Range) = + _convertSRL(StepRangeLen{Float64,TwicePrecision{Float64},TwicePrecision{Float64}}, r) +convert(::Type{StepRangeLen{T}}, r::Range) where {T<:IEEEFloat} = + _convertSRL(StepRangeLen{T,Float64,Float64}, r) function _convertSRL(::Type{StepRangeLen{T,R,S}}, r::StepRangeLen{<:Integer}) where {T,R,S} StepRangeLen{T,R,S}(R(r.ref), S(r.step), length(r), r.offset) @@ -285,12 +535,12 @@ function sum(r::StepRangeLen) sp, sn = sumpair(np), sumpair(nn) tp = _prod(r.step, sp[1], sp[2]) tn = _prod(r.step, sn[1], sn[2]) - s_hi, s_lo = add2(tp.hi, -tn.hi) + s_hi, s_lo = add12(tp.hi, -tn.hi) s_lo += tp.lo - tn.lo # Add in contributions of ref ref = r.ref * l - sm_hi, sm_lo = add2(s_hi, ref.hi) - add2(sm_hi, sm_lo + ref.lo)[1] + sm_hi, sm_lo = add12(s_hi, ref.hi) + add12(sm_hi, sm_lo + ref.lo)[1] end # sum(1:n) as a product of two integers @@ -316,10 +566,10 @@ end ## LinSpace # For Float16, Float32, and Float64, linspace returns a StepRangeLen -function linspace(start::T, stop::T, len::Integer) where T<:Union{Float16,Float32,Float64} +function linspace(start::T, stop::T, len::Integer) where {T<:IEEEFloat} len < 2 && return _linspace1(T, start, stop, len) if start == stop - return StepRangeLen(TwicePrecision(start,zero(T)), TwicePrecision(zero(T),zero(T)), len) + return steprangelen_hp(T, start, zero(T), 0, len, 1) end # Attempt to find exact rational approximations start_n, start_d = rat(start) @@ -338,15 +588,15 @@ function linspace(start::T, stop::T, len::Integer) where T<:Union{Float16,Float3 _linspace(start, stop, len) end -function _linspace(start::T, stop::T, len::Integer) where T<:Union{Float16,Float32,Float64} +function _linspace(start::T, stop::T, len::Integer) where {T<:IEEEFloat} (isfinite(start) && isfinite(stop)) || throw(ArgumentError("start and stop must be finite, got $start and $stop")) # Find the index that returns the smallest-magnitude element Δ, Δfac = stop-start, 1 if !isfinite(Δ) # handle overflow for large endpoints Δ, Δfac = stop/len - start/len, Int(len) end - tmin = -(start/Δ)/Δfac # interpolation t such that return value is 0 - imin = round(Int, tmin*(len-1)+1) + tmin = -(start/Δ)/Δfac # t such that (1-t)*start + t*stop == 0 + imin = round(Int, tmin*(len-1)+1) # index approximately corresponding to t if 1 < imin < len # The smallest-magnitude element is in the interior t = (imin-1)/(len-1) @@ -364,8 +614,7 @@ function _linspace(start::T, stop::T, len::Integer) where T<:Union{Float16,Float if len == 2 && !isfinite(step) # For very large endpoints where step overflows, exploit the # split-representation to handle the overflow - return StepRangeLen(TwicePrecision(start, zero(T)), - TwicePrecision(-start, stop), 2) + return steprangelen_hp(T, start, (-start, stop), 0, 2, 1) end # 2x calculations to get high precision endpoint matching while also # preventing overflow in ref_hi+(i-offset)*step_hi @@ -373,33 +622,27 @@ function _linspace(start::T, stop::T, len::Integer) where T<:Union{Float16,Float step_hi_pre = clamp(step, max(-(m+ref)/k, (-m+ref)/k), min((m-ref)/k, (m+ref)/k)) nb = nbitslen(T, len, imin) step_hi = truncbits(step_hi_pre, nb) - x1_hi, x1_lo = add2((1-imin)*step_hi, ref) - x2_hi, x2_lo = add2((len-imin)*step_hi, ref) + x1_hi, x1_lo = add12((1-imin)*step_hi, ref) + x2_hi, x2_lo = add12((len-imin)*step_hi, ref) a, b = (start - x1_hi) - x1_lo, (stop - x2_hi) - x2_lo step_lo = (b - a)/(len - 1) ref_lo = a - (1 - imin)*step_lo - StepRangeLen(TwicePrecision(ref, ref_lo), TwicePrecision(step_hi, step_lo), Int(len), imin) + steprangelen_hp(T, (ref, ref_lo), (step_hi, step_lo), 0, Int(len), imin) end # linspace for rational numbers, start = start_n/den, stop = stop_n/den # Note this returns a StepRangeLen function linspace(::Type{T}, start_n::Integer, stop_n::Integer, len::Integer, den::Integer) where T len < 2 && return _linspace1(T, start_n/den, stop_n/den, len) - start_n == stop_n && return StepRangeLen(TwicePrecision{T}((start_n, den)), zero(TwicePrecision{T}), len) + start_n == stop_n && return steprangelen_hp(T, (start_n, den), (zero(start_n), den), 0, len) tmin = -start_n/(Float64(stop_n) - Float64(start_n)) imin = round(Int, tmin*(len-1)+1) imin = clamp(imin, 1, Int(len)) - # Compute (1-t)*a and t*b separately in 2x precision (itp = interpolant)... - dent = (den, len-1) # represent products as a tuple to eliminate risk of overflow - start_itp = proddiv(T, (len-imin, start_n), dent) - stop_itp = proddiv(T, (imin-1, stop_n), dent) - # ...and then combine them to make ref - ref = start_itp + stop_itp - # Compute step to 2x precision without risking overflow... - rend = proddiv(T, (stop_n,), dent) - rbeg = proddiv(T, (-start_n,), dent) - step = twiceprecision(rbeg + rend, nbitslen(T, len, imin)) # ...and truncate hi-bits as needed - StepRangeLen(ref, step, Int(len), imin) + ref_num = Int128(len-imin) * start_n + Int128(imin-1) * stop_n + ref_denom = Int128(len-1) * den + ref = (ref_num, ref_denom) + step_full = (Int128(stop_n) - Int128(start_n), ref_denom) + steprangelen_hp(T, ref, step_full, nbitslen(T, len, imin), Int(len), imin) end # For len < 2 @@ -440,82 +683,12 @@ narrow(::Type{Float64}) = Float32 narrow(::Type{Float32}) = Float16 narrow(::Type{Float16}) = Float16 -function add2(u::T, v::T) where T<:Number - @_inline_meta - u, v = ifelse(abs(v) > abs(u), (v, u), (u, v)) - w = u + v - w, (u-w) + v -end - -add2(u, v) = _add2(promote(u, v)...) -_add2(u::T, v::T) where {T<:Number} = add2(u, v) -_add2(u, v) = error("$u::$(typeof(u)) and $v::$(typeof(v)) cannot be promoted to a common type") - -function +(x::TwicePrecision, y::Number) - s_hi, s_lo = add2(x.hi, y) - TwicePrecision(s_hi, s_lo+x.lo) -end -+(x::Number, y::TwicePrecision) = y+x - -function +(x::TwicePrecision{T}, y::TwicePrecision{T}) where T - r = x.hi + y.hi - s = abs(x.hi) > abs(y.hi) ? (((x.hi - r) + y.hi) + y.lo) + x.lo : (((y.hi - r) + x.hi) + x.lo) + y.lo - TwicePrecision(r, s) -end -+(x::TwicePrecision, y::TwicePrecision) = _add2(promote(x, y)...) -_add2(x::T, y::T) where {T<:TwicePrecision} = x + y -_add2(x::TwicePrecision, y::TwicePrecision) = TwicePrecision(x.hi+y.hi, x.lo+y.lo) - -function *(x::TwicePrecision, v::Integer) - v == 0 && return TwicePrecision(x.hi*v, x.lo*v) - nb = ceil(Int, log2(abs(v))) - u = truncbits(x.hi, nb) - y_hi, y_lo = add2(u*v, ((x.hi-u) + x.lo)*v) - TwicePrecision(y_hi, y_lo) -end - -function _mul2(x::TwicePrecision{T}, v::T) where T<:Union{Float16,Float32,Float64} - v == 0 && return TwicePrecision(T(0), T(0)) - xhh, xhl = splitprec(x.hi) - vh, vl = splitprec(v) - y_hi, y_lo = add2(xhh*vh, xhh*vl + xhl*vh) - TwicePrecision(y_hi, y_lo + xhl*vl + x.lo*v) -end - -_mul2(x::TwicePrecision, v::Number) = TwicePrecision(x.hi*v, x.lo*v) - -function *(x::TwicePrecision{R}, v::S) where R where S<:Number - T = promote_type(R, S) - _mul2(convert(TwicePrecision{T}, x), convert(T, v)) -end - -*(v::Number, x::TwicePrecision) = x*v - -function /(x::TwicePrecision, v::Number) - hi = x.hi/v - w = TwicePrecision(hi, zero(hi)) * v - lo = (((x.hi - w.hi) - w.lo) + x.lo)/v - y_hi, y_lo = add2(hi, lo) - TwicePrecision(y_hi, y_lo) -end - -# hi-precision version of prod(num)/prod(den) -# num and den are tuples to avoid risk of overflow -function proddiv(T, num, den) - @_inline_meta - t = TwicePrecision(T(num[1]), zero(T)) - t = _prod(t, tail(num)...) - _divt(t, den...) -end function _prod(t::TwicePrecision, x, y...) @_inline_meta _prod(t * x, y...) end _prod(t::TwicePrecision) = t -function _divt(t::TwicePrecision, x, y...) - @_inline_meta - _divt(t / x, y...) -end -_divt(t::TwicePrecision) = t +<(x::TwicePrecision{T}, y::TwicePrecision{T}) where {T} = + x.hi < y.hi || ((x.hi == y.hi) & (x.lo < y.lo)) isbetween(a, x, b) = a <= x <= b || b <= x <= a diff --git a/test/ranges.jl b/test/ranges.jl index 1b04b345d8eab..a953599223ad2 100644 --- a/test/ranges.jl +++ b/test/ranges.jl @@ -1,6 +1,188 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -# ranges +# Because ranges rely on high precision arithmetic, test those utilities first +for (I, T) in ((Int16, Float16), (Int32, Float32), (Int64, Float64)), i = 1:10^3 + i = rand(I) >> 1 # test large values below + hi, lo = Base.splitprec(T, i) + @test widen(hi) + widen(lo) == i + @test endswith(bits(hi), repeat('0', Base.Math.significand_bits(T) ÷ 2)) +end +for (I, T) in ((Int16, Float16), (Int32, Float32), (Int64, Float64)) + x = T(typemax(I)) + Δi = ceil(I, eps(x)) + for i = typemax(I)-2Δi:typemax(I)-Δi + hi, lo = Base.splitprec(T, i) + @test widen(hi) + widen(lo) == i + @test endswith(bits(hi), repeat('0', Base.Math.significand_bits(T) ÷ 2)) + end + for i = typemin(I):typemin(I)+Δi + hi, lo = Base.splitprec(T, i) + @test widen(hi) + widen(lo) == i + @test endswith(bits(hi), repeat('0', Base.Math.significand_bits(T) ÷ 2)) + end +end + +# Compare precision in a manner sensitive to subnormals, which lose +# precision compared to widening. +function cmp_sn(w, hi, lo, slopbits=0) + if !isfinite(hi) + if abs(w) > realmax(typeof(hi)) + return isinf(hi) && sign(w) == sign(hi) + end + if isnan(w) && isnan(hi) + return true + end + return w == hi + end + if abs(w) < subnormalmin(typeof(hi)) + return (hi == zero(hi) || abs(w - widen(hi)) < abs(w)) && lo == zero(hi) + end + # Compare w == hi + lo unless `lo` issubnormal + z = widen(hi) + widen(lo) + if !issubnormal(lo) && lo != 0 + if slopbits == 0 + return z == w + end + wr, zr = roundshift(w, slopbits), roundshift(z, slopbits) + return max(wr-1, zero(wr)) <= zr <= wr+1 + end + # round w to the same number of bits as z + zu = asbits(z) + wu = asbits(w) + lastbit = false + while zu > 0 && !isodd(zu) + lastbit = isodd(wu) + zu = zu >> 1 + wu = wu >> 1 + end + return wu <= zu <= wu + lastbit +end + +asbits(x) = reinterpret(Base.uinttype(typeof(x)), x) + +function roundshift(x, n) + xu = asbits(x) + lastbit = false + for i = 1:n + lastbit = isodd(xu) + xu = xu >> 1 + end + xu + lastbit +end + +subnormalmin(::Type{T}) where T = reinterpret(T, Base.uinttype(T)(1)) + +function highprec_pair(x, y) + slopbits = (Base.Math.significand_bits(typeof(widen(x))) + 1) - + 2*(Base.Math.significand_bits(typeof(x)) + 1) + hi, lo = Base.add12(x, y) + @test cmp_sn(widen(x) + widen(y), hi, lo) + hi, lo = Base.mul12(x, y) + @test cmp_sn(widen(x) * widen(y), hi, lo) + y == 0 && return nothing + hi, lo = Base.div12(x, y) + @test cmp_sn(widen(x) / widen(y), hi, lo, slopbits) + nothing +end + +# # This tests every possible pair of Float16s. It takes too long for +# # ordinary use, which is why it's commented out. +# function pair16() +# for yu in 0x0000:0xffff +# for xu in 0x0000:0xffff +# x, y = reinterpret(Float16, xu), reinterpret(Float16, yu) +# highprec_pair(x, y) +# end +# end +# end + +for T in (Float16, Float32) # skip Float64 (bit representation of BigFloat is not available) + for i = 1:10^5 + x, y = rand(T), rand(T) + highprec_pair(x, y) + highprec_pair(-x, y) + highprec_pair(x, -y) + highprec_pair(-x, -y) + end + # Make sure we test dynamic range too + for i = 1:10^5 + x, y = rand(T), rand(T) + x == 0 || y == 0 && continue + x, y = log(x), log(y) + highprec_pair(x, y) + end +end + +asww(x) = widen(widen(x.hi)) + widen(widen(x.lo)) +astuple(x) = (x.hi, x.lo) + +function cmp_sn2(w, hi, lo, slopbits=0) + if !isfinite(hi) + if abs(w) > realmax(typeof(hi)) + return isinf(hi) && sign(w) == sign(hi) + end + if isnan(w) && isnan(hi) + return true + end + return w == hi + end + if abs(w) < subnormalmin(typeof(hi)) + return (hi == zero(hi) || abs(w - widen(hi)) < abs(w)) && lo == zero(hi) + end + z = widen(hi) + widen(lo) + zu, wu = asbits(z), asbits(w) + while zu > 0 && !isodd(zu) + zu = zu >> 1 + wu = wu >> 1 + end + zu = zu >> slopbits + wu = wu >> slopbits + return wu - 1 <= zu <= wu + 1 +end + +# TwicePrecision test. These routines lose accuracy if you form +# intermediate subnormals; with Float16, this happens so frequently, +# let's only test Float32. +T = Float32 +Tw = widen(T) +slopbits = (Base.Math.significand_bits(Tw) + 1) - + 2*(Base.Math.significand_bits(T) + 1) +for i = 1:10^5 + x = Base.TwicePrecision{T}(rand()) + y = Base.TwicePrecision{T}(rand()) + xw, yw = asww(x), asww(y) + @test cmp_sn2(Tw(xw+yw), astuple(x+y)..., slopbits) + @test cmp_sn2(Tw(xw-yw), astuple(x-y)..., slopbits) + @test cmp_sn2(Tw(xw*yw), astuple(x*y)..., slopbits) + @test cmp_sn2(Tw(xw/yw), astuple(x/y)..., slopbits) + y = rand(T) + yw = widen(widen(y)) + @test cmp_sn2(Tw(xw+yw), astuple(x+y)..., slopbits) + @test cmp_sn2(Tw(xw-yw), astuple(x-y)..., slopbits) + @test cmp_sn2(Tw(xw*yw), astuple(x*y)..., slopbits) + @test cmp_sn2(Tw(xw/yw), astuple(x/y)..., slopbits) +end + +x1 = Base.TwicePrecision{Float64}(1) +x0 = Base.TwicePrecision{Float64}(0) +xinf = Base.TwicePrecision{Float64}(Inf) +@test Float64(x1+x0) == 1 +@test Float64(x1+0) == 1 +@test Float64(x1+0.0) == 1 +@test Float64(x1*x0) == 0 +@test Float64(x1*0) == 0 +@test Float64(x1*0.0) == 0 +@test Float64(x1/x0) == Inf +@test Float64(x1/0) == Inf +@test Float64(xinf*x1) == Inf +@test isnan(Float64(xinf*x0)) +@test isnan(Float64(xinf*0)) +@test isnan(Float64(xinf*0.0)) +@test isnan(Float64(x0/x0)) +@test isnan(Float64(x0/0)) +@test isnan(Float64(x0/0.0)) + +## ranges @test size(10:1:0) == (0,) @test length(1:.2:2) == 6 @test length(1.:.2:2.) == 6 @@ -338,14 +520,14 @@ end for T = (Float32, Float64,),# BigFloat), a = -5:25, s = [-5:-1;1:25;], d = 1:25, n = -1:15 - den = convert(T,d) - start = convert(T,a)/den - step = convert(T,s)/den - stop = convert(T,(a+(n-1)*s))/den - vals = T[a:s:a+(n-1)*s;]./den - r = start:step:stop + denom = convert(T,d) + strt = convert(T,a)/denom + Δ = convert(T,s)/denom + stop = convert(T,(a+(n-1)*s))/denom + vals = T[a:s:a+(n-1)*s;]./denom + r = strt:Δ:stop @test [r;] == vals - @test [linspace(start, stop, length(r));] == vals + @test [linspace(strt, stop, length(r));] == vals # issue #7420 n = length(r) @test [r[1:n];] == [r;] @@ -362,27 +544,31 @@ end @test [-3*0.05:-0.05:-0.2;] == [linspace(-3*0.05,-0.2,2);] == [-3*0.05,-0.2] @test [-0.2:0.05:-3*0.05;] == [linspace(-0.2,-3*0.05,2);] == [-0.2,-3*0.05] -for T = (Float32, Float64,), i = 1:2^15, n = 1:5 - start, step = randn(T), randn(T) - step == 0 && continue - stop = start + (n-1)*step - # `n` is not necessarily unique s.t. `start + (n-1)*step == stop` - # so test that `length(start:step:stop)` satisfies this identity - # and is the closest value to `(stop-start)/step` to do so - lo = hi = n - while start + (lo-1)*step == stop; lo -= 1; end - while start + (hi-1)*step == stop; hi += 1; end - m = clamp(round(Int, (stop-start)/step) + 1, lo+1, hi-1) - r = start:step:stop - @test m == length(r) - # FIXME: these fail some small portion of the time - @test_skip start == first(r) - @test_skip stop == last(r) - l = linspace(start,stop,n) - @test n == length(l) - # FIXME: these fail some small portion of the time - @test_skip start == first(l) - @test_skip stop == last(l) +function range_fuzztests(::Type{T}, niter, nrange) where {T} + for i = 1:niter, n in nrange + strt, Δ = randn(T), randn(T) + Δ == 0 && continue + stop = strt + (n-1)*Δ + # `n` is not necessarily unique s.t. `strt + (n-1)*Δ == stop` + # so test that `length(strt:Δ:stop)` satisfies this identity + # and is the closest value to `(stop-strt)/Δ` to do so + lo = hi = n + while strt + (lo-1)*Δ == stop; lo -= 1; end + while strt + (hi-1)*Δ == stop; hi += 1; end + m = clamp(round(Int, (stop-strt)/Δ) + 1, lo+1, hi-1) + r = strt:Δ:stop + @test m == length(r) + @test strt == first(r) + @test Δ == step(r) + @test_skip stop == last(r) + l = linspace(strt,stop,n) + @test n == length(l) + @test strt == first(l) + @test stop == last(l) + end +end +for T = (Float32, Float64,) + range_fuzztests(T, 2^15, 1:5) end # Inexact errors on 32 bit architectures. #22613 @@ -892,6 +1078,11 @@ for i = 2:4 @test r[i] == x end +# issue #23178 +r = linspace(Float16(0.1094), Float16(0.9697), 300) +@test r[1] == Float16(0.1094) +@test r[end] == Float16(0.9697) + # issue #20382 r = @inferred(colon(big(1.0),big(2.0),big(5.0))) @test eltype(r) == BigFloat @@ -958,7 +1149,7 @@ end end end -# Issue #23300 +# issue #23300 x = -5:big(1.0):5 @test map(Float64, x) === -5.0:1.0:5.0 @test map(Float32, x) === -5.0f0:1.0f0:5.0f0