diff --git a/base/float.jl b/base/float.jl
index 8a611cf38f8e8..4d65e56be4ded 100644
--- a/base/float.jl
+++ b/base/float.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+const IEEEFloat = Union{Float16, Float32, Float64}
+
 ## floating point traits ##
 
 """
@@ -605,7 +607,7 @@ uabs(x::Signed) = unsigned(abs(x))
 The result of `n` iterative applications of `nextfloat` to `x` if `n >= 0`, or `-n`
 applications of `prevfloat` if `n < 0`.
 """
-function nextfloat(f::Union{Float16,Float32,Float64}, d::Integer)
+function nextfloat(f::IEEEFloat, d::Integer)
     F = typeof(f)
     fumax = reinterpret(Unsigned, F(Inf))
     U = typeof(fumax)
@@ -711,12 +713,12 @@ end
 
 Test whether a floating point number is subnormal.
 """
-function issubnormal end
+function issubnormal(x::T) where {T<:IEEEFloat}
+    y = reinterpret(Unsigned, x)
+    (y & exponent_mask(T) == 0) & (y & significand_mask(T) != 0)
+end
 
 @eval begin
-    issubnormal(x::Float32) = (abs(x) < $(bitcast(Float32, 0x00800000))) & (x!=0)
-    issubnormal(x::Float64) = (abs(x) < $(bitcast(Float64, 0x0010000000000000))) & (x!=0)
-
     typemin(::Type{Float16}) = $(bitcast(Float16, 0xfc00))
     typemax(::Type{Float16}) = $(Inf16)
     typemin(::Type{Float32}) = $(-Inf32)
@@ -864,32 +866,11 @@ exponent_half(::Type{Float16}) =    0x3800
 significand_mask(::Type{Float16}) = 0x03ff
 
 # integer size of float
-fpinttype(::Type{Float64}) = UInt64
-fpinttype(::Type{Float32}) = UInt32
-fpinttype(::Type{Float16}) = UInt16
-
-## TwicePrecision utilities
-# The numeric constants are half the number of bits in the mantissa
-for (F, T, n) in ((Float16, UInt16, 5), (Float32, UInt32, 12), (Float64, UInt64, 26))
-    @eval begin
-        function truncbits(x::$F, nb)
-            @_inline_meta
-            truncmask(x, typemax($T) << nb)
-        end
-        function truncmask(x::$F, mask)
-            @_inline_meta
-            reinterpret($F, mask & reinterpret($T, x))
-        end
-        function splitprec(x::$F)
-            @_inline_meta
-            hi = truncmask(x, typemax($T) << $n)
-            hi, x-hi
-        end
-    end
-end
+uinttype(::Type{Float64}) = UInt64
+uinttype(::Type{Float32}) = UInt32
+uinttype(::Type{Float16}) = UInt16
 
-truncbits(x, nb) = x
-truncmask(x, mask) = x
+Base.iszero(x::Float16) = reinterpret(UInt16, x) & ~sign_mask(Float16) == 0x0000
 
 ## Array operations on floating point numbers ##
 
@@ -904,7 +885,8 @@ end
 
 float(r::StepRange) = float(r.start):float(r.step):float(last(r))
 float(r::UnitRange) = float(r.start):float(last(r))
-float(r::StepRangeLen) = StepRangeLen(float(r.ref), float(r.step), length(r), r.offset)
+float(r::StepRangeLen{T}) where {T} =
+    StepRangeLen{typeof(float(T(r.ref)))}(float(r.ref), float(r.step), length(r), r.offset)
 function float(r::LinSpace)
     LinSpace(float(r.start), float(r.stop), length(r))
 end
diff --git a/base/math.jl b/base/math.jl
index 64f957d7a1467..b90caf4574315 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -21,11 +21,11 @@ import Base: log, exp, sin, cos, tan, sinh, cosh, tanh, asin,
              exp10, expm1, log1p
 
 using Base: sign_mask, exponent_mask, exponent_one,
-            exponent_half, fpinttype, significand_mask
+            exponent_half, uinttype, significand_mask
 
 using Core.Intrinsics: sqrt_llvm
 
-const IEEEFloat = Union{Float16, Float32, Float64}
+using Base.IEEEFloat
 
 @noinline function throw_complex_domainerror(f, x)
     throw(DomainError(x, string("$f will only return a complex result if called with a ",
@@ -588,7 +588,7 @@ function ldexp(x::T, e::Integer) where T<:IEEEFloat
         return flipsign(T(Inf), x)
     end
     if k > 0 # normal case
-        xu = (xu & ~exponent_mask(T)) | (rem(k, fpinttype(T)) << significand_bits(T))
+        xu = (xu & ~exponent_mask(T)) | (rem(k, uinttype(T)) << significand_bits(T))
         return reinterpret(T, xu)
     else # subnormal case
         if k <= -significand_bits(T) # underflow
@@ -598,7 +598,7 @@ function ldexp(x::T, e::Integer) where T<:IEEEFloat
         end
         k += significand_bits(T)
         z = T(2.0)^-significand_bits(T)
-        xu = (xu & ~exponent_mask(T)) | (rem(k, fpinttype(T)) << significand_bits(T))
+        xu = (xu & ~exponent_mask(T)) | (rem(k, uinttype(T)) << significand_bits(T))
         return z*reinterpret(T, xu)
     end
 end
diff --git a/base/range.jl b/base/range.jl
index 381efd9ea5f2f..5bf8a2896767c 100644
--- a/base/range.jl
+++ b/base/range.jl
@@ -205,6 +205,8 @@ end
 
 StepRangeLen(ref::R, step::S, len::Integer, offset::Integer = 1) where {R,S} =
     StepRangeLen{typeof(ref+0*step),R,S}(ref, step, len, offset)
+StepRangeLen{T}(ref::R, step::S, len::Integer, offset::Integer = 1) where {T,R,S} =
+    StepRangeLen{T,R,S}(ref, step, len, offset)
 
 ## linspace and logspace
 
@@ -370,9 +372,12 @@ julia> step(linspace(2.5,10.9,85))
 """
 step(r::StepRange) = r.step
 step(r::AbstractUnitRange) = 1
-step(r::StepRangeLen) = r.step
+step(r::StepRangeLen{T}) where {T} = T(r.step)
 step(r::LinSpace) = (last(r)-first(r))/r.lendiv
 
+step_hp(r::StepRangeLen) = r.step
+step_hp(r::Range) = step(r)
+
 unsafe_length(r::Range) = length(r)  # generic fallback
 
 function unsafe_length(r::StepRange)
@@ -455,10 +460,9 @@ done(r::StepRange, i) = isempty(r) | (i < min(r.start, r.stop)) | (i > max(r.sta
 done(r::StepRange, i::Integer) =
     isempty(r) | (i == oftype(i, r.stop) + r.step)
 
-# see also twiceprecision.jl
-start(r::StepRangeLen) = (unsafe_getindex(r, 1), 1)
-next(r::StepRangeLen{T}, s) where {T} = s[1], (T(s[1]+r.step), s[2]+1)
-done(r::StepRangeLen, s) = s[2] > length(r)
+start(r::StepRangeLen) = 1
+next(r::StepRangeLen{T}, i) where {T} = unsafe_getindex(r, i), i+1
+done(r::StepRangeLen, i) = i > length(r)
 
 start(r::UnitRange{T}) where {T} = oftype(r.start + oneunit(T), r.start)
 next(r::AbstractUnitRange{T}, i) where {T} = (convert(T, i), i + oneunit(T))
@@ -496,7 +500,7 @@ end
 
 function getindex(v::Range{T}, i::Integer) where T
     @_inline_meta
-    ret = convert(T, first(v) + (i - 1)*step(v))
+    ret = convert(T, first(v) + (i - 1)*step_hp(v))
     ok = ifelse(step(v) > zero(step(v)),
                 (ret <= v.stop) & (ret >= v.start),
                 (ret <= v.start) & (ret >= v.stop))
@@ -516,6 +520,11 @@ function unsafe_getindex(r::StepRangeLen{T}, i::Integer) where T
     T(r.ref + u*r.step)
 end
 
+function _getindex_hiprec(r::StepRangeLen, i::Integer)  # without rounding by T
+    u = i - r.offset
+    r.ref + u*r.step
+end
+
 function unsafe_getindex(r::LinSpace, i::Integer)
     lerpi.(i-1, r.lendiv, r.start, r.stop)
 end
@@ -556,11 +565,14 @@ function getindex(r::StepRange, s::Range{<:Integer})
     range(st, step(r)*step(s), length(s))
 end
 
-function getindex(r::StepRangeLen, s::OrdinalRange{<:Integer})
+function getindex(r::StepRangeLen{T}, s::OrdinalRange{<:Integer}) where {T}
     @_inline_meta
     @boundscheck checkbounds(r, s)
-    vfirst = unsafe_getindex(r, first(s))
-    return StepRangeLen(vfirst, r.step*step(s), length(s))
+    # Find closest approach to offset by s
+    ind = linearindices(s)
+    offset = max(min(1 + round(Int, (r.offset - first(s))/step(s)), last(ind)), first(ind))
+    ref = _getindex_hiprec(r, first(s) + (offset-1)*step(s))
+    return StepRangeLen{T}(ref, r.step*step(s), length(s), offset)
 end
 
 function getindex(r::LinSpace, s::OrdinalRange{<:Integer})
@@ -725,16 +737,17 @@ end
 ## linear operations on ranges ##
 
 -(r::OrdinalRange) = range(-first(r), -step(r), length(r))
--(r::StepRangeLen) = StepRangeLen(-r.ref, -r.step, length(r), r.offset)
+-(r::StepRangeLen{T,R,S}) where {T,R,S} =
+    StepRangeLen{T,R,S}(-r.ref, -r.step, length(r), r.offset)
 -(r::LinSpace) = LinSpace(-r.start, -r.stop, length(r))
 
 +(x::Real, r::AbstractUnitRange) = range(x + first(r), length(r))
 # For #18336 we need to prevent promotion of the step type:
 +(x::Number, r::AbstractUnitRange) = range(x + first(r), step(r), length(r))
 +(x::Number, r::Range) = (x+first(r)):step(r):(x+last(r))
-function +(x::Number, r::StepRangeLen)
+function +(x::Number, r::StepRangeLen{T}) where T
     newref = x + r.ref
-    StepRangeLen{eltype(newref),typeof(newref),typeof(r.step)}(newref, r.step, length(r), r.offset)
+    StepRangeLen{typeof(T(r.ref) + x)}(newref, r.step, length(r), r.offset)
 end
 function +(x::Number, r::LinSpace)
     LinSpace(x + r.start, x + r.stop, r.len)
@@ -750,15 +763,18 @@ end
 -(r::Range, x::Number) = +(-x, r)
 
 *(x::Number, r::Range)        = range(x*first(r), x*step(r), length(r))
-*(x::Number, r::StepRangeLen) = StepRangeLen(x*r.ref, x*r.step, length(r), r.offset)
+*(x::Number, r::StepRangeLen{T}) where {T} =
+    StepRangeLen{typeof(x*T(r.ref))}(x*r.ref, x*r.step, length(r), r.offset)
 *(x::Number, r::LinSpace)     = LinSpace(x * r.start, x * r.stop, r.len)
 # separate in case of noncommutative multiplication
 *(r::Range, x::Number)        = range(first(r)*x, step(r)*x, length(r))
-*(r::StepRangeLen, x::Number) = StepRangeLen(r.ref*x, r.step*x, length(r), r.offset)
+*(r::StepRangeLen{T}, x::Number) where {T} =
+    StepRangeLen{typeof(T(r.ref)*x)}(r.ref*x, r.step*x, length(r), r.offset)
 *(r::LinSpace, x::Number)     = LinSpace(r.start * x, r.stop * x, r.len)
 
 /(r::Range, x::Number)        = range(first(r)/x, step(r)/x, length(r))
-/(r::StepRangeLen, x::Number) = StepRangeLen(r.ref/x, r.step/x, length(r), r.offset)
+/(r::StepRangeLen{T}, x::Number) where {T} =
+    StepRangeLen{typeof(T(r.ref)/x)}(r.ref/x, r.step/x, length(r), r.offset)
 /(r::LinSpace, x::Number)     = LinSpace(r.start / x, r.stop / x, r.len)
 
 /(x::Number, r::Range) = [ x/y for y=r ]
diff --git a/base/special/exp.jl b/base/special/exp.jl
index e49bfb5ed942b..ccabe295597db 100644
--- a/base/special/exp.jl
+++ b/base/special/exp.jl
@@ -117,11 +117,11 @@ function exp(x::T) where T<:Union{Float32,Float64}
         if k > -significand_bits(T)
             # multiply by 2.0 first to prevent overflow, which helps extends the range
             k == exponent_max(T) && return y * T(2.0) * T(2.0)^(exponent_max(T) - 1)
-            twopk = reinterpret(T, rem(exponent_bias(T) + k, fpinttype(T)) << significand_bits(T))
+            twopk = reinterpret(T, rem(exponent_bias(T) + k, uinttype(T)) << significand_bits(T))
             return y*twopk
         else
             # add significand_bits(T) + 1 to lift the range outside the subnormals
-            twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, fpinttype(T)) << significand_bits(T))
+            twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, uinttype(T)) << significand_bits(T))
             return y * twopk * T(2.0)^(-significand_bits(T) - 1)
         end
     elseif xa < reinterpret(Unsigned, exp_small_thres(T)) # |x| < exp_small_thres
diff --git a/base/special/exp10.jl b/base/special/exp10.jl
index d8ce280468b6c..711da3b04549b 100644
--- a/base/special/exp10.jl
+++ b/base/special/exp10.jl
@@ -119,11 +119,11 @@ function exp10(x::T) where T<:Union{Float32,Float64}
         if k > -significand_bits(T)
             # multiply by 2.0 first to prevent overflow, extending the range
             k == exponent_max(T) && return y * T(2.0) * T(2.0)^(exponent_max(T) - 1)
-            twopk = reinterpret(T, rem(exponent_bias(T) + k, fpinttype(T)) << significand_bits(T))
+            twopk = reinterpret(T, rem(exponent_bias(T) + k, uinttype(T)) << significand_bits(T))
             return y*twopk
         else
             # add significand_bits(T) + 1 to lift the range outside the subnormals
-            twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, fpinttype(T)) << significand_bits(T))
+            twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, uinttype(T)) << significand_bits(T))
             return y * twopk * T(2.0)^(-significand_bits(T) - 1)
         end
     elseif xa < reinterpret(Unsigned, exp10_small_thres(T))  # |x| < exp10_small_thres
diff --git a/base/sysimg.jl b/base/sysimg.jl
index de68ed222632b..8b180d773cdef 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -84,7 +84,6 @@ include("tuple.jl")
 include("pair.jl")
 include("traits.jl")
 include("range.jl")
-include("twiceprecision.jl")
 include("expr.jl")
 include("error.jl")
 
@@ -135,6 +134,7 @@ include("hashing.jl")
 include("rounding.jl")
 importall .Rounding
 include("float.jl")
+include("twiceprecision.jl")
 include("complex.jl")
 include("rational.jl")
 include("multinverses.jl")
diff --git a/base/twiceprecision.jl b/base/twiceprecision.jl
index 8a849dcb9af68..1baf1b9a921bd 100644
--- a/base/twiceprecision.jl
+++ b/base/twiceprecision.jl
@@ -6,6 +6,157 @@
 # that return r[3] == 0.3.  Otherwise, we have roundoff error due to
 #     0.1 + 2*0.1 = 0.30000000000000004
 
+"""
+    hi, lo = splitprec(F::Type{<:AbstractFloat}, i::Integer)
+
+Represent an integer `i` as a pair of floating-point numbers `hi` and
+`lo` (of type `F`) such that:
+- `widen(hi) + widen(lo) ≈ i`. It is exact if 1.5 * (number of precision bits in `F`) is greater than the number of bits in `i`.
+- all bits in `hi` are more significant than any of the bits in `lo`
+- `hi` can be exactly multiplied by the `hi` component of another call to `splitprec`.
+
+In particular, while `convert(Float64, i)` can be lossy since Float64
+has only 53 bits of precision, `splitprec(Float64, i)` is exact for
+any Int64/UInt64.
+"""
+function splitprec(::Type{F}, i::Integer) where {F<:AbstractFloat}
+    hi = truncbits(F(i), cld(precision(F), 2))
+    ihi = oftype(i, hi)
+    hi, F(i - ihi)
+end
+
+function truncmask(x::F, mask) where {F<:IEEEFloat}
+    reinterpret(F, mask & reinterpret(uinttype(F), x))
+end
+truncmask(x, mask) = x
+
+function truncbits(x::F, nb) where {F<:IEEEFloat}
+    truncmask(x, typemax(uinttype(F)) << nb)
+end
+truncbits(x, nb) = x
+
+
+## Dekker arithmetic
+
+"""
+    hi, lo = canonicalize2(big, little)
+
+Generate a representation where all the nonzero bits in `hi` are more
+significant than any of the nonzero bits in `lo`. `big` must be larger
+in absolute value than `little`.
+"""
+function canonicalize2(big, little)
+    h = big+little
+    h, (big - h) + little
+end
+
+"""
+    zhi, zlo = add12(x, y)
+
+A high-precision representation of `x + y` for floating-point
+numbers. Mathematically, `zhi + zlo = x + y`, where `zhi` contains the
+most significant bits and `zlo` the least significant.
+
+Because of the way floating-point numbers are printed, `lo` may not
+look the way you might expect from the standpoint of decimal
+representation, even though it is exact from the standpoint of binary
+representation.
+
+Example:
+```julia
+julia> 1.0 + 1.0001e-15
+1.000000000000001
+
+julia> big(1.0) + big(1.0001e-15)
+1.000000000000001000100000000000020165767380775934141445417482375879192346701529
+
+julia> hi, lo = Base.add12(1.0, 1.0001e-15)
+(1.000000000000001, -1.1012302462515652e-16)
+
+julia> big(hi) + big(lo)
+1.000000000000001000100000000000020165767380775934141445417482375879192346701529
+```
+
+`lo` differs from 1.0e-19 because `hi` is not exactly equal to
+the first 16 decimal digits of the answer.
+"""
+function add12(x::T, y::T) where {T}
+    x, y = ifelse(abs(y) > abs(x), (y, x), (x, y))
+    canonicalize2(x, y)
+end
+add12(x, y) = add12(promote_noncircular(x, y)...)
+
+"""
+    zhi, zlo = mul12(x, y)
+
+A high-precision representation of `x * y` for floating-point
+numbers. Mathematically, `zhi + zlo = x * y`, where `zhi` contains the
+most significant bits and `zlo` the least significant.
+
+Example:
+```julia
+julia> x = Float32(π)
+3.1415927f0
+
+julia> x * x
+9.869605f0
+
+julia> Float64(x) * Float64(x)
+9.869604950382893
+
+julia> hi, lo = Base.mul12(x, x)
+(9.869605f0, -1.140092f-7)
+
+julia> Float64(hi) + Float64(lo)
+9.869604950382893
+```
+"""
+function mul12(x::T, y::T) where {T<:AbstractFloat}
+    h = x * y
+    ifelse(iszero(h) | !isfinite(h), (h, h), canonicalize2(h, fma(x, y, -h)))
+end
+mul12(x::T, y::T) where {T} = (p = x * y; (p, zero(p)))
+mul12(x, y) = mul12(promote_noncircular(x, y)...)
+
+"""
+    zhi, zlo = div12(x, y)
+
+A high-precision representation of `x / y` for floating-point
+numbers. Mathematically, `zhi + zlo ≈ x / y`, where `zhi` contains the
+most significant bits and `zlo` the least significant.
+
+Example:
+```julia
+julia> x, y = Float32(π), 3.1f0
+(3.1415927f0, 3.1f0)
+
+julia> x / y
+1.013417f0
+
+julia> Float64(x) / Float64(y)
+1.0134170444063078
+
+julia> hi, lo = Base.div12(x, y)
+(1.013417f0, 3.8867366f-8)
+
+julia> Float64(hi) + Float64(lo)
+1.0134170444063066
+"""
+function div12(x::T, y::T) where {T<:AbstractFloat}
+    # We lose precision if any intermediate calculation results in a subnormal.
+    # To prevent this from happening, standardize the values.
+    xs, xe = frexp(x)
+    ys, ye = frexp(y)
+    r = xs / ys
+    rh, rl = canonicalize2(r, -fma(r, ys, -xs)/ys)
+    ifelse(iszero(r) | !isfinite(r), (r, r), (ldexp(rh, xe-ye), ldexp(rl, xe-ye)))
+end
+div12(x::T, y::T) where {T} = (p = x / y; (p, zero(p)))
+div12(x, y) = div12(promote_noncircular(x, y)...)
+
+
+## TwicePrecision
+
 """
     TwicePrecision{T}(hi::T, lo::T)
     TwicePrecision{T}((num, denom))
@@ -16,7 +167,7 @@ Float64`. `hi` represents the high bits (most significant bits) and
 `num//denom` can be approximated conveniently using the syntax
 `TwicePrecision{T}((num, denom))`.
 
-When used with `T<:AbstractFloat` to construct an exact
+When used with `T<:Union{Float16,Float32,Float64}` to construct an "exact"
 `StepRangeLen`, `ref` should be the range element with smallest
 magnitude and `offset` set to the corresponding index.  For
 efficiency, multiplication of `step` by the index is not performed at
@@ -35,30 +186,52 @@ struct TwicePrecision{T}
     lo::T    # least significant bits
 end
 
-function TwicePrecision{T}(nd::Tuple{I,I}) where {T,I}
+TwicePrecision{T}(x::T) where {T} = TwicePrecision{T}(x, zero(T))
+
+function TwicePrecision{T}(x) where {T}
+    xT = convert(T, x)
+    Δx = x - xT
+    TwicePrecision{T}(xT, T(Δx))
+end
+
+TwicePrecision{T}(i::Integer) where {T<:AbstractFloat} =
+    TwicePrecision{T}(canonicalize2(splitprec(T, i)...)...)
+
+TwicePrecision(x) = TwicePrecision{typeof(x)}(x)
+
+# Numerator/Denominator constructors
+function TwicePrecision{T}(nd::Tuple{Integer,Integer}) where {T<:Union{Float16,Float32}}
     n, d = nd
-    TwicePrecision{T}(n, zero(T)) / d
+    TwicePrecision{T}(n/d)
+end
+
+function TwicePrecision{T}(nd::Tuple{Any,Any}) where {T}
+    n, d = nd
+    TwicePrecision{T}(n) / d
 end
 
 function TwicePrecision{T}(nd::Tuple{I,I}, nb::Integer) where {T,I}
     twiceprecision(TwicePrecision{T}(nd), nb)
 end
 
-function twiceprecision(val::T, nb::Integer) where T<:Number
+# Truncating constructors. Useful for generating values that can be
+# exactly multiplied by small integers.
+function twiceprecision(val::T, nb::Integer) where {T<:IEEEFloat}
     hi = truncbits(val, nb)
     TwicePrecision{T}(hi, val - hi)
 end
 
-function twiceprecision(val::TwicePrecision{T}, nb::Integer) where T<:Number
+function twiceprecision(val::TwicePrecision{T}, nb::Integer) where {T<:IEEEFloat}
     hi = truncbits(val.hi, nb)
     TwicePrecision{T}(hi, (val.hi - hi) + val.lo)
 end
 
 nbitslen(r::StepRangeLen) = nbitslen(eltype(r), length(r), r.offset)
-nbitslen(::Type{Float64}, len, offset) = min(26, nbitslen(len, offset))
-nbitslen(::Type{Float32}, len, offset) = min(12, nbitslen(len, offset))
-nbitslen(::Type{Float16}, len, offset) = min(5,  nbitslen(len, offset))
-nbitslen(len, offset) = len < 2 ? 0 : ceil(Int, log2(max(offset-1, len-offset)))
+nbitslen(::Type{T}, len, offset) where {T<:IEEEFloat} =
+    min(cld(precision(T), 2), nbitslen(len, offset))
+# The +1 here is for safety, because the precision of the significand
+# is 1 bit higher than the number that are explicitly stored.
+nbitslen(len, offset) = len < 2 ? 0 : ceil(Int, log2(max(offset-1, len-offset))) + 1
 
 eltype(::Type{TwicePrecision{T}}) where {T} = T
 
@@ -83,25 +256,109 @@ big(x::TwicePrecision) = big(x.hi) + big(x.lo)
 
 zero(::Type{TwicePrecision{T}}) where {T} = TwicePrecision{T}(0, 0)
 
+# Arithmetic
+
+function +(x::TwicePrecision, y::Number)
+    s_hi, s_lo = add12(x.hi, y)
+    TwicePrecision(canonicalize2(s_hi, s_lo+x.lo)...)
+end
++(x::Number, y::TwicePrecision) = y+x
+
+function +(x::TwicePrecision{T}, y::TwicePrecision{T}) where T
+    r = x.hi + y.hi
+    s = abs(x.hi) > abs(y.hi) ? (((x.hi - r) + y.hi) + y.lo) + x.lo : (((y.hi - r) + x.hi) + x.lo) + y.lo
+    TwicePrecision(canonicalize2(r, s)...)
+end
++(x::TwicePrecision, y::TwicePrecision) = +(promote_noncircular(x, y)...)
+
+-(x::TwicePrecision, y::TwicePrecision) = x + (-y)
+-(x::TwicePrecision, y::Number) = x + (-y)
+-(x::Number, y::TwicePrecision) = x + (-y)
+
+function *(x::TwicePrecision, v::Number)
+    v == 0 && return TwicePrecision(x.hi*v, x.lo*v)
+    x * TwicePrecision{typeof(x.hi*v)}(v)
+end
+function *(x::TwicePrecision{<:IEEEFloat}, v::Integer)
+    v == 0 && return TwicePrecision(x.hi*v, x.lo*v)
+    nb = ceil(Int, log2(abs(v)))
+    u = truncbits(x.hi, nb)
+    TwicePrecision(canonicalize2(u*v, ((x.hi-u) + x.lo)*v)...)
+end
+*(v::Number, x::TwicePrecision) = x*v
+
+function *(x::TwicePrecision{T}, y::TwicePrecision{T}) where {T}
+    zh, zl = mul12(x.hi, y.hi)
+    ret = TwicePrecision{T}(canonicalize2(zh, (x.hi * y.lo + x.lo * y.hi) + zl)...)
+    ifelse(iszero(zh) | !isfinite(zh), TwicePrecision{T}(zh, zh), ret)
+end
+*(x::TwicePrecision, y::TwicePrecision) = *(promote_noncircular(x, y)...)
+
+function /(x::TwicePrecision, v::Number)
+    x / TwicePrecision{typeof(x.hi/v)}(v)
+end
+
+function /(x::TwicePrecision, y::TwicePrecision)
+    hi = x.hi / y.hi
+    uh, ul = mul12(hi, y.hi)
+    lo = ((((x.hi - uh) - ul) + x.lo) - hi*y.lo)/y.hi
+    ret = TwicePrecision(canonicalize2(hi, lo)...)
+    ifelse(iszero(hi) | !isfinite(hi), TwicePrecision(hi, hi), ret)
+end
+
 ## StepRangeLen
 
-# If using TwicePrecision numbers, deliberately force user to specify offset
-StepRangeLen(ref::TwicePrecision{T}, step::TwicePrecision{T}, len::Integer, offset::Integer) where {T} =
+# Use TwicePrecision only for Float64; use Float64 for T<:Union{Float16,Float32}
+# Ratio-of-integers constructors
+function steprangelen_hp(::Type{Float64}, ref::Tuple{Integer,Integer},
+                         step::Tuple{Integer,Integer}, nb::Integer,
+                         len::Integer, offset::Integer)
+    StepRangeLen(TwicePrecision{Float64}(ref),
+                 TwicePrecision{Float64}(step, nb), Int(len), offset)
+end
+
+function steprangelen_hp(::Type{T}, ref::Tuple{Integer,Integer},
+                         step::Tuple{Integer,Integer}, nb::Integer,
+                         len::Integer, offset::Integer) where {T<:IEEEFloat}
+    StepRangeLen{T}(ref[1]/ref[2], step[1]/step[2], Int(len), offset)
+end
+
+# AbstractFloat constructors (can supply a single number or a 2-tuple
+const F_or_FF = Union{AbstractFloat, Tuple{AbstractFloat,AbstractFloat}}
+asF64(x::AbstractFloat) = Float64(x)
+asF64(x::Tuple{AbstractFloat,AbstractFloat}) = Float64(x[1]) + Float64(x[2])
+
+function steprangelen_hp(::Type{Float64}, ref::F_or_FF,
+                         step::F_or_FF, nb::Integer,
+                         len::Integer, offset::Integer)
+    StepRangeLen(TwicePrecision{Float64}(ref...),
+                 twiceprecision(TwicePrecision{Float64}(step...), nb), Int(len), offset)
+end
+
+function steprangelen_hp(::Type{T}, ref::F_or_FF,
+                         step::F_or_FF, nb::Integer,
+                         len::Integer, offset::Integer) where {T<:IEEEFloat}
+    StepRangeLen{T}(asF64(ref),
+                    asF64(step), Int(len), offset)
+end
+
+
+
+StepRangeLen(ref::TwicePrecision{T}, step::TwicePrecision{T},
+             len::Integer, offset::Integer=1) where {T} =
     StepRangeLen{T,TwicePrecision{T},TwicePrecision{T}}(ref, step, len, offset)
 
 # Construct range for rational start=start_n/den, step=step_n/den
 function floatrange(::Type{T}, start_n::Integer, step_n::Integer, len::Integer, den::Integer) where T
     if len < 2
-        return StepRangeLen(TwicePrecision{T}((start_n, den)),
-                            TwicePrecision{T}((step_n, den)), Int(len), 1)
+        return steprangelen_hp(T, (start_n, den), (step_n, den), 0, Int(len), 1)
     end
     # index of smallest-magnitude value
     imin = clamp(round(Int, -start_n/step_n+1), 1, Int(len))
     # Compute smallest-magnitude element to 2x precision
     ref_n = start_n+(imin-1)*step_n  # this shouldn't overflow, so don't check
     nb = nbitslen(T, len, imin)
-    StepRangeLen(TwicePrecision{T}((ref_n, den)),
-                 TwicePrecision{T}((step_n, den), nb), Int(len), imin)
+    steprangelen_hp(T, (ref_n, den), (step_n, den), nb, Int(len), imin)
 end
 
 function floatrange(a::AbstractFloat, st::AbstractFloat, len::Real, divisor::AbstractFloat)
@@ -116,8 +373,7 @@ function floatrange(a::AbstractFloat, st::AbstractFloat, len::Real, divisor::Abs
     end
     # Fallback (misses the opportunity to set offset different from 1,
     # but otherwise this is still high-precision)
-    StepRangeLen(TwicePrecision{T}((a,divisor)),
-                 TwicePrecision{T}((st,divisor), nbitslen(T, len, 1)), Int(len), 1)
+    steprangelen_hp(T, (a,divisor), (st,divisor), nbitslen(T, len, 1), Int(len), 1)
 end
 
 function colon(start::T, step::T, stop::T) where T<:Union{Float16,Float32,Float64}
@@ -158,7 +414,7 @@ function colon(start::T, step::T, stop::T) where T<:Union{Float16,Float32,Float6
         # if we've overshot the end, subtract one:
         len -= (start < stop < stop′) + (start > stop > stop′)
     end
-    StepRangeLen(TwicePrecision(start, zero(T)), twiceprecision(step, nbitslen(T, len, 1)), len)
+    steprangelen_hp(T, start, step, 0, len, 1)
 end
 
 function range(a::T, st::T, len::Integer) where T<:Union{Float16,Float32,Float64}
@@ -175,38 +431,28 @@ function range(a::T, st::T, len::Integer) where T<:Union{Float16,Float32,Float64
             return floatrange(T, start_n, step_n, len, den)
         end
     end
-    StepRangeLen(TwicePrecision(a, zero(T)), TwicePrecision(st, zero(T)), len)
-end
-
-step(r::StepRangeLen{T,R,S}) where {T,R,S<:TwicePrecision} = convert(eltype(S), r.step)
-
-start(r::StepRangeLen{<:Any,<:TwicePrecision,<:TwicePrecision}) = 1
-done(r::StepRangeLen{<:Any,<:TwicePrecision,<:TwicePrecision}, i::Int) = length(r) < i
-function next(r::StepRangeLen{<:Any,<:TwicePrecision,<:TwicePrecision}, i::Int)
-    @_inline_meta
-    unsafe_getindex(r, i), i+1
+    steprangelen_hp(T, a, st, 0, len, 1)
 end
 
 # This assumes that r.step has already been split so that (0:len-1)*r.step.hi is exact
 function unsafe_getindex(r::StepRangeLen{T,<:TwicePrecision,<:TwicePrecision}, i::Integer) where T
-    # Very similar to _getindex_hiprec, but optimized to avoid a 2nd call to add2
+    # Very similar to _getindex_hiprec, but optimized to avoid a 2nd call to add12
     @_inline_meta
     u = i - r.offset
     shift_hi, shift_lo = u*r.step.hi, u*r.step.lo
-    x_hi, x_lo = add2(r.ref.hi, shift_hi)
+    x_hi, x_lo = add12(r.ref.hi, shift_hi)
     T(x_hi + (x_lo + (shift_lo + r.ref.lo)))
 end
 
 function _getindex_hiprec(r::StepRangeLen{<:Any,<:TwicePrecision,<:TwicePrecision}, i::Integer)
     u = i - r.offset
     shift_hi, shift_lo = u*r.step.hi, u*r.step.lo
-    x_hi, x_lo = add2(r.ref.hi, shift_hi)
-    x_hi, x_lo = add2(x_hi, x_lo + (shift_lo + r.ref.lo))
+    x_hi, x_lo = add12(r.ref.hi, shift_hi)
+    x_hi, x_lo = add12(x_hi, x_lo + (shift_lo + r.ref.lo))
     TwicePrecision(x_hi, x_lo)
 end
 
 function getindex(r::StepRangeLen{T,<:TwicePrecision,<:TwicePrecision}, s::OrdinalRange{<:Integer}) where T
-    @_inline_meta
     @boundscheck checkbounds(r, s)
     soffset = 1 + round(Int, (r.offset - first(s))/step(s))
     soffset = clamp(soffset, 1, length(s))
@@ -234,11 +480,15 @@ convert(::Type{StepRangeLen{T,R,S}}, r::StepRangeLen{T,R,S}) where {T<:AbstractF
 convert(::Type{StepRangeLen{T,R,S}}, r::StepRangeLen) where {T<:AbstractFloat,R<:TwicePrecision,S<:TwicePrecision} =
     _convertSRL(StepRangeLen{T,R,S}, r)
 
-convert(::Type{StepRangeLen{T}}, r::StepRangeLen) where {T<:Union{Float16,Float32,Float64}} =
-    _convertSRL(StepRangeLen{T,TwicePrecision{T},TwicePrecision{T}}, r)
+convert(::Type{StepRangeLen{Float64}}, r::StepRangeLen) =
+    _convertSRL(StepRangeLen{Float64,TwicePrecision{Float64},TwicePrecision{Float64}}, r)
+convert(::Type{StepRangeLen{T}}, r::StepRangeLen) where {T<:IEEEFloat} =
+    _convertSRL(StepRangeLen{T,Float64,Float64}, r)
 
-convert(::Type{StepRangeLen{T}}, r::Range) where {T<:Union{Float16,Float32,Float64}} =
-    _convertSRL(StepRangeLen{T,TwicePrecision{T},TwicePrecision{T}}, r)
+convert(::Type{StepRangeLen{Float64}}, r::Range) =
+    _convertSRL(StepRangeLen{Float64,TwicePrecision{Float64},TwicePrecision{Float64}}, r)
+convert(::Type{StepRangeLen{T}}, r::Range) where {T<:IEEEFloat} =
+    _convertSRL(StepRangeLen{T,Float64,Float64}, r)
 
 function _convertSRL(::Type{StepRangeLen{T,R,S}}, r::StepRangeLen{<:Integer}) where {T,R,S}
     StepRangeLen{T,R,S}(R(r.ref), S(r.step), length(r), r.offset)
@@ -285,12 +535,12 @@ function sum(r::StepRangeLen)
     sp, sn = sumpair(np), sumpair(nn)
     tp = _prod(r.step, sp[1], sp[2])
     tn = _prod(r.step, sn[1], sn[2])
-    s_hi, s_lo = add2(tp.hi, -tn.hi)
+    s_hi, s_lo = add12(tp.hi, -tn.hi)
     s_lo += tp.lo - tn.lo
     # Add in contributions of ref
     ref = r.ref * l
-    sm_hi, sm_lo = add2(s_hi, ref.hi)
-    add2(sm_hi, sm_lo + ref.lo)[1]
+    sm_hi, sm_lo = add12(s_hi, ref.hi)
+    add12(sm_hi, sm_lo + ref.lo)[1]
 end
 
 # sum(1:n) as a product of two integers
@@ -316,10 +566,10 @@ end
 ## LinSpace
 
 # For Float16, Float32, and Float64, linspace returns a StepRangeLen
-function linspace(start::T, stop::T, len::Integer) where T<:Union{Float16,Float32,Float64}
+function linspace(start::T, stop::T, len::Integer) where {T<:IEEEFloat}
     len < 2 && return _linspace1(T, start, stop, len)
     if start == stop
-        return StepRangeLen(TwicePrecision(start,zero(T)), TwicePrecision(zero(T),zero(T)), len)
+        return steprangelen_hp(T, start, zero(T), 0, len, 1)
     end
     # Attempt to find exact rational approximations
     start_n, start_d = rat(start)
@@ -338,15 +588,15 @@ function linspace(start::T, stop::T, len::Integer) where T<:Union{Float16,Float3
     _linspace(start, stop, len)
 end
 
-function _linspace(start::T, stop::T, len::Integer) where T<:Union{Float16,Float32,Float64}
+function _linspace(start::T, stop::T, len::Integer) where {T<:IEEEFloat}
     (isfinite(start) && isfinite(stop)) || throw(ArgumentError("start and stop must be finite, got $start and $stop"))
     # Find the index that returns the smallest-magnitude element
     Δ, Δfac = stop-start, 1
     if !isfinite(Δ)   # handle overflow for large endpoints
         Δ, Δfac = stop/len - start/len, Int(len)
     end
-    tmin = -(start/Δ)/Δfac            # interpolation t such that return value is 0
-    imin = round(Int, tmin*(len-1)+1)
+    tmin = -(start/Δ)/Δfac            # t such that (1-t)*start + t*stop == 0
+    imin = round(Int, tmin*(len-1)+1) # index approximately corresponding to t
     if 1 < imin < len
         # The smallest-magnitude element is in the interior
         t = (imin-1)/(len-1)
@@ -364,8 +614,7 @@ function _linspace(start::T, stop::T, len::Integer) where T<:Union{Float16,Float
     if len == 2 && !isfinite(step)
         # For very large endpoints where step overflows, exploit the
         # split-representation to handle the overflow
-        return StepRangeLen(TwicePrecision(start, zero(T)),
-                            TwicePrecision(-start, stop), 2)
+        return steprangelen_hp(T, start, (-start, stop), 0, 2, 1)
     end
     # 2x calculations to get high precision endpoint matching while also
     # preventing overflow in ref_hi+(i-offset)*step_hi
@@ -373,33 +622,27 @@ function _linspace(start::T, stop::T, len::Integer) where T<:Union{Float16,Float
     step_hi_pre = clamp(step, max(-(m+ref)/k, (-m+ref)/k), min((m-ref)/k, (m+ref)/k))
     nb = nbitslen(T, len, imin)
     step_hi = truncbits(step_hi_pre, nb)
-    x1_hi, x1_lo = add2((1-imin)*step_hi, ref)
-    x2_hi, x2_lo = add2((len-imin)*step_hi, ref)
+    x1_hi, x1_lo = add12((1-imin)*step_hi, ref)
+    x2_hi, x2_lo = add12((len-imin)*step_hi, ref)
     a, b = (start - x1_hi) - x1_lo, (stop - x2_hi) - x2_lo
     step_lo = (b - a)/(len - 1)
     ref_lo = a - (1 - imin)*step_lo
-    StepRangeLen(TwicePrecision(ref, ref_lo), TwicePrecision(step_hi, step_lo), Int(len), imin)
+    steprangelen_hp(T, (ref, ref_lo), (step_hi, step_lo), 0, Int(len), imin)
 end
 
 # linspace for rational numbers, start = start_n/den, stop = stop_n/den
 # Note this returns a StepRangeLen
 function linspace(::Type{T}, start_n::Integer, stop_n::Integer, len::Integer, den::Integer) where T
     len < 2 && return _linspace1(T, start_n/den, stop_n/den, len)
-    start_n == stop_n && return StepRangeLen(TwicePrecision{T}((start_n, den)), zero(TwicePrecision{T}), len)
+    start_n == stop_n && return steprangelen_hp(T, (start_n, den), (zero(start_n), den), 0, len)
     tmin = -start_n/(Float64(stop_n) - Float64(start_n))
     imin = round(Int, tmin*(len-1)+1)
     imin = clamp(imin, 1, Int(len))
-    # Compute (1-t)*a and t*b separately in 2x precision (itp = interpolant)...
-    dent = (den, len-1)  # represent products as a tuple to eliminate risk of overflow
-    start_itp = proddiv(T, (len-imin, start_n), dent)
-    stop_itp = proddiv(T, (imin-1, stop_n), dent)
-    # ...and then combine them to make ref
-    ref = start_itp + stop_itp
-    # Compute step to 2x precision without risking overflow...
-    rend = proddiv(T, (stop_n,), dent)
-    rbeg = proddiv(T, (-start_n,), dent)
-    step = twiceprecision(rbeg + rend, nbitslen(T, len, imin)) # ...and truncate hi-bits as needed
-    StepRangeLen(ref, step, Int(len), imin)
+    ref_num = Int128(len-imin) * start_n + Int128(imin-1) * stop_n
+    ref_denom = Int128(len-1) * den
+    ref = (ref_num, ref_denom)
+    step_full = (Int128(stop_n) - Int128(start_n), ref_denom)
+    steprangelen_hp(T, ref, step_full,  nbitslen(T, len, imin), Int(len), imin)
 end
 
 # For len < 2
@@ -440,82 +683,12 @@ narrow(::Type{Float64}) = Float32
 narrow(::Type{Float32}) = Float16
 narrow(::Type{Float16}) = Float16
 
-function add2(u::T, v::T) where T<:Number
-    @_inline_meta
-    u, v = ifelse(abs(v) > abs(u), (v, u), (u, v))
-    w = u + v
-    w, (u-w) + v
-end
-
-add2(u, v) = _add2(promote(u, v)...)
-_add2(u::T, v::T) where {T<:Number} = add2(u, v)
-_add2(u, v) = error("$u::$(typeof(u)) and $v::$(typeof(v)) cannot be promoted to a common type")
-
-function +(x::TwicePrecision, y::Number)
-    s_hi, s_lo = add2(x.hi, y)
-    TwicePrecision(s_hi, s_lo+x.lo)
-end
-+(x::Number, y::TwicePrecision) = y+x
-
-function +(x::TwicePrecision{T}, y::TwicePrecision{T}) where T
-    r = x.hi + y.hi
-    s = abs(x.hi) > abs(y.hi) ? (((x.hi - r) + y.hi) + y.lo) + x.lo : (((y.hi - r) + x.hi) + x.lo) + y.lo
-    TwicePrecision(r, s)
-end
-+(x::TwicePrecision, y::TwicePrecision) = _add2(promote(x, y)...)
-_add2(x::T, y::T) where {T<:TwicePrecision} = x + y
-_add2(x::TwicePrecision, y::TwicePrecision) = TwicePrecision(x.hi+y.hi, x.lo+y.lo)
-
-function *(x::TwicePrecision, v::Integer)
-    v == 0 && return TwicePrecision(x.hi*v, x.lo*v)
-    nb = ceil(Int, log2(abs(v)))
-    u = truncbits(x.hi, nb)
-    y_hi, y_lo = add2(u*v, ((x.hi-u) + x.lo)*v)
-    TwicePrecision(y_hi, y_lo)
-end
-
-function _mul2(x::TwicePrecision{T}, v::T) where T<:Union{Float16,Float32,Float64}
-    v == 0 && return TwicePrecision(T(0), T(0))
-    xhh, xhl = splitprec(x.hi)
-    vh, vl = splitprec(v)
-    y_hi, y_lo = add2(xhh*vh, xhh*vl + xhl*vh)
-    TwicePrecision(y_hi, y_lo + xhl*vl + x.lo*v)
-end
-
-_mul2(x::TwicePrecision, v::Number) = TwicePrecision(x.hi*v, x.lo*v)
-
-function *(x::TwicePrecision{R}, v::S) where R where S<:Number
-    T = promote_type(R, S)
-    _mul2(convert(TwicePrecision{T}, x), convert(T, v))
-end
-
-*(v::Number, x::TwicePrecision) = x*v
-
-function /(x::TwicePrecision, v::Number)
-    hi = x.hi/v
-    w = TwicePrecision(hi, zero(hi)) * v
-    lo = (((x.hi - w.hi) - w.lo) + x.lo)/v
-    y_hi, y_lo = add2(hi, lo)
-    TwicePrecision(y_hi, y_lo)
-end
-
-# hi-precision version of prod(num)/prod(den)
-# num and den are tuples to avoid risk of overflow
-function proddiv(T, num, den)
-    @_inline_meta
-    t = TwicePrecision(T(num[1]), zero(T))
-    t = _prod(t, tail(num)...)
-    _divt(t, den...)
-end
 function _prod(t::TwicePrecision, x, y...)
     @_inline_meta
     _prod(t * x, y...)
 end
 _prod(t::TwicePrecision) = t
-function _divt(t::TwicePrecision, x, y...)
-    @_inline_meta
-    _divt(t / x, y...)
-end
-_divt(t::TwicePrecision) = t
+<(x::TwicePrecision{T}, y::TwicePrecision{T}) where {T} =
+    x.hi < y.hi || ((x.hi == y.hi) & (x.lo < y.lo))
 
 isbetween(a, x, b) = a <= x <= b || b <= x <= a
diff --git a/test/ranges.jl b/test/ranges.jl
index 1b04b345d8eab..a953599223ad2 100644
--- a/test/ranges.jl
+++ b/test/ranges.jl
@@ -1,6 +1,188 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# ranges
+# Because ranges rely on high precision arithmetic, test those utilities first
+for (I, T) in ((Int16, Float16), (Int32, Float32), (Int64, Float64)), i = 1:10^3
+    i = rand(I) >> 1  # test large values below
+    hi, lo = Base.splitprec(T, i)
+    @test widen(hi) + widen(lo) == i
+    @test endswith(bits(hi), repeat('0', Base.Math.significand_bits(T) ÷ 2))
+end
+for (I, T) in ((Int16, Float16), (Int32, Float32), (Int64, Float64))
+    x = T(typemax(I))
+    Δi = ceil(I, eps(x))
+    for i = typemax(I)-2Δi:typemax(I)-Δi
+        hi, lo = Base.splitprec(T, i)
+        @test widen(hi) + widen(lo) == i
+        @test endswith(bits(hi), repeat('0', Base.Math.significand_bits(T) ÷ 2))
+    end
+    for i = typemin(I):typemin(I)+Δi
+        hi, lo = Base.splitprec(T, i)
+        @test widen(hi) + widen(lo) == i
+        @test endswith(bits(hi), repeat('0', Base.Math.significand_bits(T) ÷ 2))
+    end
+end
+
+# Compare precision in a manner sensitive to subnormals, which lose
+# precision compared to widening.
+function cmp_sn(w, hi, lo, slopbits=0)
+    if !isfinite(hi)
+        if abs(w) > realmax(typeof(hi))
+            return isinf(hi) && sign(w) == sign(hi)
+        end
+        if isnan(w) && isnan(hi)
+            return true
+        end
+        return w == hi
+    end
+    if abs(w) < subnormalmin(typeof(hi))
+        return (hi == zero(hi) || abs(w - widen(hi)) < abs(w)) && lo == zero(hi)
+    end
+    # Compare w == hi + lo unless `lo` issubnormal
+    z = widen(hi) + widen(lo)
+    if !issubnormal(lo) && lo != 0
+        if slopbits == 0
+            return z == w
+        end
+        wr, zr = roundshift(w, slopbits), roundshift(z, slopbits)
+        return max(wr-1, zero(wr)) <= zr <= wr+1
+    end
+    # round w to the same number of bits as z
+    zu = asbits(z)
+    wu = asbits(w)
+    lastbit = false
+    while zu > 0 && !isodd(zu)
+        lastbit = isodd(wu)
+        zu = zu >> 1
+        wu = wu >> 1
+    end
+    return wu <= zu <= wu + lastbit
+end
+
+asbits(x) = reinterpret(Base.uinttype(typeof(x)), x)
+
+function roundshift(x, n)
+    xu = asbits(x)
+    lastbit = false
+    for i = 1:n
+        lastbit = isodd(xu)
+        xu = xu >> 1
+    end
+    xu + lastbit
+end
+
+subnormalmin(::Type{T}) where T = reinterpret(T, Base.uinttype(T)(1))
+
+function highprec_pair(x, y)
+    slopbits = (Base.Math.significand_bits(typeof(widen(x))) + 1) -
+        2*(Base.Math.significand_bits(typeof(x)) + 1)
+    hi, lo = Base.add12(x, y)
+    @test cmp_sn(widen(x) + widen(y), hi, lo)
+    hi, lo = Base.mul12(x, y)
+    @test cmp_sn(widen(x) * widen(y), hi, lo)
+    y == 0 && return nothing
+    hi, lo = Base.div12(x, y)
+    @test cmp_sn(widen(x) / widen(y), hi, lo, slopbits)
+    nothing
+end
+
+# # This tests every possible pair of Float16s. It takes too long for
+# # ordinary use, which is why it's commented out.
+# function pair16()
+#     for yu in 0x0000:0xffff
+#         for xu in 0x0000:0xffff
+#             x, y = reinterpret(Float16, xu), reinterpret(Float16, yu)
+#             highprec_pair(x, y)
+#         end
+#     end
+# end
+
+for T in (Float16, Float32) # skip Float64 (bit representation of BigFloat is not available)
+    for i = 1:10^5
+        x, y = rand(T), rand(T)
+        highprec_pair(x, y)
+        highprec_pair(-x, y)
+        highprec_pair(x, -y)
+        highprec_pair(-x, -y)
+    end
+    # Make sure we test dynamic range too
+    for i = 1:10^5
+        x, y = rand(T), rand(T)
+        x == 0 || y == 0 && continue
+        x, y = log(x), log(y)
+        highprec_pair(x, y)
+    end
+end
+
+asww(x) = widen(widen(x.hi)) + widen(widen(x.lo))
+astuple(x) = (x.hi, x.lo)
+
+function cmp_sn2(w, hi, lo, slopbits=0)
+    if !isfinite(hi)
+        if abs(w) > realmax(typeof(hi))
+            return isinf(hi) && sign(w) == sign(hi)
+        end
+        if isnan(w) && isnan(hi)
+            return true
+        end
+        return w == hi
+    end
+    if abs(w) < subnormalmin(typeof(hi))
+        return (hi == zero(hi) || abs(w - widen(hi)) < abs(w)) && lo == zero(hi)
+    end
+    z = widen(hi) + widen(lo)
+    zu, wu = asbits(z), asbits(w)
+    while zu > 0 && !isodd(zu)
+        zu = zu >> 1
+        wu = wu >> 1
+    end
+    zu = zu >> slopbits
+    wu = wu >> slopbits
+    return wu - 1 <= zu <= wu + 1
+end
+
+# TwicePrecision test. These routines lose accuracy if you form
+# intermediate subnormals; with Float16, this happens so frequently,
+# let's only test Float32.
+T = Float32
+Tw = widen(T)
+slopbits = (Base.Math.significand_bits(Tw) + 1) -
+    2*(Base.Math.significand_bits(T) + 1)
+for i = 1:10^5
+    x = Base.TwicePrecision{T}(rand())
+    y = Base.TwicePrecision{T}(rand())
+    xw, yw = asww(x), asww(y)
+    @test cmp_sn2(Tw(xw+yw), astuple(x+y)..., slopbits)
+    @test cmp_sn2(Tw(xw-yw), astuple(x-y)..., slopbits)
+    @test cmp_sn2(Tw(xw*yw), astuple(x*y)..., slopbits)
+    @test cmp_sn2(Tw(xw/yw), astuple(x/y)..., slopbits)
+    y = rand(T)
+    yw = widen(widen(y))
+    @test cmp_sn2(Tw(xw+yw), astuple(x+y)..., slopbits)
+    @test cmp_sn2(Tw(xw-yw), astuple(x-y)..., slopbits)
+    @test cmp_sn2(Tw(xw*yw), astuple(x*y)..., slopbits)
+    @test cmp_sn2(Tw(xw/yw), astuple(x/y)..., slopbits)
+end
+
+x1 = Base.TwicePrecision{Float64}(1)
+x0 = Base.TwicePrecision{Float64}(0)
+xinf = Base.TwicePrecision{Float64}(Inf)
+@test Float64(x1+x0)  == 1
+@test Float64(x1+0)   == 1
+@test Float64(x1+0.0) == 1
+@test Float64(x1*x0)  == 0
+@test Float64(x1*0)   == 0
+@test Float64(x1*0.0) == 0
+@test Float64(x1/x0)  == Inf
+@test Float64(x1/0)   == Inf
+@test Float64(xinf*x1) == Inf
+@test isnan(Float64(xinf*x0))
+@test isnan(Float64(xinf*0))
+@test isnan(Float64(xinf*0.0))
+@test isnan(Float64(x0/x0))
+@test isnan(Float64(x0/0))
+@test isnan(Float64(x0/0.0))
+
+## ranges
 @test size(10:1:0) == (0,)
 @test length(1:.2:2) == 6
 @test length(1.:.2:2.) == 6
@@ -338,14 +520,14 @@ end
 
 for T = (Float32, Float64,),# BigFloat),
     a = -5:25, s = [-5:-1;1:25;], d = 1:25, n = -1:15
-    den   = convert(T,d)
-    start = convert(T,a)/den
-    step  = convert(T,s)/den
-    stop  = convert(T,(a+(n-1)*s))/den
-    vals  = T[a:s:a+(n-1)*s;]./den
-    r = start:step:stop
+    denom = convert(T,d)
+    strt = convert(T,a)/denom
+    Δ     = convert(T,s)/denom
+    stop  = convert(T,(a+(n-1)*s))/denom
+    vals  = T[a:s:a+(n-1)*s;]./denom
+    r = strt:Δ:stop
     @test [r;] == vals
-    @test [linspace(start, stop, length(r));] == vals
+    @test [linspace(strt, stop, length(r));] == vals
     # issue #7420
     n = length(r)
     @test [r[1:n];] == [r;]
@@ -362,27 +544,31 @@ end
 @test [-3*0.05:-0.05:-0.2;] == [linspace(-3*0.05,-0.2,2);] == [-3*0.05,-0.2]
 @test [-0.2:0.05:-3*0.05;]  == [linspace(-0.2,-3*0.05,2);] == [-0.2,-3*0.05]
 
-for T = (Float32, Float64,), i = 1:2^15, n = 1:5
-    start, step = randn(T), randn(T)
-    step == 0 && continue
-    stop = start + (n-1)*step
-    # `n` is not necessarily unique s.t. `start + (n-1)*step == stop`
-    # so test that `length(start:step:stop)` satisfies this identity
-    # and is the closest value to `(stop-start)/step` to do so
-    lo = hi = n
-    while start + (lo-1)*step == stop; lo -= 1; end
-    while start + (hi-1)*step == stop; hi += 1; end
-    m = clamp(round(Int, (stop-start)/step) + 1, lo+1, hi-1)
-    r = start:step:stop
-    @test m == length(r)
-    # FIXME: these fail some small portion of the time
-    @test_skip start == first(r)
-    @test_skip stop  == last(r)
-    l = linspace(start,stop,n)
-    @test n == length(l)
-    # FIXME: these fail some small portion of the time
-    @test_skip start == first(l)
-    @test_skip stop  == last(l)
+function range_fuzztests(::Type{T}, niter, nrange) where {T}
+    for i = 1:niter, n in nrange
+        strt, Δ = randn(T), randn(T)
+        Δ == 0 && continue
+        stop = strt + (n-1)*Δ
+        # `n` is not necessarily unique s.t. `strt + (n-1)*Δ == stop`
+        # so test that `length(strt:Δ:stop)` satisfies this identity
+        # and is the closest value to `(stop-strt)/Δ` to do so
+        lo = hi = n
+        while strt + (lo-1)*Δ == stop; lo -= 1; end
+        while strt + (hi-1)*Δ == stop; hi += 1; end
+        m = clamp(round(Int, (stop-strt)/Δ) + 1, lo+1, hi-1)
+        r = strt:Δ:stop
+        @test m == length(r)
+        @test strt == first(r)
+        @test Δ == step(r)
+        @test_skip stop == last(r)
+        l = linspace(strt,stop,n)
+        @test n == length(l)
+        @test strt == first(l)
+        @test stop  == last(l)
+    end
+end
+for T = (Float32, Float64,)
+    range_fuzztests(T, 2^15, 1:5)
 end
 
 # Inexact errors on 32 bit architectures. #22613
@@ -892,6 +1078,11 @@ for i = 2:4
     @test r[i] == x
 end
 
+# issue #23178
+r = linspace(Float16(0.1094), Float16(0.9697), 300)
+@test r[1] == Float16(0.1094)
+@test r[end] == Float16(0.9697)
+
 # issue #20382
 r = @inferred(colon(big(1.0),big(2.0),big(5.0)))
 @test eltype(r) == BigFloat
@@ -958,7 +1149,7 @@ end
     end
 end
 
-# Issue #23300
+# issue #23300
 x = -5:big(1.0):5
 @test map(Float64, x) === -5.0:1.0:5.0
 @test map(Float32, x) === -5.0f0:1.0f0:5.0f0