diff --git a/runtime/druntime/src/core/int128.d b/runtime/druntime/src/core/int128.d index e2c9debe77..12731fca90 100644 --- a/runtime/druntime/src/core/int128.d +++ b/runtime/druntime/src/core/int128.d @@ -40,6 +40,28 @@ else else private enum Cent_alignment = (size_t.sizeof * 2); } +version (LDC) +{ + pragma(inline, true) + private Cent ldc_binop(string op)(Cent a, Cent b) pure @trusted + { + import ldc.llvmasm : __ir_pure; + + enum int alignI = Cent_alignment; // => no suffix for .stringof + enum alignSuffix = ", align " ~ alignI.stringof ~ "\n"; + + Cent r = void; + __ir_pure!( + "%a = load i128, ptr %0" ~ alignSuffix ~ + "%b = load i128, ptr %1" ~ alignSuffix ~ + "%r = " ~ op ~ " i128 %a, %b\n" ~ + "store i128 %r, ptr %2" ~ alignSuffix, void)(&a, &b, &r); + return r; + } +} + +pragma(inline, true): // LDC + /** * 128 bit integer type. * See_also: $(REF Int128, std,int128). @@ -193,9 +215,13 @@ Cent sar1(Cent c) */ pure Cent shl(Cent c, uint n) +in (n < Ubits * 2) { - if (n >= Ubits * 2) - return Zero; + version (LDC) + { + if (!__ctfe) + return ldc_binop!"shl"(c, Cent(n)); + } if (n >= Ubits) { @@ -220,9 +246,13 @@ Cent shl(Cent c, uint n) */ pure Cent shr(Cent c, uint n) +in (n < Ubits * 2) { - if (n >= Ubits * 2) - return Zero; + version (LDC) + { + if (!__ctfe) + return ldc_binop!"lshr"(c, Cent(n)); + } if (n >= Ubits) { @@ -247,18 +277,19 @@ Cent shr(Cent c, uint n) */ pure Cent sar(Cent c, uint n) +in (n < Ubits * 2) { + version (LDC) + { + if (!__ctfe) + return ldc_binop!"ashr"(c, Cent(n)); + } + const signmask = -(c.hi >> (Ubits - 1)); const signshift = (Ubits * 2) - n; c = shr(c, n); - // Sign extend all bits beyond the precision of Cent. - if (n >= Ubits * 2) - { - c.hi = signmask; - c.lo = signmask; - } - else if (signshift >= Ubits * 2) + if (signshift == Ubits * 2) { } else if (signshift >= Ubits) @@ -321,6 +352,8 @@ pure Cent rol(Cent c, uint n) { n &= Ubits * 2 - 1; + if (n == 0) + return c; Cent l = shl(c, n); Cent r = shr(c, Ubits * 2 - n); return or(l, r); @@ -338,6 +371,8 @@ pure Cent ror(Cent c, uint n) { n &= Ubits * 2 - 1; + if (n == 0) + return c; Cent r = shr(c, n); Cent l = shl(c, Ubits * 2 - n); return or(r, l); @@ -354,6 +389,12 @@ Cent ror(Cent c, uint n) pure Cent and(Cent c1, Cent c2) { + version (LDC) + { + if (!__ctfe) + return ldc_binop!"and"(c1, c2); + } + const Cent ret = { lo:c1.lo & c2.lo, hi:c1.hi & c2.hi }; return ret; } @@ -369,6 +410,12 @@ Cent and(Cent c1, Cent c2) pure Cent or(Cent c1, Cent c2) { + version (LDC) + { + if (!__ctfe) + return ldc_binop!"or"(c1, c2); + } + const Cent ret = { lo:c1.lo | c2.lo, hi:c1.hi | c2.hi }; return ret; } @@ -384,6 +431,12 @@ Cent or(Cent c1, Cent c2) pure Cent xor(Cent c1, Cent c2) { + version (LDC) + { + if (!__ctfe) + return ldc_binop!"xor"(c1, c2); + } + const Cent ret = { lo:c1.lo ^ c2.lo, hi:c1.hi ^ c2.hi }; return ret; } @@ -399,6 +452,12 @@ Cent xor(Cent c1, Cent c2) pure Cent add(Cent c1, Cent c2) { + version (LDC) + { + if (!__ctfe) + return ldc_binop!"add"(c1, c2); + } + U r = cast(U)(c1.lo + c2.lo); const Cent ret = { lo:r, hi:cast(U)(c1.hi + c2.hi + (r < c1.lo)) }; return ret; @@ -415,6 +474,12 @@ Cent add(Cent c1, Cent c2) pure Cent sub(Cent c1, Cent c2) { + version (LDC) + { + if (!__ctfe) + return ldc_binop!"sub"(c1, c2); + } + return add(c1, neg(c2)); } @@ -429,6 +494,12 @@ Cent sub(Cent c1, Cent c2) pure Cent mul(Cent c1, Cent c2) { + version (LDC) + { + if (!__ctfe) + return ldc_binop!"mul"(c1, c2); + } + enum mulmask = (1UL << (Ubits / 2)) - 1; enum mulshift = Ubits / 2; @@ -474,6 +545,12 @@ Cent mul(Cent c1, Cent c2) pure Cent udiv(Cent c1, Cent c2) { + version (none) // prefer upstream udivmod implementation + { + if (!__ctfe) + return ldc_binop!"udiv"(c1, c2); + } + Cent modulus; return udivmod(c1, c2, modulus); } @@ -490,67 +567,20 @@ Cent udiv(Cent c1, Cent c2) pure Cent udivmod(Cent c1, Cent c2, out Cent modulus) { - //printf("udiv c1(%llx,%llx) c2(%llx,%llx)\n", c1.lo, c1.hi, c2.lo, c2.hi); - // Based on "Unsigned Doubleword Division" in Hacker's Delight - import core.bitop; - - // Divides a 128-bit dividend by a 64-bit divisor. - // The result must fit in 64 bits. - static U udivmod128_64(Cent c1, U c2, out U modulus) + version (none) // prefer upstream implementation { - // We work in base 2^^32 - enum base = 1UL << 32; - enum divmask = (1UL << (Ubits / 2)) - 1; - enum divshift = Ubits / 2; - - // Check for overflow and divide by 0 - if (c1.hi >= c2) - { - modulus = 0UL; - return ~0UL; - } - - // Computes [num1 num0] / den - static uint udiv96_64(U num1, uint num0, U den) + if (!__ctfe) { - // Extract both digits of the denominator - const den1 = cast(uint)(den >> divshift); - const den0 = cast(uint)(den & divmask); - // Estimate ret as num1 / den1, and then correct it - U ret = num1 / den1; - const t2 = (num1 % den1) * base + num0; - const t1 = ret * den0; - if (t1 > t2) - ret -= (t1 - t2 > den) ? 2 : 1; - return cast(uint)ret; + const r = ldc_binop!"udiv"(c1, c2); + modulus = ldc_binop!"urem"(c1, c2); + return r; } - - // Determine the normalization factor. We multiply c2 by this, so that its leading - // digit is at least half base. In binary this means just shifting left by the number - // of leading zeros, so that there's a 1 in the MSB. - // We also shift number by the same amount. This cannot overflow because c1.hi < c2. - const shift = (Ubits - 1) - bsr(c2); - c2 <<= shift; - U num2 = c1.hi; - num2 <<= shift; - num2 |= (c1.lo >> (-shift & 63)) & (-cast(I)shift >> 63); - c1.lo <<= shift; - - // Extract the low digits of the numerator (after normalizing) - const num1 = cast(uint)(c1.lo >> divshift); - const num0 = cast(uint)(c1.lo & divmask); - - // Compute q1 = [num2 num1] / c2 - const q1 = udiv96_64(num2, num1, c2); - // Compute the true (partial) remainder - const rem = num2 * base + num1 - q1 * c2; - // Compute q0 = [rem num0] / c2 - const q0 = udiv96_64(rem, num0, c2); - - modulus = (rem * base + num0 - q0 * c2) >> shift; - return (cast(U)q1 << divshift) | q0; } + //printf("udiv c1(%llx,%llx) c2(%llx,%llx)\n", c1.lo, c1.hi, c2.lo, c2.hi); + // Based on "Unsigned Doubleword Division" in Hacker's Delight + import core.bitop; + // Special cases if (!tst(c2)) { @@ -580,7 +610,7 @@ Cent udivmod(Cent c1, Cent c2, out Cent modulus) if (q1) c1.hi = c1.hi % c2.lo; Cent rem; - const q0 = udivmod128_64(c1, c2.lo, rem.lo); + const q0 = udivmod(c1, c2.lo, rem.lo); modulus = rem; const Cent ret = { lo:q0, hi:q1 }; return ret; @@ -601,7 +631,7 @@ Cent udivmod(Cent c1, Cent c2, out Cent modulus) // Get quotient from divide unsigned operation. U rem_ignored; - const Cent q1 = { lo:udivmod128_64(u1, v1, rem_ignored) }; + const Cent q1 = { lo:udivmod(u1, v1, rem_ignored) }; // Undo normalization and division of c1 by 2. Cent quotient = shr(shl(q1, shift), 63); @@ -628,6 +658,113 @@ Cent udivmod(Cent c1, Cent c2, out Cent modulus) return quotient; } +version (X86_64) +{ + version (GNU) version = GNU_OR_LDC_X86_64; + version (LDC) version = GNU_OR_LDC_X86_64; +} + +/**************************** + * Unsigned divide 128-bit c1 / 64-bit c2. The result must fit in 64 bits. + * The remainder after division is stored to modulus. + * Params: + * c1 = dividend + * c2 = divisor + * modulus = set to c1 % c2 + * Returns: + * quotient c1 / c2 + */ +pure +U udivmod(Cent c1, U c2, out U modulus) +{ + import core.bitop; + + if (!__ctfe) + { + version (GNU_OR_LDC_X86_64) + { + U ret = void; + asm pure @trusted nothrow @nogc + { + "divq %4" + : "=a"(ret), "=d"(modulus) + : "a"(c1.lo), "d"(c1.hi), "r"(c2) + : "cc"; + } + return ret; + } + else version (D_InlineAsm_X86_64) + { + const lo = c1.lo; + const hi = c1.hi; + U mod = void; + U ret = void; + asm pure @trusted nothrow @nogc + { + mov RAX, lo; + mov RDX, hi; + div c2; + mov mod, RDX; // DMD bug: cannot use modulus directly + mov ret, RAX; + } + modulus = mod; + return ret; + } + } + + // We work in base 2^^32 + enum base = 1UL << 32; + enum divmask = (1UL << (Ubits / 2)) - 1; + enum divshift = Ubits / 2; + + // Check for overflow and divide by 0 + if (c1.hi >= c2) + { + modulus = 0UL; + return ~0UL; + } + + // Computes [num1 num0] / den + static uint udiv96_64(U num1, uint num0, U den) + { + // Extract both digits of the denominator + const den1 = cast(uint)(den >> divshift); + const den0 = cast(uint)(den & divmask); + // Estimate ret as num1 / den1, and then correct it + U ret = num1 / den1; + const t2 = (num1 % den1) * base + num0; + const t1 = ret * den0; + if (t1 > t2) + ret -= (t1 - t2 > den) ? 2 : 1; + return cast(uint)ret; + } + + // Determine the normalization factor. We multiply c2 by this, so that its leading + // digit is at least half base. In binary this means just shifting left by the number + // of leading zeros, so that there's a 1 in the MSB. + // We also shift number by the same amount. This cannot overflow because c1.hi < c2. + const shift = (Ubits - 1) - bsr(c2); + c2 <<= shift; + U num2 = c1.hi; + num2 <<= shift; + num2 |= (c1.lo >> (-shift & 63)) & (-cast(I)shift >> 63); + c1.lo <<= shift; + + // Extract the low digits of the numerator (after normalizing) + const num1 = cast(uint)(c1.lo >> divshift); + const num0 = cast(uint)(c1.lo & divmask); + + // Compute q1 = [num2 num1] / c2 + const q1 = udiv96_64(num2, num1, c2); + // Compute the true (partial) remainder + const rem = num2 * base + num1 - q1 * c2; + // Compute q0 = [rem num0] / c2 + const q0 = udiv96_64(rem, num0, c2); + + modulus = (rem * base + num0 - q0 * c2) >> shift; + return (cast(U)q1 << divshift) | q0; +} + /**************************** * Signed divide c1 / c2. @@ -640,6 +777,12 @@ Cent udivmod(Cent c1, Cent c2, out Cent modulus) pure Cent div(Cent c1, Cent c2) { + version (none) // prefer upstream udivmod implementation + { + if (!__ctfe) + return ldc_binop!"sdiv"(c1, c2); + } + Cent modulus; return divmod(c1, c2, modulus); } @@ -656,6 +799,16 @@ Cent div(Cent c1, Cent c2) pure Cent divmod(Cent c1, Cent c2, out Cent modulus) { + version (none) // prefer upstream udivmod implementation + { + if (!__ctfe) + { + const r = ldc_binop!"sdiv"(c1, c2); + modulus = ldc_binop!"srem"(c1, c2); + return r; + } + } + /* Muck about with the signs so we can use the unsigned divide */ if (cast(I)c1.hi < 0) @@ -897,18 +1050,14 @@ unittest assert(shl(C10,0) == C10); assert(shl(C10,Ubits) == C10_0); assert(shl(C10,1) == C20); - assert(shl(C10,Ubits * 2) == C0); assert(shr(C10_0,0) == C10_0); assert(shr(C10_0,Ubits) == C10); assert(shr(C10_0,Ubits - 1) == C20); assert(shr(C10_0,Ubits + 1) == C5); - assert(shr(C10_0,Ubits * 2) == C0); assert(sar(C10_0,0) == C10_0); assert(sar(C10_0,Ubits) == C10); assert(sar(C10_0,Ubits - 1) == C20); assert(sar(C10_0,Ubits + 1) == C5); - assert(sar(C10_0,Ubits * 2) == C0); - assert(sar(Cm1,Ubits * 2) == Cm1); assert(shl1(C10) == C20); assert(shr1(C10_0) == C5_0); @@ -920,7 +1069,6 @@ unittest assert(udiv(C10,C2) == C5); assert(udivmod(C10,C2, modulus) == C5); assert(modulus == C0); assert(udivmod(C10,C3, modulus) == C3); assert(modulus == C1); - assert(udivmod(C10,C0, modulus) == Cm1); assert(modulus == C0); assert(udivmod(C2,C90_30, modulus) == C0); assert(modulus == C2); assert(udiv(mul(C90_30, C2), C2) == C90_30); assert(udiv(mul(C90_30, C2), C90_30) == C2); @@ -965,4 +1113,6 @@ unittest assert(rol(ror(C7_9, 5), 5) == C7_9); assert(rol(C7_9, 1) == rol1(C7_9)); assert(ror(C7_9, 1) == ror1(C7_9)); + assert(rol(C7_9, 0) == C7_9); + assert(ror(C7_9, 0) == C7_9); } diff --git a/tests/codegen/int128.d b/tests/codegen/int128.d new file mode 100644 index 0000000000..040e270846 --- /dev/null +++ b/tests/codegen/int128.d @@ -0,0 +1,35 @@ +// REQUIRES: target_X86 + +// RUN: %ldc -output-s -mtriple=x86_64-linux-gnu -O -of=%t.s %s && FileCheck %s < %t.s + +import core.int128; + +// CHECK: _D6int1285mulhiFmmZm: +ulong mulhi(ulong a, ulong b) +{ + // CHECK-NEXT: .cfi_startproc + // CHECK-NEXT: movq %rsi, %rax + // CHECK-NEXT: mulq %rdi + // CHECK-NEXT: movq %rdx, %rax + // CHECK-NEXT: retq + + return mul(Cent(a), Cent(b)).hi; +} + +// CHECK: _D6int12810mul_divmodFmmmJmZm: +ulong mul_divmod(ulong a, ulong b, ulong c, out ulong modulus) +{ + // CHECK-NEXT: .cfi_startproc + // CHECK-NEXT: movq %rdx, %r8 + // CHECK-NEXT: movq %rsi, %rax + // CHECK-NEXT: mulq %rdi + // CHECK-NEXT: movq $0, (%rcx) + // CHECK-NEXT: #APP + // CHECK-NEXT: divq %r8 + // CHECK-NEXT: #NO_APP + // CHECK-NEXT: movq %rdx, (%rcx) + // CHECK-NEXT: retq + + const product128 = mul(Cent(a), Cent(b)); + return udivmod(product128, c, modulus); +}