diff --git a/runtime/druntime/src/core/int128.d b/runtime/druntime/src/core/int128.d
index e2c9debe77..12731fca90 100644
--- a/runtime/druntime/src/core/int128.d
+++ b/runtime/druntime/src/core/int128.d
@@ -40,6 +40,28 @@ else
     else             private enum Cent_alignment = (size_t.sizeof * 2);
 }
 
+version (LDC)
+{
+    pragma(inline, true)
+    private Cent ldc_binop(string op)(Cent a, Cent b) pure @trusted
+    {
+        import ldc.llvmasm : __ir_pure;
+
+        enum int alignI = Cent_alignment; // => no suffix for .stringof
+        enum alignSuffix = ", align " ~ alignI.stringof ~ "\n";
+
+        Cent r = void;
+        __ir_pure!(
+            "%a = load i128, ptr %0" ~ alignSuffix ~
+            "%b = load i128, ptr %1" ~ alignSuffix ~
+            "%r = " ~ op ~ " i128 %a, %b\n" ~
+            "store i128 %r, ptr %2" ~ alignSuffix, void)(&a, &b, &r);
+        return r;
+    }
+}
+
+pragma(inline, true): // LDC
+
 /**
  * 128 bit integer type.
  * See_also: $(REF Int128, std,int128).
@@ -193,9 +215,13 @@ Cent sar1(Cent c)
  */
 pure
 Cent shl(Cent c, uint n)
+in (n < Ubits * 2)
 {
-    if (n >= Ubits * 2)
-        return Zero;
+    version (LDC)
+    {
+        if (!__ctfe)
+            return ldc_binop!"shl"(c, Cent(n));
+    }
 
     if (n >= Ubits)
     {
@@ -220,9 +246,13 @@ Cent shl(Cent c, uint n)
  */
 pure
 Cent shr(Cent c, uint n)
+in (n < Ubits * 2)
 {
-    if (n >= Ubits * 2)
-        return Zero;
+    version (LDC)
+    {
+        if (!__ctfe)
+            return ldc_binop!"lshr"(c, Cent(n));
+    }
 
     if (n >= Ubits)
     {
@@ -247,18 +277,19 @@ Cent shr(Cent c, uint n)
  */
 pure
 Cent sar(Cent c, uint n)
+in (n < Ubits * 2)
 {
+    version (LDC)
+    {
+        if (!__ctfe)
+            return ldc_binop!"ashr"(c, Cent(n));
+    }
+
     const signmask = -(c.hi >> (Ubits - 1));
     const signshift = (Ubits * 2) - n;
     c = shr(c, n);
 
-    // Sign extend all bits beyond the precision of Cent.
-    if (n >= Ubits * 2)
-    {
-        c.hi = signmask;
-        c.lo = signmask;
-    }
-    else if (signshift >= Ubits * 2)
+    if (signshift == Ubits * 2)
     {
     }
     else if (signshift >= Ubits)
@@ -321,6 +352,8 @@ pure
 Cent rol(Cent c, uint n)
 {
     n &= Ubits * 2 - 1;
+    if (n == 0)
+        return c;
     Cent l = shl(c, n);
     Cent r = shr(c, Ubits * 2 - n);
     return or(l, r);
@@ -338,6 +371,8 @@ pure
 Cent ror(Cent c, uint n)
 {
     n &= Ubits * 2 - 1;
+    if (n == 0)
+        return c;
     Cent r = shr(c, n);
     Cent l = shl(c, Ubits * 2 - n);
     return or(r, l);
@@ -354,6 +389,12 @@ Cent ror(Cent c, uint n)
 pure
 Cent and(Cent c1, Cent c2)
 {
+    version (LDC)
+    {
+        if (!__ctfe)
+            return ldc_binop!"and"(c1, c2);
+    }
+
     const Cent ret = { lo:c1.lo & c2.lo, hi:c1.hi & c2.hi };
     return ret;
 }
@@ -369,6 +410,12 @@ Cent and(Cent c1, Cent c2)
 pure
 Cent or(Cent c1, Cent c2)
 {
+    version (LDC)
+    {
+        if (!__ctfe)
+            return ldc_binop!"or"(c1, c2);
+    }
+
     const Cent ret = { lo:c1.lo | c2.lo, hi:c1.hi | c2.hi };
     return ret;
 }
@@ -384,6 +431,12 @@ Cent or(Cent c1, Cent c2)
 pure
 Cent xor(Cent c1, Cent c2)
 {
+    version (LDC)
+    {
+        if (!__ctfe)
+            return ldc_binop!"xor"(c1, c2);
+    }
+
     const Cent ret = { lo:c1.lo ^ c2.lo, hi:c1.hi ^ c2.hi };
     return ret;
 }
@@ -399,6 +452,12 @@ Cent xor(Cent c1, Cent c2)
 pure
 Cent add(Cent c1, Cent c2)
 {
+    version (LDC)
+    {
+        if (!__ctfe)
+            return ldc_binop!"add"(c1, c2);
+    }
+
     U r = cast(U)(c1.lo + c2.lo);
     const Cent ret = { lo:r, hi:cast(U)(c1.hi + c2.hi + (r < c1.lo)) };
     return ret;
@@ -415,6 +474,12 @@ Cent add(Cent c1, Cent c2)
 pure
 Cent sub(Cent c1, Cent c2)
 {
+    version (LDC)
+    {
+        if (!__ctfe)
+            return ldc_binop!"sub"(c1, c2);
+    }
+
     return add(c1, neg(c2));
 }
 
@@ -429,6 +494,12 @@ Cent sub(Cent c1, Cent c2)
 pure
 Cent mul(Cent c1, Cent c2)
 {
+    version (LDC)
+    {
+        if (!__ctfe)
+            return ldc_binop!"mul"(c1, c2);
+    }
+
     enum mulmask = (1UL << (Ubits / 2)) - 1;
     enum mulshift = Ubits / 2;
 
@@ -474,6 +545,12 @@ Cent mul(Cent c1, Cent c2)
 pure
 Cent udiv(Cent c1, Cent c2)
 {
+    version (none) // prefer upstream udivmod implementation
+    {
+        if (!__ctfe)
+            return ldc_binop!"udiv"(c1, c2);
+    }
+
     Cent modulus;
     return udivmod(c1, c2, modulus);
 }
@@ -490,67 +567,20 @@ Cent udiv(Cent c1, Cent c2)
 pure
 Cent udivmod(Cent c1, Cent c2, out Cent modulus)
 {
-    //printf("udiv c1(%llx,%llx) c2(%llx,%llx)\n", c1.lo, c1.hi, c2.lo, c2.hi);
-    // Based on "Unsigned Doubleword Division" in Hacker's Delight
-    import core.bitop;
-
-    // Divides a 128-bit dividend by a 64-bit divisor.
-    // The result must fit in 64 bits.
-    static U udivmod128_64(Cent c1, U c2, out U modulus)
+    version (none) // prefer upstream implementation
     {
-        // We work in base 2^^32
-        enum base = 1UL << 32;
-        enum divmask = (1UL << (Ubits / 2)) - 1;
-        enum divshift = Ubits / 2;
-
-        // Check for overflow and divide by 0
-        if (c1.hi >= c2)
-        {
-            modulus = 0UL;
-            return ~0UL;
-        }
-
-        // Computes [num1 num0] / den
-        static uint udiv96_64(U num1, uint num0, U den)
+        if (!__ctfe)
         {
-            // Extract both digits of the denominator
-            const den1 = cast(uint)(den >> divshift);
-            const den0 = cast(uint)(den & divmask);
-            // Estimate ret as num1 / den1, and then correct it
-            U ret = num1 / den1;
-            const t2 = (num1 % den1) * base + num0;
-            const t1 = ret * den0;
-            if (t1 > t2)
-                ret -= (t1 - t2 > den) ? 2 : 1;
-            return cast(uint)ret;
+            const r = ldc_binop!"udiv"(c1, c2);
+            modulus = ldc_binop!"urem"(c1, c2);
+            return r;
         }
-
-        // Determine the normalization factor. We multiply c2 by this, so that its leading
-        // digit is at least half base. In binary this means just shifting left by the number
-        // of leading zeros, so that there's a 1 in the MSB.
-        // We also shift number by the same amount. This cannot overflow because c1.hi < c2.
-        const shift = (Ubits - 1) - bsr(c2);
-        c2 <<= shift;
-        U num2 = c1.hi;
-        num2 <<= shift;
-        num2 |= (c1.lo >> (-shift & 63)) & (-cast(I)shift >> 63);
-        c1.lo <<= shift;
-
-        // Extract the low digits of the numerator (after normalizing)
-        const num1 = cast(uint)(c1.lo >> divshift);
-        const num0 = cast(uint)(c1.lo & divmask);
-
-        // Compute q1 = [num2 num1] / c2
-        const q1 = udiv96_64(num2, num1, c2);
-        // Compute the true (partial) remainder
-        const rem = num2 * base + num1 - q1 * c2;
-        // Compute q0 = [rem num0] / c2
-        const q0 = udiv96_64(rem, num0, c2);
-
-        modulus = (rem * base + num0 - q0 * c2) >> shift;
-        return (cast(U)q1 << divshift) | q0;
     }
 
+    //printf("udiv c1(%llx,%llx) c2(%llx,%llx)\n", c1.lo, c1.hi, c2.lo, c2.hi);
+    // Based on "Unsigned Doubleword Division" in Hacker's Delight
+    import core.bitop;
+
     // Special cases
     if (!tst(c2))
     {
@@ -580,7 +610,7 @@ Cent udivmod(Cent c1, Cent c2, out Cent modulus)
         if (q1)
             c1.hi = c1.hi % c2.lo;
         Cent rem;
-        const q0 = udivmod128_64(c1, c2.lo, rem.lo);
+        const q0 = udivmod(c1, c2.lo, rem.lo);
         modulus = rem;
         const Cent ret = { lo:q0, hi:q1 };
         return ret;
@@ -601,7 +631,7 @@ Cent udivmod(Cent c1, Cent c2, out Cent modulus)
 
     // Get quotient from divide unsigned operation.
     U rem_ignored;
-    const Cent q1 = { lo:udivmod128_64(u1, v1, rem_ignored) };
+    const Cent q1 = { lo:udivmod(u1, v1, rem_ignored) };
 
     // Undo normalization and division of c1 by 2.
     Cent quotient = shr(shl(q1, shift), 63);
@@ -628,6 +658,113 @@ Cent udivmod(Cent c1, Cent c2, out Cent modulus)
     return quotient;
 }
 
+version (X86_64)
+{
+    version (GNU) version = GNU_OR_LDC_X86_64;
+    version (LDC) version = GNU_OR_LDC_X86_64;
+}
+
+/****************************
+ * Unsigned divide 128-bit c1 / 64-bit c2. The result must fit in 64 bits.
+ * The remainder after division is stored to modulus.
+ * Params:
+ *      c1 = dividend
+ *      c2 = divisor
+ *      modulus = set to c1 % c2
+ * Returns:
+ *      quotient c1 / c2
+ */
+pure
+U udivmod(Cent c1, U c2, out U modulus)
+{
+    import core.bitop;
+
+    if (!__ctfe)
+    {
+        version (GNU_OR_LDC_X86_64)
+        {
+            U ret = void;
+            asm pure @trusted nothrow @nogc
+            {
+                "divq %4"
+                : "=a"(ret), "=d"(modulus)
+                : "a"(c1.lo), "d"(c1.hi), "r"(c2)
+                : "cc";
+            }
+            return ret;
+        }
+        else version (D_InlineAsm_X86_64)
+        {
+            const lo = c1.lo;
+            const hi = c1.hi;
+            U mod = void;
+            U ret = void;
+            asm pure @trusted nothrow @nogc
+            {
+                mov RAX, lo;
+                mov RDX, hi;
+                div c2;
+                mov mod, RDX; // DMD bug: cannot use modulus directly
+                mov ret, RAX;
+            }
+            modulus = mod;
+            return ret;
+        }
+    }
+
+    // We work in base 2^^32
+    enum base = 1UL << 32;
+    enum divmask = (1UL << (Ubits / 2)) - 1;
+    enum divshift = Ubits / 2;
+
+    // Check for overflow and divide by 0
+    if (c1.hi >= c2)
+    {
+        modulus = 0UL;
+        return ~0UL;
+    }
+
+    // Computes [num1 num0] / den
+    static uint udiv96_64(U num1, uint num0, U den)
+    {
+        // Extract both digits of the denominator
+        const den1 = cast(uint)(den >> divshift);
+        const den0 = cast(uint)(den & divmask);
+        // Estimate ret as num1 / den1, and then correct it
+        U ret = num1 / den1;
+        const t2 = (num1 % den1) * base + num0;
+        const t1 = ret * den0;
+        if (t1 > t2)
+            ret -= (t1 - t2 > den) ? 2 : 1;
+        return cast(uint)ret;
+    }
+
+    // Determine the normalization factor. We multiply c2 by this, so that its leading
+    // digit is at least half base. In binary this means just shifting left by the number
+    // of leading zeros, so that there's a 1 in the MSB.
+    // We also shift number by the same amount. This cannot overflow because c1.hi < c2.
+    const shift = (Ubits - 1) - bsr(c2);
+    c2 <<= shift;
+    U num2 = c1.hi;
+    num2 <<= shift;
+    num2 |= (c1.lo >> (-shift & 63)) & (-cast(I)shift >> 63);
+    c1.lo <<= shift;
+
+    // Extract the low digits of the numerator (after normalizing)
+    const num1 = cast(uint)(c1.lo >> divshift);
+    const num0 = cast(uint)(c1.lo & divmask);
+
+    // Compute q1 = [num2 num1] / c2
+    const q1 = udiv96_64(num2, num1, c2);
+    // Compute the true (partial) remainder
+    const rem = num2 * base + num1 - q1 * c2;
+    // Compute q0 = [rem num0] / c2
+    const q0 = udiv96_64(rem, num0, c2);
+
+    modulus = (rem * base + num0 - q0 * c2) >> shift;
+    return (cast(U)q1 << divshift) | q0;
+}
+
 
 /****************************
  * Signed divide c1 / c2.
@@ -640,6 +777,12 @@ Cent udivmod(Cent c1, Cent c2, out Cent modulus)
 pure
 Cent div(Cent c1, Cent c2)
 {
+    version (none) // prefer upstream udivmod implementation
+    {
+        if (!__ctfe)
+            return ldc_binop!"sdiv"(c1, c2);
+    }
+
     Cent modulus;
     return divmod(c1, c2, modulus);
 }
@@ -656,6 +799,16 @@ Cent div(Cent c1, Cent c2)
 pure
 Cent divmod(Cent c1, Cent c2, out Cent modulus)
 {
+    version (none) // prefer upstream udivmod implementation
+    {
+        if (!__ctfe)
+        {
+            const r = ldc_binop!"sdiv"(c1, c2);
+            modulus = ldc_binop!"srem"(c1, c2);
+            return r;
+        }
+    }
+
     /* Muck about with the signs so we can use the unsigned divide
      */
     if (cast(I)c1.hi < 0)
@@ -897,18 +1050,14 @@ unittest
     assert(shl(C10,0) == C10);
     assert(shl(C10,Ubits) == C10_0);
     assert(shl(C10,1) == C20);
-    assert(shl(C10,Ubits * 2) == C0);
     assert(shr(C10_0,0) == C10_0);
     assert(shr(C10_0,Ubits) == C10);
     assert(shr(C10_0,Ubits - 1) == C20);
     assert(shr(C10_0,Ubits + 1) == C5);
-    assert(shr(C10_0,Ubits * 2) == C0);
     assert(sar(C10_0,0) == C10_0);
     assert(sar(C10_0,Ubits) == C10);
     assert(sar(C10_0,Ubits - 1) == C20);
     assert(sar(C10_0,Ubits + 1) == C5);
-    assert(sar(C10_0,Ubits * 2) == C0);
-    assert(sar(Cm1,Ubits * 2) == Cm1);
 
     assert(shl1(C10) == C20);
     assert(shr1(C10_0) == C5_0);
@@ -920,7 +1069,6 @@ unittest
     assert(udiv(C10,C2) == C5);
     assert(udivmod(C10,C2, modulus) ==  C5);   assert(modulus == C0);
     assert(udivmod(C10,C3, modulus) ==  C3);   assert(modulus == C1);
-    assert(udivmod(C10,C0, modulus) == Cm1);   assert(modulus == C0);
     assert(udivmod(C2,C90_30, modulus) == C0); assert(modulus == C2);
     assert(udiv(mul(C90_30, C2), C2) == C90_30);
     assert(udiv(mul(C90_30, C2), C90_30) == C2);
@@ -965,4 +1113,6 @@ unittest
     assert(rol(ror(C7_9, 5), 5) == C7_9);
     assert(rol(C7_9, 1) == rol1(C7_9));
     assert(ror(C7_9, 1) == ror1(C7_9));
+    assert(rol(C7_9, 0) == C7_9);
+    assert(ror(C7_9, 0) == C7_9);
 }
diff --git a/tests/codegen/int128.d b/tests/codegen/int128.d
new file mode 100644
index 0000000000..040e270846
--- /dev/null
+++ b/tests/codegen/int128.d
@@ -0,0 +1,35 @@
+// REQUIRES: target_X86
+
+// RUN: %ldc -output-s -mtriple=x86_64-linux-gnu -O -of=%t.s %s && FileCheck %s < %t.s
+
+import core.int128;
+
+// CHECK: _D6int1285mulhiFmmZm:
+ulong mulhi(ulong a, ulong b)
+{
+    // CHECK-NEXT: .cfi_startproc
+    // CHECK-NEXT: movq	%rsi, %rax
+    // CHECK-NEXT: mulq	%rdi
+    // CHECK-NEXT: movq	%rdx, %rax
+    // CHECK-NEXT: retq
+
+    return mul(Cent(a), Cent(b)).hi;
+}
+
+// CHECK: _D6int12810mul_divmodFmmmJmZm:
+ulong mul_divmod(ulong a, ulong b, ulong c, out ulong modulus)
+{
+    // CHECK-NEXT: .cfi_startproc
+    // CHECK-NEXT: movq	%rdx, %r8
+    // CHECK-NEXT: movq	%rsi, %rax
+    // CHECK-NEXT: mulq	%rdi
+    // CHECK-NEXT: movq	$0, (%rcx)
+    // CHECK-NEXT: #APP
+    // CHECK-NEXT: divq	%r8
+    // CHECK-NEXT: #NO_APP
+    // CHECK-NEXT: movq	%rdx, (%rcx)
+    // CHECK-NEXT: retq
+
+    const product128 = mul(Cent(a), Cent(b));
+    return udivmod(product128, c, modulus);
+}