diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 2303ddb286971..b31dd36bd4337 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -1488,12 +1488,19 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j if (type_is_ghost(elty)) return ghostValue(jltype); AllocaInst *intcast = NULL; - if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy() && !elty->isFloatingPointTy()) { + if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) { const DataLayout &DL = jl_data_layout; unsigned nb = DL.getTypeSizeInBits(elty); intcast = ctx.builder.CreateAlloca(elty); elty = Type::getIntNTy(jl_LLVMContext, nb); } + Type *realelty = elty; + if (Order != AtomicOrdering::NotAtomic && isa(elty)) { + unsigned nb = cast(elty)->getBitWidth(); + unsigned nb2 = PowerOf2Ceil(nb); + if (nb != nb2) + elty = Type::getIntNTy(jl_LLVMContext, nb2); + } Type *ptrty = PointerType::get(elty, ptr->getType()->getPointerAddressSpace()); Value *data; if (ptr->getType() != ptrty) @@ -1502,7 +1509,7 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j data = ptr; if (idx_0based) data = ctx.builder.CreateInBoundsGEP(elty, data, idx_0based); - Instruction *load; + Value *instr; // TODO: can only lazy load if we can create a gc root for ptr for the lifetime of elt //if (elty->isAggregateType() && tbaa == tbaa_immut && !alignment) { // can lazy load on demand, no copy needed // elt = data; @@ -1512,20 +1519,23 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j alignment = sizeof(void*); else if (!alignment) alignment = julia_alignment(jltype); - load = ctx.builder.CreateAlignedLoad(data, Align(alignment), false); - cast(load)->setOrdering(Order); + LoadInst *load = ctx.builder.CreateAlignedLoad(data, Align(alignment), false); + load->setOrdering(Order); if (aliasscope) load->setMetadata("alias.scope", aliasscope); if (isboxed) - load = maybe_mark_load_dereferenceable(load, true, jltype); + maybe_mark_load_dereferenceable(load, true, jltype); if (tbaa) - load = tbaa_decorate(tbaa, load); + tbaa_decorate(tbaa, load); + instr = load; + if (elty != realelty) + instr = ctx.builder.CreateTrunc(instr, realelty); if (intcast) { - ctx.builder.CreateStore(load, ctx.builder.CreateBitCast(intcast, load->getType()->getPointerTo())); - load = ctx.builder.CreateLoad(intcast); + ctx.builder.CreateStore(instr, ctx.builder.CreateBitCast(intcast, instr->getType()->getPointerTo())); + instr = ctx.builder.CreateLoad(intcast); } if (maybe_null_if_boxed) { - Value *first_ptr = isboxed ? load : extract_first_ptr(ctx, load); + Value *first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr); if (first_ptr) null_pointer_check(ctx, first_ptr, nullcheck); } @@ -1535,9 +1545,9 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j //load->setMetadata(LLVMContext::MD_range, MDNode::get(jl_LLVMContext, { // ConstantAsMetadata::get(ConstantInt::get(T_int8, 0)), // ConstantAsMetadata::get(ConstantInt::get(T_int8, 2)) })); - load = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, load, T_int1)); + instr = ctx.builder.CreateTrunc(instr, T_int1); } - return mark_julia_type(ctx, load, isboxed, jltype); + return mark_julia_type(ctx, instr, isboxed, jltype); } static jl_cgval_t typed_store(jl_codectx_t &ctx, @@ -1553,18 +1563,27 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, if (type_is_ghost(elty)) return oldval; Value *intcast = nullptr; - if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy() && !elty->isFloatingPointTy()) { + if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) { const DataLayout &DL = jl_data_layout; unsigned nb = DL.getTypeSizeInBits(elty); if (!issetfield) intcast = ctx.builder.CreateAlloca(elty); elty = Type::getIntNTy(jl_LLVMContext, nb); } + Type *realelty = elty; + if (Order != AtomicOrdering::NotAtomic && isa(elty)) { + unsigned nb = cast(elty)->getBitWidth(); + unsigned nb2 = PowerOf2Ceil(nb); + if (nb != nb2) + elty = Type::getIntNTy(jl_LLVMContext, nb2); + } Value *r; if (!isboxed) - r = emit_unbox(ctx, elty, rhs, jltype); + r = emit_unbox(ctx, realelty, rhs, jltype); else r = boxed(ctx, rhs); + if (realelty != elty) + r = ctx.builder.CreateZExt(r, elty); Type *ptrty = PointerType::get(elty, ptr->getType()->getPointerAddressSpace()); if (ptr->getType() != ptrty) ptr = ctx.builder.CreateBitCast(ptr, ptrty); @@ -1587,18 +1606,19 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, instr->setMetadata("noalias", aliasscope); if (tbaa) tbaa_decorate(tbaa, instr); - } - if (isreplacefield) { - oldval = mark_julia_type(ctx, instr, isboxed, jltype); - Value *first_ptr = nullptr; - if (maybe_null_if_boxed) - first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr); - Success = emit_nullcheck_guard(ctx, first_ptr, [&] { - return emit_f_is(ctx, oldval, cmp); - }); - BasicBlock *BB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f); - ctx.builder.CreateCondBr(Success, BB, DoneBB); - ctx.builder.SetInsertPoint(BB); + assert(realelty == elty); + if (isreplacefield) { + oldval = mark_julia_type(ctx, instr, isboxed, jltype); + Value *first_ptr = nullptr; + if (maybe_null_if_boxed) + first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr); + Success = emit_nullcheck_guard(ctx, first_ptr, [&] { + return emit_f_is(ctx, oldval, cmp); + }); + BasicBlock *BB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f); + ctx.builder.CreateCondBr(Success, BB, DoneBB); + ctx.builder.SetInsertPoint(BB); + } } StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment)); store->setOrdering(Order); @@ -1637,7 +1657,9 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, Current->addIncoming(instr, SkipBB); ctx.builder.SetInsertPoint(BB); } - Compare = emit_unbox(ctx, elty, cmp, jltype); + Compare = emit_unbox(ctx, realelty, cmp, jltype); + if (realelty != elty) + Compare = ctx.builder.CreateZExt(Compare, elty); } else if (cmp.isboxed) { Compare = boxed(ctx, cmp); @@ -1685,21 +1707,26 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, if (tbaa) tbaa_decorate(tbaa, store); instr = ctx.builder.Insert(ExtractValueInst::Create(store, 0)); - Success = ctx.builder.CreateExtractValue(store, 1); + Success = ctx.builder.Insert(ExtractValueInst::Create(store, 1)); Value *Done = Success; if (needloop) { if (isreplacefield) { + Value *realinstr = instr; + if (realelty != elty) + realinstr = ctx.builder.CreateTrunc(instr, realelty); if (intcast) { - ctx.builder.CreateStore(instr, ctx.builder.CreateBitCast(intcast, instr->getType()->getPointerTo())); + ctx.builder.CreateStore(realinstr, ctx.builder.CreateBitCast(intcast, realinstr->getType()->getPointerTo())); oldval = mark_julia_slot(intcast, jltype, NULL, tbaa_stack); + if (maybe_null_if_boxed) + realinstr = ctx.builder.CreateLoad(intcast); } else { - oldval = mark_julia_type(ctx, instr, isboxed, jltype); + oldval = mark_julia_type(ctx, realinstr, isboxed, jltype); } Done = emit_guarded_test(ctx, ctx.builder.CreateNot(Success), false, [&] { Value *first_ptr = nullptr; if (maybe_null_if_boxed) - first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr); + first_ptr = isboxed ? realinstr : extract_first_ptr(ctx, realinstr); return emit_nullcheck_guard(ctx, first_ptr, [&] { return emit_f_is(ctx, oldval, cmp); }); @@ -1756,6 +1783,8 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, } } if (!issetfield) { + if (realelty != elty) + instr = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, instr, realelty)); if (intcast) { ctx.builder.CreateStore(instr, ctx.builder.CreateBitCast(intcast, instr->getType()->getPointerTo())); instr = ctx.builder.CreateLoad(intcast); @@ -2059,6 +2088,9 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st emit_atomic_error(ctx, "getfield: atomic field cannot be accessed non-atomically"); return jl_cgval_t(); // unreachable } + if (order == jl_memory_order_unspecified) { + order = isatomic ? jl_memory_order_unordered : jl_memory_order_notatomic; + } if (jfty == jl_bottom_type) { raise_exception(ctx, literal_pointer_val(ctx, jl_undefref_exception)); return jl_cgval_t(); // unreachable @@ -2132,7 +2164,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st if (needlock) emit_lockstate_value(ctx, strct, true); jl_cgval_t ret = typed_load(ctx, addr, NULL, jfty, tbaa, nullptr, false, - needlock || order <= jl_memory_order_notatomic ? AtomicOrdering::NotAtomic : get_llvm_atomic_order(order), // TODO: we should use unordered for anything with CountTrackedPointers(elty).count > 0 + needlock ? AtomicOrdering::NotAtomic : get_llvm_atomic_order(order), // TODO: we should use unordered for anything with CountTrackedPointers(elty).count > 0 maybe_null, align, nullcheck); if (needlock) emit_lockstate_value(ctx, strct, false); diff --git a/src/datatype.c b/src/datatype.c index 9bad2bc3d3bd8..fbdf1d58d2c0c 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -719,7 +719,53 @@ JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt) #error MAX_POINTERATOMIC_SIZE too large #endif #if MAX_POINTERATOMIC_SIZE >= 16 +#ifndef _P64 +#error 12 byte GC pool size not implemented for 32-bit +#endif typedef __uint128_t uint128_t; +typedef uint128_t jl_uatomicmax_t; +#else +typedef uint64_t jl_uatomicmax_t; +#endif + +#if BYTE_ORDER != LITTLE_ENDIAN +#error using masks for atomics (instead of memcpy like nb == 16) assumes little endian +#endif + +static inline uint32_t zext_read32(const jl_value_t *x, size_t nb) JL_NOTSAFEPOINT +{ + uint32_t y = *(uint32_t*)x; + if (nb == 4) + return y; + else // if (nb == 3) + return 0xffffffu & y; +} + +#if MAX_POINTERATOMIC_SIZE >= 8 +static inline uint64_t zext_read64(const jl_value_t *x, size_t nb) JL_NOTSAFEPOINT +{ + uint64_t y = *(uint64_t*)x; + if (nb == 8) + return y; + else if (nb == 7) + return 0xffffffffffffffu & y; + else if (nb == 6) + return 0xffffffffffffu & y; + else // if (nb == 5) + return 0xffffffffffu & y; +} +#endif + +#if MAX_POINTERATOMIC_SIZE >= 16 +static inline uint128_t zext_read128(const jl_value_t *x, size_t nb) JL_NOTSAFEPOINT +{ + uint128_t y = 0; + if (nb == 16) + y = *(uint128_t*)x; + else + memcpy(&y, x, nb); + return y; +} #endif JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, const void *data) @@ -744,16 +790,7 @@ JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, const void *data) jl_task_t *ct = jl_current_task; jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt); - switch (nb) { - case 1: *(uint8_t*) v = *(uint8_t*)data; break; - case 2: *(uint16_t*)v = jl_load_unaligned_i16(data); break; - case 4: *(uint32_t*)v = jl_load_unaligned_i32(data); break; - case 8: *(uint64_t*)v = jl_load_unaligned_i64(data); break; - case 16: - memcpy(jl_assume_aligned(v, 16), data, 16); - break; - default: memcpy(v, data, nb); - } + memcpy(jl_assume_aligned(v, sizeof(void*)), data, nb); return v; } @@ -778,19 +815,24 @@ JL_DLLEXPORT jl_value_t *jl_atomic_new_bits(jl_value_t *dt, const char *data) jl_task_t *ct = jl_current_task; jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt); - switch (nb) { - case 1: *(uint8_t*) v = jl_atomic_load((uint8_t*)data); break; - case 2: *(uint16_t*)v = jl_atomic_load((uint16_t*)data); break; - case 4: *(uint32_t*)v = jl_atomic_load((uint32_t*)data); break; + // data is aligned to the power of two, + // we will write too much of v, but the padding should exist + if (nb == 1) + *(uint8_t*) v = jl_atomic_load((uint8_t*)data); + else if (nb <= 2) + *(uint16_t*)v = jl_atomic_load((uint16_t*)data); + else if (nb <= 4) + *(uint32_t*)v = jl_atomic_load((uint32_t*)data); #if MAX_POINTERATOMIC_SIZE >= 8 - case 8: *(uint64_t*)v = jl_atomic_load((uint64_t*)data); break; + else if (nb <= 8) + *(uint64_t*)v = jl_atomic_load((uint64_t*)data); #endif #if MAX_POINTERATOMIC_SIZE >= 16 - case 16: *(uint128_t*)v = jl_atomic_load((uint128_t*)data); break; + else if (nb <= 16) + *(uint128_t*)v = jl_atomic_load((uint128_t*)data); #endif - default: + else abort(); - } return v; } @@ -798,20 +840,26 @@ JL_DLLEXPORT void jl_atomic_store_bits(char *dst, const jl_value_t *src, int nb) { // dst must have the required alignment for an atomic of the given size // src must be aligned by the GC - switch (nb) { - case 0: break; - case 1: jl_atomic_store((uint8_t*)dst, *(uint8_t*)src); break; - case 2: jl_atomic_store((uint16_t*)dst, *(uint16_t*)src); break; - case 4: jl_atomic_store((uint32_t*)dst, *(uint32_t*)src); break; + // we may therefore read too much from src, but will zero the excess bits + // before the store (so that we can get faster cmpswap later) + if (nb == 0) + ; + else if (nb == 1) + jl_atomic_store((uint8_t*)dst, *(uint8_t*)src); + else if (nb == 2) + jl_atomic_store((uint16_t*)dst, *(uint16_t*)src); + else if (nb <= 4) + jl_atomic_store((uint32_t*)dst, zext_read32(src, nb)); #if MAX_POINTERATOMIC_SIZE >= 8 - case 8: jl_atomic_store((uint64_t*)dst, *(uint64_t*)src); break; + else if (nb <= 8) + jl_atomic_store((uint64_t*)dst, zext_read64(src, nb)); #endif #if MAX_POINTERATOMIC_SIZE >= 16 - case 16: jl_atomic_store((uint128_t*)dst, *(uint128_t*)src); break; + else if (nb <= 16) + jl_atomic_store((uint128_t*)dst, zext_read128(src, nb)); #endif - default: + else abort(); - } } JL_DLLEXPORT jl_value_t *jl_atomic_swap_bits(jl_value_t *dt, char *dst, const jl_value_t *src, int nb) @@ -834,19 +882,22 @@ JL_DLLEXPORT jl_value_t *jl_atomic_swap_bits(jl_value_t *dt, char *dst, const jl jl_task_t *ct = jl_current_task; jl_value_t *v = jl_gc_alloc(ct->ptls, jl_datatype_size(bt), bt); - switch (nb) { - case 1: *(uint8_t*) v = jl_atomic_exchange((uint8_t*)dst, *(uint8_t*)src); break; - case 2: *(uint16_t*)v = jl_atomic_exchange((uint16_t*)dst, *(uint16_t*)src); break; - case 4: *(uint32_t*)v = jl_atomic_exchange((uint32_t*)dst, *(uint32_t*)src); break; + if (nb == 1) + *(uint8_t*)v = jl_atomic_exchange((uint8_t*)dst, *(uint8_t*)src); + else if (nb == 2) + *(uint16_t*)v = jl_atomic_exchange((uint16_t*)dst, *(uint16_t*)src); + else if (nb <= 4) + *(uint32_t*)v = jl_atomic_exchange((uint32_t*)dst, zext_read32(src, nb)); #if MAX_POINTERATOMIC_SIZE >= 8 - case 8: *(uint64_t*)v = jl_atomic_exchange((uint64_t*)dst, *(uint64_t*)src); break; + else if (nb <= 8) + *(uint64_t*)v = jl_atomic_exchange((uint64_t*)dst, zext_read64(src, nb)); #endif #if MAX_POINTERATOMIC_SIZE >= 16 - case 16: *(uint128_t*)v = jl_atomic_exchange((uint128_t*)dst, *(uint128_t*)src); break; + else if (nb <= 16) + *(uint128_t*)v = jl_atomic_exchange((uint128_t*)dst, zext_read128(src, nb)); #endif - default: + else abort(); - } return v; } @@ -855,41 +906,37 @@ JL_DLLEXPORT int jl_atomic_bool_cmpswap_bits(char *dst, const jl_value_t *expect // dst must have the required alignment for an atomic of the given size // n.b.: this can spuriously fail if there are padding bits, the caller should deal with that int success; - switch (nb) { - case 0: { + if (nb == 0) { success = 1; - break; } - case 1: { + else if (nb == 1) { uint8_t y = *(uint8_t*)expected; success = jl_atomic_cmpswap((uint8_t*)dst, &y, *(uint8_t*)src); - break; } - case 2: { + else if (nb == 2) { uint16_t y = *(uint16_t*)expected; success = jl_atomic_cmpswap((uint16_t*)dst, &y, *(uint16_t*)src); - break; } - case 4: { - uint32_t y = *(uint32_t*)expected; - success = jl_atomic_cmpswap((uint32_t*)dst, &y, *(uint32_t*)src); - break; + else if (nb <= 4) { + uint32_t y = zext_read32(expected, nb); + uint32_t z = zext_read32(src, nb); + success = jl_atomic_cmpswap((uint32_t*)dst, &y, z); } #if MAX_POINTERATOMIC_SIZE >= 8 - case 8: { - uint64_t y = *(uint64_t*)expected; - success = jl_atomic_cmpswap((uint64_t*)dst, &y, *(uint64_t*)src); - break; + else if (nb <= 8) { + uint64_t y = zext_read64(expected, nb); + uint64_t z = zext_read64(src, nb); + success = jl_atomic_cmpswap((uint64_t*)dst, &y, z); } #endif #if MAX_POINTERATOMIC_SIZE >= 16 - case 16: { - uint128_t y = *(uint128_t*)expected; - success = jl_atomic_cmpswap((uint128_t*)dst, &y, *(uint128_t*)src); - break; + else if (nb <= 16) { + uint128_t y = zext_read128(expected, nb); + uint128_t z = zext_read128(src, nb); + success = jl_atomic_cmpswap((uint128_t*)dst, &y, z); } #endif - default: + else { abort(); } return success; @@ -909,45 +956,42 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, char *dst, co jl_value_t *y = jl_gc_alloc(ct->ptls, isptr ? nb : tuptyp->size, isptr ? dt : tuptyp); int success; jl_datatype_t *et = (jl_datatype_t*)jl_typeof(expected); - switch (nb) { - case 0: { + if (nb == 0) { success = (dt == et); - break; } - case 1: { + else if (nb == 1) { uint8_t *y8 = (uint8_t*)y; + assert(!dt->layout->haspadding); if (dt == et) { *y8 = *(uint8_t*)expected; - success = jl_atomic_cmpswap((uint8_t*)dst, y8, *(uint8_t*)src); + uint8_t z8 = *(uint8_t*)src; + success = jl_atomic_cmpswap((uint8_t*)dst, y8, z8); } else { *y8 = jl_atomic_load((uint8_t*)dst); success = 0; } - break; } - case 2: { + else if (nb == 2) { uint16_t *y16 = (uint16_t*)y; + assert(!dt->layout->haspadding); if (dt == et) { *y16 = *(uint16_t*)expected; - while (1) { - success = jl_atomic_cmpswap((uint16_t*)dst, y16, *(uint16_t*)src); - if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt)) - break; - } + uint16_t z16 = *(uint16_t*)src; + success = jl_atomic_cmpswap((uint16_t*)dst, y16, z16); } else { *y16 = jl_atomic_load((uint16_t*)dst); success = 0; } - break; } - case 4: { + else if (nb <= 4) { uint32_t *y32 = (uint32_t*)y; if (dt == et) { - *y32 = *(uint32_t*)expected; + *y32 = zext_read32(expected, nb); + uint32_t z32 = zext_read32(src, nb); while (1) { - success = jl_atomic_cmpswap((uint32_t*)dst, y32, *(uint32_t*)src); + success = jl_atomic_cmpswap((uint32_t*)dst, y32, z32); if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt)) break; } @@ -956,15 +1000,15 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, char *dst, co *y32 = jl_atomic_load((uint32_t*)dst); success = 0; } - break; } #if MAX_POINTERATOMIC_SIZE >= 8 - case 8: { + else if (nb <= 8) { uint64_t *y64 = (uint64_t*)y; if (dt == et) { - *y64 = *(uint64_t*)expected; + *y64 = zext_read64(expected, nb); + uint64_t z64 = zext_read64(src, nb); while (1) { - success = jl_atomic_cmpswap((uint64_t*)dst, y64, *(uint64_t*)src); + success = jl_atomic_cmpswap((uint64_t*)dst, y64, z64); if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt)) break; } @@ -973,16 +1017,16 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, char *dst, co *y64 = jl_atomic_load((uint64_t*)dst); success = 0; } - break; } #endif #if MAX_POINTERATOMIC_SIZE >= 16 - case 16: { + else if (nb <= 16) { uint128_t *y128 = (uint128_t*)y; if (dt == et) { - *y128 = *(uint128_t*)expected; + *y128 = zext_read128(expected, nb); + uint128_t z128 = zext_read128(src, nb); while (1) { - success = jl_atomic_cmpswap((uint128_t*)dst, y128, *(uint128_t*)src); + success = jl_atomic_cmpswap((uint128_t*)dst, y128, z128); if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt)) break; } @@ -991,10 +1035,9 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, char *dst, co *y128 = jl_atomic_load((uint128_t*)dst); success = 0; } - break; } #endif - default: + else { abort(); } if (isptr) { @@ -1406,16 +1449,12 @@ static inline void memassign_safe(int hasptr, jl_value_t *parent, char *dst, con else { // src must be a heap box. assert(nb == jl_datatype_size(jl_typeof(src))); + if (nb >= 16) { + memcpy(dst, jl_assume_aligned(src, 16), nb); + return; + } } - switch (nb) { - case 0: break; - case 1: *(uint8_t*)dst = *(uint8_t*)src; break; - case 2: jl_store_unaligned_i16(dst, *(uint16_t*)src); break; - case 4: jl_store_unaligned_i32(dst, *(uint32_t*)src); break; - case 8: jl_store_unaligned_i64(dst, *(uint64_t*)src); break; - case 16: memcpy(dst, jl_assume_aligned(src, 16), 16); break; - default: memcpy(dst, src, nb); break; - } + memcpy(dst, jl_assume_aligned(src, sizeof(void*)), nb); } void set_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic) JL_NOTSAFEPOINT diff --git a/src/processor.cpp b/src/processor.cpp index 9d4ac476ef316..c5e42368412e8 100644 --- a/src/processor.cpp +++ b/src/processor.cpp @@ -401,6 +401,8 @@ static inline std::vector serialize_target_data(llvm::StringRef name, { std::vector res; auto add_data = [&] (const void *data, size_t sz) { + if (sz == 0) + return; size_t old_sz = res.size(); res.resize(old_sz + sz); memcpy(&res[old_sz], data, sz); diff --git a/test/atomics.jl b/test/atomics.jl index 2a0cbd7357c6e..e4202b5ce1aea 100644 --- a/test/atomics.jl +++ b/test/atomics.jl @@ -63,22 +63,43 @@ let (x, y) = (Complex{Int128}(10, 30), Complex{Int128}(20, 40)) @test sizeof(r) == sizeof(ar) - Int(fieldoffset(typeof(ar), 1)) end +struct PadIntA <: Number # internal padding + a::Int8 + b::Int16 + PadIntA(x) = new(82, x) +end +struct PadIntB <: Number # external padding + a::UInt8 + b::UInt8 + c::UInt8 + PadIntB(x) = new(x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff) +end +primitive type Int24 <: Signed 24 end # integral padding +Int24(x::Int) = Core.Intrinsics.trunc_int(Int24, x) +Base.Int(x::PadIntB) = x.a + (Int(x.b) << 8) + (Int(x.c) << 16) +Base.:(+)(x::PadIntA, b::Int) = PadIntA(x.b + b) +Base.:(+)(x::PadIntB, b::Int) = PadIntB(Int(x) + b) +Base.:(+)(x::Int24, b::Int) = Core.Intrinsics.add_int(x, Int24(b)) +Base.show(io::IO, x::PadIntA) = print(io, "PadIntA(", x.b, ")") +Base.show(io::IO, x::PadIntB) = print(io, "PadIntB(", Int(x), ")") +Base.show(io::IO, x::Int24) = print(io, "Int24(", Core.Intrinsics.zext_int(Int, x), ")") + @noinline function _test_field_operators(r) r = r[] T = typeof(getfield(r, :x)) - @test getfield(r, :x, :sequentially_consistent) === T(12345_10) - @test setfield!(r, :x, T(12345_1), :sequentially_consistent) === T(12345_1) - @test getfield(r, :x, :sequentially_consistent) === T(12345_1) - @test replacefield!(r, :x, 12345_1 % UInt, T(12345_100), :sequentially_consistent, :sequentially_consistent) === (T(12345_1), false) - @test replacefield!(r, :x, T(12345_1), T(12345_100), :sequentially_consistent, :sequentially_consistent) === (T(12345_1), true) - @test getfield(r, :x, :sequentially_consistent) === T(12345_100) - @test replacefield!(r, :x, T(12345_1), T(12345_1), :sequentially_consistent, :sequentially_consistent) === (T(12345_100), false) - @test getfield(r, :x, :sequentially_consistent) === T(12345_100) - @test modifyfield!(r, :x, add, 1, :sequentially_consistent) === (T(12345_100), T(12345_101)) - @test modifyfield!(r, :x, add, 1, :sequentially_consistent) === (T(12345_101), T(12345_102)) - @test getfield(r, :x, :sequentially_consistent) === T(12345_102) - @test swapfield!(r, :x, T(12345_1), :sequentially_consistent) === T(12345_102) - @test getfield(r, :x, :sequentially_consistent) === T(12345_1) + @test getfield(r, :x, :sequentially_consistent) === T(123_10) + @test setfield!(r, :x, T(123_1), :sequentially_consistent) === T(123_1) + @test getfield(r, :x, :sequentially_consistent) === T(123_1) + @test replacefield!(r, :x, 123_1 % UInt, T(123_30), :sequentially_consistent, :sequentially_consistent) === (T(123_1), false) + @test replacefield!(r, :x, T(123_1), T(123_30), :sequentially_consistent, :sequentially_consistent) === (T(123_1), true) + @test getfield(r, :x, :sequentially_consistent) === T(123_30) + @test replacefield!(r, :x, T(123_1), T(123_1), :sequentially_consistent, :sequentially_consistent) === (T(123_30), false) + @test getfield(r, :x, :sequentially_consistent) === T(123_30) + @test modifyfield!(r, :x, add, 1, :sequentially_consistent) === (T(123_30), T(123_31)) + @test modifyfield!(r, :x, add, 1, :sequentially_consistent) === (T(123_31), T(123_32)) + @test getfield(r, :x, :sequentially_consistent) === T(123_32) + @test swapfield!(r, :x, T(123_1), :sequentially_consistent) === T(123_32) + @test getfield(r, :x, :sequentially_consistent) === T(123_1) nothing end @noinline function test_field_operators(r) @@ -86,11 +107,15 @@ end _test_field_operators(Ref{Any}(copy(r))) nothing end -test_field_operators(ARefxy{Int}(12345_10, 12345_20)) -test_field_operators(ARefxy{Any}(12345_10, 12345_20)) -test_field_operators(ARefxy{Union{Nothing,Int}}(12345_10, nothing)) -test_field_operators(ARefxy{Complex{Int32}}(12345_10, 12345_20)) -test_field_operators(ARefxy{Complex{Int128}}(12345_10, 12345_20)) +test_field_operators(ARefxy{Int}(123_10, 123_20)) +test_field_operators(ARefxy{Any}(123_10, 123_20)) +test_field_operators(ARefxy{Union{Nothing,Int}}(123_10, nothing)) +test_field_operators(ARefxy{Complex{Int32}}(123_10, 123_20)) +test_field_operators(ARefxy{Complex{Int128}}(123_10, 123_20)) +test_field_operators(ARefxy{PadIntA}(123_10, 123_20)) +test_field_operators(ARefxy{PadIntB}(123_10, 123_20)) +#FIXME: test_field_operators(ARefxy{Int24}(123_10, 123_20)) +test_field_operators(ARefxy{Float64}(123_10, 123_20)) @noinline function _test_field_orderings(r, x, y) @nospecialize x y @@ -247,11 +272,13 @@ test_field_orderings(true, false) test_field_orderings("hi", "bye") test_field_orderings(:hi, :bye) test_field_orderings(nothing, nothing) -test_field_orderings(ARefxy{Any}(12345_10, 12345_20), 12345_10, 12345_20) +test_field_orderings(ARefxy{Any}(123_10, 123_20), 123_10, 123_20) test_field_orderings(ARefxy{Any}(true, false), true, false) test_field_orderings(ARefxy{Union{Nothing,Missing}}(nothing, missing), nothing, missing) -test_field_orderings(ARefxy{Union{Nothing,Int}}(nothing, 12345_1), nothing, 12345_1) +test_field_orderings(ARefxy{Union{Nothing,Int}}(nothing, 123_1), nothing, 123_1) test_field_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40)) +test_field_orderings(10.0, 20.0) +test_field_orderings(NaN, Inf) struct UndefComplex{T} re::T