Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions llvm/lib/Analysis/ScalarEvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6855,25 +6855,23 @@ const ConstantRange &ScalarEvolution::getRangeRef(
if (U->getType()->isPointerTy() && SignHint == HINT_RANGE_UNSIGNED) {
// Strengthen the range if the underlying IR value is a
// global/alloca/heap allocation using the size of the object.
ObjectSizeOpts Opts;
Opts.RoundToAlign = false;
Opts.NullIsUnknownSize = true;
uint64_t ObjSize;
if ((isa<GlobalVariable>(V) || isa<AllocaInst>(V) ||
isAllocationFn(V, &TLI)) &&
getObjectSize(V, ObjSize, DL, &TLI, Opts) && ObjSize > 1) {
// The highest address the object can start is ObjSize bytes before the
// end (unsigned max value). If this value is not a multiple of the
bool CanBeNull, CanBeFreed;
uint64_t DerefBytes =
V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
if (DerefBytes > 1) {
// The highest address the object can start is DerefBytes bytes before
// the end (unsigned max value). If this value is not a multiple of the
// alignment, the last possible start value is the next lowest multiple
// of the alignment. Note: The computations below cannot overflow,
// because if they would there's no possible start address for the
// object.
APInt MaxVal = APInt::getMaxValue(BitWidth) - APInt(BitWidth, ObjSize);
APInt MaxVal =
APInt::getMaxValue(BitWidth) - APInt(BitWidth, DerefBytes);
uint64_t Align = U->getValue()->getPointerAlignment(DL).value();
uint64_t Rem = MaxVal.urem(Align);
MaxVal -= APInt(BitWidth, Rem);
APInt MinVal = APInt::getZero(BitWidth);
if (llvm::isKnownNonZero(V, DL))
if (!CanBeNull || llvm::isKnownNonZero(V, DL))
MinVal = Align;
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange::getNonEmpty(MinVal, MaxVal + 1), RangeType);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/IR/Value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -909,7 +909,7 @@ uint64_t Value::getPointerDereferenceableBytes(const DataLayout &DL,
if (!AI->isArrayAllocation()) {
DerefBytes =
DL.getTypeStoreSize(AI->getAllocatedType()).getKnownMinValue();
CanBeNull = false;
CanBeNull = AI->getType()->getPointerAddressSpace() != 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this PR should be part of this change. Note that there is also a similar case for globals below.

I'm also not entirely sure this change should be made at all. There is a subtle distinction in semantics between:

  • Can return an allocation or null. This means that the pointer is not always dereferenceable.
  • Can return an allocation that may also be located at the null address. This means that the pointer is always dereferenceable, even if it is null.

The doc comment for this method is phrased vaguely enough that it's not super clear what the intended semantics are:

  /// If CanBeNull is set by this function the pointer can either be null or be
  /// dereferenceable up to the returned number of bytes.

But there are clearly some uses of the API where the current implementation is fine, for example

uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
doesn't care whether the pointer is null, as long as it is dereferenceable.

The use here does care. I think what I would go for is to leave this method alone and drop the !CanBeNull || check (maybe with a comment on why we don't use it).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough and thanks for explaining the subtle distinction! I've reverted the changes.

CanBeFreed = false;
}
} else if (auto *GV = dyn_cast<GlobalVariable>(this)) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ define void @test_05(i32 %N) {
; CHECK-NEXT: %"alloca point" = bitcast i32 0 to i32
; CHECK-NEXT: --> 0 U: [0,1) S: [0,1)
; CHECK-NEXT: %tmp = getelementptr [1000 x i32], ptr @A, i32 0, i32 %i.0
; CHECK-NEXT: --> {(8 + @A)<nuw><nsw>,+,4}<nw><%bb3> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (408 + @A) LoopDispositions: { %bb3: Computable }
; CHECK-NEXT: --> {(8 + @A)<nuw><nsw>,+,4}<nw><%bb3> U: [40,-3623) S: [-9223372036854775808,9223372036854775805) Exits: (408 + @A)<nuw> LoopDispositions: { %bb3: Computable }
; CHECK-NEXT: %tmp2 = add i32 %i.0, 1
; CHECK-NEXT: --> {3,+,1}<nuw><nsw><%bb3> U: [3,104) S: [3,104) Exits: 103 LoopDispositions: { %bb3: Computable }
; CHECK-NEXT: %i.0 = phi i32 [ 2, %entry ], [ %tmp2, %bb ]
Expand Down
37 changes: 35 additions & 2 deletions llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -183,15 +183,15 @@ define void @f3(ptr %x_addr, ptr %y_addr, ptr %tmp_addr) {
; CHECK-NEXT: %s3.zext = zext i8 %s3 to i16
; CHECK-NEXT: --> (1 + (zext i8 (4 + (32 * %x) + (36 * %y)) to i16))<nuw><nsw> U: [1,254) S: [1,257)
; CHECK-NEXT: %ptr = bitcast ptr @z_addr to ptr
; CHECK-NEXT: --> @z_addr U: [0,-3) S: [-9223372036854775808,9223372036854775805)
; CHECK-NEXT: --> @z_addr U: [4,-19) S: [-9223372036854775808,9223372036854775805)
; CHECK-NEXT: %int0 = ptrtoint ptr %ptr to i32
; CHECK-NEXT: --> (trunc i64 (ptrtoint ptr @z_addr to i64) to i32) U: [0,-3) S: [-2147483648,2147483645)
; CHECK-NEXT: %int5 = add i32 %int0, 5
; CHECK-NEXT: --> (5 + (trunc i64 (ptrtoint ptr @z_addr to i64) to i32)) U: [5,2) S: [-2147483643,-2147483646)
; CHECK-NEXT: %int.zext = zext i32 %int5 to i64
; CHECK-NEXT: --> (1 + (zext i32 (4 + (trunc i64 (ptrtoint ptr @z_addr to i64) to i32)) to i64))<nuw><nsw> U: [1,4294967294) S: [1,4294967297)
; CHECK-NEXT: %ptr_noalign = bitcast ptr @z_addr_noalign to ptr
; CHECK-NEXT: --> @z_addr_noalign U: full-set S: full-set
; CHECK-NEXT: --> @z_addr_noalign U: [1,-16) S: full-set
; CHECK-NEXT: %int0_na = ptrtoint ptr %ptr_noalign to i32
; CHECK-NEXT: --> (trunc i64 (ptrtoint ptr @z_addr_noalign to i64) to i32) U: full-set S: full-set
; CHECK-NEXT: %int5_na = add i32 %int0_na, 5
Expand Down Expand Up @@ -362,3 +362,36 @@ loop:
exit2:
ret i1 false
}


define void @dereferenceable_arg(ptr dereferenceable(128) %len_addr, ptr dereferenceable(128) align(8) %len_addr2) {
; CHECK-LABEL: 'dereferenceable_arg'
; CHECK-NEXT: Classifying expressions for: @dereferenceable_arg
; CHECK-NEXT: %ptr = bitcast ptr %len_addr to ptr
; CHECK-NEXT: --> %len_addr U: [1,-128) S: full-set
; CHECK-NEXT: %ptr2 = bitcast ptr %len_addr2 to ptr
; CHECK-NEXT: --> %len_addr2 U: [8,-135) S: [-9223372036854775808,9223372036854775801)
; CHECK-NEXT: Determining loop execution counts for: @dereferenceable_arg
;
entry:
%ptr = bitcast ptr %len_addr to ptr
%ptr2 = bitcast ptr %len_addr2 to ptr

ret void
}

define void @dereferenceable_or_null_arg(ptr dereferenceable_or_null(128) %len_addr, ptr dereferenceable_or_null(128) align(8) %len_addr2) {
; CHECK-LABEL: 'dereferenceable_or_null_arg'
; CHECK-NEXT: Classifying expressions for: @dereferenceable_or_null_arg
; CHECK-NEXT: %ptr = bitcast ptr %len_addr to ptr
; CHECK-NEXT: --> %len_addr U: [0,-128) S: full-set
; CHECK-NEXT: %ptr2 = bitcast ptr %len_addr2 to ptr
; CHECK-NEXT: --> %len_addr2 U: [0,-135) S: [-9223372036854775808,9223372036854775801)
; CHECK-NEXT: Determining loop execution counts for: @dereferenceable_or_null_arg
;
entry:
%ptr = bitcast ptr %len_addr to ptr
%ptr2 = bitcast ptr %len_addr2 to ptr

ret void
}