Skip to content

Commit

Permalink
Strength reduce intrinsics with overflow into regular arithmetic oper…
Browse files Browse the repository at this point in the history
…ations if possible.

Some intrinsics, like s/uadd.with.overflow and umul.with.overflow, are already strength reduced.
This change adds other arithmetic intrinsics: s/usub.with.overflow, smul.with.overflow.
It completes the work on PR20194.

llvm-svn: 224417
  • Loading branch information
eeckstein committed Dec 17, 2014
1 parent 92731d2 commit a451b9b
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 12 deletions.
1 change: 1 addition & 0 deletions llvm/lib/Transforms/InstCombine/InstCombine.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
bool WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS, Instruction *CxtI);
bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction *CxtI);
bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction *CxtI);
bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction *CxtI);
Value *EmitGEPOffset(User *GEP);
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask);
Expand Down
45 changes: 45 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1008,6 +1008,51 @@ bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS,
return false;
}

/// \brief Return true if we can prove that:
/// (mul LHS, RHS) === (mul nsw LHS, RHS)
bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
Instruction *CxtI) {
if (IntegerType *IT = dyn_cast<IntegerType>(LHS->getType())) {

// Multiplying n * m significant bits yields a result of n + m significant
// bits. If the total number of significant bits does not exceed the
// result bit width (minus 1), there is no overflow.
// This means if we have enough leading sign bits in the operands
// we can guarantee that the result does not overflow.
// Ref: "Hacker's Delight" by Henry Warren
unsigned BitWidth = IT->getBitWidth();

// Note that underestimating the number of sign bits gives a more
// conservative answer.
unsigned SignBits = ComputeNumSignBits(LHS, 0, CxtI) +
ComputeNumSignBits(RHS, 0, CxtI);

// First handle the easy case: if we have enough sign bits there's
// definitely no overflow.
if (SignBits > BitWidth + 1)
return true;

// There are two ambiguous cases where there can be no overflow:
// SignBits == BitWidth + 1 and
// SignBits == BitWidth
// The second case is difficult to check, therefore we only handle the
// first case.
if (SignBits == BitWidth + 1) {
// It overflows only when both arguments are negative and the true
// product is exactly the minimum negative number.
// E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
// For simplicity we just check if at least one side is not negative.
bool LHSNonNegative, LHSNegative;
bool RHSNonNegative, RHSNegative;
ComputeSignBit(LHS, LHSNonNegative, LHSNegative, DL, 0, AT, CxtI, DT);
ComputeSignBit(RHS, RHSNonNegative, RHSNegative, DL, 0, AT, CxtI, DT);
if (LHSNonNegative || RHSNonNegative)
return true;
}
}
return false;
}

// Checks if any operand is negative and we can convert add to sub.
// This function checks for following negative patterns
// ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C))
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return CreateOverflowTuple(II, LHS, false, /*ReUseName*/false);
}
}
if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) {
if (WillNotOverflowSignedSub(LHS, RHS, II)) {
return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false);
}
} else {
if (WillNotOverflowUnsignedSub(LHS, RHS, II)) {
return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false);
}
}
break;
}
case Intrinsic::umul_with_overflow: {
Expand Down Expand Up @@ -477,6 +486,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
/*ReUseName*/false);
}
}
if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) {
Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
if (WillNotOverflowSignedMul(LHS, RHS, II)) {
return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false);
}
}
break;
case Intrinsic::minnum:
case Intrinsic::maxnum: {
Expand Down
103 changes: 91 additions & 12 deletions llvm/test/Transforms/InstCombine/intrinsics.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
; RUN: opt -instcombine -S < %s | FileCheck %s

%overflow.result = type {i8, i1}
%ov.result.32 = type { i32, i1 }


declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8)
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8)
declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8) nounwind readnone
declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8) nounwind readnone
declare %ov.result.32 @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
declare %ov.result.32 @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
declare %ov.result.32 @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
declare %ov.result.32 @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
declare %ov.result.32 @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
declare %ov.result.32 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
declare double @llvm.powi.f64(double, i32) nounwind readonly
declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
Expand Down Expand Up @@ -91,17 +98,92 @@ define i8 @uaddtest7(i8 %A, i8 %B) {
}

; PR20194
define { i32, i1 } @saddtest1(i8 %a, i8 %b) {
define %ov.result.32 @saddtest_nsw(i8 %a, i8 %b) {
%A = sext i8 %a to i32
%B = sext i8 %b to i32
%x = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %A, i32 %B)
ret { i32, i1 } %x
; CHECK-LABEL: @saddtest1
%x = call %ov.result.32 @llvm.sadd.with.overflow.i32(i32 %A, i32 %B)
ret %ov.result.32 %x
; CHECK-LABEL: @saddtest_nsw
; CHECK: %x = add nsw i32 %A, %B
; CHECK-NEXT: %1 = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 %x, 0
; CHECK-NEXT: ret { i32, i1 } %1
; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
; CHECK-NEXT: ret %ov.result.32 %1
}

define %ov.result.32 @uaddtest_nuw(i32 %a, i32 %b) {
%A = and i32 %a, 2147483647
%B = and i32 %b, 2147483647
%x = call %ov.result.32 @llvm.uadd.with.overflow.i32(i32 %A, i32 %B)
ret %ov.result.32 %x
; CHECK-LABEL: @uaddtest_nuw
; CHECK: %x = add nuw i32 %A, %B
; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
; CHECK-NEXT: ret %ov.result.32 %1
}

define %ov.result.32 @ssubtest_nsw(i8 %a, i8 %b) {
%A = sext i8 %a to i32
%B = sext i8 %b to i32
%x = call %ov.result.32 @llvm.ssub.with.overflow.i32(i32 %A, i32 %B)
ret %ov.result.32 %x
; CHECK-LABEL: @ssubtest_nsw
; CHECK: %x = sub nsw i32 %A, %B
; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
; CHECK-NEXT: ret %ov.result.32 %1
}

define %ov.result.32 @usubtest_nuw(i32 %a, i32 %b) {
%A = or i32 %a, 2147483648
%B = and i32 %b, 2147483647
%x = call %ov.result.32 @llvm.usub.with.overflow.i32(i32 %A, i32 %B)
ret %ov.result.32 %x
; CHECK-LABEL: @usubtest_nuw
; CHECK: %x = sub nuw i32 %A, %B
; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
; CHECK-NEXT: ret %ov.result.32 %1
}

define %ov.result.32 @smultest1_nsw(i32 %a, i32 %b) {
%A = and i32 %a, 4095 ; 0xfff
%B = and i32 %b, 524287; 0x7ffff
%x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B)
ret %ov.result.32 %x
; CHECK-LABEL: @smultest1_nsw
; CHECK: %x = mul nsw i32 %A, %B
; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
; CHECK-NEXT: ret %ov.result.32 %1
}

define %ov.result.32 @smultest2_nsw(i32 %a, i32 %b) {
%A = ashr i32 %a, 16
%B = ashr i32 %b, 16
%x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B)
ret %ov.result.32 %x
; CHECK-LABEL: @smultest2_nsw
; CHECK: %x = mul nsw i32 %A, %B
; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
; CHECK-NEXT: ret %ov.result.32 %1
}

define %ov.result.32 @smultest3_sw(i32 %a, i32 %b) {
%A = ashr i32 %a, 16
%B = ashr i32 %b, 15
%x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B)
ret %ov.result.32 %x
; CHECK-LABEL: @smultest3_sw
; CHECK: %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B)
; CHECK-NEXT: ret %ov.result.32 %x
}

define %ov.result.32 @umultest_nuw(i32 %a, i32 %b) {
%A = and i32 %a, 65535 ; 0xffff
%B = and i32 %b, 65535 ; 0xffff
%x = call %ov.result.32 @llvm.umul.with.overflow.i32(i32 %A, i32 %B)
ret %ov.result.32 %x
; CHECK-LABEL: @umultest_nuw
; CHECK: %x = mul nuw i32 %A, %B
; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
; CHECK-NEXT: ret %ov.result.32 %1
}

define i8 @umultest1(i8 %A, i1* %overflowPtr) {
%x = call %overflow.result @llvm.umul.with.overflow.i8(i8 0, i8 %A)
Expand All @@ -125,9 +207,6 @@ define i8 @umultest2(i8 %A, i1* %overflowPtr) {
; CHECK-NEXT: ret i8 %A
}

%ov.result.32 = type { i32, i1 }
declare %ov.result.32 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone

define i32 @umultest3(i32 %n) nounwind {
%shr = lshr i32 %n, 2
%mul = call %ov.result.32 @llvm.umul.with.overflow.i32(i32 %shr, i32 3)
Expand Down

0 comments on commit a451b9b

Please sign in to comment.