From d64c57b2a57ab18afc450ce97f2ec623f519dfdf Mon Sep 17 00:00:00 2001 From: "Clare.T" Date: Tue, 9 Sep 2025 17:43:16 +0800 Subject: [PATCH] Optimize nested arithmetic to direct shift and abs operations --- .../InstCombine/InstCombineAddSub.cpp | 62 ++++++ .../Transforms/InstCombine/abs-add-abs-sub.ll | 182 ++++++++++++++++++ 2 files changed, 244 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/abs-add-abs-sub.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 91ca44e0f11e..53c8fdd0b23d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1175,6 +1175,64 @@ static Instruction *foldAddToAshr(BinaryOperator &Add) { X, ConstantInt::get(Add.getType(), DivC->exactLogBase2())); } +static Instruction *foldAbsAddAbsSub(BinaryOperator &I) { + Value *L = I.getOperand(0), *R = I.getOperand(1); + auto *LHS = dyn_cast(L); + auto *RHS = dyn_cast(R); + + if (!LHS || !RHS || + LHS->getIntrinsicID() != Intrinsic::abs || + RHS->getIntrinsicID() != Intrinsic::abs) { + return nullptr; + } + + Value *X = LHS->getArgOperand(0); + Value *Y = RHS->getArgOperand(0); + + Value *A1, *B1, *A2, *B2; + bool MatchFound = false; + + auto matchPattern = [&](Value *Add, Value *Sub) -> bool { + if (match(Add, m_Add(m_Value(A1), m_Value(B1)))) { + if (match(Sub, m_Sub(m_Value(A2), m_Value(B2)))) { + return (A1 == A2 && B1 == B2) || (A1 == B2 && B1 == A2); + } + if (match(Sub, m_Sub(m_Value(B2), m_Value(A2)))) { + return (A1 == A2 && B1 == B2) || (A1 == B2 && B1 == A2); + } + } + return false; + }; + + if (matchPattern(X, Y)) MatchFound = true; + else if (matchPattern(Y, X)) MatchFound = true; + + if (!MatchFound) return nullptr; + + Value *PoisonArg = nullptr; + if (LHS->getNumOperands() > 1) + PoisonArg = LHS->getArgOperand(1); + else if (RHS->getNumOperands() > 1) + PoisonArg = RHS->getArgOperand(1); + + IRBuilder<> Builder(&I); + + Value *AbsA = PoisonArg ? + Builder.CreateIntrinsic(Intrinsic::abs, {A1->getType()}, {A1, PoisonArg}) : + Builder.CreateIntrinsic(Intrinsic::abs, {A1->getType()}, {A1}); + + Value *AbsB = PoisonArg ? + Builder.CreateIntrinsic(Intrinsic::abs, {B1->getType()}, {B1, PoisonArg}) : + Builder.CreateIntrinsic(Intrinsic::abs, {B1->getType()}, {B1}); + + Value *Max = Builder.CreateIntrinsic( + Intrinsic::smax, {AbsA->getType()}, {AbsA, AbsB}); + + Instruction *Shl = BinaryOperator::CreateShl( + Max, ConstantInt::get(Max->getType(), 1), "maxabs"); + return Shl; +} + Instruction *InstCombinerImpl:: canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( BinaryOperator &I) { @@ -1621,6 +1679,10 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I)) return Res; + if (Instruction *Res = foldAbsAddAbsSub(I)) { + return Res; + } + return Changed ? &I : nullptr; } diff --git a/llvm/test/Transforms/InstCombine/abs-add-abs-sub.ll b/llvm/test/Transforms/InstCombine/abs-add-abs-sub.ll new file mode 100644 index 000000000000..adb1829fa163 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/abs-add-abs-sub.ll @@ -0,0 +1,182 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +declare i32 @llvm.abs.i32(i32, i1) +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) +declare i8 @gen8() +declare void @use32(i32) + +define i32 @t0_basic(i32 %a, i32 %b) { +; CHECK-LABEL: @t0_basic( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.abs.i32(i32 [[A:%.*]], i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.abs.i32(i32 [[B:%.*]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 [[TMP2]]) +; CHECK-NEXT: [[RES:%.*]] = shl i32 [[TMP3]], 1 +; CHECK-NEXT: ret i32 nuw [[RES]] +; + %add = add i32 %a, %b + %abs_add = call i32 @llvm.abs.i32(i32 %add, i1 false) + %sub = sub i32 %a, %b + %abs_sub = call i32 @llvm.abs.i32(i32 %sub, i1 false) + %sum = add i32 %abs_add, %abs_sub + ret i32 %sum +} + +define i32 @t1_poison(i32 %a, i32 %b) { +; CHECK-LABEL: @t1_poison( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.abs.i32(i32 [[A:%.*]], i1 true) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.abs.i32(i32 [[B:%.*]], i1 true) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 [[TMP2]]) +; CHECK-NEXT: [[RES:%.*]] = shl i32 [[TMP3]], 1 +; CHECK-NEXT: ret i32 nuw [[RES]] +; + %add = add i32 %a, %b + %abs_add = call i32 @llvm.abs.i32(i32 %add, i1 true) + %sub = sub i32 %a, %b + %abs_sub = call i32 @llvm.abs.i32(i32 %sub, i1 true) + %sum = add i32 %abs_add, %abs_sub + ret i32 %sum +} + +define i32 @t2_mixed_poison(i32 %a, i32 %b) { +; CHECK-LABEL: @t2_mixed_poison( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.abs.i32(i32 [[A:%.*]], i1 true) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.abs.i32(i32 [[B:%.*]], i1 true) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 [[TMP2]]) +; CHECK-NEXT: [[RES:%.*]] = shl i32 [[TMP3]], 1 +; CHECK-NEXT: ret i32 [[RES]] +; + %add = add i32 %a, %b + %abs_add = call i32 @llvm.abs.i32(i32 %add, i1 true) + %sub = sub i32 %a, %b + %abs_sub = call i32 @llvm.abs.i32(i32 %sub, i1 false) + %sum = add i32 %abs_add, %abs_sub + ret i32 %sum +} + +define i32 @t3_reverse_sub(i32 %a, i32 %b) { +; CHECK-LABEL: @t3_reverse_sub( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.abs.i32(i32 [[A:%.*]], i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.abs.i32(i32 [[B:%.*]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 [[TMP2]]) +; CHECK-NEXT: [[RES:%.*]] = shl i32 [[TMP3]], 1 +; CHECK-NEXT: ret i32 [[RES]] +; + %add = add i32 %a, %b + %abs_add = call i32 @llvm.abs.i32(i32 %add, i1 false) + %sub = sub i32 %b, %a + %abs_sub = call i32 @llvm.abs.i32(i32 %sub, i1 false) + %sum = add i32 %abs_add, %abs_sub + ret i32 %sum +} + +define <4 x i32> @t4_vector(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: @t4_vector( +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[A:%.*]], i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[B:%.*]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +; CHECK-NEXT: [[RES:%.*]] = shl <4 x i32> [[TMP3]], +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %add = add <4 x i32> %a, %b + %abs_add = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %add, i1 false) + %sub = sub <4 x i32> %a, %b + %abs_sub = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 false) + %sum = add <4 x i32> %abs_add, %abs_sub + ret <4 x i32> %sum +} + +define i32 @t5_multi_use(i32 %a, i32 %b) { +; CHECK-LABEL: @t5_multi_use( +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[ABS_ADD:%.*]] = call i32 @llvm.abs.i32(i32 [[ADD]], i1 false) +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[A]], [[B]] +; CHECK-NEXT: [[ABS_SUB:%.*]] = call i32 @llvm.abs.i32(i32 [[SUB]], i1 false) +; CHECK-NEXT: call void @use32(i32 [[ABS_ADD]]) +; CHECK-NEXT: call void @use32(i32 [[ABS_SUB]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.abs.i32(i32 [[A]], i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.abs.i32(i32 [[B]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 [[TMP2]]) +; CHECK-NEXT: [[RES:%.*]] = shl i32 [[TMP3]], 1 +; CHECK-NEXT: ret i32 [[RES]] +; + %add = add i32 %a, %b + %abs_add = call i32 @llvm.abs.i32(i32 %add, i1 false) + %sub = sub i32 %a, %b + %abs_sub = call i32 @llvm.abs.i32(i32 %sub, i1 false) + call void @use32(i32 %abs_add) + call void @use32(i32 %abs_sub) + %sum = add i32 %abs_add, %abs_sub + ret i32 %sum +} + +define i32 @t6_different_operands(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @t6_different_operands( +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[C:%.*]] +; CHECK-NEXT: [[ABS_ADD:%.*]] = call i32 @llvm.abs.i32(i32 [[ADD]], i1 false) +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[A]], [[B:%.*]] +; CHECK-NEXT: [[ABS_SUB:%.*]] = call i32 @llvm.abs.i32(i32 [[SUB]], i1 false) +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[ABS_ADD]], [[ABS_SUB]] +; CHECK-NEXT: ret i32 [[SUM]] +; + %add = add i32 %a, %c + %abs_add = call i32 @llvm.abs.i32(i32 %add, i1 false) + %sub = sub i32 %a, %b + %abs_sub = call i32 @llvm.abs.i32(i32 %sub, i1 false) + %sum = add i32 %abs_add, %abs_sub + ret i32 %sum +} + +define i32 @t7_nested(i32 %x, i32 %y) { +; CHECK-LABEL: @t7_nested( +; CHECK-NEXT: [[ADD:%.*]] = shl i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.abs.i32(i32 [[ADD]], i1 false) +; CHECK-NEXT: [[SUB:%.*]] = shl i32 [[Y:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.abs.i32(i32 [[SUB]], i1 false) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret i32 [[RES]] +; + %a = add i32 %x, %y + %b = sub i32 %x, %y + %add = add i32 %a, %b + %abs_add = call i32 @llvm.abs.i32(i32 %add, i1 false) + %sub = sub i32 %a, %b + %abs_sub = call i32 @llvm.abs.i32(i32 %sub, i1 false) + %sum = add i32 %abs_add, %abs_sub + ret i32 %sum +} + +define i32 @n8_no_optimize_missing_abs(i32 %a, i32 %b) { +; CHECK-LABEL: @n8_no_optimize_missing_abs( +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[ABS_ADD:%.*]] = call i32 @llvm.abs.i32(i32 [[ADD]], i1 false) +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[A]], [[B]] +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[ABS_ADD]], [[SUB]] +; CHECK-NEXT: ret i32 [[SUM]] +; + %add = add i32 %a, %b + %abs_add = call i32 @llvm.abs.i32(i32 %add, i1 false) + %sub = sub i32 %a, %b + %sum = add i32 %abs_add, %sub + ret i32 %sum +} + +define i32 @n9_no_optimize_wrong_operation(i32 %a, i32 %b) { +; CHECK-LABEL: @n9_no_optimize_wrong_operation( +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[ABS_ADD:%.*]] = call i32 @llvm.abs.i32(i32 [[ADD]], i1 false) +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[A]], [[B]] +; CHECK-NEXT: [[ABS_MUL:%.*]] = call i32 @llvm.abs.i32(i32 [[MUL]], i1 false) +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[ABS_ADD]], [[ABS_MUL]] +; CHECK-NEXT: ret i32 [[SUM]] +; + %add = add i32 %a, %b + %abs_add = call i32 @llvm.abs.i32(i32 %add, i1 false) + %mul = mul i32 %a, %b + %abs_mul = call i32 @llvm.abs.i32(i32 %mul, i1 false) + %sum = add i32 %abs_add, %abs_mul + ret i32 %sum +} + +; Helper function declaration +declare i32 @gen32() \ No newline at end of file -- Gitee