From 7f3683848372f2c303f54e702af5505e05096fe6 Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi Date: Wed, 22 Oct 2025 12:16:11 +0100 Subject: [PATCH] [WPD]: Enable speculative devirtualizatoin. (#159048) This patch implements the speculative devirtualization feature in the LLVM backend. It handles the case of single implementation devirtualization where there is a single possible callee of a virtual function. - Add cl::opt 'devirtualize-speculatively' to enable it. - Flag is disabled by default. - It works regardless of the visibility of the object. - Not enabled for LTO for now. --- .../lib/Transforms/IPO/WholeProgramDevirt.cpp | 79 ++++++++--- .../speculative-devirt-single-impl.ll | 132 ++++++++++++++++++ .../virtual-const-prop-check.ll | 7 + 3 files changed, 200 insertions(+), 18 deletions(-) create mode 100644 llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 3406595950b5..e5244e91ce70 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -24,7 +24,8 @@ // returns 0, or a single vtable's function returns 1, replace each virtual // call with a comparison of the vptr against that vtable's address. // -// This pass is intended to be used during the regular and thin LTO pipelines: +// This pass is intended to be used during the regular/thin and non-LTO +// pipelines: // // During regular LTO, the pass determines the best optimization for each // virtual call and applies the resolutions directly to virtual calls that are @@ -48,6 +49,14 @@ // is supported. // - Import phase: (same as with hybrid case above). // +// During Speculative devirtualization mode -not restricted to LTO-: +// - The pass applies speculative devirtualization without requiring any type of +// visibility. +// - Skips other features like virtual constant propagation, uniform return +// value optimization, unique return value optimization and branch funnels as +// they need LTO. +// - This mode is enabled via 'devirtualize-speculatively' flag. +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/WholeProgramDevirt.h" @@ -61,7 +70,10 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" @@ -139,6 +151,13 @@ static cl::opt ClWriteSummary( "bitcode, otherwise YAML"), cl::Hidden); +// TODO: This option eventually should support any public visibility vtables +// with/out LTO. +static cl::opt ClDevirtualizeSpeculatively( + "devirtualize-speculatively", + cl::desc("Enable speculative devirtualization optimization"), + cl::init(false)); + static cl::opt ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden, cl::init(10), @@ -869,6 +888,8 @@ void updatePublicTypeTestCalls(Module &M, CI->eraseFromParent(); } } else { + // TODO: Don't replace public type tests when speculative devirtualization + // gets enabled in LTO mode. auto *True = ConstantInt::getTrue(M.getContext()); for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) { auto *CI = cast(U.getUser()); @@ -1065,10 +1086,10 @@ bool DevirtModule::tryFindVirtualCallTargets( if (!TM.Bits->GV->isConstant()) return false; - // We cannot perform whole program devirtualization analysis on a vtable - // with public LTO visibility. - if (TM.Bits->GV->getVCallVisibility() == - GlobalObject::VCallVisibilityPublic) + // Without ClDevirtualizeSpeculatively, we cannot perform whole program + // devirtualization analysis on a vtable with public LTO visibility. + if (!ClDevirtualizeSpeculatively && TM.Bits->GV->getVCallVisibility() == + GlobalObject::VCallVisibilityPublic) return false; Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(), @@ -1094,6 +1115,12 @@ bool DevirtModule::tryFindVirtualCallTargets( if (Fn->getName() == "__cxa_pure_virtual") continue; + // In most cases empty functions will be overridden by the + // implementation of the derived class, so we can skip them. + if (ClDevirtualizeSpeculatively && Fn->getReturnType()->isVoidTy() && + Fn->getInstructionCount() <= 1) + continue; + // We can disregard unreachable functions as possible call targets, as // unreachable functions shouldn't be called. if (mustBeUnreachableFunction(Fn, ExportSummary)) @@ -1204,10 +1231,12 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, CallTrap->setDebugLoc(CB.getDebugLoc()); } - // If fallback checking is enabled, add support to compare the virtual - // function pointer to the devirtualized target. In case of a mismatch, - // fall back to indirect call. - if (DevirtCheckMode == WPDCheckMode::Fallback) { + // If fallback checking or speculative devirtualization are enabled, + // add support to compare the virtual function pointer to the + // devirtualized target. In case of a mismatch, fall back to indirect + // call. + if (DevirtCheckMode == WPDCheckMode::Fallback || + ClDevirtualizeSpeculatively) { MDNode *Weights = MDBuilder(M.getContext()).createBranchWeights((1U << 20) - 1, 1); // Version the indirect call site. If the called value is equal to the @@ -1968,15 +1997,15 @@ void DevirtModule::scanTypeTestUsers( Function *TypeTestFunc, DenseMap> &TypeIdMap) { // Find all virtual calls via a virtual table pointer %p under an assumption - // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p - // points to a member of the type identifier %md. Group calls by (type ID, - // offset) pair (effectively the identity of the virtual function) and store - // to CallSlots. + // of the form llvm.assume(llvm.type.test(%p, %md)) or + // llvm.assume(llvm.public.type.test(%p, %md)). + // This indicates that %p points to a member of the type identifier %md. + // Group calls by (type ID, offset) pair (effectively the identity of the + // virtual function) and store to CallSlots. for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) { auto *CI = dyn_cast(U.getUser()); if (!CI) continue; - // Search for virtual calls based on %p and add them to DevirtCalls. SmallVector DevirtCalls; SmallVector Assumes; @@ -2261,6 +2290,12 @@ bool DevirtModule::run() { (ImportSummary && ImportSummary->partiallySplitLTOUnits())) return false; + Function *PublicTypeTestFunc = nullptr; + // If we are in speculative devirtualization mode, we can work on the public + // type test intrinsics. + if (ClDevirtualizeSpeculatively) + PublicTypeTestFunc = + M.getFunction(Intrinsic::getName(Intrinsic::public_type_test)); Function *TypeTestFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_test)); Function *TypeCheckedLoadFunc = @@ -2273,8 +2308,9 @@ bool DevirtModule::run() { // module, this pass has nothing to do. But if we are exporting, we also need // to handle any users that appear only in the function summaries. if (!ExportSummary && - (!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc || - AssumeFunc->use_empty()) && + (((!PublicTypeTestFunc || PublicTypeTestFunc->use_empty()) && + (!TypeTestFunc || TypeTestFunc->use_empty())) || + !AssumeFunc || AssumeFunc->use_empty()) && (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) && (!TypeCheckedLoadRelativeFunc || TypeCheckedLoadRelativeFunc->use_empty())) @@ -2285,6 +2321,9 @@ bool DevirtModule::run() { DenseMap> TypeIdMap; buildTypeIdentifierMap(Bits, TypeIdMap); + if (PublicTypeTestFunc && AssumeFunc) + scanTypeTestUsers(PublicTypeTestFunc, TypeIdMap); + if (TypeTestFunc && AssumeFunc) scanTypeTestUsers(TypeTestFunc, TypeIdMap); @@ -2383,8 +2422,12 @@ bool DevirtModule::run() { .WPDRes[S.first.ByteOffset]; if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos, S.first.ByteOffset, ExportSummary)) { - - if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) { + bool SingleImplDevirt = + trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res); + // Out of speculative devirtualization mode, Try to apply virtual constant + // propagation or branch funneling. + // TODO: This should eventually be enabled for non-public type tests. + if (!SingleImplDevirt && !ClDevirtualizeSpeculatively) { DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first); diff --git a/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll b/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll new file mode 100644 index 000000000000..10566ae0dee8 --- /dev/null +++ b/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll @@ -0,0 +1,132 @@ +; -stats requires asserts +; REQUIRES: asserts + +; Check that we can still devirtualize outside LTO mode when speculative devirtualization is enabled. +; Check that we skip devirtualization for empty functions in speculative devirtualization mode + +; RUN: opt -S -passes=wholeprogramdevirt -devirtualize-speculatively \ +; RUN: -pass-remarks=wholeprogramdevirt -stats %s 2>&1 | FileCheck %s + +target datalayout = "e-p:64:64" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: remark: devirt-single.cc:30:32: single-impl: devirtualized a call to vf +; CHECK: remark: devirt-single.cc:41:32: single-impl: devirtualized a call to vf +; CHECK: remark: devirt-single.cc:51:32: single-impl: devirtualized a call to vf +; CHECK: remark: devirt-single.cc:13:0: devirtualized vf +; CHECK-NOT: devirtualized + +@vt1 = constant [1 x ptr] [ptr @vf], !type !8 +@vt2 = constant [1 x ptr] [ptr @vf_empty], !type !12 + +define i1 @vf(ptr %this) #0 !dbg !7 { + ret i1 true +} + +; This should NOT be devirtualized because during non-lto empty functions +; are skipped. +define void @vf_empty(ptr %this) !dbg !11 { + ret void +} + +; CHECK: define void @call +define void @call(ptr %obj) #1 !dbg !5 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.public.type.test(ptr %vtable, metadata !"typeid") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr %vtable + ; CHECK: if.true.direct_targ: + ; CHECK: call i1 @vf( + ; CHECK: if.false.orig_indirect: + ; CHECK: call i1 %fptr( + call i1 %fptr(ptr %obj), !dbg !6 + ret void +} + + +; CHECK: define void @call1 +define void @call1(ptr %obj) #1 !dbg !9 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr %vtable, align 8 + ; CHECK: call i1 %fptr + %1 = call i1 %fptr(ptr %obj), !dbg !10 + ret void +} +declare ptr @llvm.load.relative.i32(ptr, i32) + +@vt3 = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr @vt3 to i64)) to i32) +], align 4, !type !15 + +; CHECK: define void @call2 +define void @call2(ptr %obj) #1 !dbg !13 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; CHECK: if.true.direct_targ: + ; CHECK: call i1 @vf( + ; CHECK: if.false.orig_indirect: + ; CHECK: call i1 %fptr( + call i1 %fptr(ptr %obj), !dbg !14 + ret void +} + +@_ZTV1A.local = private unnamed_addr constant { [3 x i32] } { [3 x i32] [ + i32 0, ; offset to top + i32 0, ; rtti + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [3 x i32] }, ptr @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32) ; vf_emptyunc offset +] }, align 4, !type !18 + +; CHECK: define void @call3 +define void @call3(ptr %obj) #1 !dbg !16 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid3") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 8) + ; CHECK: if.true.direct_targ: + ; CHECK: call i1 @vf( + ; CHECK: if.false.orig_indirect: + ; CHECK: call i1 %fptr( + call i1 %fptr(ptr %obj), !dbg !17 + ret void +} + + +declare i1 @llvm.type.test(ptr, metadata) +declare i1 @llvm.public.type.test(ptr, metadata) +declare void @llvm.assume(i1) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} +!llvm.ident = !{!4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 4.0.0 (trunk 278098)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "devirt-single.cc", directory: ".") +!2 = !{i32 2, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{!"clang version 4.0.0 (trunk 278098)"} +!5 = distinct !DISubprogram(name: "call", linkageName: "_Z4callPv", scope: !1, file: !1, line: 29, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!6 = !DILocation(line: 30, column: 32, scope: !5) +!7 = distinct !DISubprogram(name: "vf", linkageName: "_ZN3vt12vfEv", scope: !1, file: !1, line: 13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!8 = !{i32 0, !"typeid"} + +!9 = distinct !DISubprogram(name: "call1", linkageName: "_Z5call1Pv", scope: !1, file: !1, line: 31, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!10 = !DILocation(line: 35, column: 32, scope: !9) +!11 = distinct !DISubprogram(name: "vf_empty", linkageName: "_ZN3vt18vf_emptyEv", scope: !1, file: !1, line: 23, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!12 = !{i32 0, !"typeid1"} + +!13 = distinct !DISubprogram(name: "call2", linkageName: "_Z5call2Pv", scope: !1, file: !1, line: 40, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!14 = !DILocation(line: 41, column: 32, scope: !13) +!15 = !{i32 0, !"typeid2"} + +!16 = distinct !DISubprogram(name: "call3", linkageName: "_Z5call3Pv", scope: !1, file: !1, line: 50, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!17 = !DILocation(line: 51, column: 32, scope: !16) +!18 = !{i32 0, !"typeid3"} + + + +; CHECK: 1 wholeprogramdevirt - Number of whole program devirtualization targets +; CHECK: 3 wholeprogramdevirt - Number of single implementation devirtualizations diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll index fb8b6de003fd..1dd92bf20437 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll @@ -11,6 +11,9 @@ ; Check wildcard ; RUN: opt -S -passes=wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt -wholeprogramdevirt-skip=vf?i1 %s 2>&1 | FileCheck %s --check-prefix=SKIP +; Check that no stats are reported in speculative devirtualization mode as the virtual const prop is disabled. +; RUN: opt -S -passes=wholeprogramdevirt -devirtualize-speculatively -stats %s 2>&1 | FileCheck %s --check-prefix=CHECK-SPECULATIVE-WPD + target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" @@ -156,3 +159,7 @@ declare void @__cxa_pure_virtual() ; CHECK: 6 wholeprogramdevirt - Number of whole program devirtualization targets ; CHECK: 1 wholeprogramdevirt - Number of virtual constant propagations ; CHECK: 2 wholeprogramdevirt - Number of 1 bit virtual constant propagations + +; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of unique return value optimizations +; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of virtual constant propagations +; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of 1 bit virtual constant propagations -- Gitee