diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst index 29126bf0eac9a35b32a1ce0c0caa076d2aef8b71..408d2e27b64182a8c84b8b3ec3629857b2b011f0 100644 --- a/Documentation/arch/arm64/booting.rst +++ b/Documentation/arch/arm64/booting.rst @@ -349,6 +349,27 @@ Before jumping into the kernel, the following conditions must be met: - HWFGWTR_EL2.nSMPRI_EL1 (bit 54) must be initialised to 0b01. + For CPUs with feature Branch Record Buffer Extension (FEAT_BRBE): + + - If EL3 is present: + + - MDCR_EL3.SBRBE (bits 33:32) must be initialised to 0b11. + + - If the kernel is entered at EL1 and EL2 is present: + + - BRBCR_EL2.CC (bit 3) must be initialised to 0b1. + - BRBCR_EL2.MPRED (bit 4) must be initialised to 0b1. + + - HDFGRTR_EL2.nBRBDATA (bit 61) must be initialised to 0b1. + - HDFGRTR_EL2.nBRBCTL (bit 60) must be initialised to 0b1. + - HDFGRTR_EL2.nBRBIDR (bit 59) must be initialised to 0b1. + + - HDFGWTR_EL2.nBRBDATA (bit 61) must be initialised to 0b1. + - HDFGWTR_EL2.nBRBCTL (bit 60) must be initialised to 0b1. + + - HFGITR_EL2.nBRBIALL (bit 56) must be initialised to 0b1. + - HFGITR_EL2.nBRBINJ (bit 55) must be initialised to 0b1. + For CPUs with the Scalable Matrix Extension FA64 feature (FEAT_SME_FA64): - If EL3 is present: diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 45d9443367e19136ef8cf59848cda7b3353e3ee2..967ab41d53ddba29b14bd68a38e572e078aa0d10 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -6836,6 +6836,7 @@ CONFIG_ARM_PMU=y CONFIG_ARM_PMU_ACPI=y CONFIG_ARM_SMMU_V3_PMU=m CONFIG_ARM_PMUV3=y +CONFIG_ARM64_BRBE=y # CONFIG_ARM_DSU_PMU is not set CONFIG_QCOM_L2_PMU=y CONFIG_QCOM_L3_PMU=y diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index e4546b29dd0cbfbb5b902e15caadab69458b1fac..e7cae1452098c165418995524963999181a7dcce 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -155,6 +155,40 @@ .Lskip_set_cptr_\@: .endm +/* + * Enable BRBE to record cycle counts and branch mispredicts. + * + * At any EL, to record cycle counts BRBE requires that both + * BRBCR_EL2.CC=1 and BRBCR_EL1.CC=1. + * + * At any EL, to record branch mispredicts BRBE requires that both + * BRBCR_EL2.MPRED=1 and BRBCR_EL1.MPRED=1. + * + * When HCR_EL2.E2H=1, the BRBCR_EL1 encoding is redirected to + * BRBCR_EL2, but the {CC,MPRED} bits in the real BRBCR_EL1 register + * still apply. + * + * Set {CC,MPRBED} in both BRBCR_EL2 and BRBCR_EL1 so that at runtime we + * only need to enable/disable thse in BRBCR_EL1 regardless of whether + * the kernel ends up executing in EL1 or EL2. + */ +.macro __init_el2_brbe + mrs x1, id_aa64dfr0_el1 + ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4 + cbz x1, .Lskip_brbe_\@ + + mov_q x0, BRBCR_ELx_CC | BRBCR_ELx_MPRED + msr_s SYS_BRBCR_EL2, x0 + + __check_hvhe .Lset_brbe_nvhe_\@, x1 + msr_s SYS_BRBCR_EL12, x0 // VHE + b .Lskip_brbe_\@ + +.Lset_brbe_nvhe_\@: + msr_s SYS_BRBCR_EL1, x0 // NVHE +.Lskip_brbe_\@: +.endm + /* Disable any fine grained traps */ .macro __init_el2_fgt mrs x1, id_aa64mmfr0_el1 @@ -162,16 +196,48 @@ cbz x1, .Lskip_fgt_\@ mov x0, xzr + mov x2, xzr mrs x1, id_aa64dfr0_el1 ubfx x1, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4 cmp x1, #3 b.lt .Lset_debug_fgt_\@ + /* Disable PMSNEVFR_EL1 read and write traps */ - orr x0, x0, #(1 << 62) + orr x0, x0, #HDFGRTR_EL2_nPMSNEVFR_EL1_MASK + orr x2, x2, #HDFGWTR_EL2_nPMSNEVFR_EL1_MASK .Lset_debug_fgt_\@: +#ifdef CONFIG_ARM64_BRBE + mrs x1, id_aa64dfr0_el1 + ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4 + cbz x1, .Lskip_brbe_reg_fgt_\@ + + /* + * Disable read traps for the following registers + * + * [BRBSRC|BRBTGT|RBINF]_EL1 + * [BRBSRCINJ|BRBTGTINJ|BRBINFINJ|BRBTS]_EL1 + */ + orr x0, x0, #HDFGRTR_EL2_nBRBDATA_MASK + + /* + * Disable write traps for the following registers + * + * [BRBSRCINJ|BRBTGTINJ|BRBINFINJ|BRBTS]_EL1 + */ + orr x2, x2, #HDFGWTR_EL2_nBRBDATA_MASK + + /* Disable read and write traps for [BRBCR|BRBFCR]_EL1 */ + orr x0, x0, #HDFGRTR_EL2_nBRBCTL_MASK + orr x2, x2, #HDFGWTR_EL2_nBRBCTL_MASK + + /* Disable read traps for BRBIDR_EL1 */ + orr x0, x0, #HDFGRTR_EL2_nBRBIDR_MASK + +.Lskip_brbe_reg_fgt_\@: +#endif /* CONFIG_ARM64_BRBE */ msr_s SYS_HDFGRTR_EL2, x0 - msr_s SYS_HDFGWTR_EL2, x0 + msr_s SYS_HDFGWTR_EL2, x2 mov x0, xzr mrs x1, id_aa64pfr1_el1 @@ -194,7 +260,21 @@ .Lset_fgt_\@: msr_s SYS_HFGRTR_EL2, x0 msr_s SYS_HFGWTR_EL2, x0 - msr_s SYS_HFGITR_EL2, xzr + mov x0, xzr +#ifdef CONFIG_ARM64_BRBE + mrs x1, id_aa64dfr0_el1 + ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4 + cbz x1, .Lskip_brbe_insn_fgt_\@ + + /* Disable traps for BRBIALL instruction */ + orr x0, x0, #HFGITR_EL2_nBRBIALL_MASK + + /* Disable traps for BRBINJ instruction */ + orr x0, x0, #HFGITR_EL2_nBRBINJ_MASK + +.Lskip_brbe_insn_fgt_\@: +#endif /* CONFIG_ARM64_BRBE */ + msr_s SYS_HFGITR_EL2, x0 mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU ubfx x1, x1, #ID_AA64PFR0_EL1_AMU_SHIFT, #4 @@ -229,6 +309,7 @@ __init_el2_nvhe_idregs __init_el2_cptr __init_el2_fgt + __init_el2_brbe .endm #ifndef __KVM_NVHE_HYPERVISOR__ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 2293caab9f8f8e263b607f264082203f36a77cdf..cbe5187f1f1226a9bacd8fd45b7ca56177b4ec02 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -616,7 +616,7 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN); if (!vcpu_has_sve(vcpu) || - (vcpu->arch.fp_state != FP_STATE_GUEST_OWNED)) + (*host_data_ptr(fp_owner) != FP_STATE_GUEST_OWNED)) val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN; if (cpus_have_final_cap(ARM64_SME)) val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN; @@ -624,7 +624,7 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) val = CPTR_NVHE_EL2_RES1; if (vcpu_has_sve(vcpu) && - (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)) + (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED)) val |= CPTR_EL2_TZ; if (cpus_have_final_cap(ARM64_SME)) val &= ~CPTR_EL2_TSM; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ac8115098ec9573b9930299e8fb4bed8913aa401..b69115c4fe346860f0f83ee0fb671160d3f7bb87 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -450,8 +450,43 @@ struct kvm_cpu_context { struct kvm_vcpu *__hyp_running_vcpu; }; +/* + * This structure is instantiated on a per-CPU basis, and contains + * data that is: + * + * - tied to a single physical CPU, and + * - either have a lifetime that does not extend past vcpu_put() + * - or is an invariant for the lifetime of the system + * + * Use host_data_ptr(field) as a way to access a pointer to such a + * field. + */ struct kvm_host_data { struct kvm_cpu_context host_ctxt; + struct user_fpsimd_state *fpsimd_state; /* hyp VA */ + + /* Ownership of the FP regs */ + enum { + FP_STATE_FREE, + FP_STATE_HOST_OWNED, + FP_STATE_GUEST_OWNED, + } fp_owner; + + /* + * host_debug_state contains the host registers which are + * saved and restored during world switches. + */ + struct { + /* {Break,watch}point registers */ + struct kvm_guest_debug_arch regs; + /* Statistical profiling extension */ + u64 pmscr_el1; + /* Self-hosted trace */ + u64 trfcr_el1; + /* Values of trap registers for the host before guest entry. */ + u64 mdcr_el2; + u64 brbcr_el1; + } host_debug_state; }; struct kvm_host_psci_config { @@ -510,19 +545,9 @@ struct kvm_vcpu_arch { u64 mdcr_el2; u64 cptr_el2; - /* Values of trap registers for the host before guest entry. */ - u64 mdcr_el2_host; - /* Exception Information */ struct kvm_vcpu_fault_info fault; - /* Ownership of the FP regs */ - enum { - FP_STATE_FREE, - FP_STATE_HOST_OWNED, - FP_STATE_GUEST_OWNED, - } fp_state; - /* Configuration flags, set once and for all before the vcpu can run */ u8 cflags; @@ -545,11 +570,10 @@ struct kvm_vcpu_arch { * We maintain more than a single set of debug registers to support * debugging the guest from the host and to maintain separate host and * guest state during world switches. vcpu_debug_state are the debug - * registers of the vcpu as the guest sees them. host_debug_state are - * the host registers which are saved and restored during - * world switches. external_debug_state contains the debug - * values we want to debug the guest. This is set via the - * KVM_SET_GUEST_DEBUG ioctl. + * registers of the vcpu as the guest sees them. + * + * external_debug_state contains the debug values we want to debug the + * guest. This is set via the KVM_SET_GUEST_DEBUG ioctl. * * debug_ptr points to the set of debug registers that should be loaded * onto the hardware when running the guest. @@ -558,18 +582,8 @@ struct kvm_vcpu_arch { struct kvm_guest_debug_arch vcpu_debug_state; struct kvm_guest_debug_arch external_debug_state; - struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ struct task_struct *parent_task; - struct { - /* {Break,watch}point registers */ - struct kvm_guest_debug_arch regs; - /* Statistical profiling extension */ - u64 pmscr_el1; - /* Self-hosted trace */ - u64 trfcr_el1; - } host_debug_state; - /* VGIC state */ struct vgic_cpu vgic_cpu; struct arch_timer_cpu timer_cpu; @@ -755,8 +769,8 @@ struct kvm_vcpu_arch { #define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5)) /* Save TRBE context if active */ #define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6)) -/* vcpu running in HYP context */ -#define VCPU_HYP_CONTEXT __vcpu_single_flag(iflags, BIT(7)) +/* Save BRBE context if active */ +#define DEBUG_STATE_SAVE_BRBE __vcpu_single_flag(iflags, BIT(7)) /* SVE enabled for host EL0 */ #define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0)) @@ -1129,6 +1143,32 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data); +/* + * How we access per-CPU host data depends on the where we access it from, + * and the mode we're in: + * + * - VHE and nVHE hypervisor bits use their locally defined instance + * + * - the rest of the kernel use either the VHE or nVHE one, depending on + * the mode we're running in. + * + * Unless we're in protected mode, fully deprivileged, and the nVHE + * per-CPU stuff is exclusively accessible to the protected EL2 code. + * In this case, the EL1 code uses the *VHE* data as its private state + * (which makes sense in a way as there shouldn't be any shared state + * between the host and the hypervisor). + * + * Yes, this is all totally trivial. Shoot me now. + */ +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) +#define host_data_ptr(f) (&this_cpu_ptr(&kvm_host_data)->f) +#else +#define host_data_ptr(f) \ + (static_branch_unlikely(&kvm_protected_mode_initialized) ? \ + &this_cpu_ptr(&kvm_host_data)->f : \ + &this_cpu_ptr_hyp_sym(kvm_host_data)->f) +#endif + static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 42358b8d678e070a25b8b0a9b564fe2e1829a82b..faba73f948b17188176a52e82ec689ce3225dc44 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -197,16 +197,8 @@ #define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0) #define SYS_BRBINF_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0)) -#define SYS_BRBINFINJ_EL1 sys_reg(2, 1, 9, 1, 0) #define SYS_BRBSRC_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1)) -#define SYS_BRBSRCINJ_EL1 sys_reg(2, 1, 9, 1, 1) #define SYS_BRBTGT_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2)) -#define SYS_BRBTGTINJ_EL1 sys_reg(2, 1, 9, 1, 2) -#define SYS_BRBTS_EL1 sys_reg(2, 1, 9, 0, 2) - -#define SYS_BRBCR_EL1 sys_reg(2, 1, 9, 0, 0) -#define SYS_BRBFCR_EL1 sys_reg(2, 1, 9, 0, 1) -#define SYS_BRBIDR0_EL1 sys_reg(2, 1, 9, 2, 0) #define SYS_TRCITECR_EL1 sys_reg(3, 0, 1, 2, 3) #define SYS_TRCACATR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3))) @@ -272,6 +264,8 @@ /* ETM */ #define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4) +#define SYS_BRBCR_EL2 sys_reg(2, 4, 9, 0, 0) + #define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) @@ -488,6 +482,7 @@ #define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) #define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1) +#define SYS_SCTLR2_EL2 sys_reg(3, 4, 1, 0, 3) #define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0) #define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1) #define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2) @@ -501,6 +496,7 @@ #define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2) #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) +#define SYS_VNCR_EL2 sys_reg(3, 4, 2, 2, 0) #define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) #define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) @@ -518,6 +514,18 @@ #define SYS_MAIR_EL2 sys_reg(3, 4, 10, 2, 0) #define SYS_AMAIR_EL2 sys_reg(3, 4, 10, 3, 0) +#define SYS_MPAMHCR_EL2 sys_reg(3, 4, 10, 4, 0) +#define SYS_MPAMVPMV_EL2 sys_reg(3, 4, 10, 4, 1) +#define SYS_MPAM2_EL2 sys_reg(3, 4, 10, 5, 0) +#define __SYS__MPAMVPMx_EL2(x) sys_reg(3, 4, 10, 6, x) +#define SYS_MPAMVPM0_EL2 __SYS__MPAMVPMx_EL2(0) +#define SYS_MPAMVPM1_EL2 __SYS__MPAMVPMx_EL2(1) +#define SYS_MPAMVPM2_EL2 __SYS__MPAMVPMx_EL2(2) +#define SYS_MPAMVPM3_EL2 __SYS__MPAMVPMx_EL2(3) +#define SYS_MPAMVPM4_EL2 __SYS__MPAMVPMx_EL2(4) +#define SYS_MPAMVPM5_EL2 __SYS__MPAMVPMx_EL2(5) +#define SYS_MPAMVPM6_EL2 __SYS__MPAMVPMx_EL2(6) +#define SYS_MPAMVPM7_EL2 __SYS__MPAMVPMx_EL2(7) #define SYS_MPAMHCR_EL2 sys_reg(3, 4, 10, 4, 0) #define SYS_MPAMVPMV_EL2 sys_reg(3, 4, 10, 4, 1) @@ -573,25 +581,50 @@ #define SYS_CONTEXTIDR_EL2 sys_reg(3, 4, 13, 0, 1) #define SYS_TPIDR_EL2 sys_reg(3, 4, 13, 0, 2) +#define SYS_SCXTNUM_EL2 sys_reg(3, 4, 13, 0, 7) + +#define __AMEV_op2(m) (m & 0x7) +#define __AMEV_CRm(n, m) (n | ((m & 0x8) >> 3)) +#define __SYS__AMEVCNTVOFF0n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0x8, m), __AMEV_op2(m)) +#define SYS_AMEVCNTVOFF0n_EL2(m) __SYS__AMEVCNTVOFF0n_EL2(m) +#define __SYS__AMEVCNTVOFF1n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0xA, m), __AMEV_op2(m)) +#define SYS_AMEVCNTVOFF1n_EL2(m) __SYS__AMEVCNTVOFF1n_EL2(m) #define SYS_CNTVOFF_EL2 sys_reg(3, 4, 14, 0, 3) #define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0) +#define SYS_CNTHP_TVAL_EL2 sys_reg(3, 4, 14, 2, 0) +#define SYS_CNTHP_CTL_EL2 sys_reg(3, 4, 14, 2, 1) +#define SYS_CNTHP_CVAL_EL2 sys_reg(3, 4, 14, 2, 2) +#define SYS_CNTHV_TVAL_EL2 sys_reg(3, 4, 14, 3, 0) +#define SYS_CNTHV_CTL_EL2 sys_reg(3, 4, 14, 3, 1) +#define SYS_CNTHV_CVAL_EL2 sys_reg(3, 4, 14, 3, 2) /* VHE encodings for architectural EL0/1 system registers */ +#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0) #define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) +#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2) +#define SYS_SCTLR2_EL12 sys_reg(3, 5, 1, 0, 3) +#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0) +#define SYS_TRFCR_EL12 sys_reg(3, 5, 1, 2, 1) +#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6) #define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) #define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1) #define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2) +#define SYS_TCR2_EL12 sys_reg(3, 5, 2, 0, 3) #define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0) #define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1) #define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0) #define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1) #define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0) #define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0) +#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0) +#define SYS_PMSCR_EL12 sys_reg(3, 5, 9, 9, 0) #define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0) #define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0) #define SYS_MPAM1_EL12 sys_reg(3, 5, 10, 5, 0) #define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0) +#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1) +#define SYS_SCXTNUM_EL12 sys_reg(3, 5, 13, 0, 7) #define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0) #define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0) #define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1) @@ -754,6 +787,12 @@ #define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5) #define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7) +/* + * BRBE Instructions + */ +#define BRB_IALL_INSN __emit_inst(0xd5000000 | OP_BRB_IALL | (0x1f)) +#define BRB_INJ_INSN __emit_inst(0xd5000000 | OP_BRB_INJ | (0x1f)) + /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_ENTP2 (BIT(60)) #define SCTLR_ELx_DSSBS (BIT(44)) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index beb92bdde3bc157fd42bcf314f14f9102954baa4..c003d804828bc32946de39633bfddbee97e0ff8b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -449,12 +449,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; - /* - * Default value for the FP state, will be overloaded at load - * time if we support FP (pretty likely) - */ - vcpu->arch.fp_state = FP_STATE_FREE; - /* Set up the timer */ kvm_timer_vcpu_init(vcpu); @@ -2075,7 +2069,7 @@ static void cpu_set_hyp_vector(void) static void cpu_hyp_init_context(void) { - kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt); + kvm_init_host_cpu_context(host_data_ptr(host_ctxt)); if (!is_kernel_in_hyp_mode()) cpu_init_hyp_mode(); diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 8725291cb00a185780ed1d795ff4ef0dcef31d36..99f85d8acbf369078241182ffd704f82fed78c59 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -335,10 +335,15 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu) if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) && !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P)) vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE); + + /* Check if we have BRBE implemented and available at the host */ + if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT)) + vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_BRBE); } void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu) { vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_SPE); vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_TRBE); + vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_BRBE); } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 8c1d0d4853df48abf4d089bbde153bcee8d0e6d0..4f51293db17327a6880a8920da105710590e223f 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -49,8 +49,6 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) if (ret) return ret; - vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd); - /* * We need to keep current's task_struct pinned until its data has been * unshared with the hypervisor to make sure it is not re-used by the @@ -86,7 +84,8 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) * guest in kvm_arch_vcpu_ctxflush_fp() and override this to * FP_STATE_FREE if the flag set. */ - vcpu->arch.fp_state = FP_STATE_HOST_OWNED; + *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED; + *host_data_ptr(fpsimd_state) = kern_hyp_va(¤t->thread.uw.fpsimd_state); vcpu_clear_flag(vcpu, HOST_SVE_ENABLED); if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) @@ -110,7 +109,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) * been saved, this is very unlikely to happen. */ if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) { - vcpu->arch.fp_state = FP_STATE_FREE; + *host_data_ptr(fp_owner) = FP_STATE_FREE; fpsimd_save_and_flush_cpu_state(); } } @@ -126,7 +125,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu) { if (test_thread_flag(TIF_FOREIGN_FPSTATE)) - vcpu->arch.fp_state = FP_STATE_FREE; + *host_data_ptr(fp_owner) = FP_STATE_FREE; } /* @@ -142,7 +141,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) WARN_ON_ONCE(!irqs_disabled()); - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) { + if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) { /* * Currently we do not support SME guests so SVCR is @@ -195,7 +194,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) isb(); } - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) { + if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) { if (vcpu_has_sve(vcpu)) { __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index 961bbef104a63e08cf25638efa105cb03eb50bad..d00093699aaf1fb8c886b14687867e49f85b0da3 100644 --- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -135,9 +135,9 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) if (!vcpu_get_flag(vcpu, DEBUG_DIRTY)) return; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; - host_dbg = &vcpu->arch.host_debug_state.regs; + host_dbg = host_data_ptr(host_debug_state.regs); guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); __debug_save_state(host_dbg, host_ctxt); @@ -154,9 +154,9 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) if (!vcpu_get_flag(vcpu, DEBUG_DIRTY)) return; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; - host_dbg = &vcpu->arch.host_debug_state.regs; + host_dbg = host_data_ptr(host_debug_state.regs); guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); __debug_save_state(guest_dbg, guest_ctxt); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 821d803b395d192279ebdafde658bc4e9409257e..e16fa1211e10bba2f0cb1e55e2b94ec020cc6149 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -42,7 +42,7 @@ extern struct kvm_exception_table_entry __stop___kvm_ex_table; /* Check whether the FP regs are owned by the guest */ static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu) { - return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED; + return *host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED; } /* Save the 32-bit only FPSIMD system register state */ @@ -82,7 +82,7 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp; u64 r_val, w_val; @@ -157,7 +157,7 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); if (!cpus_have_final_cap(ARM64_HAS_FGT)) return; @@ -218,7 +218,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) write_sysreg(0, pmselr_el0); - hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + hctxt = host_data_ptr(host_ctxt); ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); @@ -228,7 +228,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) !kern_hyp_va(vcpu->kvm)->arch.pfr1_nmi) sysreg_clear_set_s(SYS_HCRX_EL2, 0, HCRX_EL2_TALLINT); - vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2); + *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2); write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); if (cpus_have_final_cap(ARM64_HAS_HCX)) { @@ -251,7 +251,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) { - write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2); + write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2); if (cpus_have_final_cap(ARM64_HAS_NMI) && !kern_hyp_va(vcpu->kvm)->arch.pfr1_nmi) @@ -261,7 +261,7 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) if (kvm_arm_support_pmu_v3()) { struct kvm_cpu_context *hctxt; - hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + hctxt = host_data_ptr(host_ctxt); write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); } @@ -364,8 +364,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) isb(); /* Write out the host state if it's in the registers */ - if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED) - __fpsimd_save_state(vcpu->arch.host_fpsimd_state); + if (*host_data_ptr(fp_owner) == FP_STATE_HOST_OWNED) + __fpsimd_save_state(*host_data_ptr(fpsimd_state)); /* Restore the guest state */ if (sve_guest) @@ -377,7 +377,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) if (!(read_sysreg(hcr_el2) & HCR_RW)) write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); - vcpu->arch.fp_state = FP_STATE_GUEST_OWNED; + *host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED; return true; } diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 4558c02eb352d06be1c5861ea22a956c2c05954f..97e861df1b45188959513e61e03c57f6d7cd9263 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -31,8 +31,8 @@ static void __debug_save_spe(u64 *pmscr_el1) return; /* Yes; save the control register and disable data generation */ - *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1); - write_sysreg_s(0, SYS_PMSCR_EL1); + *pmscr_el1 = read_sysreg_el1(SYS_PMSCR); + write_sysreg_el1(0, SYS_PMSCR); isb(); /* Now drain all buffered data to memory */ @@ -48,7 +48,7 @@ static void __debug_restore_spe(u64 pmscr_el1) isb(); /* Re-enable data generation */ - write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); + write_sysreg_el1(pmscr_el1, SYS_PMSCR); } static void __debug_save_trace(u64 *trfcr_el1) @@ -63,8 +63,8 @@ static void __debug_save_trace(u64 *trfcr_el1) * Since access to TRFCR_EL1 is trapped, the guest can't * modify the filtering set by the host. */ - *trfcr_el1 = read_sysreg_s(SYS_TRFCR_EL1); - write_sysreg_s(0, SYS_TRFCR_EL1); + *trfcr_el1 = read_sysreg_el1(SYS_TRFCR); + write_sysreg_el1(0, SYS_TRFCR); isb(); /* Drain the trace buffer to memory */ tsb_csync(); @@ -76,17 +76,46 @@ static void __debug_restore_trace(u64 trfcr_el1) return; /* Restore trace filter controls */ - write_sysreg_s(trfcr_el1, SYS_TRFCR_EL1); + write_sysreg_el1(trfcr_el1, SYS_TRFCR); +} + +static void __debug_save_brbe(u64 *brbcr_el1) +{ + *brbcr_el1 = 0; + + /* Check if the BRBE is enabled */ + if (!(read_sysreg_el1(SYS_BRBCR) & (BRBCR_ELx_E0BRE | BRBCR_ELx_ExBRE))) + return; + + /* + * Prohibit branch record generation while we are in guest. + * Since access to BRBCR_EL1 is trapped, the guest can't + * modify the filtering set by the host. + */ + *brbcr_el1 = read_sysreg_el1(SYS_BRBCR); + write_sysreg_el1(0, SYS_BRBCR); +} + +static void __debug_restore_brbe(u64 brbcr_el1) +{ + if (!brbcr_el1) + return; + + /* Restore BRBE controls */ + write_sysreg_el1(brbcr_el1, SYS_BRBCR); } void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) { /* Disable and flush SPE data generation */ if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE)) - __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); + __debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1)); /* Disable and flush Self-Hosted Trace generation */ if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) - __debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1); + __debug_save_trace(host_data_ptr(host_debug_state.trfcr_el1)); + /* Disable BRBE branch records */ + if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_BRBE)) + __debug_save_brbe(host_data_ptr(host_debug_state.brbcr_el1)); } void __debug_switch_to_guest(struct kvm_vcpu *vcpu) @@ -97,9 +126,11 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu) void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) { if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE)) - __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); + __debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1)); if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) - __debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1); + __debug_restore_trace(*host_data_ptr(host_debug_state.trfcr_el1)); + if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_BRBE)) + __debug_restore_brbe(*host_data_ptr(host_debug_state.brbcr_el1)); } void __debug_switch_to_host(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 2385fd03ed87c6c0450a1c2dbe830e95ce235ecc..26561c562f7a2333ea755bbd270ebb7c755bd0ec 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -39,10 +39,8 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2; hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags; - hyp_vcpu->vcpu.arch.fp_state = host_vcpu->arch.fp_state; hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr); - hyp_vcpu->vcpu.arch.host_fpsimd_state = host_vcpu->arch.host_fpsimd_state; hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2; @@ -64,7 +62,6 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault; host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags; - host_vcpu->arch.fp_state = hyp_vcpu->vcpu.arch.fp_state; host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr; for (i = 0; i < hyp_cpu_if->used_lrs; ++i) diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index d57bcb6ab94d25b9998bbabd5cd0cdb2fd27e794..dfe8fe0f7eaff098348091b67b87f58b756517cb 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -205,7 +205,7 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on) struct psci_boot_args *boot_args; struct kvm_cpu_context *host_ctxt; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); if (is_cpu_on) boot_args = this_cpu_ptr(&cpu_on_args); diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 0d5e0a89ddce5df353fb5cccd6269686eaa674db..c6edba9c66b68fa5566e72f8e491e9bd41e8012b 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -257,8 +257,7 @@ static int fix_hyp_pgtable_refcnt(void) void __noreturn __pkvm_init_finalise(void) { - struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); - struct kvm_cpu_context *host_ctxt = &host_data->host_ctxt; + struct kvm_cpu_context *host_ctxt = host_data_ptr(host_ctxt); unsigned long nr_pages, reserved_pages, pfn; int ret; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index c2118f658e228f2a98dc2bd2d3a6b76d46d49d8d..ff502b5e148048cb3753e3818130cc83d1ef85b6 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -271,7 +271,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) pmr_sync(); } - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; @@ -346,7 +346,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_restore_state_nvhe(host_ctxt); - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) + if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) __fpsimd_save_fpexc32(vcpu); __debug_switch_to_host(vcpu); @@ -376,7 +376,7 @@ asmlinkage void __noreturn hyp_panic(void) struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); vcpu = host_ctxt->__hyp_running_vcpu; if (vcpu) { diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 448b17080d3617cbf5d2fceec7e7fa62866760d4..821e366b54f2e42e9d64903d0c37c5a68692e094 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -183,7 +183,7 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) * If we were in HYP context on entry, adjust the PSTATE view * so that the usual helpers work correctly. */ - if (unlikely(vcpu_get_flag(vcpu, VCPU_HYP_CONTEXT))) { + if (vcpu_has_nv(vcpu) && (read_sysreg(hcr_el2) & HCR_NV)) { u64 mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT); switch (mode) { @@ -207,7 +207,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt; u64 exit_code; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; @@ -232,11 +232,6 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_restore_guest_state_vhe(guest_ctxt); __debug_switch_to_guest(vcpu); - if (is_hyp_ctxt(vcpu)) - vcpu_set_flag(vcpu, VCPU_HYP_CONTEXT); - else - vcpu_clear_flag(vcpu, VCPU_HYP_CONTEXT); - do { /* Jump in the fire! */ exit_code = __guest_enter(vcpu); @@ -250,7 +245,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_restore_host_state_vhe(host_ctxt); - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) + if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) __fpsimd_save_fpexc32(vcpu); __debug_switch_to_host(vcpu); @@ -298,7 +293,7 @@ static void __hyp_call_panic(u64 spsr, u64 elr, u64 par) struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); vcpu = host_ctxt->__hyp_running_vcpu; __deactivate_traps(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 6b407cd3230d4dca06dece7a9347d2f1587025ab..fe98cd1eff4d2d4d50c4979c258a92aca2bbe965 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -67,7 +67,7 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; struct kvm_cpu_context *host_ctxt; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); __sysreg_save_user_state(host_ctxt); /* @@ -114,6 +114,7 @@ void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu) struct kvm_cpu_context *host_ctxt; host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); deactivate_traps_vhe_put(vcpu); __sysreg_save_el1_state(guest_ctxt); diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index a243934c5568bb0bffbc5e3b1828707a56aadad8..329819806096b854e551e997b1b0be8cad710cf8 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -232,7 +232,7 @@ bool kvm_set_pmuserenr(u64 val) if (!vcpu || !vcpu_get_flag(vcpu, PMUSERENR_ON_CPU)) return false; - hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + hctxt = host_data_ptr(host_ctxt); ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val; return true; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e4952fe3c5a08e6d9bb4d95d9b1852f280dd82ae..2c9ad2bb6aee13f869432150f8a88a26180cf47b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1123,6 +1123,11 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return true; } +#define BRB_INF_SRC_TGT_EL1(n) \ + { SYS_DESC(SYS_BRBINF_EL1(n)), undef_access }, \ + { SYS_DESC(SYS_BRBSRC_EL1(n)), undef_access }, \ + { SYS_DESC(SYS_BRBTGT_EL1(n)), undef_access } \ + /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ { SYS_DESC(SYS_DBGBVRn_EL1(n)), \ @@ -1511,6 +1516,9 @@ static u64 read_sanitised_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, /* Hide SPE from guests */ val &= ~ID_AA64DFR0_EL1_PMSVer_MASK; + /* Hide BRBE from guests */ + val &= ~ID_AA64DFR0_EL1_BRBE_MASK; + return val; } @@ -1973,6 +1981,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_DC_CISW), access_dcsw }, { SYS_DESC(SYS_DC_CIGSW), access_dcgsw }, { SYS_DESC(SYS_DC_CIGDSW), access_dcgsw }, + { SYS_DESC(OP_BRB_IALL), undef_access }, + { SYS_DESC(OP_BRB_INJ), undef_access }, DBG_BCR_BVR_WCR_WVR_EL1(0), DBG_BCR_BVR_WCR_WVR_EL1(1), @@ -2003,6 +2013,52 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_DBGCLAIMCLR_EL1), trap_raz_wi }, { SYS_DESC(SYS_DBGAUTHSTATUS_EL1), trap_dbgauthstatus_el1 }, + /* + * BRBE branch record sysreg address space is interleaved between + * corresponding BRBINF_EL1, BRBSRC_EL1, and BRBTGT_EL1. + */ + BRB_INF_SRC_TGT_EL1(0), + BRB_INF_SRC_TGT_EL1(16), + BRB_INF_SRC_TGT_EL1(1), + BRB_INF_SRC_TGT_EL1(17), + BRB_INF_SRC_TGT_EL1(2), + BRB_INF_SRC_TGT_EL1(18), + BRB_INF_SRC_TGT_EL1(3), + BRB_INF_SRC_TGT_EL1(19), + BRB_INF_SRC_TGT_EL1(4), + BRB_INF_SRC_TGT_EL1(20), + BRB_INF_SRC_TGT_EL1(5), + BRB_INF_SRC_TGT_EL1(21), + BRB_INF_SRC_TGT_EL1(6), + BRB_INF_SRC_TGT_EL1(22), + BRB_INF_SRC_TGT_EL1(7), + BRB_INF_SRC_TGT_EL1(23), + BRB_INF_SRC_TGT_EL1(8), + BRB_INF_SRC_TGT_EL1(24), + BRB_INF_SRC_TGT_EL1(9), + BRB_INF_SRC_TGT_EL1(25), + BRB_INF_SRC_TGT_EL1(10), + BRB_INF_SRC_TGT_EL1(26), + BRB_INF_SRC_TGT_EL1(11), + BRB_INF_SRC_TGT_EL1(27), + BRB_INF_SRC_TGT_EL1(12), + BRB_INF_SRC_TGT_EL1(28), + BRB_INF_SRC_TGT_EL1(13), + BRB_INF_SRC_TGT_EL1(29), + BRB_INF_SRC_TGT_EL1(14), + BRB_INF_SRC_TGT_EL1(30), + BRB_INF_SRC_TGT_EL1(15), + BRB_INF_SRC_TGT_EL1(31), + + /* Remaining BRBE sysreg addresses space */ + { SYS_DESC(SYS_BRBCR_EL1), undef_access }, + { SYS_DESC(SYS_BRBFCR_EL1), undef_access }, + { SYS_DESC(SYS_BRBTS_EL1), undef_access }, + { SYS_DESC(SYS_BRBINFINJ_EL1), undef_access }, + { SYS_DESC(SYS_BRBSRCINJ_EL1), undef_access }, + { SYS_DESC(SYS_BRBTGTINJ_EL1), undef_access }, + { SYS_DESC(SYS_BRBIDR0_EL1), undef_access }, + { SYS_DESC(SYS_MDCCSR_EL0), trap_raz_wi }, { SYS_DESC(SYS_DBGDTR_EL0), trap_raz_wi }, // DBGDTR[TR]X_EL0 share the same encoding diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 0e7d7f327410ab33cbdaf090dbae92a2bf149eca..17286210625b4fd190ec633e6ca444e96888016f 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1002,6 +1002,137 @@ UnsignedEnum 3:0 BT EndEnum EndSysreg + +SysregFields BRBINFx_EL1 +Res0 63:47 +Field 46 CCU +Field 45:32 CC +Res0 31:18 +Field 17 LASTFAILED +Field 16 T +Res0 15:14 +Enum 13:8 TYPE + 0b000000 DIRECT_UNCOND + 0b000001 INDIRECT + 0b000010 DIRECT_LINK + 0b000011 INDIRECT_LINK + 0b000101 RET + 0b000111 ERET + 0b001000 DIRECT_COND + 0b100001 DEBUG_HALT + 0b100010 CALL + 0b100011 TRAP + 0b100100 SERROR + 0b100110 INSN_DEBUG + 0b100111 DATA_DEBUG + 0b101010 ALIGN_FAULT + 0b101011 INSN_FAULT + 0b101100 DATA_FAULT + 0b101110 IRQ + 0b101111 FIQ + 0b110000 IMPDEF_TRAP_EL3 + 0b111001 DEBUG_EXIT +EndEnum +Enum 7:6 EL + 0b00 EL0 + 0b01 EL1 + 0b10 EL2 + 0b11 EL3 +EndEnum +Field 5 MPRED +Res0 4:2 +Enum 1:0 VALID + 0b00 NONE + 0b01 TARGET + 0b10 SOURCE + 0b11 FULL +EndEnum +EndSysregFields + +SysregFields BRBCR_ELx +Res0 63:24 +Field 23 EXCEPTION +Field 22 ERTN +Res0 21:10 +Field 9 FZPSS +Field 8 FZP +Res0 7 +Enum 6:5 TS + 0b01 VIRTUAL + 0b10 GUEST_PHYSICAL + 0b11 PHYSICAL +EndEnum +Field 4 MPRED +Field 3 CC +Res0 2 +Field 1 ExBRE +Field 0 E0BRE +EndSysregFields + +Sysreg BRBCR_EL1 2 1 9 0 0 +Fields BRBCR_ELx +EndSysreg + +Sysreg BRBFCR_EL1 2 1 9 0 1 +Res0 63:30 +Enum 29:28 BANK + 0b00 BANK_0 + 0b01 BANK_1 +EndEnum +Res0 27:23 +Field 22 CONDDIR +Field 21 DIRCALL +Field 20 INDCALL +Field 19 RTN +Field 18 INDIRECT +Field 17 DIRECT +Field 16 EnI +Res0 15:8 +Field 7 PAUSED +Field 6 LASTFAILED +Res0 5:0 +EndSysreg + +Sysreg BRBTS_EL1 2 1 9 0 2 +Field 63:0 TS +EndSysreg + +Sysreg BRBINFINJ_EL1 2 1 9 1 0 +Fields BRBINFx_EL1 +EndSysreg + +Sysreg BRBSRCINJ_EL1 2 1 9 1 1 +Field 63:0 ADDRESS +EndSysreg + +Sysreg BRBTGTINJ_EL1 2 1 9 1 2 +Field 63:0 ADDRESS +EndSysreg + +Sysreg BRBIDR0_EL1 2 1 9 2 0 +Res0 63:16 +Enum 15:12 CC + 0b0101 20_BIT +EndEnum +Enum 11:8 FORMAT + 0b0000 FORMAT_0 +EndEnum +Enum 7:0 NUMREC + 0b00001000 8 + 0b00010000 16 + 0b00100000 32 + 0b01000000 64 +EndEnum +EndSysreg + +Sysreg BRBCR_EL2 2 4 9 0 0 +Fields BRBCR_ELx +EndSysreg + +Sysreg BRBCR_EL12 2 5 9 0 0 +Fields BRBCR_ELx +EndSysreg + Sysreg ID_AA64ZFR0_EL1 3 0 0 4 4 Res0 63:60 UnsignedEnum 59:56 F64MM diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 273d67ecf6d2530f0f31c8dd393b31b1fde560ee..4cfafe375b1770a0157059415284cc243ee667cc 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -180,6 +180,17 @@ config ARM_SPE_PMU Extension, which provides periodic sampling of operations in the CPU pipeline and reports this via the perf AUX interface. +config ARM64_BRBE + bool "Enable support for branch stack sampling using FEAT_BRBE" + depends on PERF_EVENTS && ARM64 && ARM_PMU + default y + help + Enable perf support for Branch Record Buffer Extension (BRBE) which + records all branches taken in an execution path. This supports some + branch types and privilege based filtering. It captures additional + relevant information such as cycle count, misprediction and branch + type, branch privilege level etc. + config ARM_DMC620_PMU tristate "Enable PMU support for the ARM DMC-620 memory controller" depends on (ARM64 && ACPI) || COMPILE_TEST diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 16b3ec4db916d0bcb5f3895a82c7d9b1a77e18a2..a8b7bc22e3d6b28e3782d6cdea9d9f74b907bf7e 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_RISCV_PMU_SBI) += riscv_pmu_sbi.o obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o +obj-$(CONFIG_ARM64_BRBE) += arm_brbe.o obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o diff --git a/drivers/perf/arm_brbe.c b/drivers/perf/arm_brbe.c new file mode 100644 index 0000000000000000000000000000000000000000..d795e8fd646fc84bd722b36f71e8771ac853d3ae --- /dev/null +++ b/drivers/perf/arm_brbe.c @@ -0,0 +1,1198 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Branch Record Buffer Extension Driver. + * + * Copyright (C) 2022-2023 ARM Limited + * + * Author: Anshuman Khandual + */ +#include "arm_pmuv3_branch.h" + +#define BRBFCR_EL1_BRANCH_FILTERS (BRBFCR_EL1_DIRECT | \ + BRBFCR_EL1_INDIRECT | \ + BRBFCR_EL1_RTN | \ + BRBFCR_EL1_INDCALL | \ + BRBFCR_EL1_DIRCALL | \ + BRBFCR_EL1_CONDDIR) + +#define BRBFCR_EL1_CONFIG_MASK (BRBFCR_EL1_BANK_MASK | \ + BRBFCR_EL1_PAUSED | \ + BRBFCR_EL1_EnI | \ + BRBFCR_EL1_BRANCH_FILTERS) + +/* + * BRBTS_EL1 is currently not used for branch stack implementation + * purpose but BRBCR_ELx.TS needs to have a valid value from all + * available options. BRBCR_ELx_TS_VIRTUAL is selected for this. + */ +#define BRBCR_ELx_DEFAULT_TS FIELD_PREP(BRBCR_ELx_TS_MASK, BRBCR_ELx_TS_VIRTUAL) + +#define BRBCR_ELx_CONFIG_MASK (BRBCR_ELx_EXCEPTION | \ + BRBCR_ELx_ERTN | \ + BRBCR_ELx_CC | \ + BRBCR_ELx_MPRED | \ + BRBCR_ELx_ExBRE | \ + BRBCR_ELx_E0BRE | \ + BRBCR_ELx_FZP | \ + BRBCR_ELx_TS_MASK) + +/* + * BRBE Buffer Organization + * + * BRBE buffer is arranged as multiple banks of 32 branch record + * entries each. An individual branch record in a given bank could + * be accessed, after selecting the bank in BRBFCR_EL1.BANK and + * accessing the registers i.e [BRBSRC, BRBTGT, BRBINF] set with + * indices [0..31]. + * + * Bank 0 + * + * --------------------------------- ------ + * | 00 | BRBSRC | BRBTGT | BRBINF | | 00 | + * --------------------------------- ------ + * | 01 | BRBSRC | BRBTGT | BRBINF | | 01 | + * --------------------------------- ------ + * | .. | BRBSRC | BRBTGT | BRBINF | | .. | + * --------------------------------- ------ + * | 31 | BRBSRC | BRBTGT | BRBINF | | 31 | + * --------------------------------- ------ + * + * Bank 1 + * + * --------------------------------- ------ + * | 32 | BRBSRC | BRBTGT | BRBINF | | 00 | + * --------------------------------- ------ + * | 33 | BRBSRC | BRBTGT | BRBINF | | 01 | + * --------------------------------- ------ + * | .. | BRBSRC | BRBTGT | BRBINF | | .. | + * --------------------------------- ------ + * | 63 | BRBSRC | BRBTGT | BRBINF | | 31 | + * --------------------------------- ------ + */ +#define BRBE_BANK_MAX_ENTRIES 32 +#define BRBE_MAX_BANK 2 +#define BRBE_MAX_ENTRIES (BRBE_BANK_MAX_ENTRIES * BRBE_MAX_BANK) + +#define BRBE_BANK0_IDX_MIN 0 +#define BRBE_BANK0_IDX_MAX 31 +#define BRBE_BANK1_IDX_MIN 32 +#define BRBE_BANK1_IDX_MAX 63 + +struct brbe_regset { + unsigned long brbsrc; + unsigned long brbtgt; + unsigned long brbinf; +}; + +#define PERF_BR_ARM64_MAX (PERF_BR_MAX + PERF_BR_NEW_MAX) + +struct arm64_perf_task_context { + struct brbe_regset store[BRBE_MAX_ENTRIES]; + int nr_brbe_records; + + /* + * Branch Filter Mask + * + * This mask represents all branch record types i.e PERF_BR_XXX + * (as defined in core perf ABI) that can be generated with the + * event's branch_sample_type request. The mask layout could be + * found here. Although the bit 15 i.e PERF_BR_EXTEND_ABI never + * gets set in the mask. + * + * 23 (PERF_BR_MAX + PERF_BR_NEW_MAX) 0 + * | | + * --------------------------------------------------------- + * | Extended ABI section | X | ABI section | + * --------------------------------------------------------- + */ + DECLARE_BITMAP(br_type_mask, PERF_BR_ARM64_MAX); +}; + +static void branch_mask_set_all(unsigned long *event_type_mask) +{ + int idx; + + for (idx = PERF_BR_UNKNOWN; idx < PERF_BR_EXTEND_ABI; idx++) + set_bit(idx, event_type_mask); + + for (idx = PERF_BR_NEW_FAULT_ALGN; idx < PERF_BR_NEW_MAX; idx++) + set_bit(PERF_BR_MAX + idx, event_type_mask); +} + +static void branch_mask_set_arch(unsigned long *event_type_mask) +{ + set_bit(PERF_BR_MAX + PERF_BR_NEW_FAULT_ALGN, event_type_mask); + set_bit(PERF_BR_MAX + PERF_BR_NEW_FAULT_DATA, event_type_mask); + set_bit(PERF_BR_MAX + PERF_BR_NEW_FAULT_INST, event_type_mask); + + set_bit(PERF_BR_MAX + PERF_BR_ARM64_FIQ, event_type_mask); + set_bit(PERF_BR_MAX + PERF_BR_ARM64_DEBUG_HALT, event_type_mask); + set_bit(PERF_BR_MAX + PERF_BR_ARM64_DEBUG_EXIT, event_type_mask); + set_bit(PERF_BR_MAX + PERF_BR_ARM64_DEBUG_INST, event_type_mask); + set_bit(PERF_BR_MAX + PERF_BR_ARM64_DEBUG_DATA, event_type_mask); +} + +static void branch_entry_mask(struct perf_branch_entry *entry, + unsigned long *event_type_mask) +{ + u64 idx; + + bitmap_zero(event_type_mask, PERF_BR_ARM64_MAX); + for (idx = PERF_BR_UNKNOWN; idx < PERF_BR_EXTEND_ABI; idx++) { + if (entry->type == idx) + set_bit(idx, event_type_mask); + } + + if (entry->type == PERF_BR_EXTEND_ABI) { + for (idx = PERF_BR_NEW_FAULT_ALGN; idx < PERF_BR_NEW_MAX; idx++) { + if (entry->new_type == idx) + set_bit(PERF_BR_MAX + idx, event_type_mask); + } + } +} + +static void prepare_event_branch_type_mask(struct perf_event *event, + unsigned long *event_type_mask) +{ + u64 branch_sample = event->attr.branch_sample_type; + + bitmap_zero(event_type_mask, PERF_BR_ARM64_MAX); + + /* + * The platform specific branch types might not follow event's + * branch filter requests accurately. Let's add all of them as + * acceptible branch types during the filtering process. + */ + branch_mask_set_arch(event_type_mask); + + if (branch_sample & PERF_SAMPLE_BRANCH_ANY) { + branch_mask_set_all(event_type_mask); + return; + } + + if (branch_sample & PERF_SAMPLE_BRANCH_IND_JUMP) + set_bit(PERF_BR_IND, event_type_mask); + + set_bit(PERF_BR_UNCOND, event_type_mask); + if (branch_sample & PERF_SAMPLE_BRANCH_COND) { + clear_bit(PERF_BR_UNCOND, event_type_mask); + set_bit(PERF_BR_COND, event_type_mask); + } + + if (branch_sample & PERF_SAMPLE_BRANCH_CALL) + set_bit(PERF_BR_CALL, event_type_mask); + + if (branch_sample & PERF_SAMPLE_BRANCH_IND_CALL) + set_bit(PERF_BR_IND_CALL, event_type_mask); + + if (branch_sample & PERF_SAMPLE_BRANCH_ANY_CALL) { + set_bit(PERF_BR_CALL, event_type_mask); + set_bit(PERF_BR_IRQ, event_type_mask); + set_bit(PERF_BR_SYSCALL, event_type_mask); + set_bit(PERF_BR_SERROR, event_type_mask); + + if (branch_sample & PERF_SAMPLE_BRANCH_COND) + set_bit(PERF_BR_COND_CALL, event_type_mask); + } + + if (branch_sample & PERF_SAMPLE_BRANCH_ANY_RETURN) { + set_bit(PERF_BR_RET, event_type_mask); + set_bit(PERF_BR_ERET, event_type_mask); + set_bit(PERF_BR_SYSRET, event_type_mask); + + if (branch_sample & PERF_SAMPLE_BRANCH_COND) + set_bit(PERF_BR_COND_RET, event_type_mask); + } +} + +struct brbe_hw_attr { + int brbe_version; + int brbe_cc; + int brbe_nr; + int brbe_format; +}; + +#define RETURN_READ_BRBSRCN(n) \ + read_sysreg_s(SYS_BRBSRC_EL1(n)) + +#define RETURN_READ_BRBTGTN(n) \ + read_sysreg_s(SYS_BRBTGT_EL1(n)) + +#define RETURN_READ_BRBINFN(n) \ + read_sysreg_s(SYS_BRBINF_EL1(n)) + +#define BRBE_REGN_CASE(n, case_macro) \ + case n: return case_macro(n); break + +#define BRBE_REGN_SWITCH(x, case_macro) \ + do { \ + switch (x) { \ + BRBE_REGN_CASE(0, case_macro); \ + BRBE_REGN_CASE(1, case_macro); \ + BRBE_REGN_CASE(2, case_macro); \ + BRBE_REGN_CASE(3, case_macro); \ + BRBE_REGN_CASE(4, case_macro); \ + BRBE_REGN_CASE(5, case_macro); \ + BRBE_REGN_CASE(6, case_macro); \ + BRBE_REGN_CASE(7, case_macro); \ + BRBE_REGN_CASE(8, case_macro); \ + BRBE_REGN_CASE(9, case_macro); \ + BRBE_REGN_CASE(10, case_macro); \ + BRBE_REGN_CASE(11, case_macro); \ + BRBE_REGN_CASE(12, case_macro); \ + BRBE_REGN_CASE(13, case_macro); \ + BRBE_REGN_CASE(14, case_macro); \ + BRBE_REGN_CASE(15, case_macro); \ + BRBE_REGN_CASE(16, case_macro); \ + BRBE_REGN_CASE(17, case_macro); \ + BRBE_REGN_CASE(18, case_macro); \ + BRBE_REGN_CASE(19, case_macro); \ + BRBE_REGN_CASE(20, case_macro); \ + BRBE_REGN_CASE(21, case_macro); \ + BRBE_REGN_CASE(22, case_macro); \ + BRBE_REGN_CASE(23, case_macro); \ + BRBE_REGN_CASE(24, case_macro); \ + BRBE_REGN_CASE(25, case_macro); \ + BRBE_REGN_CASE(26, case_macro); \ + BRBE_REGN_CASE(27, case_macro); \ + BRBE_REGN_CASE(28, case_macro); \ + BRBE_REGN_CASE(29, case_macro); \ + BRBE_REGN_CASE(30, case_macro); \ + BRBE_REGN_CASE(31, case_macro); \ + default: \ + pr_warn("unknown register index\n"); \ + return -1; \ + } \ + } while (0) + +static inline int buffer_to_brbe_idx(int buffer_idx) +{ + return buffer_idx % BRBE_BANK_MAX_ENTRIES; +} + +static inline u64 get_brbsrc_reg(int buffer_idx) +{ + int brbe_idx = buffer_to_brbe_idx(buffer_idx); + + BRBE_REGN_SWITCH(brbe_idx, RETURN_READ_BRBSRCN); +} + +static inline u64 get_brbtgt_reg(int buffer_idx) +{ + int brbe_idx = buffer_to_brbe_idx(buffer_idx); + + BRBE_REGN_SWITCH(brbe_idx, RETURN_READ_BRBTGTN); +} + +static inline u64 get_brbinf_reg(int buffer_idx) +{ + int brbe_idx = buffer_to_brbe_idx(buffer_idx); + + BRBE_REGN_SWITCH(brbe_idx, RETURN_READ_BRBINFN); +} + +static inline u64 brbe_record_valid(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_VALID_MASK, brbinf); +} + +static inline bool brbe_invalid(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_NONE; +} + +static inline bool brbe_record_is_complete(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_FULL; +} + +static inline bool brbe_record_is_source_only(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_SOURCE; +} + +static inline bool brbe_record_is_target_only(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_TARGET; +} + +static inline int brbe_get_in_tx(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_T_MASK, brbinf); +} + +static inline int brbe_get_mispredict(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_MPRED_MASK, brbinf); +} + +static inline int brbe_get_lastfailed(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_LASTFAILED_MASK, brbinf); +} + +static inline int brbe_get_cycles(u64 brbinf) +{ + /* + * Captured cycle count is unknown and hence + * should not be passed on to the user space. + */ + if (brbinf & BRBINFx_EL1_CCU) + return 0; + + return FIELD_GET(BRBINFx_EL1_CC_MASK, brbinf); +} + +static inline int brbe_get_type(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_TYPE_MASK, brbinf); +} + +static inline int brbe_get_el(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_EL_MASK, brbinf); +} + +static inline int brbe_get_numrec(u64 brbidr) +{ + return FIELD_GET(BRBIDR0_EL1_NUMREC_MASK, brbidr); +} + +static inline int brbe_get_format(u64 brbidr) +{ + return FIELD_GET(BRBIDR0_EL1_FORMAT_MASK, brbidr); +} + +static inline int brbe_get_cc_bits(u64 brbidr) +{ + return FIELD_GET(BRBIDR0_EL1_CC_MASK, brbidr); +} + +void armv8pmu_branch_stack_reset(void) +{ + asm volatile(BRB_IALL_INSN); + isb(); +} + +void armv8pmu_branch_stack_add(struct perf_event *event, struct pmu_hw_events *hw_events) +{ + struct arm64_perf_task_context *task_ctx = event->pmu_ctx->task_ctx_data; + + if (event->ctx->task) + prepare_event_branch_type_mask(event, task_ctx->br_type_mask); + + /* + * Reset branch records buffer if a new CPU bound event + * gets scheduled on a PMU. Otherwise existing branch + * records present in the buffer might just leak into + * such events. + * + * Also reset current 'hw_events->branch_context' because + * any previous task bound event now would have lost an + * opportunity for continuous branch records. + */ + if (!event->ctx->task) { + hw_events->branch_context = NULL; + armv8pmu_branch_stack_reset(); + } + + /* + * Reset branch records buffer if a new task event gets + * scheduled on a PMU which might have existing records. + * Otherwise older branch records present in the buffer + * might leak into the new task event. + */ + if (event->ctx->task && hw_events->branch_context != event->ctx) { + hw_events->branch_context = event->ctx; + armv8pmu_branch_stack_reset(); + } + hw_events->branch_users++; +} + +void armv8pmu_branch_stack_del(struct perf_event *event, struct pmu_hw_events *hw_events) +{ + WARN_ON_ONCE(!hw_events->branch_users); + hw_events->branch_users--; + if (!hw_events->branch_users) { + hw_events->branch_context = NULL; + hw_events->branch_sample_type = 0; + } +} + +static bool valid_brbe_nr(int brbe_nr) +{ + return brbe_nr == BRBIDR0_EL1_NUMREC_8 || + brbe_nr == BRBIDR0_EL1_NUMREC_16 || + brbe_nr == BRBIDR0_EL1_NUMREC_32 || + brbe_nr == BRBIDR0_EL1_NUMREC_64; +} + +static bool valid_brbe_cc(int brbe_cc) +{ + return brbe_cc == BRBIDR0_EL1_CC_20_BIT; +} + +static bool valid_brbe_format(int brbe_format) +{ + return brbe_format == BRBIDR0_EL1_FORMAT_FORMAT_0; +} + +static bool valid_brbe_version(int brbe_version) +{ + return brbe_version == ID_AA64DFR0_EL1_BRBE_IMP || + brbe_version == ID_AA64DFR0_EL1_BRBE_BRBE_V1P1; +} + +static void select_brbe_bank(int bank) +{ + u64 brbfcr; + + WARN_ON(bank > 1); + brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + brbfcr &= ~BRBFCR_EL1_BANK_MASK; + brbfcr |= SYS_FIELD_PREP(BRBFCR_EL1, BANK, bank); + write_sysreg_s(brbfcr, SYS_BRBFCR_EL1); + isb(); +} + +static bool __read_brbe_regset(struct brbe_regset *entry, int idx) +{ + entry->brbinf = get_brbinf_reg(idx); + + if (brbe_invalid(entry->brbinf)) + return false; + + entry->brbsrc = get_brbsrc_reg(idx); + entry->brbtgt = get_brbtgt_reg(idx); + return true; +} + +/* + * Read all BRBE entries in HW until the first invalid entry. + * + * The caller must ensure that the BRBE is not concurrently modifying these + * branch entries. + */ +static int capture_brbe_regset(struct brbe_regset *buf, int nr_hw_entries) +{ + int idx = 0; + + select_brbe_bank(0); + while (idx < nr_hw_entries && idx <= BRBE_BANK0_IDX_MAX) { + if (!__read_brbe_regset(&buf[idx], idx)) + return idx; + idx++; + } + + select_brbe_bank(1); + while (idx < nr_hw_entries && idx <= BRBE_BANK1_IDX_MAX) { + if (!__read_brbe_regset(&buf[idx], idx)) + return idx; + idx++; + } + return idx; +} + +/* + * This function concatenates branch records from stored and live buffer + * up to maximum nr_max records and the stored buffer holds the resultant + * buffer. The concatenated buffer contains all the branch records from + * the live buffer but might contain some from stored buffer considering + * the maximum combined length does not exceed 'nr_max'. + * + * Stored records Live records + * ------------------------------------------------^ + * | S0 | L0 | Newest | + * --------------------------------- | + * | S1 | L1 | | + * --------------------------------- | + * | S2 | L2 | | + * --------------------------------- | + * | S3 | L3 | | + * --------------------------------- | + * | S4 | L4 | nr_max + * --------------------------------- | + * | | L5 | | + * --------------------------------- | + * | | L6 | | + * --------------------------------- | + * | | L7 | | + * --------------------------------- | + * | | | | + * --------------------------------- | + * | | | Oldest | + * ------------------------------------------------V + * + * + * S0 is the newest in the stored records, where as L7 is the oldest in + * the live records. Unless the live buffer is detected as being full + * thus potentially dropping off some older records, L7 and S0 records + * are contiguous in time for a user task context. The stitched buffer + * here represents maximum possible branch records, contiguous in time. + * + * Stored records Live records + * ------------------------------------------------^ + * | L0 | L0 | Newest | + * --------------------------------- | + * | L1 | L1 | | + * --------------------------------- | + * | L2 | L2 | | + * --------------------------------- | + * | L3 | L3 | | + * --------------------------------- | + * | L4 | L4 | nr_max + * --------------------------------- | + * | L5 | L5 | | + * --------------------------------- | + * | L6 | L6 | | + * --------------------------------- | + * | L7 | L7 | | + * --------------------------------- | + * | S0 | | | + * --------------------------------- | + * | S1 | | Oldest | + * ------------------------------------------------V + * | S2 | <----| + * ----------------- | + * | S3 | <----| Dropped off after nr_max + * ----------------- | + * | S4 | <----| + * ----------------- + */ +static int stitch_stored_live_entries(struct brbe_regset *stored, + struct brbe_regset *live, + int nr_stored, int nr_live, + int nr_max) +{ + int nr_move = min(nr_stored, nr_max - nr_live); + + /* Move the tail of the buffer to make room for the new entries */ + memmove(&stored[nr_live], &stored[0], nr_move * sizeof(*stored)); + + /* Copy the new entries into the head of the buffer */ + memcpy(&stored[0], &live[0], nr_live * sizeof(*stored)); + + /* Return the number of entries in the stitched buffer */ + return min(nr_live + nr_stored, nr_max); +} + +static int brbe_branch_save(struct brbe_regset *live, int nr_hw_entries) +{ + u64 brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + int nr_live; + + write_sysreg_s(brbfcr | BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1); + isb(); + + nr_live = capture_brbe_regset(live, nr_hw_entries); + + write_sysreg_s(brbfcr & ~BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1); + isb(); + + return nr_live; +} + +void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx) +{ + struct arm64_perf_task_context *task_ctx = ctx; + struct brbe_regset live[BRBE_MAX_ENTRIES]; + int nr_live, nr_store, nr_hw_entries; + + nr_hw_entries = brbe_get_numrec(arm_pmu->reg_brbidr); + nr_live = brbe_branch_save(live, nr_hw_entries); + nr_store = task_ctx->nr_brbe_records; + nr_store = stitch_stored_live_entries(task_ctx->store, live, nr_store, + nr_live, nr_hw_entries); + task_ctx->nr_brbe_records = nr_store; +} + +/* + * Generic perf branch filters supported on BRBE + * + * New branch filters need to be evaluated whether they could be supported on + * BRBE. This ensures that such branch filters would not just be accepted, to + * fail silently. PERF_SAMPLE_BRANCH_HV is a special case that is selectively + * supported only on platforms where kernel is in hyp mode. + */ +#define BRBE_EXCLUDE_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_ABORT_TX | \ + PERF_SAMPLE_BRANCH_IN_TX | \ + PERF_SAMPLE_BRANCH_NO_TX | \ + PERF_SAMPLE_BRANCH_CALL_STACK | \ + PERF_SAMPLE_BRANCH_COUNTERS) + +#define BRBE_ALLOWED_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_USER | \ + PERF_SAMPLE_BRANCH_KERNEL | \ + PERF_SAMPLE_BRANCH_HV | \ + PERF_SAMPLE_BRANCH_ANY | \ + PERF_SAMPLE_BRANCH_ANY_CALL | \ + PERF_SAMPLE_BRANCH_ANY_RETURN | \ + PERF_SAMPLE_BRANCH_IND_CALL | \ + PERF_SAMPLE_BRANCH_COND | \ + PERF_SAMPLE_BRANCH_IND_JUMP | \ + PERF_SAMPLE_BRANCH_CALL | \ + PERF_SAMPLE_BRANCH_NO_FLAGS | \ + PERF_SAMPLE_BRANCH_NO_CYCLES | \ + PERF_SAMPLE_BRANCH_TYPE_SAVE | \ + PERF_SAMPLE_BRANCH_HW_INDEX | \ + PERF_SAMPLE_BRANCH_PRIV_SAVE) + +#define BRBE_PERF_BRANCH_FILTERS (BRBE_ALLOWED_BRANCH_FILTERS | \ + BRBE_EXCLUDE_BRANCH_FILTERS) + +bool armv8pmu_branch_attr_valid(struct perf_event *event) +{ + u64 branch_type = event->attr.branch_sample_type; + + /* + * Ensure both perf branch filter allowed and exclude + * masks are always in sync with the generic perf ABI. + */ + BUILD_BUG_ON(BRBE_PERF_BRANCH_FILTERS != (PERF_SAMPLE_BRANCH_MAX - 1)); + + if (branch_type & ~BRBE_ALLOWED_BRANCH_FILTERS) { + pr_debug_once("requested branch filter not supported 0x%llx\n", branch_type); + return false; + } + + /* + * If the event does not have at least one of the privilege + * branch filters as in PERF_SAMPLE_BRANCH_PLM_ALL, the core + * perf will adjust its value based on perf event's existing + * privilege level via attr.exclude_[user|kernel|hv]. + * + * As event->attr.branch_sample_type might have been changed + * when the event reaches here, it is not possible to figure + * out whether the event originally had HV privilege request + * or got added via the core perf. Just report this situation + * once and continue ignoring if there are other instances. + */ + if ((branch_type & PERF_SAMPLE_BRANCH_HV) && !is_kernel_in_hyp_mode()) + pr_debug_once("hypervisor privilege filter not supported 0x%llx\n", branch_type); + + return true; +} + +int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu) +{ + size_t size = sizeof(struct arm64_perf_task_context); + + arm_pmu->pmu.task_ctx_cache = kmem_cache_create("arm64_brbe_task_ctx", size, 0, 0, NULL); + if (!arm_pmu->pmu.task_ctx_cache) + return -ENOMEM; + return 0; +} + +void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu) +{ + kmem_cache_destroy(arm_pmu->pmu.task_ctx_cache); +} + +static int brbe_attributes_probe(struct arm_pmu *armpmu, u32 brbe) +{ + u64 brbidr = read_sysreg_s(SYS_BRBIDR0_EL1); + int brbe_version, brbe_format, brbe_cc, brbe_nr; + + brbe_version = brbe; + brbe_format = brbe_get_format(brbidr); + brbe_cc = brbe_get_cc_bits(brbidr); + brbe_nr = brbe_get_numrec(brbidr); + armpmu->reg_brbidr = brbidr; + + if (!valid_brbe_version(brbe_version) || + !valid_brbe_format(brbe_format) || + !valid_brbe_cc(brbe_cc) || + !valid_brbe_nr(brbe_nr)) + return -EOPNOTSUPP; + return 0; +} + +void armv8pmu_branch_probe(struct arm_pmu *armpmu) +{ + u64 aa64dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1); + u32 brbe; + + /* + * BRBE implementation's branch entries cannot exceed maximum + * branch records supported at the ARM PMU level abstraction. + * Otherwise there is always a possibility of array overflow, + * while processing BRBE branch records. + */ + BUILD_BUG_ON(BRBE_BANK_MAX_ENTRIES > MAX_BRANCH_RECORDS); + + brbe = cpuid_feature_extract_unsigned_field(aa64dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT); + if (!brbe) + return; + + if (brbe_attributes_probe(armpmu, brbe)) + return; + + armpmu->has_branch_stack = 1; +} + +/* + * BRBE supports the following functional branch type filters while + * generating branch records. These branch filters can be enabled, + * either individually or as a group i.e ORing multiple filters + * with each other. + * + * BRBFCR_EL1_CONDDIR - Conditional direct branch + * BRBFCR_EL1_DIRCALL - Direct call + * BRBFCR_EL1_INDCALL - Indirect call + * BRBFCR_EL1_INDIRECT - Indirect branch + * BRBFCR_EL1_DIRECT - Direct branch + * BRBFCR_EL1_RTN - Subroutine return + */ +static u64 branch_type_to_brbfcr(int branch_type) +{ + u64 brbfcr = 0; + + if (branch_type & PERF_SAMPLE_BRANCH_ANY) { + brbfcr |= BRBFCR_EL1_BRANCH_FILTERS; + return brbfcr; + } + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL) { + brbfcr |= BRBFCR_EL1_INDCALL; + brbfcr |= BRBFCR_EL1_DIRCALL; + } + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + brbfcr |= BRBFCR_EL1_RTN; + + if (branch_type & PERF_SAMPLE_BRANCH_IND_CALL) + brbfcr |= BRBFCR_EL1_INDCALL; + + if (branch_type & PERF_SAMPLE_BRANCH_COND) + brbfcr |= BRBFCR_EL1_CONDDIR; + + if (branch_type & PERF_SAMPLE_BRANCH_IND_JUMP) + brbfcr |= BRBFCR_EL1_INDIRECT; + + if (branch_type & PERF_SAMPLE_BRANCH_CALL) + brbfcr |= BRBFCR_EL1_DIRCALL; + + return brbfcr & BRBFCR_EL1_CONFIG_MASK; +} + +/* + * BRBE supports the following privilege mode filters while generating + * branch records. + * + * BRBCR_ELx_E0BRE - EL0 branch records + * BRBCR_ELx_ExBRE - EL1/EL2 branch records + * + * BRBE also supports the following additional functional branch type + * filters while generating branch records. + * + * BRBCR_ELx_EXCEPTION - Exception + * BRBCR_ELx_ERTN - Exception return + */ +static u64 branch_type_to_brbcr(int branch_type) +{ + u64 brbcr = BRBCR_ELx_DEFAULT_TS; + + /* + * BRBE should be paused on PMU interrupt while tracing kernel + * space to stop capturing further branch records. Otherwise + * interrupt handler branch records might get into the samples + * which is not desired. + * + * BRBE need not be paused on PMU interrupt while tracing only + * the user space, because it will automatically be inside the + * prohibited region. But even after PMU overflow occurs, the + * interrupt could still take much more cycles, before it can + * be taken and by that time BRBE will have been overwritten. + * Hence enable pause on PMU interrupt mechanism even for user + * only traces as well. + */ + brbcr |= BRBCR_ELx_FZP; + + if (branch_type & PERF_SAMPLE_BRANCH_USER) + brbcr |= BRBCR_ELx_E0BRE; + + /* + * When running in the hyp mode, writing into BRBCR_EL1 + * actually writes into BRBCR_EL2 instead. Field E2BRE + * is also at the same position as E1BRE. + */ + if (branch_type & PERF_SAMPLE_BRANCH_KERNEL) + brbcr |= BRBCR_ELx_ExBRE; + + if (branch_type & PERF_SAMPLE_BRANCH_HV) { + if (is_kernel_in_hyp_mode()) + brbcr |= BRBCR_ELx_ExBRE; + } + + if (!(branch_type & PERF_SAMPLE_BRANCH_NO_CYCLES)) + brbcr |= BRBCR_ELx_CC; + + if (!(branch_type & PERF_SAMPLE_BRANCH_NO_FLAGS)) + brbcr |= BRBCR_ELx_MPRED; + + /* + * The exception and exception return branches could be + * captured, irrespective of the perf event's privilege. + * If the perf event does not have enough privilege for + * a given exception level, then addresses which falls + * under that exception level will be reported as zero + * for the captured branch record, creating source only + * or target only records. + */ + if (branch_type & PERF_SAMPLE_BRANCH_ANY) { + brbcr |= BRBCR_ELx_EXCEPTION; + brbcr |= BRBCR_ELx_ERTN; + } + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL) + brbcr |= BRBCR_ELx_EXCEPTION; + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + brbcr |= BRBCR_ELx_ERTN; + + return brbcr & BRBCR_ELx_CONFIG_MASK; +} + +void armv8pmu_branch_enable(struct arm_pmu *arm_pmu) +{ + struct pmu_hw_events *cpuc = this_cpu_ptr(arm_pmu->hw_events); + u64 brbfcr, brbcr; + + if (!(cpuc->branch_sample_type && cpuc->branch_users)) + return; + + /* + * BRBE gets configured with a new mismatched branch sample + * type request, overriding any previous branch filters. + */ + brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + brbfcr &= ~BRBFCR_EL1_CONFIG_MASK; + brbfcr |= branch_type_to_brbfcr(cpuc->branch_sample_type); + write_sysreg_s(brbfcr, SYS_BRBFCR_EL1); + isb(); + + brbcr = read_sysreg_s(SYS_BRBCR_EL1); + brbcr &= ~BRBCR_ELx_CONFIG_MASK; + brbcr |= branch_type_to_brbcr(cpuc->branch_sample_type); + write_sysreg_s(brbcr, SYS_BRBCR_EL1); + isb(); +} + +void armv8pmu_branch_disable(void) +{ + u64 brbfcr, brbcr; + + brbcr = read_sysreg_s(SYS_BRBCR_EL1); + brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + brbcr &= ~(BRBCR_ELx_E0BRE | BRBCR_ELx_ExBRE); + brbfcr |= BRBFCR_EL1_PAUSED; + write_sysreg_s(brbcr, SYS_BRBCR_EL1); + write_sysreg_s(brbfcr, SYS_BRBFCR_EL1); + isb(); +} + +static void brbe_set_perf_entry_type(struct perf_branch_entry *entry, u64 brbinf) +{ + int brbe_type = brbe_get_type(brbinf); + + switch (brbe_type) { + case BRBINFx_EL1_TYPE_DIRECT_UNCOND: + entry->type = PERF_BR_UNCOND; + break; + case BRBINFx_EL1_TYPE_INDIRECT: + entry->type = PERF_BR_IND; + break; + case BRBINFx_EL1_TYPE_DIRECT_LINK: + entry->type = PERF_BR_CALL; + break; + case BRBINFx_EL1_TYPE_INDIRECT_LINK: + entry->type = PERF_BR_IND_CALL; + break; + case BRBINFx_EL1_TYPE_RET: + entry->type = PERF_BR_RET; + break; + case BRBINFx_EL1_TYPE_DIRECT_COND: + entry->type = PERF_BR_COND; + break; + case BRBINFx_EL1_TYPE_CALL: + entry->type = PERF_BR_CALL; + break; + case BRBINFx_EL1_TYPE_TRAP: + entry->type = PERF_BR_SYSCALL; + break; + case BRBINFx_EL1_TYPE_ERET: + entry->type = PERF_BR_ERET; + break; + case BRBINFx_EL1_TYPE_IRQ: + entry->type = PERF_BR_IRQ; + break; + case BRBINFx_EL1_TYPE_DEBUG_HALT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_DEBUG_HALT; + break; + case BRBINFx_EL1_TYPE_SERROR: + entry->type = PERF_BR_SERROR; + break; + case BRBINFx_EL1_TYPE_INSN_DEBUG: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_DEBUG_INST; + break; + case BRBINFx_EL1_TYPE_DATA_DEBUG: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_DEBUG_DATA; + break; + case BRBINFx_EL1_TYPE_ALIGN_FAULT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_NEW_FAULT_ALGN; + break; + case BRBINFx_EL1_TYPE_INSN_FAULT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_NEW_FAULT_INST; + break; + case BRBINFx_EL1_TYPE_DATA_FAULT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_NEW_FAULT_DATA; + break; + case BRBINFx_EL1_TYPE_FIQ: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_FIQ; + break; + case BRBINFx_EL1_TYPE_DEBUG_EXIT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_DEBUG_EXIT; + break; + default: + pr_warn_once("%d - unknown branch type captured\n", brbe_type); + entry->type = PERF_BR_UNKNOWN; + break; + } +} + +static int brbe_get_perf_priv(u64 brbinf) +{ + int brbe_el = brbe_get_el(brbinf); + + switch (brbe_el) { + case BRBINFx_EL1_EL_EL0: + return PERF_BR_PRIV_USER; + case BRBINFx_EL1_EL_EL1: + return PERF_BR_PRIV_KERNEL; + case BRBINFx_EL1_EL_EL2: + if (is_kernel_in_hyp_mode()) + return PERF_BR_PRIV_KERNEL; + return PERF_BR_PRIV_HV; + default: + pr_warn_once("%d - unknown branch privilege captured\n", brbe_el); + return PERF_BR_PRIV_UNKNOWN; + } +} + +static void capture_brbe_flags(struct perf_branch_entry *entry, struct perf_event *event, + u64 brbinf) +{ + brbe_set_perf_entry_type(entry, brbinf); + + if (!branch_sample_no_cycles(event)) + entry->cycles = brbe_get_cycles(brbinf); + + if (!branch_sample_no_flags(event)) { + /* + * BRBINFx_EL1.LASTFAILED indicates that a TME transaction failed (or + * was cancelled) prior to this record, and some number of records + * prior to this one, may have been generated during an attempt to + * execute the transaction. + */ + entry->abort = brbe_get_lastfailed(brbinf); + + /* + * All these information (i.e transaction state and mispredicts) + * are available for source only and complete branch records. + */ + if (brbe_record_is_complete(brbinf) || + brbe_record_is_source_only(brbinf)) { + entry->mispred = brbe_get_mispredict(brbinf); + entry->predicted = !entry->mispred; + entry->in_tx = brbe_get_in_tx(brbinf); + } + + /* + * Currently TME feature is neither implemented in any hardware + * nor it is being supported in the kernel. Just warn here once + * if TME related information shows up rather unexpectedly. + */ + if (entry->abort || entry->in_tx) + pr_warn_once("Unknown transaction states %d %d\n", + entry->abort, entry->in_tx); + } + + /* + * All these information (i.e branch privilege level) are + * available for target only and complete branch records. + */ + if (brbe_record_is_complete(brbinf) || + brbe_record_is_target_only(brbinf)) + entry->priv = brbe_get_perf_priv(brbinf); +} + +static void brbe_regset_branch_entries(struct pmu_hw_events *cpuc, struct perf_event *event, + struct brbe_regset *regset, int idx) +{ + struct perf_branch_entry *entry = &cpuc->branches->branch_entries[idx]; + u64 brbinf = regset[idx].brbinf; + + perf_clear_branch_entry_bitfields(entry); + if (brbe_record_is_complete(brbinf)) { + entry->from = regset[idx].brbsrc; + entry->to = regset[idx].brbtgt; + } else if (brbe_record_is_source_only(brbinf)) { + entry->from = regset[idx].brbsrc; + entry->to = 0; + } else if (brbe_record_is_target_only(brbinf)) { + entry->from = 0; + entry->to = regset[idx].brbtgt; + } + capture_brbe_flags(entry, event, brbinf); +} + +static void process_branch_entries(struct pmu_hw_events *cpuc, struct perf_event *event, + struct brbe_regset *regset, int nr_regset) +{ + int idx; + + for (idx = 0; idx < nr_regset; idx++) + brbe_regset_branch_entries(cpuc, event, regset, idx); + + cpuc->branches->branch_stack.nr = nr_regset; + cpuc->branches->branch_stack.hw_idx = -1ULL; +} + +void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event) +{ + struct arm64_perf_task_context *task_ctx = event->pmu_ctx->task_ctx_data; + struct brbe_regset live[BRBE_MAX_ENTRIES]; + int nr_live, nr_store, nr_hw_entries; + + nr_hw_entries = brbe_get_numrec(cpuc->percpu_pmu->reg_brbidr); + nr_live = capture_brbe_regset(live, nr_hw_entries); + if (event->ctx->task) { + nr_store = task_ctx->nr_brbe_records; + nr_store = stitch_stored_live_entries(task_ctx->store, live, nr_store, + nr_live, nr_hw_entries); + process_branch_entries(cpuc, event, task_ctx->store, nr_store); + task_ctx->nr_brbe_records = 0; + } else { + process_branch_entries(cpuc, event, live, nr_live); + } +} + +static bool filter_branch_privilege(struct perf_branch_entry *entry, u64 branch_sample_type) +{ + /* + * Retrieve the privilege level branch filter requests + * from the overall branch sample type. + */ + branch_sample_type &= PERF_SAMPLE_BRANCH_PLM_ALL; + + /* + * The privilege information do not always get captured + * successfully for given BRBE branch record. Hence the + * entry->priv could be analyzed for filtering when the + * information has really been captured. + */ + if (entry->priv) { + if (entry->priv == PERF_BR_PRIV_USER) { + if (!(branch_sample_type & PERF_SAMPLE_BRANCH_USER)) + return false; + } + + if (entry->priv == PERF_BR_PRIV_KERNEL) { + if (!(branch_sample_type & PERF_SAMPLE_BRANCH_KERNEL)) { + if (!is_kernel_in_hyp_mode()) + return false; + + if (!(branch_sample_type & PERF_SAMPLE_BRANCH_HV)) + return false; + } + } + + if (entry->priv == PERF_BR_PRIV_HV) { + /* + * PERF_SAMPLE_BRANCH_HV request actually gets configured + * similar to PERF_SAMPLE_BRANCH_KERNEL when kernel is in + * hyp mode. In that case PERF_BR_PRIV_KERNEL should have + * been reported for corresponding branch records. + */ + pr_warn_once("PERF_BR_PRIV_HV should not have been captured\n"); + } + return true; + } + + if (is_ttbr0_addr(entry->from) || is_ttbr0_addr(entry->to)) { + if (!(branch_sample_type & PERF_SAMPLE_BRANCH_USER)) + return false; + } + + if (is_ttbr1_addr(entry->from) || is_ttbr1_addr(entry->to)) { + if (!(branch_sample_type & PERF_SAMPLE_BRANCH_KERNEL)) { + if (!is_kernel_in_hyp_mode()) + return false; + + if (!(branch_sample_type & PERF_SAMPLE_BRANCH_HV)) + return false; + } + } + return true; +} + +static bool filter_branch_record(struct pmu_hw_events *cpuc, + struct perf_event *event, + struct perf_branch_entry *entry) +{ + struct arm64_perf_task_context *task_ctx = event->pmu_ctx->task_ctx_data; + u64 branch_sample = event->attr.branch_sample_type; + DECLARE_BITMAP(entry_type_mask, PERF_BR_ARM64_MAX); + DECLARE_BITMAP(event_type_mask, PERF_BR_ARM64_MAX); + + if (!filter_branch_privilege(entry, branch_sample)) + return false; + + if (entry->type == PERF_BR_UNKNOWN) + return true; + + if (branch_sample & PERF_SAMPLE_BRANCH_ANY) + return true; + + /* + * Both PMU and event branch filters match here except the privilege + * filters - which have already been tested earlier. Skip functional + * branch type test and just return success. + */ + if ((cpuc->branch_sample_type & ~PERF_SAMPLE_BRANCH_PLM_ALL) == + event->attr.branch_sample_type) + return true; + + branch_entry_mask(entry, entry_type_mask); + if (task_ctx) + return bitmap_subset(entry_type_mask, task_ctx->br_type_mask, PERF_BR_ARM64_MAX); + + prepare_event_branch_type_mask(event, event_type_mask); + return bitmap_subset(entry_type_mask, event_type_mask, PERF_BR_ARM64_MAX); +} + +void arm64_filter_branch_records(struct pmu_hw_events *cpuc, + struct perf_event *event, + struct branch_records *event_records) +{ + struct perf_branch_entry *entry; + int idx, count = 0; + + memset(event_records, 0, sizeof(*event_records)); + for (idx = 0; idx < cpuc->branches->branch_stack.nr; idx++) { + entry = &cpuc->branches->branch_entries[idx]; + if (!filter_branch_record(cpuc, event, entry)) + continue; + + memcpy(&event_records->branch_entries[count], entry, sizeof(*entry)); + count++; + } + event_records->branch_stack.nr = count; +} diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index f180148ea7cdca11cb4475a8d89f6aefd556f660..57cd2d1a9a18ecce88321db27e82d9b382efbff9 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -289,6 +289,23 @@ static void armpmu_start(struct perf_event *event, int flags) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + struct pmu_hw_events *cpuc = this_cpu_ptr(armpmu->hw_events); + int idx; + + /* + * Merge all branch filter requests from different perf + * events being added into this PMU. This includes both + * privilege and branch type filters. + */ + if (armpmu->has_branch_stack) { + cpuc->branch_sample_type = 0; + for (idx = 0; idx < ARMPMU_MAX_HWEVENTS; idx++) { + struct perf_event *event_idx = cpuc->events[idx]; + + if (event_idx && has_branch_stack(event_idx)) + cpuc->branch_sample_type |= event_idx->attr.branch_sample_type; + } + } /* * ARM pmu always has to reprogram the period, so ignore @@ -317,6 +334,9 @@ armpmu_del(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + if (has_branch_stack(event)) + armpmu->branch_stack_del(event, hw_events); + armpmu_stop(event, PERF_EF_UPDATE); hw_events->events[idx] = NULL; armpmu->clear_event_idx(hw_events, event); @@ -342,6 +362,9 @@ armpmu_add(struct perf_event *event, int flags) if (idx < 0) return idx; + if (has_branch_stack(event)) + armpmu->branch_stack_add(event, hw_events); + /* * If there is an event in the counter we are going to use then make * sure it is disabled. @@ -511,13 +534,25 @@ static int armpmu_event_init(struct perf_event *event) !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus)) return -ENOENT; - /* does not support taken branch sampling */ - if (has_branch_stack(event)) - return -EOPNOTSUPP; + if (has_branch_stack(event)) { + if (!armpmu->has_branch_stack) + return -EOPNOTSUPP; + + if (!armpmu->branch_stack_init(event)) + return -EOPNOTSUPP; + } return __hw_perf_event_init(event); } +static void armpmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +{ + struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu); + + if (armpmu->sched_task) + armpmu->sched_task(pmu_ctx, sched_in); +} + static void armpmu_enable(struct pmu *pmu) { struct arm_pmu *armpmu = to_arm_pmu(pmu); @@ -881,6 +916,7 @@ struct arm_pmu *armpmu_alloc(void) } pmu->pmu = (struct pmu) { + .sched_task = armpmu_sched_task, .pmu_enable = armpmu_enable, .pmu_disable = armpmu_disable, .event_init = armpmu_event_init, diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 21ea93740f65617154d21f517ee327ee375b28da..baa700ab5e034cba2dbecde0088612520a32f94f 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -26,6 +26,7 @@ #include #include +#include "arm_pmuv3_branch.h" /* ARMv8 Cortex-A53 specific event types. */ #define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 @@ -834,14 +835,70 @@ static void armv8pmu_start(struct arm_pmu *cpu_pmu) armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); kvm_vcpu_pmu_resync_el0(); + if (cpu_pmu->has_branch_stack) + armv8pmu_branch_enable(cpu_pmu); } static void armv8pmu_stop(struct arm_pmu *cpu_pmu) { + if (cpu_pmu->has_branch_stack) + armv8pmu_branch_disable(); + /* Disable all counters */ armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); } +static void read_branch_records(struct pmu_hw_events *cpuc, + struct perf_event *event, + struct perf_sample_data *data, + bool *branch_captured) +{ + struct branch_records event_records; + + /* + * CPU specific branch records buffer must have been allocated already + * for the hardware records to be captured and processed further. + */ + if (WARN_ON(!cpuc->branches)) + return; + + /* + * When the current task context does not match with the PMU overflown + * event, the captured branch records here cannot be co-related to the + * overflowed event. Report to the user - as if no branch records have + * been captured, and flush branch records. + */ + if (event->ctx->task && (cpuc->branch_context != event->ctx)) + return; + + /* + * Read the branch records from the hardware once after the PMU IRQ + * has been triggered but subsequently same records can be used for + * other events that might have been overflowed simultaneously thus + * saving much CPU cycles. + */ + if (!*branch_captured) { + armv8pmu_branch_read(cpuc, event); + *branch_captured = true; + } + + /* + * Filter captured branch records + * + * PMU captured branch records would contain samples applicable for + * the aggregated branch filters, for all events that got scheduled + * on this PMU simultaneously. Hence these branch records need to + * be filtered first so that each individual event get samples they + * had requested originally. + */ + if (cpuc->branch_sample_type != event->attr.branch_sample_type) { + arm64_filter_branch_records(cpuc, event, &event_records); + perf_sample_save_brstack(data, event, &event_records.branch_stack, NULL); + return; + } + perf_sample_save_brstack(data, event, &cpuc->branches->branch_stack, NULL); +} + static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) { u32 pmovsr; @@ -849,6 +906,7 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; + bool branch_captured = false; /* * Get and reset the IRQ flags @@ -892,6 +950,13 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) if (!armpmu_event_set_period(event)) continue; + /* + * PMU IRQ should remain asserted until all branch records + * are captured and processed into struct perf_sample_data. + */ + if (has_branch_stack(event) && cpu_pmu->has_branch_stack) + read_branch_records(cpuc, event, &data, &branch_captured); + /* * Perf event overflow will queue the processing of the event as * an irq_work which will be taken care of in the handling of @@ -901,6 +966,8 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) cpu_pmu->disable(event); } armv8pmu_start(cpu_pmu); + if (cpu_pmu->has_branch_stack) + armv8pmu_branch_stack_reset(); return IRQ_HANDLED; } @@ -991,6 +1058,40 @@ static int armv8pmu_user_event_idx(struct perf_event *event) return event->hw.idx; } +static bool armv8pmu_branch_stack_init(struct perf_event *event) +{ + if (armv8pmu_branch_attr_valid(event)) { + /* + * If a task gets scheduled out, the current branch records + * get saved in the task's context data, which can be later + * used to fill in the records upon an event overflow. Let's + * enable PERF_ATTACH_TASK_DATA in 'event->attach_state' for + * all branch stack sampling perf events. + */ + event->attach_state |= PERF_ATTACH_TASK_DATA; + return true; + } + return false; +} + +static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +{ + struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu); + void *task_ctx = pmu_ctx->task_ctx_data; + + if (armpmu->has_branch_stack) { + /* Save branch records in task_ctx on sched out */ + if (task_ctx && !sched_in) { + armv8pmu_branch_save(armpmu, task_ctx); + return; + } + + /* Reset branch records on sched in */ + if (sched_in) + armv8pmu_branch_stack_reset(); + } +} + /* * Add an event filter to a given event. */ @@ -1083,6 +1184,9 @@ static void armv8pmu_reset(void *info) pmcr |= ARMV8_PMU_PMCR_LP; armv8pmu_pmcr_write(pmcr); + + if (cpu_pmu->has_branch_stack) + armv8pmu_branch_stack_reset(); } static int __armv8_pmuv3_map_event_id(struct arm_pmu *armpmu, @@ -1235,6 +1339,41 @@ static void __armv8pmu_probe_pmu(void *info) cpu_pmu->reg_pmmir = read_pmmir(); else cpu_pmu->reg_pmmir = 0; + + /* + * BRBE is being probed on a single cpu for a + * given PMU. The remaining cpus, are assumed + * to have the exact same BRBE implementation. + */ + armv8pmu_branch_probe(cpu_pmu); +} + +static int branch_records_alloc(struct arm_pmu *armpmu) +{ + struct branch_records __percpu *records; + int cpu; + + records = alloc_percpu_gfp(struct branch_records, GFP_KERNEL); + if (!records) + return -ENOMEM; + + /* + * percpu memory allocated for 'records' gets completely consumed + * here, and never required to be freed up later. So permanently + * losing access to this anchor i.e 'records' is acceptable. + * + * Otherwise this allocation handle would have to be saved up for + * free_percpu() release later if required. + */ + for_each_possible_cpu(cpu) { + struct pmu_hw_events *events_cpu; + struct branch_records *records_cpu; + + events_cpu = per_cpu_ptr(armpmu->hw_events, cpu); + records_cpu = per_cpu_ptr(records, cpu); + events_cpu->branches = records_cpu; + } + return 0; } static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) @@ -1251,7 +1390,21 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) if (ret) return ret; - return probe.present ? 0 : -ENODEV; + if (!probe.present) + return -ENODEV; + + if (cpu_pmu->has_branch_stack) { + ret = armv8pmu_task_ctx_cache_alloc(cpu_pmu); + if (ret) + return ret; + + ret = branch_records_alloc(cpu_pmu); + if (ret) { + armv8pmu_task_ctx_cache_free(cpu_pmu); + return ret; + } + } + return 0; } static void armv8pmu_disable_user_access_ipi(void *unused) @@ -1311,6 +1464,11 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx; + cpu_pmu->sched_task = armv8pmu_sched_task; + cpu_pmu->branch_stack_init = armv8pmu_branch_stack_init; + cpu_pmu->branch_stack_add = armv8pmu_branch_stack_add; + cpu_pmu->branch_stack_del = armv8pmu_branch_stack_del; + cpu_pmu->branch_stack_reset = armv8pmu_branch_stack_reset; cpu_pmu->name = name; cpu_pmu->map_event = map_event; diff --git a/drivers/perf/arm_pmuv3_branch.h b/drivers/perf/arm_pmuv3_branch.h new file mode 100644 index 0000000000000000000000000000000000000000..4142da38cfb54f63994337a999a8e595f654b432 --- /dev/null +++ b/drivers/perf/arm_pmuv3_branch.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Branch Record Buffer Extension Helpers. + * + * Copyright (C) 2022-2023 ARM Limited + * + * Author: Anshuman Khandual + */ +#include + +#ifdef CONFIG_ARM64_BRBE +void armv8pmu_branch_stack_add(struct perf_event *event, struct pmu_hw_events *cpuc); +void armv8pmu_branch_stack_del(struct perf_event *event, struct pmu_hw_events *cpuc); +void armv8pmu_branch_stack_reset(void); +void armv8pmu_branch_probe(struct arm_pmu *arm_pmu); +bool armv8pmu_branch_attr_valid(struct perf_event *event); +void armv8pmu_branch_enable(struct arm_pmu *arm_pmu); +void armv8pmu_branch_disable(void); +void armv8pmu_branch_read(struct pmu_hw_events *cpuc, + struct perf_event *event); +void arm64_filter_branch_records(struct pmu_hw_events *cpuc, + struct perf_event *event, + struct branch_records *event_records); +void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx); +int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu); +void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu); +#else +static inline void armv8pmu_branch_stack_add(struct perf_event *event, struct pmu_hw_events *cpuc) +{ +} + +static inline void armv8pmu_branch_stack_del(struct perf_event *event, struct pmu_hw_events *cpuc) +{ +} + +static inline void armv8pmu_branch_stack_reset(void) +{ +} + +static inline void armv8pmu_branch_probe(struct arm_pmu *arm_pmu) +{ +} + +static inline bool armv8pmu_branch_attr_valid(struct perf_event *event) +{ + WARN_ON_ONCE(!has_branch_stack(event)); + return false; +} + +static inline void armv8pmu_branch_enable(struct arm_pmu *arm_pmu) +{ +} + +static inline void armv8pmu_branch_disable(void) +{ +} + +static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + WARN_ON_ONCE(!has_branch_stack(event)); +} + +static inline void arm64_filter_branch_records(struct pmu_hw_events *cpuc, + struct perf_event *event, + struct branch_records *event_records) +{ + +} + +static inline void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx) +{ +} + +static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu) +{ + return 0; +} + +static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu) +{ +} +#endif diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 6fab6e84da4378f2abeb52b2d93db5fa5619dad1..47e07e3db6a4901df610c35ce72806b2a07b0f3c 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -46,6 +46,18 @@ static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_63BIT) == ARMPMU_EVT_63BIT); }, \ } +/* + * Maximum branch record entries which could be processed + * for core perf branch stack sampling support, regardless + * of the hardware support available on a given ARM PMU. + */ +#define MAX_BRANCH_RECORDS 64 + +struct branch_records { + struct perf_branch_stack branch_stack; + struct perf_branch_entry branch_entries[MAX_BRANCH_RECORDS]; +}; + /* The events for a given PMU register set. */ struct pmu_hw_events { /* @@ -72,6 +84,17 @@ struct pmu_hw_events { struct arm_pmu *percpu_pmu; int irq; + + struct branch_records *branches; + + /* Active context for task events */ + void *branch_context; + + /* Active events requesting branch records */ + unsigned int branch_users; + + /* Active branch sample type filters */ + unsigned long branch_sample_type; }; enum armpmu_attr_groups { @@ -102,8 +125,15 @@ struct arm_pmu { void (*stop)(struct arm_pmu *); void (*reset)(void *); int (*map_event)(struct perf_event *event); + void (*sched_task)(struct perf_event_pmu_context *pmu_ctx, bool sched_in); + bool (*branch_stack_init)(struct perf_event *event); + void (*branch_stack_add)(struct perf_event *event, struct pmu_hw_events *cpuc); + void (*branch_stack_del)(struct perf_event *event, struct pmu_hw_events *cpuc); + void (*branch_stack_reset)(void); int num_events; - bool secure_access; /* 32-bit ARM only */ + unsigned int secure_access : 1, /* 32-bit ARM only */ + has_branch_stack: 1, /* 64-bit ARM only */ + reserved : 30; #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); #define ARMV8_PMUV3_EXT_COMMON_EVENT_BASE 0x4000 @@ -117,6 +147,11 @@ struct arm_pmu { /* store the PMMIR_EL1 to expose slots */ u64 reg_pmmir; +#ifdef CONFIG_ARM64_BRBE + /* store the BRBIDR0_EL1 capturing attributes */ + u64 reg_brbidr; +#endif + /* Only to be used by ACPI probing code */ unsigned long acpi_cpuid; }; diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index cb6f1dd00dc483a495fdb465067aa33ae6a6be1e..7d9e0a311ef9171f135eaa97120b14981d13980a 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -139,6 +139,7 @@ static struct test_workload *workloads[] = { &workload__sqrtloop, &workload__brstack, &workload__datasym, + &workload__traploop }; static int num_subtests(const struct test_suite *t) diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh index 09908d71c9941d3c4e3cb1d81cc08617581e13d8..b0c96bfae30433895235dbcfbc0cf061503094cd 100755 --- a/tools/perf/tests/shell/test_brstack.sh +++ b/tools/perf/tests/shell/test_brstack.sh @@ -12,7 +12,6 @@ if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev fi TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX) -TESTPROG="perf test -w brstack" cleanup() { rm -rf $TMPDIR @@ -20,11 +19,21 @@ cleanup() { trap cleanup EXIT TERM INT +is_arm64() { + uname -m | grep -q aarch64 +} + +if is_arm64; then + TESTPROG="perf test -w brstack 5000" +else + TESTPROG="perf test -w brstack" +fi + test_user_branches() { echo "Testing user branch stack sampling" perf record -o $TMPDIR/perf.data --branch-filter any,save_type,u -- ${TESTPROG} > /dev/null 2>&1 - perf script -i $TMPDIR/perf.data --fields brstacksym | xargs -n1 > $TMPDIR/perf.script + perf script -i $TMPDIR/perf.data --fields brstacksym | tr ' ' '\n' > $TMPDIR/perf.script # example of branch entries: # brstack_foo+0x14/brstack_bar+0x40/P/-/-/0/CALL @@ -38,12 +47,43 @@ test_user_branches() { grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET/.*$" $TMPDIR/perf.script grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND/.*$" $TMPDIR/perf.script grep -E -m1 "^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND/.*$" $TMPDIR/perf.script + + if is_arm64; then + # in arm64 with BRBE, we get IRQ entries that correspond + # to any point in the process + grep -m1 "/IRQ/" $TMPDIR/perf.script + fi set +x # some branch types are still not being tested: # IND COND_CALL COND_RET SYSCALL SYSRET IRQ SERROR NO_TX } +test_arm64_trap_eret_branches() { + echo "Testing trap & eret branches (arm64 brbe)" + perf record -o $TMPDIR/perf.data --branch-filter any,save_type,u -- \ + perf test -w traploop 250 + perf script -i $TMPDIR/perf.data --fields brstacksym | tr ' ' '\n' > $TMPDIR/perf.script + set -x + # BRBINF.TYPE == TRAP are mapped to PERF_BR_SYSCALL by the BRBE driver + grep -E -m1 "^trap_bench\+[^ ]*/\[unknown\][^ ]*/SYSCALL/" $TMPDIR/perf.script + grep -E -m1 "^\[unknown\][^ ]*/trap_bench\+[^ ]*/ERET/" $TMPDIR/perf.script + set +x +} + +test_arm64_kernel_branches() { + echo "Testing kernel branches (arm64 brbe)" + # skip if perf doesn't have enough privileges + if ! perf record --branch-filter any,k -o- -- true > /dev/null; then + echo "[skipped: not enough privileges]" + return 0 + fi + perf record -o $TMPDIR/perf.data --branch-filter any,k -- uname -a + perf script -i $TMPDIR/perf.data --fields brstack | tr ' ' '\n' > $TMPDIR/perf.script + grep -E -m1 "0xffff[0-9a-f]{12}" $TMPDIR/perf.script + ! egrep -E -m1 "0x0000[0-9a-f]{12}" $TMPDIR/perf.script +} + # first argument is the argument passed to "--branch-stack ,save_type,u" # second argument are the expected branch types for the given filter test_filter() { @@ -53,7 +93,7 @@ test_filter() { echo "Testing branch stack filtering permutation ($test_filter_filter,$test_filter_expect)" perf record -o $TMPDIR/perf.data --branch-filter $test_filter_filter,save_type,u -- ${TESTPROG} > /dev/null 2>&1 - perf script -i $TMPDIR/perf.data --fields brstack | xargs -n1 > $TMPDIR/perf.script + perf script -i $TMPDIR/perf.data --fields brstack | tr ' ' '\n' | sed '/^[[:space:]]*$/d' > $TMPDIR/perf.script # fail if we find any branch type that doesn't match any of the expected ones # also consider UNKNOWN branch types (-) @@ -66,11 +106,16 @@ set -e test_user_branches -test_filter "any_call" "CALL|IND_CALL|COND_CALL|SYSCALL|IRQ" +if is_arm64; then + test_arm64_trap_eret_branches + test_arm64_kernel_branches +fi + +test_filter "any_call" "CALL|IND_CALL|COND_CALL|SYSCALL|IRQ|FAULT_DATA|FAULT_INST" test_filter "call" "CALL|SYSCALL" test_filter "cond" "COND" test_filter "any_ret" "RET|COND_RET|SYSRET|ERET" test_filter "call,cond" "CALL|SYSCALL|COND" -test_filter "any_call,cond" "CALL|IND_CALL|COND_CALL|IRQ|SYSCALL|COND" -test_filter "cond,any_call,any_ret" "COND|CALL|IND_CALL|COND_CALL|SYSCALL|IRQ|RET|COND_RET|SYSRET|ERET" +test_filter "any_call,cond" "CALL|IND_CALL|COND_CALL|IRQ|SYSCALL|COND|FAULT_DATA|FAULT_INST" +test_filter "cond,any_call,any_ret" "COND|CALL|IND_CALL|COND_CALL|SYSCALL|IRQ|RET|COND_RET|SYSRET|ERET|FAULT_DATA|FAULT_INST" diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index b394f3ac2d667bacb2d9e16aca78715c65ab5ed3..c65455da4eafce82460b7058f2a15fd3a842e441 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -205,6 +205,7 @@ DECLARE_WORKLOAD(leafloop); DECLARE_WORKLOAD(sqrtloop); DECLARE_WORKLOAD(brstack); DECLARE_WORKLOAD(datasym); +DECLARE_WORKLOAD(traploop); extern const char *dso_to_test; diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build index a1f34d5861e365e3ffcd57f0dd9e5f7a6dd2df19..a9dc93d8468bea5f1e6273eb3080787c0f0553b9 100644 --- a/tools/perf/tests/workloads/Build +++ b/tools/perf/tests/workloads/Build @@ -6,8 +6,10 @@ perf-y += leafloop.o perf-y += sqrtloop.o perf-y += brstack.o perf-y += datasym.o +perf-y += traploop.o CFLAGS_sqrtloop.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE CFLAGS_leafloop.o = -g -O0 -fno-inline -fno-omit-frame-pointer -U_FORTIFY_SOURCE CFLAGS_brstack.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE CFLAGS_datasym.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE +CFLAGS_traploop.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE diff --git a/tools/perf/tests/workloads/traploop.c b/tools/perf/tests/workloads/traploop.c new file mode 100644 index 0000000000000000000000000000000000000000..7dac94897e49cdde154f20c5c6c1337aeaeb888a --- /dev/null +++ b/tools/perf/tests/workloads/traploop.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "../tests.h" + +#define BENCH_RUNS 999999 + +static volatile int cnt; + +#ifdef __aarch64__ +static void trap_bench(void) +{ + unsigned long val; + + asm("mrs %0, ID_AA64ISAR0_EL1" : "=r" (val)); /* TRAP + ERET */ +} +#else +static void trap_bench(void) +{ + +} +#endif + +static int traploop(int argc, const char **argv) +{ + int num_loops = BENCH_RUNS; + + if (argc > 0) + num_loops = atoi(argv[0]); + + while (1) { + if ((cnt++) > num_loops) + break; + + trap_bench(); + } + return 0; +} + +DEFINE_WORKLOAD(traploop);