diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 60731f602d3ef712a380c55f3e6adf1c75dc3466..7053b977c0a2b50406e51ac5c793355b9a681ae5 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -239,12 +239,68 @@ /* PMMIR_EL1.SLOTS mask */ #define ARMV8_PMU_SLOTS_MASK 0xff +struct pmu_hw_events; +struct arm_pmu; +struct perf_event; + #ifdef CONFIG_PERF_EVENTS struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) #define perf_arch_bpf_user_pt_regs(regs) ®s->user_regs + +#ifdef CONFIG_ARM64_BRBE +void armv8pmu_branch_reset(void); +void armv8pmu_branch_probe(struct arm_pmu *arm_pmu); +bool armv8pmu_branch_attr_valid(struct perf_event *event); +void armv8pmu_branch_enable(struct perf_event *event); +void armv8pmu_branch_disable(struct perf_event *event); +void armv8pmu_branch_read(struct pmu_hw_events *cpuc, + struct perf_event *event); +void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx); +int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu); +void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu); +#else /* !CONFIG_ARM64_BRBE */ +static inline void armv8pmu_branch_reset(void) +{ +} + +static inline void armv8pmu_branch_probe(struct arm_pmu *arm_pmu) +{ +} + +static inline bool armv8pmu_branch_attr_valid(struct perf_event *event) +{ + return false; +} + +static inline void armv8pmu_branch_enable(struct perf_event *event) +{ +} + +static inline void armv8pmu_branch_disable(struct perf_event *event) +{ +} + +static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ +} + +static inline void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx) +{ +} + +static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu) +{ + return 0; +} + +static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu) +{ +} +#endif /* CONFIG_ARM64_BRBE */ #endif #define perf_arch_fetch_caller_regs(regs, __ip) { \ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7aded7acdd065bb85b3ae26742828fb7f9e787ce..4431c92c23bdf7f0714da3935cf7311b0b9e1c5b 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -110,6 +110,14 @@ #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) #define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2) +#define SYS_BRB_IALL sys_insn(1, 1, 7, 2, 4) +#define SYS_BRB_INJ sys_insn(1, 1, 7, 2, 5) + +/* + * BRBE Instructions + */ +#define BRB_IALL_INSN __emit_inst(0xd5000000 | SYS_BRB_IALL | (0x1f)) +#define BRB_INJ_INSN __emit_inst(0xd5000000 | SYS_BRB_INJ | (0x1f)) /* * System registers, organised loosely by encoding but grouped together @@ -138,6 +146,109 @@ #define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0) #define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0) +#define __SYS_BRBINF(n) sys_reg(2, 1, 8, ((n) & 0xf), ((((n) & 0x10) >> 2) + 0)) +#define __SYS_BRBSRC(n) sys_reg(2, 1, 8, ((n) & 0xf), ((((n) & 0x10) >> 2) + 1)) +#define __SYS_BRBTGT(n) sys_reg(2, 1, 8, ((n) & 0xf), ((((n) & 0x10) >> 2) + 2)) + +#define SYS_BRBINF0_EL1 __SYS_BRBINF(0) +#define SYS_BRBINF1_EL1 __SYS_BRBINF(1) +#define SYS_BRBINF2_EL1 __SYS_BRBINF(2) +#define SYS_BRBINF3_EL1 __SYS_BRBINF(3) +#define SYS_BRBINF4_EL1 __SYS_BRBINF(4) +#define SYS_BRBINF5_EL1 __SYS_BRBINF(5) +#define SYS_BRBINF6_EL1 __SYS_BRBINF(6) +#define SYS_BRBINF7_EL1 __SYS_BRBINF(7) +#define SYS_BRBINF8_EL1 __SYS_BRBINF(8) +#define SYS_BRBINF9_EL1 __SYS_BRBINF(9) +#define SYS_BRBINF10_EL1 __SYS_BRBINF(10) +#define SYS_BRBINF11_EL1 __SYS_BRBINF(11) +#define SYS_BRBINF12_EL1 __SYS_BRBINF(12) +#define SYS_BRBINF13_EL1 __SYS_BRBINF(13) +#define SYS_BRBINF14_EL1 __SYS_BRBINF(14) +#define SYS_BRBINF15_EL1 __SYS_BRBINF(15) +#define SYS_BRBINF16_EL1 __SYS_BRBINF(16) +#define SYS_BRBINF17_EL1 __SYS_BRBINF(17) +#define SYS_BRBINF18_EL1 __SYS_BRBINF(18) +#define SYS_BRBINF19_EL1 __SYS_BRBINF(19) +#define SYS_BRBINF20_EL1 __SYS_BRBINF(20) +#define SYS_BRBINF21_EL1 __SYS_BRBINF(21) +#define SYS_BRBINF22_EL1 __SYS_BRBINF(22) +#define SYS_BRBINF23_EL1 __SYS_BRBINF(23) +#define SYS_BRBINF24_EL1 __SYS_BRBINF(24) +#define SYS_BRBINF25_EL1 __SYS_BRBINF(25) +#define SYS_BRBINF26_EL1 __SYS_BRBINF(26) +#define SYS_BRBINF27_EL1 __SYS_BRBINF(27) +#define SYS_BRBINF28_EL1 __SYS_BRBINF(28) +#define SYS_BRBINF29_EL1 __SYS_BRBINF(29) +#define SYS_BRBINF30_EL1 __SYS_BRBINF(30) +#define SYS_BRBINF31_EL1 __SYS_BRBINF(31) + +#define SYS_BRBSRC0_EL1 __SYS_BRBSRC(0) +#define SYS_BRBSRC1_EL1 __SYS_BRBSRC(1) +#define SYS_BRBSRC2_EL1 __SYS_BRBSRC(2) +#define SYS_BRBSRC3_EL1 __SYS_BRBSRC(3) +#define SYS_BRBSRC4_EL1 __SYS_BRBSRC(4) +#define SYS_BRBSRC5_EL1 __SYS_BRBSRC(5) +#define SYS_BRBSRC6_EL1 __SYS_BRBSRC(6) +#define SYS_BRBSRC7_EL1 __SYS_BRBSRC(7) +#define SYS_BRBSRC8_EL1 __SYS_BRBSRC(8) +#define SYS_BRBSRC9_EL1 __SYS_BRBSRC(9) +#define SYS_BRBSRC10_EL1 __SYS_BRBSRC(10) +#define SYS_BRBSRC11_EL1 __SYS_BRBSRC(11) +#define SYS_BRBSRC12_EL1 __SYS_BRBSRC(12) +#define SYS_BRBSRC13_EL1 __SYS_BRBSRC(13) +#define SYS_BRBSRC14_EL1 __SYS_BRBSRC(14) +#define SYS_BRBSRC15_EL1 __SYS_BRBSRC(15) +#define SYS_BRBSRC16_EL1 __SYS_BRBSRC(16) +#define SYS_BRBSRC17_EL1 __SYS_BRBSRC(17) +#define SYS_BRBSRC18_EL1 __SYS_BRBSRC(18) +#define SYS_BRBSRC19_EL1 __SYS_BRBSRC(19) +#define SYS_BRBSRC20_EL1 __SYS_BRBSRC(20) +#define SYS_BRBSRC21_EL1 __SYS_BRBSRC(21) +#define SYS_BRBSRC22_EL1 __SYS_BRBSRC(22) +#define SYS_BRBSRC23_EL1 __SYS_BRBSRC(23) +#define SYS_BRBSRC24_EL1 __SYS_BRBSRC(24) +#define SYS_BRBSRC25_EL1 __SYS_BRBSRC(25) +#define SYS_BRBSRC26_EL1 __SYS_BRBSRC(26) +#define SYS_BRBSRC27_EL1 __SYS_BRBSRC(27) +#define SYS_BRBSRC28_EL1 __SYS_BRBSRC(28) +#define SYS_BRBSRC29_EL1 __SYS_BRBSRC(29) +#define SYS_BRBSRC30_EL1 __SYS_BRBSRC(30) +#define SYS_BRBSRC31_EL1 __SYS_BRBSRC(31) + +#define SYS_BRBTGT0_EL1 __SYS_BRBTGT(0) +#define SYS_BRBTGT1_EL1 __SYS_BRBTGT(1) +#define SYS_BRBTGT2_EL1 __SYS_BRBTGT(2) +#define SYS_BRBTGT3_EL1 __SYS_BRBTGT(3) +#define SYS_BRBTGT4_EL1 __SYS_BRBTGT(4) +#define SYS_BRBTGT5_EL1 __SYS_BRBTGT(5) +#define SYS_BRBTGT6_EL1 __SYS_BRBTGT(6) +#define SYS_BRBTGT7_EL1 __SYS_BRBTGT(7) +#define SYS_BRBTGT8_EL1 __SYS_BRBTGT(8) +#define SYS_BRBTGT9_EL1 __SYS_BRBTGT(9) +#define SYS_BRBTGT10_EL1 __SYS_BRBTGT(10) +#define SYS_BRBTGT11_EL1 __SYS_BRBTGT(11) +#define SYS_BRBTGT12_EL1 __SYS_BRBTGT(12) +#define SYS_BRBTGT13_EL1 __SYS_BRBTGT(13) +#define SYS_BRBTGT14_EL1 __SYS_BRBTGT(14) +#define SYS_BRBTGT15_EL1 __SYS_BRBTGT(15) +#define SYS_BRBTGT16_EL1 __SYS_BRBTGT(16) +#define SYS_BRBTGT17_EL1 __SYS_BRBTGT(17) +#define SYS_BRBTGT18_EL1 __SYS_BRBTGT(18) +#define SYS_BRBTGT19_EL1 __SYS_BRBTGT(19) +#define SYS_BRBTGT20_EL1 __SYS_BRBTGT(20) +#define SYS_BRBTGT21_EL1 __SYS_BRBTGT(21) +#define SYS_BRBTGT22_EL1 __SYS_BRBTGT(22) +#define SYS_BRBTGT23_EL1 __SYS_BRBTGT(23) +#define SYS_BRBTGT24_EL1 __SYS_BRBTGT(24) +#define SYS_BRBTGT25_EL1 __SYS_BRBTGT(25) +#define SYS_BRBTGT26_EL1 __SYS_BRBTGT(26) +#define SYS_BRBTGT27_EL1 __SYS_BRBTGT(27) +#define SYS_BRBTGT28_EL1 __SYS_BRBTGT(28) +#define SYS_BRBTGT29_EL1 __SYS_BRBTGT(29) +#define SYS_BRBTGT30_EL1 __SYS_BRBTGT(30) +#define SYS_BRBTGT31_EL1 __SYS_BRBTGT(31) + #define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) @@ -467,6 +578,396 @@ #define SYS_SCXTNUM_EL0 sys_reg(3, 3, 13, 0, 7) +/*** Branch Record Buffer Extension ***/ +/* ID registers */ +#define BRBINFx_EL1_CCU GENMASK(46, 46) +#define BRBINFx_EL1_CCU_MASK GENMASK(46, 46) +#define BRBINFx_EL1_CCU_SHIFT 46 +#define BRBINFx_EL1_CCU_WIDTH 1 + +#define BRBINFx_EL1_CC GENMASK(45, 32) +#define BRBINFx_EL1_CC_MASK GENMASK(45, 32) +#define BRBINFx_EL1_CC_SHIFT 32 +#define BRBINFx_EL1_CC_WIDTH 14 + +#define BRBINFx_EL1_LASTFAILED GENMASK(17, 17) +#define BRBINFx_EL1_LASTFAILED_MASK GENMASK(17, 17) +#define BRBINFx_EL1_LASTFAILED_SHIFT 17 +#define BRBINFx_EL1_LASTFAILED_WIDTH 1 + +#define BRBINFx_EL1_T GENMASK(16, 16) +#define BRBINFx_EL1_T_MASK GENMASK(16, 16) +#define BRBINFx_EL1_T_SHIFT 16 +#define BRBINFx_EL1_T_WIDTH 1 + +#define BRBINFx_EL1_TYPE GENMASK(13, 8) +#define BRBINFx_EL1_TYPE_MASK GENMASK(13, 8) +#define BRBINFx_EL1_TYPE_SHIFT 8 +#define BRBINFx_EL1_TYPE_WIDTH 6 +#define BRBINFx_EL1_TYPE_UNCOND_DIRECT UL(0b000000) +#define BRBINFx_EL1_TYPE_INDIRECT UL(0b000001) +#define BRBINFx_EL1_TYPE_DIRECT_LINK UL(0b000010) +#define BRBINFx_EL1_TYPE_INDIRECT_LINK UL(0b000011) +#define BRBINFx_EL1_TYPE_RET UL(0b000101) +#define BRBINFx_EL1_TYPE_ERET UL(0b000111) +#define BRBINFx_EL1_TYPE_COND_DIRECT UL(0b001000) +#define BRBINFx_EL1_TYPE_DEBUG_HALT UL(0b100001) +#define BRBINFx_EL1_TYPE_CALL UL(0b100010) +#define BRBINFx_EL1_TYPE_TRAP UL(0b100011) +#define BRBINFx_EL1_TYPE_SERROR UL(0b100100) +#define BRBINFx_EL1_TYPE_INSN_DEBUG UL(0b100110) +#define BRBINFx_EL1_TYPE_DATA_DEBUG UL(0b100111) +#define BRBINFx_EL1_TYPE_ALIGN_FAULT UL(0b101010) +#define BRBINFx_EL1_TYPE_INSN_FAULT UL(0b101011) +#define BRBINFx_EL1_TYPE_DATA_FAULT UL(0b101100) +#define BRBINFx_EL1_TYPE_IRQ UL(0b101110) +#define BRBINFx_EL1_TYPE_FIQ UL(0b101111) +#define BRBINFx_EL1_TYPE_DEBUG_EXIT UL(0b111001) + +#define BRBINFx_EL1_EL GENMASK(7, 6) +#define BRBINFx_EL1_EL_MASK GENMASK(7, 6) +#define BRBINFx_EL1_EL_SHIFT 6 +#define BRBINFx_EL1_EL_WIDTH 2 +#define BRBINFx_EL1_EL_EL0 UL(0b00) +#define BRBINFx_EL1_EL_EL1 UL(0b01) +#define BRBINFx_EL1_EL_EL2 UL(0b10) +#define BRBINFx_EL1_EL_EL3 UL(0b11) + +#define BRBINFx_EL1_MPRED GENMASK(5, 5) +#define BRBINFx_EL1_MPRED_MASK GENMASK(5, 5) +#define BRBINFx_EL1_MPRED_SHIFT 5 +#define BRBINFx_EL1_MPRED_WIDTH 1 + +#define BRBINFx_EL1_VALID GENMASK(1, 0) +#define BRBINFx_EL1_VALID_MASK GENMASK(1, 0) +#define BRBINFx_EL1_VALID_SHIFT 0 +#define BRBINFx_EL1_VALID_WIDTH 2 +#define BRBINFx_EL1_VALID_NONE UL(0b00) +#define BRBINFx_EL1_VALID_TARGET UL(0b01) +#define BRBINFx_EL1_VALID_SOURCE UL(0b10) +#define BRBINFx_EL1_VALID_FULL UL(0b11) + +#define BRBINFx_EL1_RES0 (UL(0) | GENMASK_ULL(63, 47) | \ + GENMASK_ULL(31, 18) | \ + GENMASK_ULL(15, 14) | \ + GENMASK_ULL(4, 2)) +#define BRBINFx_EL1_RES1 (UL(0)) +#define BRBINFx_EL1_UNKN (UL(0)) + +#define BRBCR_ELx_EXCEPTION GENMASK(23, 23) +#define BRBCR_ELx_EXCEPTION_MASK GENMASK(23, 23) +#define BRBCR_ELx_EXCEPTION_SHIFT 23 +#define BRBCR_ELx_EXCEPTION_WIDTH 1 + +#define BRBCR_ELx_ERTN GENMASK(22, 22) +#define BRBCR_ELx_ERTN_MASK GENMASK(22, 22) +#define BRBCR_ELx_ERTN_SHIFT 22 +#define BRBCR_ELx_ERTN_WIDTH 1 + +#define BRBCR_ELx_FZP GENMASK(8, 8) +#define BRBCR_ELx_FZP_MASK GENMASK(8, 8) +#define BRBCR_ELx_FZP_SHIFT 8 +#define BRBCR_ELx_FZP_WIDTH 1 + +#define BRBCR_ELx_TS GENMASK(6, 5) +#define BRBCR_ELx_TS_MASK GENMASK(6, 5) +#define BRBCR_ELx_TS_SHIFT 5 +#define BRBCR_ELx_TS_WIDTH 2 +#define BRBCR_ELx_TS_VIRTUAL UL(0b01) +#define BRBCR_ELx_TS_GUEST_PHYSICAL UL(0b10) +#define BRBCR_ELx_TS_PHYSICAL UL(0b11) + +#define BRBCR_ELx_MPRED GENMASK(4, 4) +#define BRBCR_ELx_MPRED_MASK GENMASK(4, 4) +#define BRBCR_ELx_MPRED_SHIFT 4 +#define BRBCR_ELx_MPRED_WIDTH 1 + +#define BRBCR_ELx_CC GENMASK(3, 3) +#define BRBCR_ELx_CC_MASK GENMASK(3, 3) +#define BRBCR_ELx_CC_SHIFT 3 +#define BRBCR_ELx_CC_WIDTH 1 + +#define BRBCR_ELx_ExBRE GENMASK(1, 1) +#define BRBCR_ELx_ExBRE_MASK GENMASK(1, 1) +#define BRBCR_ELx_ExBRE_SHIFT 1 +#define BRBCR_ELx_ExBRE_WIDTH 1 + +#define BRBCR_ELx_E0BRE GENMASK(0, 0) +#define BRBCR_ELx_E0BRE_MASK GENMASK(0, 0) +#define BRBCR_ELx_E0BRE_SHIFT 0 +#define BRBCR_ELx_E0BRE_WIDTH 1 + +#define BRBCR_ELx_RES0 (UL(0) | GENMASK_ULL(63, 24) | \ + GENMASK_ULL(21, 9) | \ + GENMASK_ULL(7, 7) | \ + GENMASK_ULL(2, 2)) +#define BRBCR_ELx_RES1 (UL(0)) +#define BRBCR_ELx_UNKN (UL(0)) + +#define REG_BRBCR_EL2 S2_4_C9_C0_0 +#define SYS_BRBCR_EL2 sys_reg(2, 4, 9, 0, 0) +#define SYS_BRBCR_EL2_Op0 2 +#define SYS_BRBCR_EL2_Op1 4 +#define SYS_BRBCR_EL2_CRn 9 +#define SYS_BRBCR_EL2_CRm 0 +#define SYS_BRBCR_EL2_Op2 0 + +/* For BRBCR_EL2 fields see BRBCR_ELx */ + +#define REG_BRBCR_EL1 S2_1_C9_C0_0 +#define SYS_BRBCR_EL1 sys_reg(2, 1, 9, 0, 0) +#define SYS_BRBCR_EL1_Op0 2 +#define SYS_BRBCR_EL1_Op1 1 +#define SYS_BRBCR_EL1_CRn 9 +#define SYS_BRBCR_EL1_CRm 0 +#define SYS_BRBCR_EL1_Op2 0 + +/* For BRBCR_EL1 fields see BRBCR_ELx */ + +#define REG_BRBCR_EL12 S2_5_C9_C0_0 +#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0) +#define SYS_BRBCR_EL12_Op0 2 +#define SYS_BRBCR_EL12_Op1 5 +#define SYS_BRBCR_EL12_CRn 9 +#define SYS_BRBCR_EL12_CRm 0 +#define SYS_BRBCR_EL12_Op2 0 + +/* For BRBCR_EL12 fields see BRBCR_ELx */ + +#define REG_BRBFCR_EL1 S2_1_C9_C0_1 +#define SYS_BRBFCR_EL1 sys_reg(2, 1, 9, 0, 1) +#define SYS_BRBFCR_EL1_Op0 2 +#define SYS_BRBFCR_EL1_Op1 1 +#define SYS_BRBFCR_EL1_CRn 9 +#define SYS_BRBFCR_EL1_CRm 0 +#define SYS_BRBFCR_EL1_Op2 1 + +#define BRBFCR_EL1_BANK GENMASK(29, 28) +#define BRBFCR_EL1_BANK_MASK GENMASK(29, 28) +#define BRBFCR_EL1_BANK_SHIFT 28 +#define BRBFCR_EL1_BANK_WIDTH 2 +#define BRBFCR_EL1_BANK_FIRST UL(0b0) +#define BRBFCR_EL1_BANK_SECOND UL(0b1) + +#define BRBFCR_EL1_CONDDIR GENMASK(22, 22) +#define BRBFCR_EL1_CONDDIR_MASK GENMASK(22, 22) +#define BRBFCR_EL1_CONDDIR_SHIFT 22 +#define BRBFCR_EL1_CONDDIR_WIDTH 1 + +#define BRBFCR_EL1_DIRCALL GENMASK(21, 21) +#define BRBFCR_EL1_DIRCALL_MASK GENMASK(21, 21) +#define BRBFCR_EL1_DIRCALL_SHIFT 21 +#define BRBFCR_EL1_DIRCALL_WIDTH 1 + +#define BRBFCR_EL1_INDCALL GENMASK(20, 20) +#define BRBFCR_EL1_INDCALL_MASK GENMASK(20, 20) +#define BRBFCR_EL1_INDCALL_SHIFT 20 +#define BRBFCR_EL1_INDCALL_WIDTH 1 + +#define BRBFCR_EL1_RTN GENMASK(19, 19) +#define BRBFCR_EL1_RTN_MASK GENMASK(19, 19) +#define BRBFCR_EL1_RTN_SHIFT 19 +#define BRBFCR_EL1_RTN_WIDTH 1 + +#define BRBFCR_EL1_INDIRECT GENMASK(18, 18) +#define BRBFCR_EL1_INDIRECT_MASK GENMASK(18, 18) +#define BRBFCR_EL1_INDIRECT_SHIFT 18 +#define BRBFCR_EL1_INDIRECT_WIDTH 1 + +#define BRBFCR_EL1_DIRECT GENMASK(17, 17) +#define BRBFCR_EL1_DIRECT_MASK GENMASK(17, 17) +#define BRBFCR_EL1_DIRECT_SHIFT 17 +#define BRBFCR_EL1_DIRECT_WIDTH 1 + +#define BRBFCR_EL1_EnI GENMASK(16, 16) +#define BRBFCR_EL1_EnI_MASK GENMASK(16, 16) +#define BRBFCR_EL1_EnI_SHIFT 16 +#define BRBFCR_EL1_EnI_WIDTH 1 + +#define BRBFCR_EL1_PAUSED GENMASK(7, 7) +#define BRBFCR_EL1_PAUSED_MASK GENMASK(7, 7) +#define BRBFCR_EL1_PAUSED_SHIFT 7 +#define BRBFCR_EL1_PAUSED_WIDTH 1 + +#define BRBFCR_EL1_LASTFAILED GENMASK(6, 6) +#define BRBFCR_EL1_LASTFAILED_MASK GENMASK(6, 6) +#define BRBFCR_EL1_LASTFAILED_SHIFT 6 +#define BRBFCR_EL1_LASTFAILED_WIDTH 1 + +#define BRBFCR_EL1_RES0 (UL(0) | GENMASK_ULL(63, 30) | \ + GENMASK_ULL(27, 23) | \ + GENMASK_ULL(15, 8) | \ + GENMASK_ULL(5, 0)) +#define BRBFCR_EL1_RES1 (UL(0)) +#define BRBFCR_EL1_UNKN (UL(0)) + +#define REG_BRBTS_EL1 S2_1_C9_C0_2 +#define SYS_BRBTS_EL1 sys_reg(2, 1, 9, 0, 2) +#define SYS_BRBTS_EL1_Op0 2 +#define SYS_BRBTS_EL1_Op1 1 +#define SYS_BRBTS_EL1_CRn 9 +#define SYS_BRBTS_EL1_CRm 0 +#define SYS_BRBTS_EL1_Op2 2 + +#define BRBTS_EL1_TS GENMASK(63, 0) +#define BRBTS_EL1_TS_MASK GENMASK(63, 0) +#define BRBTS_EL1_TS_SHIFT 0 +#define BRBTS_EL1_TS_WIDTH 64 + +#define BRBTS_EL1_RES0 (UL(0)) +#define BRBTS_EL1_RES1 (UL(0)) +#define BRBTS_EL1_UNKN (UL(0)) + +#define REG_BRBINFINJ_EL1 S2_1_C9_C1_0 +#define SYS_BRBINFINJ_EL1 sys_reg(2, 1, 9, 1, 0) +#define SYS_BRBINFINJ_EL1_Op0 2 +#define SYS_BRBINFINJ_EL1_Op1 1 +#define SYS_BRBINFINJ_EL1_CRn 9 +#define SYS_BRBINFINJ_EL1_CRm 1 +#define SYS_BRBINFINJ_EL1_Op2 0 + +#define BRBINFINJ_EL1_CCU GENMASK(46, 46) +#define BRBINFINJ_EL1_CCU_MASK GENMASK(46, 46) +#define BRBINFINJ_EL1_CCU_SHIFT 46 +#define BRBINFINJ_EL1_CCU_WIDTH 1 + +#define BRBINFINJ_EL1_CC GENMASK(45, 32) +#define BRBINFINJ_EL1_CC_MASK GENMASK(45, 32) +#define BRBINFINJ_EL1_CC_SHIFT 32 +#define BRBINFINJ_EL1_CC_WIDTH 14 + +#define BRBINFINJ_EL1_LASTFAILED GENMASK(17, 17) +#define BRBINFINJ_EL1_LASTFAILED_MASK GENMASK(17, 17) +#define BRBINFINJ_EL1_LASTFAILED_SHIFT 17 +#define BRBINFINJ_EL1_LASTFAILED_WIDTH 1 + +#define BRBINFINJ_EL1_T GENMASK(16, 16) +#define BRBINFINJ_EL1_T_MASK GENMASK(16, 16) +#define BRBINFINJ_EL1_T_SHIFT 16 +#define BRBINFINJ_EL1_T_WIDTH 1 + +#define BRBINFINJ_EL1_TYPE GENMASK(13, 8) +#define BRBINFINJ_EL1_TYPE_MASK GENMASK(13, 8) +#define BRBINFINJ_EL1_TYPE_SHIFT 8 +#define BRBINFINJ_EL1_TYPE_WIDTH 6 +#define BRBINFINJ_EL1_TYPE_UNCOND_DIRECT UL(0b000000) +#define BRBINFINJ_EL1_TYPE_INDIRECT UL(0b000001) +#define BRBINFINJ_EL1_TYPE_DIRECT_LINK UL(0b000010) +#define BRBINFINJ_EL1_TYPE_INDIRECT_LINK UL(0b000011) +#define BRBINFINJ_EL1_TYPE_RET UL(0b000101) +#define BRBINFINJ_EL1_TYPE_ERET UL(0b000111) +#define BRBINFINJ_EL1_TYPE_COND_DIRECT UL(0b001000) +#define BRBINFINJ_EL1_TYPE_DEBUG_HALT UL(0b100001) +#define BRBINFINJ_EL1_TYPE_CALL UL(0b100010) +#define BRBINFINJ_EL1_TYPE_TRAP UL(0b100011) +#define BRBINFINJ_EL1_TYPE_SERROR UL(0b100100) +#define BRBINFINJ_EL1_TYPE_INSN_DEBUG UL(0b100110) +#define BRBINFINJ_EL1_TYPE_DATA_DEBUG UL(0b100111) +#define BRBINFINJ_EL1_TYPE_ALIGN_FAULT UL(0b101010) +#define BRBINFINJ_EL1_TYPE_INSN_FAULT UL(0b101011) +#define BRBINFINJ_EL1_TYPE_DATA_FAULT UL(0b101100) +#define BRBINFINJ_EL1_TYPE_IRQ UL(0b101110) +#define BRBINFINJ_EL1_TYPE_FIQ UL(0b101111) +#define BRBINFINJ_EL1_TYPE_DEBUG_EXIT UL(0b111001) + +#define BRBINFINJ_EL1_EL GENMASK(7, 6) +#define BRBINFINJ_EL1_EL_MASK GENMASK(7, 6) +#define BRBINFINJ_EL1_EL_SHIFT 6 +#define BRBINFINJ_EL1_EL_WIDTH 2 +#define BRBINFINJ_EL1_EL_EL0 UL(0b00) +#define BRBINFINJ_EL1_EL_EL1 UL(0b01) +#define BRBINFINJ_EL1_EL_EL2 UL(0b10) +#define BRBINFINJ_EL1_EL_EL3 UL(0b11) + +#define BRBINFINJ_EL1_MPRED GENMASK(5, 5) +#define BRBINFINJ_EL1_MPRED_MASK GENMASK(5, 5) +#define BRBINFINJ_EL1_MPRED_SHIFT 5 +#define BRBINFINJ_EL1_MPRED_WIDTH 1 + +#define BRBINFINJ_EL1_VALID GENMASK(1, 0) +#define BRBINFINJ_EL1_VALID_MASK GENMASK(1, 0) +#define BRBINFINJ_EL1_VALID_SHIFT 0 +#define BRBINFINJ_EL1_VALID_WIDTH 2 +#define BRBINFINJ_EL1_VALID_NONE UL(0b00) +#define BRBINFINJ_EL1_VALID_TARGET UL(0b01) +#define BRBINFINJ_EL1_VALID_SOURCE UL(0b10) +#define BRBINFINJ_EL1_VALID_FULL UL(0b11) + +#define BRBINFINJ_EL1_RES0 (UL(0) | GENMASK_ULL(63, 47) | \ + GENMASK_ULL(31, 18) | \ + GENMASK_ULL(15, 14) | \ + GENMASK_ULL(4, 2)) +#define BRBINFINJ_EL1_RES1 (UL(0)) +#define BRBINFINJ_EL1_UNKN (UL(0)) + +#define REG_BRBSRCINJ_EL1 S2_1_C9_C1_1 +#define SYS_BRBSRCINJ_EL1 sys_reg(2, 1, 9, 1, 1) +#define SYS_BRBSRCINJ_EL1_Op0 2 +#define SYS_BRBSRCINJ_EL1_Op1 1 +#define SYS_BRBSRCINJ_EL1_CRn 9 +#define SYS_BRBSRCINJ_EL1_CRm 1 +#define SYS_BRBSRCINJ_EL1_Op2 1 + +#define BRBSRCINJ_EL1_ADDRESS GENMASK(63, 0) +#define BRBSRCINJ_EL1_ADDRESS_MASK GENMASK(63, 0) +#define BRBSRCINJ_EL1_ADDRESS_SHIFT 0 +#define BRBSRCINJ_EL1_ADDRESS_WIDTH 64 + +#define BRBSRCINJ_EL1_RES0 (UL(0)) +#define BRBSRCINJ_EL1_RES1 (UL(0)) +#define BRBSRCINJ_EL1_UNKN (UL(0)) + +#define REG_BRBTGTINJ_EL1 S2_1_C9_C1_2 +#define SYS_BRBTGTINJ_EL1 sys_reg(2, 1, 9, 1, 2) +#define SYS_BRBTGTINJ_EL1_Op0 2 +#define SYS_BRBTGTINJ_EL1_Op1 1 +#define SYS_BRBTGTINJ_EL1_CRn 9 +#define SYS_BRBTGTINJ_EL1_CRm 1 +#define SYS_BRBTGTINJ_EL1_Op2 2 + +#define BRBTGTINJ_EL1_ADDRESS GENMASK(63, 0) +#define BRBTGTINJ_EL1_ADDRESS_MASK GENMASK(63, 0) +#define BRBTGTINJ_EL1_ADDRESS_SHIFT 0 +#define BRBTGTINJ_EL1_ADDRESS_WIDTH 64 + +#define BRBTGTINJ_EL1_RES0 (UL(0)) +#define BRBTGTINJ_EL1_RES1 (UL(0)) +#define BRBTGTINJ_EL1_UNKN (UL(0)) + +#define REG_BRBIDR0_EL1 S2_1_C9_C2_0 +#define SYS_BRBIDR0_EL1 sys_reg(2, 1, 9, 2, 0) +#define SYS_BRBIDR0_EL1_Op0 2 +#define SYS_BRBIDR0_EL1_Op1 1 +#define SYS_BRBIDR0_EL1_CRn 9 +#define SYS_BRBIDR0_EL1_CRm 2 +#define SYS_BRBIDR0_EL1_Op2 0 + +#define BRBIDR0_EL1_CC GENMASK(15, 12) +#define BRBIDR0_EL1_CC_MASK GENMASK(15, 12) +#define BRBIDR0_EL1_CC_SHIFT 12 +#define BRBIDR0_EL1_CC_WIDTH 4 +#define BRBIDR0_EL1_CC_20_BIT UL(0b101) + +#define BRBIDR0_EL1_FORMAT GENMASK(11, 8) +#define BRBIDR0_EL1_FORMAT_MASK GENMASK(11, 8) +#define BRBIDR0_EL1_FORMAT_SHIFT 8 +#define BRBIDR0_EL1_FORMAT_WIDTH 4 +#define BRBIDR0_EL1_FORMAT_0 UL(0b0) + +#define BRBIDR0_EL1_NUMREC GENMASK(7, 0) +#define BRBIDR0_EL1_NUMREC_MASK GENMASK(7, 0) +#define BRBIDR0_EL1_NUMREC_SHIFT 0 +#define BRBIDR0_EL1_NUMREC_WIDTH 8 +#define BRBIDR0_EL1_NUMREC_8 UL(0b0001000) +#define BRBIDR0_EL1_NUMREC_16 UL(0b0010000) +#define BRBIDR0_EL1_NUMREC_32 UL(0b0100000) +#define BRBIDR0_EL1_NUMREC_64 UL(0b1000000) + +#define BRBIDR0_EL1_RES0 (UL(0) | GENMASK_ULL(63, 16)) +#define BRBIDR0_EL1_RES1 (UL(0)) +#define BRBIDR0_EL1_UNKN (UL(0)) +/*** End of Branch Record Buffer Extension ***/ + /* Definitions for system register interface to AMU for ARMv8.4 onwards */ #define SYS_AM_EL0(crm, op2) sys_reg(3, 3, 13, (crm), (op2)) #define SYS_AMCR_EL0 SYS_AM_EL0(2, 0) @@ -915,6 +1416,7 @@ #define ID_AA64MMFR2_CNP_SHIFT 0 /* id_aa64dfr0 */ +#define ID_AA64DFR0_EL1_BRBE_SHIFT 52 #define ID_AA64DFR0_TRBE_SHIFT 44 #define ID_AA64DFR0_TRACE_FILT_SHIFT 40 #define ID_AA64DFR0_DOUBLELOCK_SHIFT 36 @@ -926,6 +1428,9 @@ #define ID_AA64DFR0_TRACEVER_SHIFT 4 #define ID_AA64DFR0_DEBUGVER_SHIFT 0 +#define ID_AA64DFR0_EL1_BRBE_NI 0x0 +#define ID_AA64DFR0_EL1_BRBE_IMP 0x1 +#define ID_AA64DFR0_EL1_BRBE_BRBE_V1P1 0x2 #define ID_AA64DFR0_PMUVER_8_0 0x1 #define ID_AA64DFR0_PMUVER_8_1 0x4 #define ID_AA64DFR0_PMUVER_8_4 0x5 @@ -1224,4 +1729,10 @@ #endif +#define SYS_FIELD_PREP(reg, field, val) \ + FIELD_PREP(reg##_##field##_MASK, val) + +#define SYS_FIELD_PREP_ENUM(reg, field, val) \ + FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val) + #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 5bc343fc2a91b0572acb4e6d32b654153729a3c0..1dc0e60248a34a6c4f3fff0cf9e52db2de519f61 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -486,6 +486,51 @@ EXPORT_SYMBOL(kimage_vaddr) */ .section ".idmap.text","awx" +#ifdef CONFIG_ARM64_BRBE +/* + * Enable BRBE cycle count + * + * BRBE requires both BRBCR_EL1.CC and BRBCR_EL2.CC fields, be set + * for the cycle counts to be available in BRBINF_EL1.CC during + * branch record processing after a PMU interrupt. This enables CC + * field on both these registers while still executing inside EL2. + * + * BRBE driver would still be able to toggle branch records cycle + * count support via BRBCR_EL1.CC field regardless of whether the + * kernel ends up executing in EL1 or EL2. + */ +.macro __init_el2_brbe + mrs x1, id_aa64dfr0_el1 + ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4 + cbz x1, .Lskip_brbe_cc_\@ + + mrs_s x0, SYS_BRBCR_EL2 + orr x0, x0, BRBCR_ELx_CC + msr_s SYS_BRBCR_EL2, x0 + + /* + * Accessing BRBCR_EL1 register here does not require + * BRBCR_EL12 addressing mode as HCR_EL2.E2H is still + * clear. Regardless, check for HCR_E2H and be on the + * safer side. + */ + mrs x1, hcr_el2 + and x1, x1, #HCR_E2H + cbz x1, .Lset_brbe_el1_direct_\@ + + mrs_s x0, SYS_BRBCR_EL12 + orr x0, x0, BRBCR_ELx_CC + msr_s SYS_BRBCR_EL12, x0 + b .Lskip_brbe_cc_\@ + +.Lset_brbe_el1_direct_\@: + mrs_s x0, SYS_BRBCR_EL1 + orr x0, x0, BRBCR_ELx_CC + msr_s SYS_BRBCR_EL1, x0 +.Lskip_brbe_cc_\@: +.endm + +#endif /* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. @@ -601,6 +646,9 @@ set_hcr: 7: msr mdcr_el2, x3 // Configure debug traps +#ifdef CONFIG_ARM64_BRBE + __init_el2_brbe +#endif /* LORegions */ mrs x1, id_aa64mmfr1_el1 ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4 diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index cdb3d4549b3a97c45bcb8de79a5f864b09e3d40f..abf31b78208e384caabc83d83b85aec419ba81a2 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -715,10 +715,16 @@ static void armv8pmu_enable_event(struct perf_event *event) * Enable counter */ armv8pmu_enable_event_counter(event); + + if (has_branch_stack(event)) + armv8pmu_branch_enable(event); } static void armv8pmu_disable_event(struct perf_event *event) { + if (has_branch_stack(event)) + armv8pmu_branch_disable(event); + /* * Disable counter */ @@ -792,6 +798,16 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) if (!armpmu_event_set_period(event)) continue; + /* + * PMU IRQ should remain asserted until all branch records + * are captured and processed into struct perf_sample_data. + */ + if (has_branch_stack(event) && !WARN_ON(!cpuc->branches)) { + armv8pmu_branch_read(cpuc, event); + data.br_stack = &cpuc->branches->branch_stack; + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } + /* * Perf event overflow will queue the processing of the event as * an irq_work which will be taken care of in the handling of @@ -871,6 +887,24 @@ static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, clear_bit(idx - 1, cpuc->used_mask); } +static void armv8pmu_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + struct arm_pmu *armpmu = to_arm_pmu(ctx->pmu); + void *task_ctx = ctx ? ctx->task_ctx_data : NULL; + + if (armpmu->has_branch_stack) { + /* Save branch records in task_ctx on sched out */ + if (task_ctx && !sched_in) { + armv8pmu_branch_save(armpmu, task_ctx); + return; + } + + /* Reset branch records on sched in */ + if (sched_in) + armv8pmu_branch_reset(); + } +} + /* * Add an event filter to a given event. */ @@ -947,6 +981,9 @@ static void armv8pmu_reset(void *info) pmcr |= ARMV8_PMU_PMCR_LP; armv8pmu_pmcr_write(pmcr); + + if (cpu_pmu->has_branch_stack) + armv8pmu_branch_reset(); } static int __armv8_pmuv3_map_event(struct perf_event *event, @@ -964,6 +1001,12 @@ static int __armv8_pmuv3_map_event(struct perf_event *event, &armv8_pmuv3_perf_cache_map, ARMV8_PMU_EVTYPE_EVENT); + if (has_branch_stack(event)) { + event->attach_state |= PERF_ATTACH_TASK_DATA; + if (!armv8pmu_branch_attr_valid(event)) + return -EOPNOTSUPP; + } + if (armv8pmu_event_is_64bit(event)) event->hw.flags |= ARMPMU_EVT_64BIT; @@ -1056,6 +1099,35 @@ static void __armv8pmu_probe_pmu(void *info) cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1); else cpu_pmu->reg_pmmir = 0; + armv8pmu_branch_probe(cpu_pmu); +} + +static int branch_records_alloc(struct arm_pmu *armpmu) +{ + struct branch_records __percpu *records; + int cpu; + + records = alloc_percpu_gfp(struct branch_records, GFP_KERNEL); + if (!records) + return -ENOMEM; + + /* + * percpu memory allocated for 'records' gets completely consumed + * here, and never required to be freed up later. So permanently + * losing access to this anchor i.e 'records' is acceptable. + * + * Otherwise this allocation handle would have to be saved up for + * free_percpu() release later if required. + */ + for_each_possible_cpu(cpu) { + struct pmu_hw_events *events_cpu; + struct branch_records *records_cpu; + + events_cpu = per_cpu_ptr(armpmu->hw_events, cpu); + records_cpu = per_cpu_ptr(records, cpu); + events_cpu->branches = records_cpu; + } + return 0; } static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) @@ -1072,7 +1144,21 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) if (ret) return ret; - return probe.present ? 0 : -ENODEV; + if (!probe.present) + return -ENODEV; + + if (cpu_pmu->has_branch_stack) { + ret = armv8pmu_task_ctx_cache_alloc(cpu_pmu); + if (ret) + return ret; + + ret = branch_records_alloc(cpu_pmu); + if (ret) { + armv8pmu_task_ctx_cache_free(cpu_pmu); + return ret; + } + } + return 0; } static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, @@ -1097,6 +1183,8 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, cpu_pmu->reset = armv8pmu_reset; cpu_pmu->set_event_filter = armv8pmu_set_event_filter; cpu_pmu->filter_match = armv8pmu_filter_match; + cpu_pmu->sched_task = armv8pmu_sched_task; + cpu_pmu->branch_reset = armv8pmu_branch_reset; cpu_pmu->name = name; cpu_pmu->map_event = map_event; diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 8334ba9bcc39dce9e72ae8b0c958114f57634621..24f9a86b922289c4900619818ad13a6bd219a7e3 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1350,10 +1350,10 @@ static int branch_map[X86_BR_TYPE_MAP_MAX] = { PERF_BR_SYSCALL, /* X86_BR_SYSCALL */ PERF_BR_SYSRET, /* X86_BR_SYSRET */ PERF_BR_UNKNOWN, /* X86_BR_INT */ - PERF_BR_UNKNOWN, /* X86_BR_IRET */ + PERF_BR_ERET, /* X86_BR_IRET */ PERF_BR_COND, /* X86_BR_JCC */ PERF_BR_UNCOND, /* X86_BR_JMP */ - PERF_BR_UNKNOWN, /* X86_BR_IRQ */ + PERF_BR_IRQ, /* X86_BR_IRQ */ PERF_BR_IND_CALL, /* X86_BR_IND_CALL */ PERF_BR_UNKNOWN, /* X86_BR_ABORT */ PERF_BR_UNKNOWN, /* X86_BR_IN_TX */ diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 1e82ab01e75f5d6737b11455ed8c2e6e291874ad..25a577530da476b773a5b835beb96a222f507dc7 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -130,6 +130,17 @@ config ARM_SPE_PMU Extension, which provides periodic sampling of operations in the CPU pipeline and reports this via the perf AUX interface. +config ARM64_BRBE + bool "Enable support for Branch Record Buffer Extension (BRBE)" + depends on PERF_EVENTS && ARM64 && ARM_PMU + default y + help + Enable perf support for Branch Record Buffer Extension (BRBE) which + records all branches taken in an execution path. This supports some + branch types and privilege based filtering. It captures additional + relevant information such as cycle count, misprediction and branch + type, branch privilege level etc. + source "drivers/perf/hisilicon/Kconfig" endmenu diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 5365fd56f88f35096a60deade85b7cbe4e0292ad..a89359aeac6d6ec98d129aafe4b9b1e5153ea15c 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -13,3 +13,4 @@ obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o +obj-$(CONFIG_ARM64_BRBE) += arm_brbe.o diff --git a/drivers/perf/arm_brbe.c b/drivers/perf/arm_brbe.c new file mode 100644 index 0000000000000000000000000000000000000000..1ffa41e355cf3da07c526742a912cdbe019dc8db --- /dev/null +++ b/drivers/perf/arm_brbe.c @@ -0,0 +1,779 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Branch Record Buffer Extension Driver. + * + * Copyright (C) 2022-2023 ARM Limited + * + * Author: Anshuman Khandual + */ +#include "arm_brbe.h" + +void armv8pmu_branch_reset(void) +{ + asm volatile(BRB_IALL_INSN); + isb(); +} + +static bool valid_brbe_nr(int brbe_nr) +{ + return brbe_nr == BRBIDR0_EL1_NUMREC_8 || + brbe_nr == BRBIDR0_EL1_NUMREC_16 || + brbe_nr == BRBIDR0_EL1_NUMREC_32 || + brbe_nr == BRBIDR0_EL1_NUMREC_64; +} + +static bool valid_brbe_cc(int brbe_cc) +{ + return brbe_cc == BRBIDR0_EL1_CC_20_BIT; +} + +static bool valid_brbe_format(int brbe_format) +{ + return brbe_format == BRBIDR0_EL1_FORMAT_0; +} + +static bool valid_brbe_version(int brbe_version) +{ + return brbe_version == ID_AA64DFR0_EL1_BRBE_IMP || + brbe_version == ID_AA64DFR0_EL1_BRBE_BRBE_V1P1; +} + +static void select_brbe_bank(int bank) +{ + u64 brbfcr; + + WARN_ON(bank > BRBE_BANK_IDX_1); + brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + brbfcr &= ~BRBFCR_EL1_BANK_MASK; + brbfcr |= SYS_FIELD_PREP(BRBFCR_EL1, BANK, bank); + write_sysreg_s(brbfcr, SYS_BRBFCR_EL1); + isb(); +} + +static bool __read_brbe_regset(struct brbe_regset *entry, int idx) +{ + entry->brbinf = get_brbinf_reg(idx); + + if (brbe_invalid(entry->brbinf)) + return false; + + entry->brbsrc = get_brbsrc_reg(idx); + entry->brbtgt = get_brbtgt_reg(idx); + return true; +} + +/* + * Read all BRBE entries in HW until the first invalid entry. + * + * The caller must ensure that the BRBE is not concurrently modifying these + * branch entries. + */ +static int capture_brbe_regset(struct brbe_regset *buf, int nr_hw_entries) +{ + int idx = 0; + + select_brbe_bank(BRBE_BANK_IDX_0); + while (idx < nr_hw_entries && idx <= BRBE_BANK0_IDX_MAX) { + if (!__read_brbe_regset(&buf[idx], idx)) + return idx; + idx++; + } + + select_brbe_bank(BRBE_BANK_IDX_1); + while (idx < nr_hw_entries && idx <= BRBE_BANK1_IDX_MAX) { + if (!__read_brbe_regset(&buf[idx], idx)) + return idx; + idx++; + } + return idx; +} + +/* + * This function concatenates branch records from stored and live buffer + * up to maximum nr_max records and the stored buffer holds the resultant + * buffer. The concatenated buffer contains all the branch records from + * the live buffer but might contain some from stored buffer considering + * the maximum combined length does not exceed 'nr_max'. + * + * Stored records Live records + * ------------------------------------------------^ + * | S0 | L0 | Newest | + * --------------------------------- | + * | S1 | L1 | | + * --------------------------------- | + * | S2 | L2 | | + * --------------------------------- | + * | S3 | L3 | | + * --------------------------------- | + * | S4 | L4 | nr_max + * --------------------------------- | + * | | L5 | | + * --------------------------------- | + * | | L6 | | + * --------------------------------- | + * | | L7 | | + * --------------------------------- | + * | | | | + * --------------------------------- | + * | | | Oldest | + * ------------------------------------------------V + * + * + * S0 is the newest in the stored records, where as L7 is the oldest in + * the live records. Unless the live buffer is detected as being full + * thus potentially dropping off some older records, L7 and S0 records + * are contiguous in time for a user task context. The stitched buffer + * here represents maximum possible branch records, contiguous in time. + * + * Stored records Live records + * ------------------------------------------------^ + * | L0 | L0 | Newest | + * --------------------------------- | + * | L0 | L1 | | + * --------------------------------- | + * | L2 | L2 | | + * --------------------------------- | + * | L3 | L3 | | + * --------------------------------- | + * | L4 | L4 | nr_max + * --------------------------------- | + * | L5 | L5 | | + * --------------------------------- | + * | L6 | L6 | | + * --------------------------------- | + * | L7 | L7 | | + * --------------------------------- | + * | S0 | | | + * --------------------------------- | + * | S1 | | Oldest | + * ------------------------------------------------V + * | S2 | <----| + * ----------------- | + * | S3 | <----| Dropped off after nr_max + * ----------------- | + * | S4 | <----| + * ----------------- + */ +static int stitch_stored_live_entries(struct brbe_regset *stored, + struct brbe_regset *live, + int nr_stored, int nr_live, + int nr_max) +{ + int nr_move = min(nr_stored, nr_max - nr_live); + + /* Move the tail of the buffer to make room for the new entries */ + memmove(&stored[nr_live], &stored[0], nr_move * sizeof(*stored)); + + /* Copy the new entries into the head of the buffer */ + memcpy(&stored[0], &live[0], nr_live * sizeof(*stored)); + + /* Return the number of entries in the stitched buffer */ + return min(nr_live + nr_stored, nr_max); +} + +static int brbe_branch_save(struct brbe_regset *live, int nr_hw_entries) +{ + u64 brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + int nr_live; + + write_sysreg_s(brbfcr | BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1); + isb(); + + nr_live = capture_brbe_regset(live, nr_hw_entries); + + write_sysreg_s(brbfcr & ~BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1); + isb(); + + return nr_live; +} + +void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx) +{ + struct arm64_perf_task_context *task_ctx = ctx; + struct brbe_regset live[BRBE_MAX_ENTRIES]; + int nr_live, nr_store, nr_hw_entries; + + nr_hw_entries = brbe_get_numrec(arm_pmu->reg_brbidr); + nr_live = brbe_branch_save(live, nr_hw_entries); + nr_store = task_ctx->nr_brbe_records; + nr_store = stitch_stored_live_entries(task_ctx->store, live, nr_store, + nr_live, nr_hw_entries); + task_ctx->nr_brbe_records = nr_store; +} + +/* + * Generic perf branch filters supported on BRBE + * + * New branch filters need to be evaluated whether they could be supported on + * BRBE. This ensures that such branch filters would not just be accepted, to + * fail silently. PERF_SAMPLE_BRANCH_HV is a special case that is selectively + * supported only on platforms where kernel is in hyp mode. + */ +#define BRBE_EXCLUDE_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_ABORT_TX | \ + PERF_SAMPLE_BRANCH_IN_TX | \ + PERF_SAMPLE_BRANCH_NO_TX | \ + PERF_SAMPLE_BRANCH_CALL_STACK) + +#define BRBE_ALLOWED_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_USER | \ + PERF_SAMPLE_BRANCH_KERNEL | \ + PERF_SAMPLE_BRANCH_HV | \ + PERF_SAMPLE_BRANCH_ANY | \ + PERF_SAMPLE_BRANCH_ANY_CALL | \ + PERF_SAMPLE_BRANCH_ANY_RETURN | \ + PERF_SAMPLE_BRANCH_IND_CALL | \ + PERF_SAMPLE_BRANCH_COND | \ + PERF_SAMPLE_BRANCH_IND_JUMP | \ + PERF_SAMPLE_BRANCH_CALL | \ + PERF_SAMPLE_BRANCH_NO_FLAGS | \ + PERF_SAMPLE_BRANCH_NO_CYCLES | \ + PERF_SAMPLE_BRANCH_TYPE_SAVE | \ + PERF_SAMPLE_BRANCH_HW_INDEX | \ + PERF_SAMPLE_BRANCH_PRIV_SAVE) + +#define BRBE_PERF_BRANCH_FILTERS (BRBE_ALLOWED_BRANCH_FILTERS | \ + BRBE_EXCLUDE_BRANCH_FILTERS) + +bool armv8pmu_branch_attr_valid(struct perf_event *event) +{ + u64 branch_type = event->attr.branch_sample_type; + + /* + * Ensure both perf branch filter allowed and exclude + * masks are always in sync with the generic perf ABI. + */ + BUILD_BUG_ON(BRBE_PERF_BRANCH_FILTERS != (PERF_SAMPLE_BRANCH_MAX - 1)); + + if (branch_type & ~BRBE_ALLOWED_BRANCH_FILTERS) { + pr_debug_once("requested branch filter not supported 0x%llx\n", branch_type); + return false; + } + + /* + * If the event does not have at least one of the privilege + * branch filters as in PERF_SAMPLE_BRANCH_PLM_ALL, the core + * perf will adjust its value based on perf event's existing + * privilege level via attr.exclude_[user|kernel|hv]. + * + * As event->attr.branch_sample_type might have been changed + * when the event reaches here, it is not possible to figure + * out whether the event originally had HV privilege request + * or got added via the core perf. Just report this situation + * once and continue ignoring if there are other instances. + */ + if ((branch_type & PERF_SAMPLE_BRANCH_HV) && !is_kernel_in_hyp_mode()) + pr_debug_once("hypervisor privilege filter not supported 0x%llx\n", branch_type); + + return true; +} + +int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu) +{ + size_t size = sizeof(struct arm64_perf_task_context); + + arm_pmu->pmu.task_ctx_cache = kmem_cache_create("arm64_brbe_task_ctx", size, 0, 0, NULL); + if (!arm_pmu->pmu.task_ctx_cache) + return -ENOMEM; + return 0; +} + +void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu) +{ + kmem_cache_destroy(arm_pmu->pmu.task_ctx_cache); +} + +static int brbe_attributes_probe(struct arm_pmu *armpmu, u32 brbe) +{ + u64 brbidr = read_sysreg_s(SYS_BRBIDR0_EL1); + int brbe_version, brbe_format, brbe_cc, brbe_nr; + + brbe_version = brbe; + brbe_format = brbe_get_format(brbidr); + brbe_cc = brbe_get_cc_bits(brbidr); + brbe_nr = brbe_get_numrec(brbidr); + armpmu->reg_brbidr = brbidr; + + if (!valid_brbe_version(brbe_version) || + !valid_brbe_format(brbe_format) || + !valid_brbe_cc(brbe_cc) || + !valid_brbe_nr(brbe_nr)) + return -EOPNOTSUPP; + return 0; +} + +void armv8pmu_branch_probe(struct arm_pmu *armpmu) +{ + u64 aa64dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1); + u32 brbe; + + /* + * BRBE implementation's branch entries cannot exceed maximum + * branch records supported at the ARM PMU level abstraction. + * Otherwise there is always a possibility of array overflow, + * while processing BRBE branch records. + */ + BUILD_BUG_ON(BRBE_BANK_MAX_ENTRIES > MAX_BRANCH_RECORDS); + + brbe = cpuid_feature_extract_unsigned_field(aa64dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT); + if (!brbe) + return; + + if (brbe_attributes_probe(armpmu, brbe)) + return; + + armpmu->has_branch_stack = 1; +} + +/* + * BRBE supports the following functional branch type filters while + * generating branch records. These branch filters can be enabled, + * either individually or as a group i.e ORing multiple filters + * with each other. + * + * BRBFCR_EL1_CONDDIR - Conditional direct branch + * BRBFCR_EL1_DIRCALL - Direct call + * BRBFCR_EL1_INDCALL - Indirect call + * BRBFCR_EL1_INDIRECT - Indirect branch + * BRBFCR_EL1_DIRECT - Direct branch + * BRBFCR_EL1_RTN - Subroutine return + */ +static u64 branch_type_to_brbfcr(int branch_type) +{ + u64 brbfcr = 0; + + if (branch_type & PERF_SAMPLE_BRANCH_ANY) { + brbfcr |= BRBFCR_EL1_BRANCH_FILTERS; + return brbfcr; + } + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL) { + brbfcr |= BRBFCR_EL1_INDCALL; + brbfcr |= BRBFCR_EL1_DIRCALL; + } + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + brbfcr |= BRBFCR_EL1_RTN; + + if (branch_type & PERF_SAMPLE_BRANCH_IND_CALL) + brbfcr |= BRBFCR_EL1_INDCALL; + + if (branch_type & PERF_SAMPLE_BRANCH_COND) + brbfcr |= BRBFCR_EL1_CONDDIR; + + if (branch_type & PERF_SAMPLE_BRANCH_IND_JUMP) + brbfcr |= BRBFCR_EL1_INDIRECT; + + if (branch_type & PERF_SAMPLE_BRANCH_CALL) + brbfcr |= BRBFCR_EL1_DIRCALL; + + return brbfcr; +} + +/* + * BRBE supports the following privilege mode filters while generating + * branch records. + * + * BRBCR_ELx_E0BRE - EL0 branch records + * BRBCR_ELx_ExBRE - EL1/EL2 branch records + * + * BRBE also supports the following additional functional branch type + * filters while generating branch records. + * + * BRBCR_ELx_EXCEPTION - Exception + * BRBCR_ELx_ERTN - Exception return + */ +static u64 branch_type_to_brbcr(int branch_type) +{ + u64 brbcr = BRBCR_ELx_DEFAULT_TS; + + /* + * BRBE should be paused on PMU interrupt while tracing kernel + * space to stop capturing further branch records. Otherwise + * interrupt handler branch records might get into the samples + * which is not desired. + * + * BRBE need not be paused on PMU interrupt while tracing only + * the user space, because it will automatically be inside the + * prohibited region. But even after PMU overflow occurs, the + * interrupt could still take much more cycles, before it can + * be taken and by that time BRBE will have been overwritten. + * Hence enable pause on PMU interrupt mechanism even for user + * only traces as well. + */ + brbcr |= BRBCR_ELx_FZP; + + if (branch_type & PERF_SAMPLE_BRANCH_USER) + brbcr |= BRBCR_ELx_E0BRE; + + /* + * When running in the hyp mode, writing into BRBCR_EL1 + * actually writes into BRBCR_EL2 instead. Field E2BRE + * is also at the same position as E1BRE. + */ + if (branch_type & PERF_SAMPLE_BRANCH_KERNEL) + brbcr |= BRBCR_ELx_ExBRE; + + if (branch_type & PERF_SAMPLE_BRANCH_HV) { + if (is_kernel_in_hyp_mode()) + brbcr |= BRBCR_ELx_ExBRE; + } + + if (!(branch_type & PERF_SAMPLE_BRANCH_NO_CYCLES)) + brbcr |= BRBCR_ELx_CC; + + if (!(branch_type & PERF_SAMPLE_BRANCH_NO_FLAGS)) + brbcr |= BRBCR_ELx_MPRED; + + /* + * The exception and exception return branches could be + * captured, irrespective of the perf event's privilege. + * If the perf event does not have enough privilege for + * a given exception level, then addresses which falls + * under that exception level will be reported as zero + * for the captured branch record, creating source only + * or target only records. + */ + if (branch_type & PERF_SAMPLE_BRANCH_ANY) { + brbcr |= BRBCR_ELx_EXCEPTION; + brbcr |= BRBCR_ELx_ERTN; + } + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL) + brbcr |= BRBCR_ELx_EXCEPTION; + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + brbcr |= BRBCR_ELx_ERTN; + + return brbcr & BRBCR_ELx_CONFIG_MASK; +} + +void armv8pmu_branch_enable(struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + u64 brbfcr, brbcr; + + if (!cpuc->brbe_users) + return; + + /* + * Skip enabling BRBE again with same filters and configs + */ + if (cpuc->brbe_sample_type == event->attr.branch_sample_type) + return; + + /* + * BRBE gets configured with a new mismatched branch sample + * type request, overriding any previous branch filters. + */ + cpuc->brbe_sample_type = event->attr.branch_sample_type; + brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + brbfcr &= ~BRBFCR_EL1_DEFAULT_CONFIG; + brbfcr |= branch_type_to_brbfcr(cpuc->brbe_sample_type); + write_sysreg_s(brbfcr, SYS_BRBFCR_EL1); + isb(); + + brbcr = read_sysreg_s(SYS_BRBCR_EL1); + brbcr &= ~BRBCR_ELx_CONFIG_MASK; + brbcr |= branch_type_to_brbcr(cpuc->brbe_sample_type); + write_sysreg_s(brbcr, SYS_BRBCR_EL1); + isb(); +} + +void armv8pmu_branch_disable(struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + u64 brbfcr, brbcr; + + if (cpuc->brbe_users) + return; + + cpuc->brbe_sample_type = 0; + brbcr = read_sysreg_s(SYS_BRBCR_EL1); + brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + brbcr &= ~(BRBCR_ELx_E0BRE | BRBCR_ELx_ExBRE); + brbfcr |= BRBFCR_EL1_PAUSED; + write_sysreg_s(brbcr, SYS_BRBCR_EL1); + write_sysreg_s(brbfcr, SYS_BRBFCR_EL1); + isb(); +} + +static void brbe_set_perf_entry_type(struct perf_branch_entry *entry, u64 brbinf) +{ + int brbe_type = brbe_get_type(brbinf); + + switch (brbe_type) { + case BRBINFx_EL1_TYPE_UNCOND_DIRECT: + entry->type = PERF_BR_UNCOND; + break; + case BRBINFx_EL1_TYPE_INDIRECT: + entry->type = PERF_BR_IND; + break; + case BRBINFx_EL1_TYPE_DIRECT_LINK: + entry->type = PERF_BR_CALL; + break; + case BRBINFx_EL1_TYPE_INDIRECT_LINK: + entry->type = PERF_BR_IND_CALL; + break; + case BRBINFx_EL1_TYPE_RET: + entry->type = PERF_BR_RET; + break; + case BRBINFx_EL1_TYPE_COND_DIRECT: + entry->type = PERF_BR_COND; + break; + case BRBINFx_EL1_TYPE_CALL: + entry->type = PERF_BR_CALL; + break; + case BRBINFx_EL1_TYPE_TRAP: + entry->type = PERF_BR_SYSCALL; + break; + case BRBINFx_EL1_TYPE_ERET: + entry->type = PERF_BR_ERET; + break; + case BRBINFx_EL1_TYPE_IRQ: + entry->type = PERF_BR_IRQ; + break; + case BRBINFx_EL1_TYPE_DEBUG_HALT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_DEBUG_HALT; + break; + case BRBINFx_EL1_TYPE_SERROR: + entry->type = PERF_BR_SERROR; + break; + case BRBINFx_EL1_TYPE_INSN_DEBUG: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_DEBUG_INST; + break; + case BRBINFx_EL1_TYPE_DATA_DEBUG: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_DEBUG_DATA; + break; + case BRBINFx_EL1_TYPE_ALIGN_FAULT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_NEW_FAULT_ALGN; + break; + case BRBINFx_EL1_TYPE_INSN_FAULT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_NEW_FAULT_INST; + break; + case BRBINFx_EL1_TYPE_DATA_FAULT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_NEW_FAULT_DATA; + break; + case BRBINFx_EL1_TYPE_FIQ: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_FIQ; + break; + case BRBINFx_EL1_TYPE_DEBUG_EXIT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_DEBUG_EXIT; + break; + default: + pr_warn_once("%d - unknown branch type captured\n", brbe_type); + entry->type = PERF_BR_UNKNOWN; + break; + } +} + +static int brbe_get_perf_priv(u64 brbinf) +{ + int brbe_el = brbe_get_el(brbinf); + + switch (brbe_el) { + case BRBINFx_EL1_EL_EL0: + return PERF_BR_PRIV_USER; + case BRBINFx_EL1_EL_EL1: + return PERF_BR_PRIV_KERNEL; + case BRBINFx_EL1_EL_EL2: + if (is_kernel_in_hyp_mode()) + return PERF_BR_PRIV_KERNEL; + return PERF_BR_PRIV_HV; + default: + pr_warn_once("%d - unknown branch privilege captured\n", brbe_el); + return PERF_BR_PRIV_UNKNOWN; + } +} + +static void capture_brbe_flags(struct perf_branch_entry *entry, struct perf_event *event, + u64 brbinf) +{ + if (branch_sample_type(event)) + brbe_set_perf_entry_type(entry, brbinf); + + if (!branch_sample_no_cycles(event)) + entry->cycles = brbe_get_cycles(brbinf); + + if (!branch_sample_no_flags(event)) { + /* + * BRBINFx_EL1.LASTFAILED indicates that a TME transaction failed (or + * was cancelled) prior to this record, and some number of records + * prior to this one, may have been generated during an attempt to + * execute the transaction. + * + * We will remove such entries later in process_branch_aborts(). + */ + entry->abort = brbe_get_lastfailed(brbinf); + + /* + * All these information (i.e transaction state and mispredicts) + * are available for source only and complete branch records. + */ + if (brbe_record_is_complete(brbinf) || + brbe_record_is_source_only(brbinf)) { + entry->mispred = brbe_get_mispredict(brbinf); + entry->predicted = !entry->mispred; + entry->in_tx = brbe_get_in_tx(brbinf); + } + } + + if (branch_sample_priv(event)) { + /* + * All these information (i.e branch privilege level) are + * available for target only and complete branch records. + */ + if (brbe_record_is_complete(brbinf) || + brbe_record_is_target_only(brbinf)) + entry->priv = brbe_get_perf_priv(brbinf); + } +} + +/* + * A branch record with BRBINFx_EL1.LASTFAILED set, implies that all + * preceding consecutive branch records, that were in a transaction + * (i.e their BRBINFx_EL1.TX set) have been aborted. + * + * Similarly BRBFCR_EL1.LASTFAILED set, indicate that all preceding + * consecutive branch records up to the last record, which were in a + * transaction (i.e their BRBINFx_EL1.TX set) have been aborted. + * + * --------------------------------- ------------------- + * | 00 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX success] + * --------------------------------- ------------------- + * | 01 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX success] + * --------------------------------- ------------------- + * | 02 | BRBSRC | BRBTGT | BRBINF | | TX = 0 | LF = 0 | + * --------------------------------- ------------------- + * | 03 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed] + * --------------------------------- ------------------- + * | 04 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed] + * --------------------------------- ------------------- + * | 05 | BRBSRC | BRBTGT | BRBINF | | TX = 0 | LF = 1 | + * --------------------------------- ------------------- + * | .. | BRBSRC | BRBTGT | BRBINF | | TX = 0 | LF = 0 | + * --------------------------------- ------------------- + * | 61 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed] + * --------------------------------- ------------------- + * | 62 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed] + * --------------------------------- ------------------- + * | 63 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed] + * --------------------------------- ------------------- + * + * BRBFCR_EL1.LASTFAILED == 1 + * + * BRBFCR_EL1.LASTFAILED fails all those consecutive, in transaction + * branches records near the end of the BRBE buffer. + * + * Architecture does not guarantee a non transaction (TX = 0) branch + * record between two different transactions. So it is possible that + * a subsequent lastfailed record (TX = 0, LF = 1) might erroneously + * mark more than required transactions as aborted. + */ +static void process_branch_aborts(struct pmu_hw_events *cpuc) +{ + u64 brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + bool lastfailed = !!(brbfcr & BRBFCR_EL1_LASTFAILED); + int idx = brbe_get_numrec(cpuc->percpu_pmu->reg_brbidr) - 1; + struct perf_branch_entry *entry; + + do { + entry = &cpuc->branches->branch_entries[idx]; + if (entry->in_tx) { + entry->abort = lastfailed; + } else { + lastfailed = entry->abort; + entry->abort = false; + } + } while (idx--, idx >= 0); +} + +static void brbe_regset_branch_entries(struct pmu_hw_events *cpuc, struct perf_event *event, + struct brbe_regset *regset, int idx) +{ + struct perf_branch_entry *entry = &cpuc->branches->branch_entries[idx]; + u64 brbinf = regset[idx].brbinf; + + perf_clear_branch_entry_bitfields(entry); + if (brbe_record_is_complete(brbinf)) { + entry->from = regset[idx].brbsrc; + entry->to = regset[idx].brbtgt; + } else if (brbe_record_is_source_only(brbinf)) { + entry->from = regset[idx].brbsrc; + entry->to = 0; + } else if (brbe_record_is_target_only(brbinf)) { + entry->from = 0; + entry->to = regset[idx].brbtgt; + } + capture_brbe_flags(entry, event, brbinf); +} + +static void process_branch_entries(struct pmu_hw_events *cpuc, struct perf_event *event, + struct brbe_regset *regset, int nr_regset) +{ + int idx; + + for (idx = 0; idx < nr_regset; idx++) + brbe_regset_branch_entries(cpuc, event, regset, idx); + + cpuc->branches->branch_stack.nr = nr_regset; + cpuc->branches->branch_stack.hw_idx = -1ULL; +} + +void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event) +{ + struct arm64_perf_task_context *task_ctx = event->ctx->task_ctx_data; + struct brbe_regset live[BRBE_MAX_ENTRIES]; + int nr_live, nr_store, nr_hw_entries; + u64 brbfcr, brbcr; + + brbcr = read_sysreg_s(SYS_BRBCR_EL1); + brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + + /* Ensure pause on PMU interrupt is enabled */ + WARN_ON_ONCE(!(brbcr & BRBCR_ELx_FZP)); + + /* Pause the buffer */ + write_sysreg_s(brbfcr | BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1); + isb(); + + /* + * Overflown event's branch_sample_type does not match the configured + * branch filters in the BRBE HW. So the captured branch records here + * cannot be co-related to the overflown event. Report to the user as + * if no branch records have been captured, and flush branch records. + */ + if ((cpuc->brbe_sample_type != event->attr.branch_sample_type) || + (event->ctx->task && cpuc->brbe_context != event->ctx)) { + cpuc->branches->branch_stack.nr = 0; + cpuc->branches->branch_stack.hw_idx = -1ULL; + goto unpause_reset; + } + + nr_hw_entries = brbe_get_numrec(cpuc->percpu_pmu->reg_brbidr); + nr_live = capture_brbe_regset(live, nr_hw_entries); + if (event->ctx->task) { + nr_store = task_ctx->nr_brbe_records; + nr_store = stitch_stored_live_entries(task_ctx->store, live, nr_store, + nr_live, nr_hw_entries); + process_branch_entries(cpuc, event, task_ctx->store, nr_store); + task_ctx->nr_brbe_records = 0; + } else { + process_branch_entries(cpuc, event, live, nr_live); + } + process_branch_aborts(cpuc); + +unpause_reset: + /* Unpause the buffer */ + write_sysreg_s(brbfcr & ~BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1); + isb(); + armv8pmu_branch_reset(); +} diff --git a/drivers/perf/arm_brbe.h b/drivers/perf/arm_brbe.h new file mode 100644 index 0000000000000000000000000000000000000000..5ba6cf1b0099d915c37daceb564e799c55c89e9e --- /dev/null +++ b/drivers/perf/arm_brbe.h @@ -0,0 +1,258 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Branch Record Buffer Extension Helpers. + * + * Copyright (C) 2022-2023 ARM Limited + * + * Author: Anshuman Khandual + */ +#define pr_fmt(fmt) "brbe: " fmt + +#include + +#define BRBFCR_EL1_BRANCH_FILTERS (BRBFCR_EL1_DIRECT | \ + BRBFCR_EL1_INDIRECT | \ + BRBFCR_EL1_RTN | \ + BRBFCR_EL1_INDCALL | \ + BRBFCR_EL1_DIRCALL | \ + BRBFCR_EL1_CONDDIR) + +#define BRBFCR_EL1_DEFAULT_CONFIG (BRBFCR_EL1_BANK_MASK | \ + BRBFCR_EL1_PAUSED | \ + BRBFCR_EL1_EnI | \ + BRBFCR_EL1_BRANCH_FILTERS) + +/* + * BRBTS_EL1 is currently not used for branch stack implementation + * purpose but BRBCR_ELx.TS needs to have a valid value from all + * available options. BRBCR_ELx_TS_VIRTUAL is selected for this. + */ +#define BRBCR_ELx_DEFAULT_TS FIELD_PREP(BRBCR_ELx_TS_MASK, BRBCR_ELx_TS_VIRTUAL) + +#define BRBCR_ELx_CONFIG_MASK (BRBCR_ELx_EXCEPTION | \ + BRBCR_ELx_ERTN | \ + BRBCR_ELx_CC | \ + BRBCR_ELx_MPRED | \ + BRBCR_ELx_ExBRE | \ + BRBCR_ELx_E0BRE | \ + BRBCR_ELx_FZP | \ + BRBCR_ELx_TS_MASK) +/* + * BRBE Buffer Organization + * + * BRBE buffer is arranged as multiple banks of 32 branch record + * entries each. An individual branch record in a given bank could + * be accessed, after selecting the bank in BRBFCR_EL1.BANK and + * accessing the registers i.e [BRBSRC, BRBTGT, BRBINF] set with + * indices [0..31]. + * + * Bank 0 + * + * --------------------------------- ------ + * | 00 | BRBSRC | BRBTGT | BRBINF | | 00 | + * --------------------------------- ------ + * | 01 | BRBSRC | BRBTGT | BRBINF | | 01 | + * --------------------------------- ------ + * | .. | BRBSRC | BRBTGT | BRBINF | | .. | + * --------------------------------- ------ + * | 31 | BRBSRC | BRBTGT | BRBINF | | 31 | + * --------------------------------- ------ + * + * Bank 1 + * + * --------------------------------- ------ + * | 32 | BRBSRC | BRBTGT | BRBINF | | 00 | + * --------------------------------- ------ + * | 33 | BRBSRC | BRBTGT | BRBINF | | 01 | + * --------------------------------- ------ + * | .. | BRBSRC | BRBTGT | BRBINF | | .. | + * --------------------------------- ------ + * | 63 | BRBSRC | BRBTGT | BRBINF | | 31 | + * --------------------------------- ------ + */ +#define BRBE_BANK_MAX_ENTRIES 32 +#define BRBE_MAX_BANK 2 +#define BRBE_MAX_ENTRIES (BRBE_BANK_MAX_ENTRIES * BRBE_MAX_BANK) + +#define BRBE_BANK0_IDX_MIN 0 +#define BRBE_BANK0_IDX_MAX 31 +#define BRBE_BANK1_IDX_MIN 32 +#define BRBE_BANK1_IDX_MAX 63 + +struct brbe_regset { + unsigned long brbsrc; + unsigned long brbtgt; + unsigned long brbinf; +}; + +struct arm64_perf_task_context { + struct brbe_regset store[BRBE_MAX_ENTRIES]; + int nr_brbe_records; +}; + +struct brbe_hw_attr { + int brbe_version; + int brbe_cc; + int brbe_nr; + int brbe_format; +}; + +enum brbe_bank_idx { + BRBE_BANK_IDX_INVALID = -1, + BRBE_BANK_IDX_0, + BRBE_BANK_IDX_1, + BRBE_BANK_IDX_MAX +}; + +#define RETURN_READ_BRBSRCN(n) \ + read_sysreg_s(SYS_BRBSRC##n##_EL1) + +#define RETURN_READ_BRBTGTN(n) \ + read_sysreg_s(SYS_BRBTGT##n##_EL1) + +#define RETURN_READ_BRBINFN(n) \ + read_sysreg_s(SYS_BRBINF##n##_EL1) + +#define BRBE_REGN_SWITCH(x, case_macro) \ + do { \ + switch (x) { \ + case 0: return case_macro(0); break; \ + case 1: return case_macro(1); break; \ + case 2: return case_macro(2); break; \ + case 3: return case_macro(3); break; \ + case 4: return case_macro(4); break; \ + case 5: return case_macro(5); break; \ + case 6: return case_macro(6); break; \ + case 7: return case_macro(7); break; \ + case 8: return case_macro(8); break; \ + case 9: return case_macro(9); break; \ + case 10: return case_macro(10); break; \ + case 11: return case_macro(11); break; \ + case 12: return case_macro(12); break; \ + case 13: return case_macro(13); break; \ + case 14: return case_macro(14); break; \ + case 15: return case_macro(15); break; \ + case 16: return case_macro(16); break; \ + case 17: return case_macro(17); break; \ + case 18: return case_macro(18); break; \ + case 19: return case_macro(19); break; \ + case 20: return case_macro(20); break; \ + case 21: return case_macro(21); break; \ + case 22: return case_macro(22); break; \ + case 23: return case_macro(23); break; \ + case 24: return case_macro(24); break; \ + case 25: return case_macro(25); break; \ + case 26: return case_macro(26); break; \ + case 27: return case_macro(27); break; \ + case 28: return case_macro(28); break; \ + case 29: return case_macro(29); break; \ + case 30: return case_macro(30); break; \ + case 31: return case_macro(31); break; \ + default: \ + pr_warn("unknown register index\n"); \ + return -1; \ + } \ + } while (0) + +static inline int buffer_to_brbe_idx(int buffer_idx) +{ + return buffer_idx % BRBE_BANK_MAX_ENTRIES; +} + +static inline u64 get_brbsrc_reg(int buffer_idx) +{ + int brbe_idx = buffer_to_brbe_idx(buffer_idx); + + BRBE_REGN_SWITCH(brbe_idx, RETURN_READ_BRBSRCN); +} + +static inline u64 get_brbtgt_reg(int buffer_idx) +{ + int brbe_idx = buffer_to_brbe_idx(buffer_idx); + + BRBE_REGN_SWITCH(brbe_idx, RETURN_READ_BRBTGTN); +} + +static inline u64 get_brbinf_reg(int buffer_idx) +{ + int brbe_idx = buffer_to_brbe_idx(buffer_idx); + + BRBE_REGN_SWITCH(brbe_idx, RETURN_READ_BRBINFN); +} + +static inline u64 brbe_record_valid(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_VALID_MASK, brbinf); +} + +static inline bool brbe_invalid(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_NONE; +} + +static inline bool brbe_record_is_complete(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_FULL; +} + +static inline bool brbe_record_is_source_only(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_SOURCE; +} + +static inline bool brbe_record_is_target_only(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_TARGET; +} + +static inline int brbe_get_in_tx(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_T_MASK, brbinf); +} + +static inline int brbe_get_mispredict(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_MPRED_MASK, brbinf); +} + +static inline int brbe_get_lastfailed(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_LASTFAILED_MASK, brbinf); +} + +static inline int brbe_get_cycles(u64 brbinf) +{ + /* + * Captured cycle count is unknown and hence + * should not be passed on to the user space. + */ + if (brbinf & BRBINFx_EL1_CCU) + return 0; + + return FIELD_GET(BRBINFx_EL1_CC_MASK, brbinf); +} + +static inline int brbe_get_type(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_TYPE_MASK, brbinf); +} + +static inline int brbe_get_el(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_EL_MASK, brbinf); +} + +static inline int brbe_get_numrec(u64 brbidr) +{ + return FIELD_GET(BRBIDR0_EL1_NUMREC_MASK, brbidr); +} + +static inline int brbe_get_format(u64 brbidr) +{ + return FIELD_GET(BRBIDR0_EL1_FORMAT_MASK, brbidr); +} + +static inline int brbe_get_cc_bits(u64 brbidr) +{ + return FIELD_GET(BRBIDR0_EL1_CC_MASK, brbidr); +} diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index c87faafbdba246ff76e44ecfbbf93a78ac027f05..d02a85c0bedca1cc363b18fca9cb8955ab1bdee8 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -313,6 +313,11 @@ armpmu_del(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + WARN_ON_ONCE(!hw_events->brbe_users); + hw_events->brbe_users--; + if (!hw_events->brbe_users) + hw_events->brbe_context = NULL; + armpmu_stop(event, PERF_EF_UPDATE); hw_events->events[idx] = NULL; armpmu->clear_event_idx(hw_events, event); @@ -329,6 +334,19 @@ armpmu_add(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx; + /* + * Reset branch records buffer if a new task event gets + * scheduled on a PMU which might have existing records. + * Otherwise older branch records present in the buffer + * might leak into the new task event. + */ + if (event->ctx->task && hw_events->brbe_context != event->ctx) { + hw_events->brbe_context = event->ctx; + if (armpmu->branch_reset) + armpmu->branch_reset(); + } + hw_events->brbe_users++; + /* An event following a process won't be stopped earlier */ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) return -ENOENT; @@ -508,8 +526,11 @@ static int armpmu_event_init(struct perf_event *event) !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus)) return -ENOENT; - /* does not support taken branch sampling */ - if (has_branch_stack(event)) + /* + * Branch stack sampling events are allowed + * only on PMU which has required support. + */ + if (has_branch_stack(event) && !armpmu->has_branch_stack) return -EOPNOTSUPP; if (armpmu->map_event(event) == -ENOENT) @@ -518,6 +539,14 @@ static int armpmu_event_init(struct perf_event *event) return __hw_perf_event_init(event); } +static void armpmu_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + struct arm_pmu *armpmu = to_arm_pmu(ctx->pmu); + + if (armpmu->sched_task) + armpmu->sched_task(ctx, sched_in); +} + static void armpmu_enable(struct pmu *pmu) { struct arm_pmu *armpmu = to_arm_pmu(pmu); @@ -904,6 +933,7 @@ static struct arm_pmu *__armpmu_alloc(gfp_t flags) } pmu->pmu = (struct pmu) { + .sched_task = armpmu_sched_task, .pmu_enable = armpmu_enable, .pmu_disable = armpmu_disable, .event_init = armpmu_event_init, diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 76042208eb0b54e482d139b2ad9e164a2bfc395f..6fd58c8f93f921729cf3cf954a98bc9f7237be89 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -41,6 +41,18 @@ }, \ } +/* + * Maximum branch record entries which could be processed + * for core perf branch stack sampling support, regardless + * of the hardware support available on a given ARM PMU. + */ +#define MAX_BRANCH_RECORDS 64 + +struct branch_records { + struct perf_branch_stack branch_stack; + struct perf_branch_entry branch_entries[MAX_BRANCH_RECORDS]; +}; + /* The events for a given PMU register set. */ struct pmu_hw_events { /* @@ -67,6 +79,11 @@ struct pmu_hw_events { struct arm_pmu *percpu_pmu; int irq; + + struct branch_records *branches; + void *brbe_context; + unsigned int brbe_users; + unsigned long brbe_sample_type; }; enum armpmu_attr_groups { @@ -97,9 +114,13 @@ struct arm_pmu { void (*stop)(struct arm_pmu *); void (*reset)(void *); int (*map_event)(struct perf_event *event); + void (*sched_task)(struct perf_event_context *ctx, bool sched_in); + void (*branch_reset)(void); int (*filter_match)(struct perf_event *event); int num_events; - bool secure_access; /* 32-bit ARM only */ + unsigned int secure_access:1, /* 32-bit ARM only */ + has_branch_stack:1, /* 64-bit ARM only */ + reserved:30; #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); #define ARMV8_PMUV3_EXT_COMMON_EVENT_BASE 0x4000 @@ -113,6 +134,11 @@ struct arm_pmu { /* store the PMMIR_EL1 to expose slots */ u64 reg_pmmir; +#ifdef CONFIG_ARM64_BRBE + /* store the BRBIDR0_EL1 capturing attributes */ + u64 reg_brbidr; +#endif + /* Only to be used by ACPI probing code */ unsigned long acpi_cpuid; }; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8e0e8cd6d4bf4ad849789600ee6b7a71b0c6bb56..7e0438d1119a178749470ace8a951b5cc113b7ea 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1079,6 +1079,7 @@ static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *b br->abort = 0; br->cycles = 0; br->type = 0; + br->spec = PERF_BR_SPEC_NA; br->reserved = 0; } @@ -1628,4 +1629,30 @@ extern void __weak arch_perf_update_userpage(struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now); +#ifdef CONFIG_PERF_EVENTS +static inline bool branch_sample_no_flags(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS; +} + +static inline bool branch_sample_no_cycles(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES; +} + +static inline bool branch_sample_type(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE; +} + +static inline bool branch_sample_hw_index(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; +} + +static inline bool branch_sample_priv(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE; +} +#endif /* CONFIG_PERF_EVENTS */ #endif /* _LINUX_PERF_EVENT_H */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 4b0893023877c1b9683b533fd33e66e50e0ba11c..aa7c556fbd8a45b03ef1ca77340044593127a1c1 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -188,6 +188,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ + PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -217,6 +219,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -235,9 +239,50 @@ enum { PERF_BR_SYSRET = 8, /* syscall return */ PERF_BR_COND_CALL = 9, /* conditional function call */ PERF_BR_COND_RET = 10, /* conditional function return */ + PERF_BR_ERET = 11, /* exception return */ + PERF_BR_IRQ = 12, /* irq */ + PERF_BR_SERROR = 13, /* system error */ + PERF_BR_NO_TX = 14, /* not in transaction */ + PERF_BR_EXTEND_ABI = 15, /* extend ABI */ PERF_BR_MAX, }; +/* + * Common branch speculation outcome classification + */ +enum { + PERF_BR_SPEC_NA = 0, /* Not available */ + PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ + PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ + PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ + PERF_BR_SPEC_MAX, +}; + +enum { + PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ + PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ + PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ + PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ + PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ + PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ + PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ + PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ + PERF_BR_NEW_MAX, +}; + +enum { + PERF_BR_PRIV_UNKNOWN = 0, + PERF_BR_PRIV_USER = 1, + PERF_BR_PRIV_KERNEL = 2, + PERF_BR_PRIV_HV = 3, +}; + +#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 +#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 +#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 +#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 +#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ @@ -1285,6 +1330,7 @@ union perf_mem_data_src { * abort: aborting a hardware transaction * cycles: cycles from last branch (or 0 if not supported) * type: branch type + * spec: branch speculation info (or 0 if not supported) */ struct perf_branch_entry { __u64 from; @@ -1295,7 +1341,14 @@ struct perf_branch_entry { abort:1, /* transaction abort */ cycles:16, /* cycle count to last branch */ type:4, /* branch type */ +#ifdef __GENKSYMS__ reserved:40; +#else + spec:2, /* branch speculation info */ + new_type:4, /* additional branch type */ + priv:3, /* privilege level */ + reserved:31; +#endif }; union perf_sample_weight { diff --git a/kernel/events/core.c b/kernel/events/core.c index d9b1916c9adec7cfdc46334fd0f63e34fb2af84f..9eed32231bf7d67856a13963aa27a46dc4d87466 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6867,11 +6867,6 @@ static void perf_output_read(struct perf_output_handle *handle, perf_output_read_one(handle, event, enabled, running); } -static inline bool perf_sample_save_hw_index(struct perf_event *event) -{ - return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; -} - void perf_output_sample(struct perf_output_handle *handle, struct perf_event_header *header, struct perf_sample_data *data, @@ -6960,7 +6955,7 @@ void perf_output_sample(struct perf_output_handle *handle, * sizeof(struct perf_branch_entry); perf_output_put(handle, data->br_stack->nr); - if (perf_sample_save_hw_index(event)) + if (branch_sample_hw_index(event)) perf_output_put(handle, data->br_stack->hw_idx); perf_output_copy(handle, data->br_stack->entries, size); } else { @@ -7167,7 +7162,7 @@ void perf_prepare_sample(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_BRANCH_STACK) { int size = sizeof(u64); /* nr */ if (data->br_stack) { - if (perf_sample_save_hw_index(event)) + if (branch_sample_hw_index(event)) size += sizeof(u64); size += data->br_stack->nr diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index e0b41a42c524ac7ca477c1b3bfc4b4854dcf7d55..c2c9df6180d844ed1f3148f9d4d8f6f9e6c92590 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -188,6 +188,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ + PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -217,6 +219,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -235,9 +239,39 @@ enum { PERF_BR_SYSRET = 8, /* syscall return */ PERF_BR_COND_CALL = 9, /* conditional function call */ PERF_BR_COND_RET = 10, /* conditional function return */ + PERF_BR_ERET = 11, /* exception return */ + PERF_BR_IRQ = 12, /* irq */ + PERF_BR_SERROR = 13, /* system error */ + PERF_BR_NO_TX = 14, /* not in transaction */ + PERF_BR_EXTEND_ABI = 15, /* extend ABI */ PERF_BR_MAX, }; +enum { + PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ + PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ + PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ + PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ + PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ + PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ + PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ + PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ + PERF_BR_NEW_MAX, +}; + +enum { + PERF_BR_PRIV_UNKNOWN = 0, + PERF_BR_PRIV_USER = 1, + PERF_BR_PRIV_KERNEL = 2, + PERF_BR_PRIV_HV = 3, +}; + +#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 +#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 +#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 +#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 +#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ @@ -1284,7 +1318,9 @@ struct perf_branch_entry { abort:1, /* transaction abort */ cycles:16, /* cycle count to last branch */ type:4, /* branch type */ - reserved:40; + new_type:4, /* additional branch type */ + priv:3, /* privilege level */ + reserved:33; }; union perf_sample_weight { diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 7e06df4715edf3f5e5bb2b7d13389fad9b3a9b69..6afbe7a3a146b0a6407c600a68f48813881ed225 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -814,6 +814,16 @@ mispred_str(struct branch_entry *br) return br->flags.predicted ? 'P' : 'M'; } +static int print_bstack_flags(FILE *fp, struct branch_entry *br) +{ + return fprintf(fp, "/%c/%c/%c/%d/%s ", + mispred_str(br), + br->flags.in_tx ? 'X' : '-', + br->flags.abort ? 'A' : '-', + br->flags.cycles, + get_branch_type(br)); +} + static int perf_sample__fprintf_brstack(struct perf_sample *sample, struct thread *thread, struct perf_event_attr *attr, FILE *fp) @@ -852,11 +862,7 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample, printed += fprintf(fp, ")"); } - printed += fprintf(fp, "/%c/%c/%c/%d ", - mispred_str(entries + i), - entries[i].flags.in_tx ? 'X' : '-', - entries[i].flags.abort ? 'A' : '-', - entries[i].flags.cycles); + printed += print_bstack_flags(fp, entries + i); } return printed; @@ -898,11 +904,7 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample, printed += map__fprintf_dsoname(alt.map, fp); printed += fprintf(fp, ")"); } - printed += fprintf(fp, "/%c/%c/%c/%d ", - mispred_str(entries + i), - entries[i].flags.in_tx ? 'X' : '-', - entries[i].flags.abort ? 'A' : '-', - entries[i].flags.cycles); + printed += print_bstack_flags(fp, entries + i); } return printed; @@ -948,11 +950,7 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample, printed += map__fprintf_dsoname(alt.map, fp); printed += fprintf(fp, ")"); } - printed += fprintf(fp, "/%c/%c/%c/%d ", - mispred_str(entries + i), - entries[i].flags.in_tx ? 'X' : '-', - entries[i].flags.abort ? 'A' : '-', - entries[i].flags.cycles); + printed += print_bstack_flags(fp, entries + i); } return printed; diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c index 2285b1eb3128d7eb2d9d8b710992cde074fe035e..6d38238481d327d00b5e13fa6171f387e4d50d16 100644 --- a/tools/perf/util/branch.c +++ b/tools/perf/util/branch.c @@ -21,7 +21,10 @@ void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags, if (flags->type == PERF_BR_UNKNOWN || from == 0) return; - st->counts[flags->type]++; + if (flags->type == PERF_BR_EXTEND_ABI) + st->new_counts[flags->new_type]++; + else + st->counts[flags->type]++; if (flags->type == PERF_BR_COND) { if (to > from) @@ -36,6 +39,38 @@ void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags, st->cross_4k++; } +const char *branch_new_type_name(int new_type) +{ + const char *branch_new_names[PERF_BR_NEW_MAX] = { + "FAULT_ALGN", + "FAULT_DATA", + "FAULT_INST", +/* + * TODO: This switch should happen on 'session->header.env.arch' + * instead, because an arm64 platform perf recording could be + * opened for analysis on other platforms as well. + */ +#ifdef __aarch64__ + "ARM64_FIQ", + "ARM64_DEBUG_HALT", + "ARM64_DEBUG_EXIT", + "ARM64_DEBUG_INST", + "ARM64_DEBUG_DATA" +#else + "ARCH_1", + "ARCH_2", + "ARCH_3", + "ARCH_4", + "ARCH_5" +#endif + }; + + if (new_type >= 0 && new_type < PERF_BR_NEW_MAX) + return branch_new_names[new_type]; + + return NULL; +} + const char *branch_type_name(int type) { const char *branch_names[PERF_BR_MAX] = { @@ -49,7 +84,12 @@ const char *branch_type_name(int type) "SYSCALL", "SYSRET", "COND_CALL", - "COND_RET" + "COND_RET", + "ERET", + "IRQ", + "SERROR", + "NO_TX", + "", // Needed for PERF_BR_EXTEND_ABI that ends up triggering some compiler warnings about NULL deref }; if (type >= 0 && type < PERF_BR_MAX) @@ -58,6 +98,17 @@ const char *branch_type_name(int type) return NULL; } +const char *get_branch_type(struct branch_entry *e) +{ + if (e->flags.type == PERF_BR_UNKNOWN) + return ""; + + if (e->flags.type == PERF_BR_EXTEND_ABI) + return branch_new_type_name(e->flags.new_type); + + return branch_type_name(e->flags.type); +} + void branch_type_stat_display(FILE *fp, struct branch_type_stat *st) { u64 total = 0; @@ -104,6 +155,15 @@ void branch_type_stat_display(FILE *fp, struct branch_type_stat *st) 100.0 * (double)st->counts[i] / (double)total); } + + for (i = 0; i < PERF_BR_NEW_MAX; i++) { + if (st->new_counts[i] > 0) + fprintf(fp, "\n%8s: %5.1f%%", + branch_new_type_name(i), + 100.0 * + (double)st->new_counts[i] / (double)total); + } + } static int count_str_scnprintf(int idx, const char *str, char *bf, int size) @@ -119,6 +179,9 @@ int branch_type_str(struct branch_type_stat *st, char *bf, int size) for (i = 0; i < PERF_BR_MAX; i++) total += st->counts[i]; + for (i = 0; i < PERF_BR_NEW_MAX; i++) + total += st->new_counts[i]; + if (total == 0) return 0; @@ -136,6 +199,11 @@ int branch_type_str(struct branch_type_stat *st, char *bf, int size) printed += count_str_scnprintf(j++, branch_type_name(i), bf + printed, size - printed); } + for (i = 0; i < PERF_BR_NEW_MAX; i++) { + if (st->new_counts[i] > 0) + printed += count_str_scnprintf(j++, branch_new_type_name(i), bf + printed, size - printed); + } + if (st->cross_4k > 0) printed += count_str_scnprintf(j++, "CROSS_4K", bf + printed, size - printed); diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 17b2ccc61094bf264bf7e663784ad2b78598f7ca..dca75cad96f68714c41e0e41e29b7cc4e7c723b6 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -24,7 +24,10 @@ struct branch_flags { u64 abort:1; u64 cycles:16; u64 type:4; - u64 reserved:40; + u64 spec:2; + u64 new_type:4; + u64 priv:3; + u64 reserved:31; }; }; }; @@ -72,6 +75,7 @@ static inline struct branch_entry *perf_sample__branch_entries(struct perf_sampl struct branch_type_stat { bool branch_to; u64 counts[PERF_BR_MAX]; + u64 new_counts[PERF_BR_NEW_MAX]; u64 cond_fwd; u64 cond_bwd; u64 cross_4k; @@ -82,6 +86,8 @@ void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags, u64 from, u64 to); const char *branch_type_name(int type); +const char *branch_new_type_name(int new_type); +const char *get_branch_type(struct branch_entry *e); void branch_type_stat_display(FILE *fp, struct branch_type_stat *st); int branch_type_str(struct branch_type_stat *st, char *bf, int bfsize); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1a1cbd16d76d475b3ef06c2455faab6a13a09fb3..9fd9c3cb233066847e1be55f4d8c37b9788b6bea 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2659,6 +2659,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, "No such device - did you specify an out-of-range profile CPU?"); break; case EOPNOTSUPP: + if (evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK) + return scnprintf(msg, size, + "%s: PMU Hardware or event type doesn't support branch stack sampling.", + evsel__name(evsel)); if (evsel->core.attr.aux_output) return scnprintf(msg, size, "%s: PMU Hardware doesn't support 'aux_output' feature", diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index bb4aa88c50a8276fd091a126873187775ce9f3d0..31a29f824b1e1fcb22feb5f3e3e5f71df144019e 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -30,8 +30,12 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND), BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP), BRANCH_OPT("call", PERF_SAMPLE_BRANCH_CALL), + BRANCH_OPT("no_flags", PERF_SAMPLE_BRANCH_NO_FLAGS), + BRANCH_OPT("no_cycles", PERF_SAMPLE_BRANCH_NO_CYCLES), BRANCH_OPT("save_type", PERF_SAMPLE_BRANCH_TYPE_SAVE), BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK), + BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX), + BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE), BRANCH_END }; diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index e67a227c0ce7e6b20312972eb2f1b78a63a3bb2b..e67c785f0336d0156c70ba677d91009727dd127b 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -51,7 +51,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value) bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), - bit_name(HW_INDEX), + bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 354e1e04a26627e9b831ddb24f3a92306e003165..95cf2e09d44377620db081d63c28c839c0bcf615 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1129,14 +1129,15 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) struct branch_entry *e = &entries[i]; if (!callstack) { - printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n", + printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x %s\n", i, e->from, e->to, (unsigned short)e->flags.cycles, e->flags.mispred ? "M" : " ", e->flags.predicted ? "P" : " ", e->flags.abort ? "A" : " ", e->flags.in_tx ? "T" : " ", - (unsigned)e->flags.reserved); + (unsigned)e->flags.reserved, + get_branch_type(e)); } else { printf("..... %2"PRIu64": %016" PRIx64 "\n", i, i > 0 ? e->from : e->to);