From bb76f2eaeac7bc375edf26a8f9ea02ad41401a73 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Mon, 21 Jun 2021 11:47:10 +0800 Subject: [PATCH 1/4] perf/x86/intel: Fix PEBS-via-PT reload base value for Extended PEBS mainline inclusion from mainline-v5.14-rc1 commit 4c58d922c0877e23cc7d3d7c6bff49b85faaca89 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I8WXIM CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4c58d922c0877e23cc7d3d7c6bff49b85faaca89 ------------------------------------- If we use the "PEBS-via-PT" feature on a platform that supports extended PBES, like this: perf record -c 10000 \ -e '{intel_pt/branch=0/,branch-instructions/aux-output/p}' uname we will encounter the following call trace: [ 250.906542] unchecked MSR access error: WRMSR to 0x14e1 (tried to write 0x0000000000000000) at rIP: 0xffffffff88073624 (native_write_msr+0x4/0x20) [ 250.920779] Call Trace: [ 250.923508] intel_pmu_pebs_enable+0x12c/0x190 [ 250.928359] intel_pmu_enable_event+0x346/0x390 [ 250.933300] x86_pmu_start+0x64/0x80 [ 250.937231] x86_pmu_enable+0x16a/0x2f0 [ 250.941434] perf_event_exec+0x144/0x4c0 [ 250.945731] begin_new_exec+0x650/0xbf0 [ 250.949933] load_elf_binary+0x13e/0x1700 [ 250.954321] ? lock_acquire+0xc2/0x390 [ 250.958430] ? bprm_execve+0x34f/0x8a0 [ 250.962544] ? lock_is_held_type+0xa7/0x120 [ 250.967118] ? find_held_lock+0x32/0x90 [ 250.971321] ? sched_clock_cpu+0xc/0xb0 [ 250.975527] bprm_execve+0x33d/0x8a0 [ 250.979452] do_execveat_common.isra.0+0x161/0x1d0 [ 250.984673] __x64_sys_execve+0x33/0x40 [ 250.988877] do_syscall_64+0x3d/0x80 [ 250.992806] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 250.998302] RIP: 0033:0x7fbc971d82fb [ 251.002235] Code: Unable to access opcode bytes at RIP 0x7fbc971d82d1. [ 251.009303] RSP: 002b:00007fffb8aed808 EFLAGS: 00000202 ORIG_RAX: 000000000000003b [ 251.017478] RAX: ffffffffffffffda RBX: 00007fffb8af2f00 RCX: 00007fbc971d82fb [ 251.025187] RDX: 00005574792aac50 RSI: 00007fffb8af2f00 RDI: 00007fffb8aed810 [ 251.032901] RBP: 00007fffb8aed970 R08: 0000000000000020 R09: 00007fbc9725c8b0 [ 251.040613] R10: 6d6c61632f6d6f63 R11: 0000000000000202 R12: 00005574792aac50 [ 251.048327] R13: 00007fffb8af35f0 R14: 00005574792aafdf R15: 00005574792aafe7 This is because the target reload msr address is calculated based on the wrong base msr and the target reload msr value is accessed from ds->pebs_event_reset[] with the wrong offset. According to Intel SDM Table 2-14, for extended PBES feature, the reload msr for MSR_IA32_FIXED_CTRx should be based on MSR_RELOAD_FIXED_CTRx. For fixed counters, let's fix it by overriding the reload msr address and its value, thus avoiding out-of-bounds access. Intel-SIG: commit 4c58d922c087 perf/x86/intel: Fix PEBS-via-PT reload base value for Extended PEBS Backport as a dependency to support PEBS format 5 on kernel v5.10. Fixes: 42880f726c66("perf/x86/intel: Support PEBS output to PT") Signed-off-by: Like Xu Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210621034710.31107-1-likexu@tencent.com Signed-off-by: Yunying Sun --- arch/x86/events/intel/ds.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 6f1fffd113f9..c465e45d103d 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1186,6 +1186,9 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; struct debug_store *ds = cpuc->ds; + u64 value = ds->pebs_event_reset[hwc->idx]; + u32 base = MSR_RELOAD_PMC0; + unsigned int idx = hwc->idx; if (!is_pebs_pt(event)) return; @@ -1195,7 +1198,12 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event) cpuc->pebs_enabled |= PEBS_OUTPUT_PT; - wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]); + if (hwc->idx >= INTEL_PMC_IDX_FIXED) { + base = MSR_RELOAD_FIXED_CTR0; + idx = hwc->idx - INTEL_PMC_IDX_FIXED; + value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx]; + } + wrmsrl(base + idx, value); } void intel_pmu_pebs_enable(struct perf_event *event) @@ -1203,6 +1211,7 @@ void intel_pmu_pebs_enable(struct perf_event *event) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; struct debug_store *ds = cpuc->ds; + unsigned int idx = hwc->idx; hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; @@ -1221,19 +1230,18 @@ void intel_pmu_pebs_enable(struct perf_event *event) } } + if (idx >= INTEL_PMC_IDX_FIXED) + idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED); + /* * Use auto-reload if possible to save a MSR write in the PMI. * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. */ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { - unsigned int idx = hwc->idx; - - if (idx >= INTEL_PMC_IDX_FIXED) - idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED); ds->pebs_event_reset[idx] = (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; } else { - ds->pebs_event_reset[hwc->idx] = 0; + ds->pebs_event_reset[idx] = 0; } intel_pmu_pebs_via_pt_enable(event); -- Gitee From 36f313d32e8111e6cec680f1df2fc94323cb0a2c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 1 Feb 2022 13:23:21 -0800 Subject: [PATCH 2/4] perf/x86/intel: Enable PEBS format 5 mainline inclusion from mainline-v5.18-rc1 commit 2145e77fecfb3965b1dc299bac203b167238bd0b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I8WXIM CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=2145e77fecfb3965b1dc299bac203b167238bd0b ------------------------------------- The new PEBS Record Format 5 is similar to the PEBS Record Format 4. The only difference is the layout of the Counter Reset fields of the PEBS Config Buffer in the DS area. For the PEBS format 4, the Counter Reset fields allocation is for 8 general-purpose counters followed by 4 fixed-function counters. For the PEBS format 5, the Counter Reset fields allocation is for 32 general-purpose counters followed by 16 fixed-function counters. Extend the MAX_PEBS_EVENTS to 32. Add MAX_PEBS_EVENTS_FMT4 for the previous platform. Except for the DS auto-reload code, other places already assume 32 counters. Only check the PEBS_FMT in the DS auto-reload code. Extend the MAX_FIXED_PEBS_EVENTS to 16, which only impacts the size of struct debug_store and some local temporary variables. The size of struct debug_store increases 288B, which is small and should be acceptable. Intel-SIG: commit 2145e77fecfb perf/x86/intel: Enable PEBS format 5 Backport PEBS format 5 support to kernel v5.10. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/1643750603-100733-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Yunying Sun --- arch/x86/events/intel/ds.c | 14 +++++++++++--- arch/x86/include/asm/intel_ds.h | 5 +++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index c465e45d103d..a9ba3f5d0f62 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1201,7 +1201,10 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event) if (hwc->idx >= INTEL_PMC_IDX_FIXED) { base = MSR_RELOAD_FIXED_CTR0; idx = hwc->idx - INTEL_PMC_IDX_FIXED; - value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx]; + if (x86_pmu.intel_cap.pebs_format < 5) + value = ds->pebs_event_reset[MAX_PEBS_EVENTS_FMT4 + idx]; + else + value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx]; } wrmsrl(base + idx, value); } @@ -1230,8 +1233,12 @@ void intel_pmu_pebs_enable(struct perf_event *event) } } - if (idx >= INTEL_PMC_IDX_FIXED) - idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED); + if (idx >= INTEL_PMC_IDX_FIXED) { + if (x86_pmu.intel_cap.pebs_format < 5) + idx = MAX_PEBS_EVENTS_FMT4 + (idx - INTEL_PMC_IDX_FIXED); + else + idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED); + } /* * Use auto-reload if possible to save a MSR write in the PMI. @@ -2197,6 +2204,7 @@ void __init intel_ds_init(void) break; case 4: + case 5: x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl; x86_pmu.pebs_record_size = sizeof(struct pebs_basic); if (x86_pmu.intel_cap.pebs_baseline) { diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h index 8380c3ddd4b2..2f9eeb5c3069 100644 --- a/arch/x86/include/asm/intel_ds.h +++ b/arch/x86/include/asm/intel_ds.h @@ -7,8 +7,9 @@ #define PEBS_BUFFER_SIZE (PAGE_SIZE << 4) /* The maximal number of PEBS events: */ -#define MAX_PEBS_EVENTS 8 -#define MAX_FIXED_PEBS_EVENTS 4 +#define MAX_PEBS_EVENTS_FMT4 8 +#define MAX_PEBS_EVENTS 32 +#define MAX_FIXED_PEBS_EVENTS 16 /* * A debug store configuration. -- Gitee From 625627e3b77d3a3d9b7a6a316336ba477ad62b16 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 1 Feb 2022 13:23:22 -0800 Subject: [PATCH 3/4] KVM: x86: use the KVM side max supported fixed counter mainline inclusion from mainline-v5.18-rc1 commit 0144ba0c5bd3176647bb4d49a697d231610c78b7 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I8WXIM CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0144ba0c5bd3176647bb4d49a697d231610c78b7 ------------------------------------- KVM vPMU doesn't support to emulate all the fixed counters that the host PMU driver has supported, e.g. the fixed counter 3 used by Topdown metrics hasn't been supported by KVM so far. Rename MAX_FIXED_COUNTERS to KVM_PMC_MAX_FIXED to have a more straightforward naming convention as INTEL_PMC_MAX_FIXED used by the host PMU driver, and fix vPMU to use the KVM side KVM_PMC_MAX_FIXED for the virtual fixed counter emulation, instead of the host side INTEL_PMC_MAX_FIXED. Intel-SIG: commit 0144ba0c5bd3 KVM: x86: use the KVM side max supported fixed counter Backport PEBS format 5 support to kernel v5.10. Signed-off-by: Wei Wang Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/1643750603-100733-2-git-send-email-kan.liang@linux.intel.com Signed-off-by: Yunying Sun --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/cpuid.c | 3 ++- arch/x86/kvm/pmu.h | 2 -- arch/x86/kvm/vmx/pmu_intel.c | 4 ++-- arch/x86/kvm/x86.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 678955cd2175..7b83f15f2df7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -446,6 +446,7 @@ struct kvm_pmc { bool is_paused; }; +#define KVM_PMC_MAX_FIXED 3 struct kvm_pmu { unsigned nr_arch_gp_counters; unsigned nr_arch_fixed_counters; @@ -462,7 +463,7 @@ struct kvm_pmu { u64 raw_event_mask; u8 version; struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; - struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; + struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED]; struct irq_work irq_work; DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 2f67d137ff2f..bcfc4476adfb 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -793,7 +793,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) eax.split.bit_width = cap.bit_width_gp; eax.split.mask_length = cap.events_mask_len; - edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS); + edx.split.num_counters_fixed = + min(cap.num_counters_fixed, KVM_PMC_MAX_FIXED); edx.split.bit_width_fixed = cap.bit_width_fixed; if (cap.version) edx.split.anythread_deprecated = 1; diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 005f580ca5e7..148ded170b17 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -15,8 +15,6 @@ #define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001 #define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002 -#define MAX_FIXED_COUNTERS 3 - struct kvm_event_hw_type_mapping { u8 eventsel; u8 unit_mask; diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index df891691cb0d..e4891e377488 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -562,7 +562,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu) pmu->gp_counters[i].current_config = 0; } - for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) { + for (i = 0; i < KVM_PMC_MAX_FIXED; i++) { pmu->fixed_counters[i].type = KVM_PMC_FIXED; pmu->fixed_counters[i].vcpu = vcpu; pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED; @@ -588,7 +588,7 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) pmc->counter = pmc->eventsel = 0; } - for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) { + for (i = 0; i < KVM_PMC_MAX_FIXED; i++) { pmc = &pmu->fixed_counters[i]; pmc_stop_counter(pmc); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dc098fe48399..94abe5a70c38 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6165,7 +6165,7 @@ static void kvm_init_msr_list(void) u32 dummy[2]; unsigned i; - BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4, + BUILD_BUG_ON_MSG(KVM_PMC_MAX_FIXED != 3, "Please update the fixed PMCs in msrs_to_saved_all[]"); perf_get_x86_pmu_capability(&x86_pmu); -- Gitee From 80498dfe50e296eec785df386ea3db814e5bc747 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 1 Feb 2022 13:23:23 -0800 Subject: [PATCH 4/4] perf/x86/intel: Increase max number of the fixed counters mainline inclusion from mainline-v5.18-rc1 commit ee28855a54493ce83bc2a3fbe30210be61b57bc7 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I8WXIM CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=ee28855a54493ce83bc2a3fbe30210be61b57bc7 ------------------------------------- The new PEBS format 5 implies that the number of the fixed counters can be up to 16. The current INTEL_PMC_MAX_FIXED is still 4. If the current kernel runs on a future platform which has more than 4 fixed counters, a warning will be triggered. The number of the fixed counters will be clipped to 4. Users have to upgrade the kernel to access the new fixed counters. Add a new default constraint for PerfMon v5 and up, which can support up to 16 fixed counters. The pseudo-encoding is applied for the fixed counters 4 and later. The user can have generic support for the new fixed counters on the future platfroms without updating the kernel. Increase the INTEL_PMC_MAX_FIXED to 16. Intel-SIG: commit ee28855a5449 perf/x86/intel: Increase max number of the fixed counters Backport PEBS format 5 support to kernel v5.10. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andi Kleen Link: https://lkml.kernel.org/r/1643750603-100733-3-git-send-email-kan.liang@linux.intel.com Signed-off-by: Yunying Sun --- arch/x86/events/intel/core.c | 40 ++++++++++++++++++++++++++++++- arch/x86/include/asm/perf_event.h | 2 +- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 8de383e42730..977e171b7e2b 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -181,6 +181,27 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = EVENT_CONSTRAINT_END }; +static struct event_constraint intel_v5_gen_event_constraints[] __read_mostly = +{ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ + FIXED_EVENT_CONSTRAINT(0x0500, 4), + FIXED_EVENT_CONSTRAINT(0x0600, 5), + FIXED_EVENT_CONSTRAINT(0x0700, 6), + FIXED_EVENT_CONSTRAINT(0x0800, 7), + FIXED_EVENT_CONSTRAINT(0x0900, 8), + FIXED_EVENT_CONSTRAINT(0x0a00, 9), + FIXED_EVENT_CONSTRAINT(0x0b00, 10), + FIXED_EVENT_CONSTRAINT(0x0c00, 11), + FIXED_EVENT_CONSTRAINT(0x0d00, 12), + FIXED_EVENT_CONSTRAINT(0x0e00, 13), + FIXED_EVENT_CONSTRAINT(0x0f00, 14), + FIXED_EVENT_CONSTRAINT(0x1000, 15), + EVENT_CONSTRAINT_END +}; + static struct event_constraint intel_slm_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ @@ -5835,7 +5856,9 @@ __init int intel_pmu_init(void) pr_cont("generic architected perfmon v1, "); name = "generic_arch_v1"; break; - default: + case 2: + case 3: + case 4: /* * default constraints for v2 and up */ @@ -5843,6 +5866,21 @@ __init int intel_pmu_init(void) pr_cont("generic architected perfmon, "); name = "generic_arch_v2+"; break; + default: + /* + * The default constraints for v5 and up can support up to + * 16 fixed counters. For the fixed counters 4 and later, + * the pseudo-encoding is applied. + * The constraints may be cut according to the CPUID enumeration + * by inserting the EVENT_CONSTRAINT_END. + */ + if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) + x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; + intel_v5_gen_event_constraints[x86_pmu.num_counters_fixed].weight = -1; + x86_pmu.event_constraints = intel_v5_gen_event_constraints; + pr_cont("generic architected perfmon, "); + name = "generic_arch_v5+"; + break; } } diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 17b131670973..b8dddb034c11 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -8,7 +8,7 @@ */ #define INTEL_PMC_MAX_GENERIC 32 -#define INTEL_PMC_MAX_FIXED 4 +#define INTEL_PMC_MAX_FIXED 16 #define INTEL_PMC_IDX_FIXED 32 #define X86_PMC_IDX_MAX 64 -- Gitee