From 52bc438de22fb63135d89648c347fdeff24e4ffa Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 29 Aug 2023 05:58:01 -0700 Subject: [PATCH 01/10] perf/x86/intel: Use the common uarch name for the shared functions mainline inclusion from mainline-v6.10-rc1 commit d4b5694c75d4eba8238d541a55da0c67e876213e category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/IAGLFT CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d4b5694c75d4eba8238d541a55da0c67e876213e ------------------------------------- From PMU's perspective, the SPR/GNR server has a similar uarch to the ADL/MTL client p-core. Many functions are shared. However, the shared function name uses the abbreviation of the server product code name, rather than the common uarch code name. Rename these internal shared functions by the common uarch name. Intel-SIG: commit d4b5694c75d4 perf/x86/intel: Use the common uarch name for the shared functions Backport as a dependency needed by the GNR distinct pmu name fix Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230829125806.3016082-2-kan.liang@linux.intel.com Signed-off-by: Yunying Sun --- arch/x86/events/intel/core.c | 64 ++++++++++++++++++------------------ arch/x86/events/intel/ds.c | 2 +- arch/x86/events/perf_event.h | 2 +- 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 8814a757e612..0c0dcf9254ce 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -299,7 +299,7 @@ static struct extra_reg intel_icl_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; -static struct extra_reg intel_spr_extra_regs[] __read_mostly = { +static struct extra_reg intel_glc_extra_regs[] __read_mostly = { INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), @@ -309,7 +309,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; -static struct event_constraint intel_spr_event_constraints[] = { +static struct event_constraint intel_glc_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ @@ -349,7 +349,7 @@ static struct event_constraint intel_spr_event_constraints[] = { EVENT_CONSTRAINT_END }; -static struct extra_reg intel_gnr_extra_regs[] __read_mostly = { +static struct extra_reg intel_rwc_extra_regs[] __read_mostly = { INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), @@ -473,7 +473,7 @@ static u64 intel_pmu_event_map(int hw_event) return intel_perfmon_event_map[hw_event]; } -static __initconst const u64 spr_hw_cache_event_ids +static __initconst const u64 glc_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -552,7 +552,7 @@ static __initconst const u64 spr_hw_cache_event_ids }, }; -static __initconst const u64 spr_hw_cache_extra_regs +static __initconst const u64 glc_hw_cache_extra_regs [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -4348,7 +4348,7 @@ icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, } static struct event_constraint * -spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx, +glc_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) { struct event_constraint *c; @@ -4437,7 +4437,7 @@ adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); if (pmu->cpu_type == hybrid_big) - return spr_get_event_constraints(cpuc, idx, event); + return glc_get_event_constraints(cpuc, idx, event); else if (pmu->cpu_type == hybrid_small) return tnt_get_event_constraints(cpuc, idx, event); @@ -4489,7 +4489,7 @@ rwc_get_event_constraints(struct cpu_hw_events *cpuc, int idx, { struct event_constraint *c; - c = spr_get_event_constraints(cpuc, idx, event); + c = glc_get_event_constraints(cpuc, idx, event); /* The Retire Latency is not supported by the fixed counter 0. */ if (event->attr.precise_ip && @@ -4570,7 +4570,7 @@ static void nhm_limit_period(struct perf_event *event, s64 *left) *left = max(*left, 32LL); } -static void spr_limit_period(struct perf_event *event, s64 *left) +static void glc_limit_period(struct perf_event *event, s64 *left) { if (event->attr.precise_ip == 3) *left = max(*left, 128LL); @@ -5417,14 +5417,14 @@ static struct attribute *icl_tsx_events_attrs[] = { EVENT_ATTR_STR(mem-stores, mem_st_spr, "event=0xcd,umask=0x2"); EVENT_ATTR_STR(mem-loads-aux, mem_ld_aux, "event=0x03,umask=0x82"); -static struct attribute *spr_events_attrs[] = { +static struct attribute *glc_events_attrs[] = { EVENT_PTR(mem_ld_hsw), EVENT_PTR(mem_st_spr), EVENT_PTR(mem_ld_aux), NULL, }; -static struct attribute *spr_td_events_attrs[] = { +static struct attribute *glc_td_events_attrs[] = { EVENT_PTR(slots), EVENT_PTR(td_retiring), EVENT_PTR(td_bad_spec), @@ -5437,7 +5437,7 @@ static struct attribute *spr_td_events_attrs[] = { NULL, }; -static struct attribute *spr_tsx_events_attrs[] = { +static struct attribute *glc_tsx_events_attrs[] = { EVENT_PTR(tx_start), EVENT_PTR(tx_abort), EVENT_PTR(tx_commit), @@ -6319,7 +6319,7 @@ __init int intel_pmu_init(void) intel_pmu_pebs_data_source_grt(); x86_pmu.pebs_latency_data = adl_latency_data_small; x86_pmu.get_event_constraints = tnt_get_event_constraints; - x86_pmu.limit_period = spr_limit_period; + x86_pmu.limit_period = glc_limit_period; td_attr = tnt_events_attrs; mem_attr = grt_mem_attrs; extra_attr = nhm_format_attr; @@ -6350,7 +6350,7 @@ __init int intel_pmu_init(void) intel_pmu_pebs_data_source_cmt(); x86_pmu.pebs_latency_data = mtl_latency_data_small; x86_pmu.get_event_constraints = cmt_get_event_constraints; - x86_pmu.limit_period = spr_limit_period; + x86_pmu.limit_period = glc_limit_period; td_attr = cmt_events_attrs; mem_attr = grt_mem_attrs; extra_attr = cmt_format_attr; @@ -6667,20 +6667,20 @@ __init int intel_pmu_init(void) case INTEL_FAM6_SAPPHIRERAPIDS_X: case INTEL_FAM6_EMERALDRAPIDS_X: x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; - x86_pmu.extra_regs = intel_spr_extra_regs; + x86_pmu.extra_regs = intel_glc_extra_regs; fallthrough; case INTEL_FAM6_GRANITERAPIDS_X: case INTEL_FAM6_GRANITERAPIDS_D: pmem = true; x86_pmu.late_ack = true; - memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - memcpy(hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + memcpy(hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - x86_pmu.event_constraints = intel_spr_event_constraints; - x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints; + x86_pmu.event_constraints = intel_glc_event_constraints; + x86_pmu.pebs_constraints = intel_glc_pebs_event_constraints; if (!x86_pmu.extra_regs) - x86_pmu.extra_regs = intel_gnr_extra_regs; - x86_pmu.limit_period = spr_limit_period; + x86_pmu.extra_regs = intel_rwc_extra_regs; + x86_pmu.limit_period = glc_limit_period; x86_pmu.pebs_ept = 1; x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; @@ -6690,13 +6690,13 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_INSTR_LATENCY; x86_pmu.hw_config = hsw_hw_config; - x86_pmu.get_event_constraints = spr_get_event_constraints; + x86_pmu.get_event_constraints = glc_get_event_constraints; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; extra_skl_attr = skl_format_attr; - mem_attr = spr_events_attrs; - td_attr = spr_td_events_attrs; - tsx_attr = spr_tsx_events_attrs; + mem_attr = glc_events_attrs; + td_attr = glc_td_events_attrs; + tsx_attr = glc_tsx_events_attrs; x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); x86_pmu.lbr_pt_coexist = true; intel_pmu_pebs_data_source_skl(pmem); @@ -6746,7 +6746,7 @@ __init int intel_pmu_init(void) x86_pmu.filter = intel_pmu_filter; x86_pmu.get_event_constraints = adl_get_event_constraints; x86_pmu.hw_config = adl_hw_config; - x86_pmu.limit_period = spr_limit_period; + x86_pmu.limit_period = glc_limit_period; x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type; /* * The rtm_abort_event is used to check whether to enable GPRs @@ -6795,11 +6795,11 @@ __init int intel_pmu_init(void) pmu->intel_cap.perf_metrics = 1; pmu->intel_cap.pebs_output_pt_available = 0; - memcpy(pmu->hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids)); - memcpy(pmu->hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs)); - pmu->event_constraints = intel_spr_event_constraints; - pmu->pebs_constraints = intel_spr_pebs_event_constraints; - pmu->extra_regs = intel_spr_extra_regs; + memcpy(pmu->hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids)); + memcpy(pmu->hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs)); + pmu->event_constraints = intel_glc_event_constraints; + pmu->pebs_constraints = intel_glc_pebs_event_constraints; + pmu->extra_regs = intel_glc_extra_regs; /* Initialize Atom core specific PerfMon capabilities.*/ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; @@ -6823,7 +6823,7 @@ __init int intel_pmu_init(void) pmu->pebs_constraints = intel_grt_pebs_event_constraints; pmu->extra_regs = intel_grt_extra_regs; if (is_mtl(boot_cpu_data.x86_model)) { - x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_gnr_extra_regs; + x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_rwc_extra_regs; x86_pmu.pebs_latency_data = mtl_latency_data_small; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 290f3d062543..e77c32a8f932 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1058,7 +1058,7 @@ struct event_constraint intel_icl_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; -struct event_constraint intel_spr_pebs_event_constraints[] = { +struct event_constraint intel_glc_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index b8a2d3ba4ccd..38342d1614f5 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1529,7 +1529,7 @@ extern struct event_constraint intel_skl_pebs_event_constraints[]; extern struct event_constraint intel_icl_pebs_event_constraints[]; -extern struct event_constraint intel_spr_pebs_event_constraints[]; +extern struct event_constraint intel_glc_pebs_event_constraints[]; struct event_constraint *intel_pebs_constraints(struct perf_event *event); -- Gitee From 7f8a6f987028be99067e8a6783f6927b28ebd780 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 29 Aug 2023 05:58:02 -0700 Subject: [PATCH 02/10] perf/x86/intel: Factor out the initialization code for SPR mainline inclusion from mainline-v6.7-rc1 commit 0ba0c03528e918a8f6b5aa63d502fdc6a9d80fc7 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/IAGLFT CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0ba0c03528e918a8f6b5aa63d502fdc6a9d80fc7 ------------------------------------- The SPR and ADL p-core have a similar uarch. Most of the initialization code can be shared. Factor out intel_pmu_init_glc() for the common initialization code. The common part of the ADL p-core will be replaced by the later patch. Intel-SIG: commit 0ba0c03528e9 perf/x86/intel: Factor out the initialization code for SPR Backport as a dependency needed by the GNR distinct pmu name fix Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230829125806.3016082-3-kan.liang@linux.intel.com Signed-off-by: Yunying Sun --- arch/x86/events/intel/core.c | 49 +++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 0c0dcf9254ce..303b9bf9547b 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -6020,6 +6020,30 @@ static __always_inline bool is_mtl(u8 x86_model) (x86_model == INTEL_FAM6_METEORLAKE_L); } +static __always_inline void intel_pmu_init_glc(struct pmu *pmu) +{ + x86_pmu.late_ack = true; + x86_pmu.limit_period = glc_limit_period; + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.pebs_block = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + x86_pmu.flags |= PMU_FL_INSTR_LATENCY; + x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); + x86_pmu.lbr_pt_coexist = true; + x86_pmu.num_topdown_events = 8; + static_call_update(intel_pmu_update_topdown_event, + &icl_update_topdown_event); + static_call_update(intel_pmu_set_topdown_event_period, + &icl_set_topdown_event_period); + + memcpy(hybrid_var(pmu, hw_cache_event_ids), glc_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hybrid_var(pmu, hw_cache_extra_regs), glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + hybrid(pmu, event_constraints) = intel_glc_event_constraints; + hybrid(pmu, pebs_constraints) = intel_glc_pebs_event_constraints; +} + __init int intel_pmu_init(void) { struct attribute **extra_skl_attr = &empty_attrs; @@ -6671,24 +6695,10 @@ __init int intel_pmu_init(void) fallthrough; case INTEL_FAM6_GRANITERAPIDS_X: case INTEL_FAM6_GRANITERAPIDS_D: - pmem = true; - x86_pmu.late_ack = true; - memcpy(hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - memcpy(hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - - x86_pmu.event_constraints = intel_glc_event_constraints; - x86_pmu.pebs_constraints = intel_glc_pebs_event_constraints; + intel_pmu_init_glc(NULL); if (!x86_pmu.extra_regs) x86_pmu.extra_regs = intel_rwc_extra_regs; - x86_pmu.limit_period = glc_limit_period; x86_pmu.pebs_ept = 1; - x86_pmu.pebs_aliases = NULL; - x86_pmu.pebs_prec_dist = true; - x86_pmu.pebs_block = true; - x86_pmu.flags |= PMU_FL_HAS_RSP_1; - x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - x86_pmu.flags |= PMU_FL_INSTR_LATENCY; - x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = glc_get_event_constraints; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? @@ -6697,14 +6707,7 @@ __init int intel_pmu_init(void) mem_attr = glc_events_attrs; td_attr = glc_td_events_attrs; tsx_attr = glc_tsx_events_attrs; - x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); - x86_pmu.lbr_pt_coexist = true; - intel_pmu_pebs_data_source_skl(pmem); - x86_pmu.num_topdown_events = 8; - static_call_update(intel_pmu_update_topdown_event, - &icl_update_topdown_event); - static_call_update(intel_pmu_set_topdown_event_period, - &icl_set_topdown_event_period); + intel_pmu_pebs_data_source_skl(true); pr_cont("Sapphire Rapids events, "); name = "sapphire_rapids"; break; -- Gitee From c8eefc4302031ccd062839868c4116bcdb50eda0 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 29 Aug 2023 05:58:03 -0700 Subject: [PATCH 03/10] perf/x86/intel: Factor out the initialization code for ADL e-core mainline inclusion from mainline-v6.10-rc1 commit d87d221f854b62f5e8026505497d33404ef6050c category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/IAGLFT CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d87d221f854b62f5e8026505497d33404ef6050c ------------------------------------- From PMU's perspective, the ADL e-core and newer SRF/GRR have a similar uarch. Most of the initialization code can be shared. Factor out intel_pmu_init_grt() for the common initialization code. The common part of the ADL e-core will be replaced by the later patch. Intel-SIG: commit d87d221f854b perf/x86/intel: Factor out the initialization code for ADL e-core Backport as a dependency needed by the GNR distinct pmu name fix Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230829125806.3016082-4-kan.liang@linux.intel.com Signed-off-by: Yunying Sun --- arch/x86/events/intel/core.c | 58 +++++++++++++----------------------- 1 file changed, 21 insertions(+), 37 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 303b9bf9547b..98b6fd726e67 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -6044,6 +6044,25 @@ static __always_inline void intel_pmu_init_glc(struct pmu *pmu) hybrid(pmu, pebs_constraints) = intel_glc_pebs_event_constraints; } +static __always_inline void intel_pmu_init_grt(struct pmu *pmu) +{ + x86_pmu.mid_ack = true; + x86_pmu.limit_period = glc_limit_period; + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.pebs_block = true; + x86_pmu.lbr_pt_coexist = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_INSTR_LATENCY; + + memcpy(hybrid_var(pmu, hw_cache_event_ids), glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hybrid_var(pmu, hw_cache_extra_regs), tnt_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + hybrid_var(pmu, hw_cache_event_ids)[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; + hybrid(pmu, event_constraints) = intel_slm_event_constraints; + hybrid(pmu, pebs_constraints) = intel_grt_pebs_event_constraints; + hybrid(pmu, extra_regs) = intel_grt_extra_regs; +} + __init int intel_pmu_init(void) { struct attribute **extra_skl_attr = &empty_attrs; @@ -6322,28 +6341,10 @@ __init int intel_pmu_init(void) break; case INTEL_FAM6_ATOM_GRACEMONT: - x86_pmu.mid_ack = true; - memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs, - sizeof(hw_cache_extra_regs)); - hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; - - x86_pmu.event_constraints = intel_slm_event_constraints; - x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints; - x86_pmu.extra_regs = intel_grt_extra_regs; - - x86_pmu.pebs_aliases = NULL; - x86_pmu.pebs_prec_dist = true; - x86_pmu.pebs_block = true; - x86_pmu.lbr_pt_coexist = true; - x86_pmu.flags |= PMU_FL_HAS_RSP_1; - x86_pmu.flags |= PMU_FL_INSTR_LATENCY; - + intel_pmu_init_grt(NULL); intel_pmu_pebs_data_source_grt(); x86_pmu.pebs_latency_data = adl_latency_data_small; x86_pmu.get_event_constraints = tnt_get_event_constraints; - x86_pmu.limit_period = glc_limit_period; td_attr = tnt_events_attrs; mem_attr = grt_mem_attrs; extra_attr = nhm_format_attr; @@ -6353,28 +6354,11 @@ __init int intel_pmu_init(void) case INTEL_FAM6_ATOM_CRESTMONT: case INTEL_FAM6_ATOM_CRESTMONT_X: - x86_pmu.mid_ack = true; - memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs, - sizeof(hw_cache_extra_regs)); - hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; - - x86_pmu.event_constraints = intel_slm_event_constraints; - x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints; + intel_pmu_init_grt(NULL); x86_pmu.extra_regs = intel_cmt_extra_regs; - - x86_pmu.pebs_aliases = NULL; - x86_pmu.pebs_prec_dist = true; - x86_pmu.lbr_pt_coexist = true; - x86_pmu.pebs_block = true; - x86_pmu.flags |= PMU_FL_HAS_RSP_1; - x86_pmu.flags |= PMU_FL_INSTR_LATENCY; - intel_pmu_pebs_data_source_cmt(); x86_pmu.pebs_latency_data = mtl_latency_data_small; x86_pmu.get_event_constraints = cmt_get_event_constraints; - x86_pmu.limit_period = glc_limit_period; td_attr = cmt_events_attrs; mem_attr = grt_mem_attrs; extra_attr = cmt_format_attr; -- Gitee From 7bf2dd59838d909f20059c6d8b9ebb550cdf1642 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 29 Aug 2023 05:58:04 -0700 Subject: [PATCH 04/10] perf/x86/intel: Apply the common initialization code for ADL mainline inclusion from mainline-v6.10-rc1 commit 299a5fc8e783eed705015e83e381912dbbf3eabc category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/IAGLFT CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=299a5fc8e783eed705015e83e381912dbbf3eabc ------------------------------------- Use the intel_pmu_init_glc() and intel_pmu_init_grt() to replace the duplicate code for ADL. The current code already checks the PERF_X86_EVENT_TOPDOWN flag before invoking the Topdown metrics functions. (The PERF_X86_EVENT_TOPDOWN flag is to indicate the Topdown metric feature, which is only available for the p-core.) Drop the unnecessary adl_set_topdown_event_period() and adl_update_topdown_event(). Intel-SIG: commit 299a5fc8e783 perf/x86/intel: Apply the common initialization code for ADL Backport as a dependency needed by the GNR distinct pmu name fix Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230829125806.3016082-5-kan.liang@linux.intel.com Signed-off-by: Yunying Sun --- arch/x86/events/intel/core.c | 53 ++---------------------------------- 1 file changed, 2 insertions(+), 51 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 98b6fd726e67..754aec2513ff 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2561,16 +2561,6 @@ static int icl_set_topdown_event_period(struct perf_event *event) return 0; } -static int adl_set_topdown_event_period(struct perf_event *event) -{ - struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); - - if (pmu->cpu_type != hybrid_big) - return 0; - - return icl_set_topdown_event_period(event); -} - DEFINE_STATIC_CALL(intel_pmu_set_topdown_event_period, x86_perf_event_set_period); static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx) @@ -2713,16 +2703,6 @@ static u64 icl_update_topdown_event(struct perf_event *event) x86_pmu.num_topdown_events - 1); } -static u64 adl_update_topdown_event(struct perf_event *event) -{ - struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); - - if (pmu->cpu_type != hybrid_big) - return 0; - - return icl_update_topdown_event(event); -} - DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update); static void intel_pmu_read_topdown_event(struct perf_event *event) @@ -6716,32 +6696,11 @@ __init int intel_pmu_init(void) static_branch_enable(&perf_is_hybrid); x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS; - x86_pmu.pebs_aliases = NULL; - x86_pmu.pebs_prec_dist = true; - x86_pmu.pebs_block = true; - x86_pmu.flags |= PMU_FL_HAS_RSP_1; - x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - x86_pmu.flags |= PMU_FL_INSTR_LATENCY; - x86_pmu.lbr_pt_coexist = true; x86_pmu.pebs_latency_data = adl_latency_data_small; - x86_pmu.num_topdown_events = 8; - static_call_update(intel_pmu_update_topdown_event, - &adl_update_topdown_event); - static_call_update(intel_pmu_set_topdown_event_period, - &adl_set_topdown_event_period); - x86_pmu.filter = intel_pmu_filter; x86_pmu.get_event_constraints = adl_get_event_constraints; x86_pmu.hw_config = adl_hw_config; - x86_pmu.limit_period = glc_limit_period; x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type; - /* - * The rtm_abort_event is used to check whether to enable GPRs - * for the RTM abort event. Atom doesn't have the RTM abort - * event. There is no harmful to set it in the common - * x86_pmu.rtm_abort_event. - */ - x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); td_attr = adl_hybrid_events_attrs; mem_attr = adl_hybrid_mem_attrs; @@ -6753,6 +6712,7 @@ __init int intel_pmu_init(void) pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; pmu->name = "cpu_core"; pmu->cpu_type = hybrid_big; + intel_pmu_init_glc(&pmu->pmu); pmu->late_ack = true; if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { pmu->num_counters = x86_pmu.num_counters + 2; @@ -6782,16 +6742,13 @@ __init int intel_pmu_init(void) pmu->intel_cap.perf_metrics = 1; pmu->intel_cap.pebs_output_pt_available = 0; - memcpy(pmu->hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids)); - memcpy(pmu->hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs)); - pmu->event_constraints = intel_glc_event_constraints; - pmu->pebs_constraints = intel_glc_pebs_event_constraints; pmu->extra_regs = intel_glc_extra_regs; /* Initialize Atom core specific PerfMon capabilities.*/ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; pmu->name = "cpu_atom"; pmu->cpu_type = hybrid_small; + intel_pmu_init_grt(&pmu->pmu); pmu->mid_ack = true; pmu->num_counters = x86_pmu.num_counters; pmu->num_counters_fixed = x86_pmu.num_counters_fixed; @@ -6803,12 +6760,6 @@ __init int intel_pmu_init(void) pmu->intel_cap.perf_metrics = 0; pmu->intel_cap.pebs_output_pt_available = 1; - memcpy(pmu->hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids)); - memcpy(pmu->hw_cache_extra_regs, tnt_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs)); - pmu->hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; - pmu->event_constraints = intel_slm_event_constraints; - pmu->pebs_constraints = intel_grt_pebs_event_constraints; - pmu->extra_regs = intel_grt_extra_regs; if (is_mtl(boot_cpu_data.x86_model)) { x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_rwc_extra_regs; x86_pmu.pebs_latency_data = mtl_latency_data_small; -- Gitee From d6caf70ed4095227b5e03486ba89e64aeb729555 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 29 Aug 2023 05:58:05 -0700 Subject: [PATCH 05/10] perf/x86/intel: Clean up the hybrid CPU type handling code mainline inclusion from mainline-v6.10-rc1 commit b0560bfd4b70277a4936c82e50e940aa253c95bf category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/IAGLFT CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b0560bfd4b70277a4936c82e50e940aa253c95bf ------------------------------------- There is a fairly long list of grievances about the current code. The main beefs: 1. hybrid_big_small assumes that the *HARDWARE* (CPUID) provided core types are a bitmap. They are not. If Intel happened to make a core type of 0xff, hilarity would ensue. 2. adl_get_hybrid_cpu_type() utterly inscrutable. There are precisely zero comments and zero changelog about what it is attempting to do. According to Kan, the adl_get_hybrid_cpu_type() is there because some Alder Lake (ADL) CPUs can do some silly things. Some ADL models are *supposed* to be hybrid CPUs with big and little cores, but there are some SKUs that only have big cores. CPUID(0x1a) on those CPUs does not say that the CPUs are big cores. It apparently just returns 0x0. It confuses perf because it expects to see either 0x40 (Core) or 0x20 (Atom). The perf workaround for this is to watch for a CPU core saying it is type 0x0. If that happens on an Alder Lake, it calls x86_pmu.get_hybrid_cpu_type() and just assumes that the core is a Core (0x40) CPU. To fix up the mess, separate out the CPU types and the 'pmu' types. This allows 'hybrid_pmu_type' bitmaps without worrying that some future CPU type will set multiple bits. Since the types are now separate, add a function to glue them back together again. Actual comment on the situation in the glue function (find_hybrid_pmu_for_cpu()). Also, give ->get_hybrid_cpu_type() a real return type and make it clear that it is overriding the *CPU* type, not the PMU type. Rename cpu_type to pmu_type in the struct x86_hybrid_pmu to reflect the change. Intel-SIG: commit b0560bfd4b70 perf/x86/intel: Clean up the hybrid CPU type handling code Backport as a dependency needed by the GNR distinct pmu name fix Originally-by: Dave Hansen Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230829125806.3016082-6-kan.liang@linux.intel.com Signed-off-by: Yunying Sun --- arch/x86/events/core.c | 6 ++-- arch/x86/events/intel/core.c | 69 ++++++++++++++++++++++++------------ arch/x86/events/intel/ds.c | 2 +- arch/x86/events/perf_event.h | 35 ++++++++++-------- 4 files changed, 72 insertions(+), 40 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index fc436fbb053f..acd367c45334 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1888,9 +1888,9 @@ ssize_t events_hybrid_sysfs_show(struct device *dev, str = pmu_attr->event_str; for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { - if (!(x86_pmu.hybrid_pmu[i].cpu_type & pmu_attr->pmu_type)) + if (!(x86_pmu.hybrid_pmu[i].pmu_type & pmu_attr->pmu_type)) continue; - if (x86_pmu.hybrid_pmu[i].cpu_type & pmu->cpu_type) { + if (x86_pmu.hybrid_pmu[i].pmu_type & pmu->pmu_type) { next_str = strchr(str, ';'); if (next_str) return snprintf(page, next_str - str + 1, "%s", str); @@ -2170,7 +2170,7 @@ static int __init init_hw_perf_events(void) hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_EXTENDED_HW_TYPE; err = perf_pmu_register(&hybrid_pmu->pmu, hybrid_pmu->name, - (hybrid_pmu->cpu_type == hybrid_big) ? PERF_TYPE_RAW : -1); + (hybrid_pmu->pmu_type == hybrid_big) ? PERF_TYPE_RAW : -1); if (err) break; } diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 754aec2513ff..4bc83a594915 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3864,7 +3864,7 @@ static inline bool require_mem_loads_aux_event(struct perf_event *event) return false; if (is_hybrid()) - return hybrid_pmu(event->pmu)->cpu_type == hybrid_big; + return hybrid_pmu(event->pmu)->pmu_type == hybrid_big; return true; } @@ -4416,9 +4416,9 @@ adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, { struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); - if (pmu->cpu_type == hybrid_big) + if (pmu->pmu_type == hybrid_big) return glc_get_event_constraints(cpuc, idx, event); - else if (pmu->cpu_type == hybrid_small) + else if (pmu->pmu_type == hybrid_small) return tnt_get_event_constraints(cpuc, idx, event); WARN_ON(1); @@ -4493,9 +4493,9 @@ mtl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, { struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); - if (pmu->cpu_type == hybrid_big) + if (pmu->pmu_type == hybrid_big) return rwc_get_event_constraints(cpuc, idx, event); - if (pmu->cpu_type == hybrid_small) + if (pmu->pmu_type == hybrid_small) return cmt_get_event_constraints(cpuc, idx, event); WARN_ON(1); @@ -4506,18 +4506,18 @@ static int adl_hw_config(struct perf_event *event) { struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); - if (pmu->cpu_type == hybrid_big) + if (pmu->pmu_type == hybrid_big) return hsw_hw_config(event); - else if (pmu->cpu_type == hybrid_small) + else if (pmu->pmu_type == hybrid_small) return intel_pmu_hw_config(event); WARN_ON(1); return -EOPNOTSUPP; } -static u8 adl_get_hybrid_cpu_type(void) +static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void) { - return hybrid_big; + return HYBRID_INTEL_CORE; } /* @@ -4693,22 +4693,47 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) } } -static bool init_hybrid_pmu(int cpu) +static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) { - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); u8 cpu_type = get_this_hybrid_cpu_type(); - struct x86_hybrid_pmu *pmu = NULL; int i; - if (!cpu_type && x86_pmu.get_hybrid_cpu_type) - cpu_type = x86_pmu.get_hybrid_cpu_type(); + /* + * This is running on a CPU model that is known to have hybrid + * configurations. But the CPU told us it is not hybrid, shame + * on it. There should be a fixup function provided for these + * troublesome CPUs (->get_hybrid_cpu_type). + */ + if (cpu_type == HYBRID_INTEL_NONE) { + if (x86_pmu.get_hybrid_cpu_type) + cpu_type = x86_pmu.get_hybrid_cpu_type(); + else + return NULL; + } + /* + * This essentially just maps between the 'hybrid_cpu_type' + * and 'hybrid_pmu_type' enums: + */ for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { - if (x86_pmu.hybrid_pmu[i].cpu_type == cpu_type) { - pmu = &x86_pmu.hybrid_pmu[i]; - break; - } + enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type; + + if (cpu_type == HYBRID_INTEL_CORE && + pmu_type == hybrid_big) + return &x86_pmu.hybrid_pmu[i]; + if (cpu_type == HYBRID_INTEL_ATOM && + pmu_type == hybrid_small) + return &x86_pmu.hybrid_pmu[i]; } + + return NULL; +} + +static bool init_hybrid_pmu(int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + struct x86_hybrid_pmu *pmu = find_hybrid_pmu_for_cpu(); + if (WARN_ON_ONCE(!pmu || (pmu->pmu.type == -1))) { cpuc->pmu = NULL; return false; @@ -5783,7 +5808,7 @@ static bool is_attr_for_this_pmu(struct kobject *kobj, struct attribute *attr) struct perf_pmu_events_hybrid_attr *pmu_attr = container_of(attr, struct perf_pmu_events_hybrid_attr, attr.attr); - return pmu->cpu_type & pmu_attr->pmu_type; + return pmu->pmu_type & pmu_attr->pmu_type; } static umode_t hybrid_events_is_visible(struct kobject *kobj, @@ -5820,7 +5845,7 @@ static umode_t hybrid_format_is_visible(struct kobject *kobj, container_of(attr, struct perf_pmu_format_hybrid_attr, attr.attr); int cpu = hybrid_find_supported_cpu(pmu); - return (cpu >= 0) && (pmu->cpu_type & pmu_attr->pmu_type) ? attr->mode : 0; + return (cpu >= 0) && (pmu->pmu_type & pmu_attr->pmu_type) ? attr->mode : 0; } static struct attribute_group hybrid_group_events_td = { @@ -6711,7 +6736,7 @@ __init int intel_pmu_init(void) /* Initialize big core specific PerfMon capabilities.*/ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; pmu->name = "cpu_core"; - pmu->cpu_type = hybrid_big; + pmu->pmu_type = hybrid_big; intel_pmu_init_glc(&pmu->pmu); pmu->late_ack = true; if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { @@ -6747,7 +6772,7 @@ __init int intel_pmu_init(void) /* Initialize Atom core specific PerfMon capabilities.*/ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; pmu->name = "cpu_atom"; - pmu->cpu_type = hybrid_small; + pmu->pmu_type = hybrid_small; intel_pmu_init_grt(&pmu->pmu); pmu->mid_ack = true; pmu->num_counters = x86_pmu.num_counters; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index e77c32a8f932..14b11d607f47 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -261,7 +261,7 @@ static u64 __adl_latency_data_small(struct perf_event *event, u64 status, { u64 val; - WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big); + WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big); dse &= PERF_PEBS_DATA_SOURCE_MASK; val = hybrid_var(event->pmu, pebs_data_source)[dse]; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 38342d1614f5..fb56518356ec 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -658,10 +658,29 @@ enum { #define PERF_PEBS_DATA_SOURCE_MAX 0x10 #define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1) +enum hybrid_cpu_type { + HYBRID_INTEL_NONE, + HYBRID_INTEL_ATOM = 0x20, + HYBRID_INTEL_CORE = 0x40, +}; + +enum hybrid_pmu_type { + not_hybrid, + hybrid_small = BIT(0), + hybrid_big = BIT(1), + + hybrid_big_small = hybrid_big | hybrid_small, /* only used for matching */ +}; + +#define X86_HYBRID_PMU_ATOM_IDX 0 +#define X86_HYBRID_PMU_CORE_IDX 1 + +#define X86_HYBRID_NUM_PMUS 2 + struct x86_hybrid_pmu { struct pmu pmu; const char *name; - u8 cpu_type; + enum hybrid_pmu_type pmu_type; cpumask_t supported_cpus; union perf_capabilities intel_cap; u64 intel_ctrl; @@ -727,18 +746,6 @@ extern struct static_key_false perf_is_hybrid; __Fp; \ }) -enum hybrid_pmu_type { - hybrid_big = 0x40, - hybrid_small = 0x20, - - hybrid_big_small = hybrid_big | hybrid_small, -}; - -#define X86_HYBRID_PMU_ATOM_IDX 0 -#define X86_HYBRID_PMU_CORE_IDX 1 - -#define X86_HYBRID_NUM_PMUS 2 - /* * struct x86_pmu - generic x86 pmu */ @@ -947,7 +954,7 @@ struct x86_pmu { */ int num_hybrid_pmus; struct x86_hybrid_pmu *hybrid_pmu; - u8 (*get_hybrid_cpu_type) (void); + enum hybrid_cpu_type (*get_hybrid_cpu_type) (void); }; struct x86_perf_task_context_opt { -- Gitee From 312cd1f560ad579ee9a0b1ecefef8fac1c86532a Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 29 Aug 2023 05:58:06 -0700 Subject: [PATCH 06/10] perf/x86/intel: Add common intel_pmu_init_hybrid() mainline inclusion from mainline-v6.10-rc1 commit 97588df87b56e27fd2b5d928d61c7a53e38afbb0 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/IAGLFT CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=97588df87b56e27fd2b5d928d61c7a53e38afbb0 ------------------------------------- The current hybrid initialization codes aren't well organized and are hard to read. Factor out intel_pmu_init_hybrid() to do a common setup for each hybrid PMU. The PMU-specific capability will be updated later via either hard code (ADL) or CPUID hybrid enumeration (MTL). Splitting the ADL and MTL initialization codes, since they have different uarches. The hard code PMU capabilities are not required for MTL either. They can be enumerated by the new leaf 0x23 and IA32_PERF_CAPABILITIES MSR. The hybrid enumeration of the IA32_PERF_CAPABILITIES MSR is broken on MTL. Using the default value. Intel-SIG: commit 97588df87b56 perf/x86/intel: Add common intel_pmu_init_hybrid() Backport as a dependency needed by the GNR distinct pmu name fix Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230829125806.3016082-7-kan.liang@linux.intel.com Signed-off-by: Yunying Sun --- arch/x86/events/intel/core.c | 162 ++++++++++++++++++++++++----------- 1 file changed, 111 insertions(+), 51 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 4bc83a594915..dd56bbff47fb 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4678,6 +4678,16 @@ static void intel_pmu_check_num_counters(int *num_counters, int *num_counters_fixed, u64 *intel_ctrl, u64 fixed_mask); +static inline bool intel_pmu_broken_perf_cap(void) +{ + /* The Perf Metric (Bit 15) is always cleared */ + if ((boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE) || + (boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE_L)) + return true; + + return false; +} + static void update_pmu_cap(struct x86_hybrid_pmu *pmu) { unsigned int sub_bitmaps = cpuid_eax(ARCH_PERFMON_EXT_LEAF); @@ -4690,7 +4700,27 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) pmu->num_counters_fixed = fls(ebx); intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed, &pmu->intel_ctrl, ebx); + pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); + pmu->unconstrained = (struct event_constraint) + __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, + 0, pmu->num_counters, 0, 0); } + + + if (!intel_pmu_broken_perf_cap()) { + /* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */ + rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities); + } + + if (pmu->intel_cap.perf_metrics) + pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; + else + pmu->intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS); + + if (pmu->intel_cap.pebs_output_pt_available) + pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; + else + pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT; } static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) @@ -6019,10 +6049,52 @@ static void intel_pmu_check_hybrid_pmus(u64 fixed_mask) } } -static __always_inline bool is_mtl(u8 x86_model) +static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = { + { hybrid_small, "cpu_atom" }, + { hybrid_big, "cpu_core" }, +}; + +static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus) { - return (x86_model == INTEL_FAM6_METEORLAKE) || - (x86_model == INTEL_FAM6_METEORLAKE_L); + unsigned long pmus_mask = pmus; + struct x86_hybrid_pmu *pmu; + int idx = 0, bit; + + x86_pmu.num_hybrid_pmus = hweight_long(pmus_mask); + x86_pmu.hybrid_pmu = kcalloc(x86_pmu.num_hybrid_pmus, + sizeof(struct x86_hybrid_pmu), + GFP_KERNEL); + if (!x86_pmu.hybrid_pmu) + return -ENOMEM; + + static_branch_enable(&perf_is_hybrid); + x86_pmu.filter = intel_pmu_filter; + + for_each_set_bit(bit, &pmus_mask, ARRAY_SIZE(intel_hybrid_pmu_type_map)) { + pmu = &x86_pmu.hybrid_pmu[idx++]; + pmu->pmu_type = intel_hybrid_pmu_type_map[bit].id; + pmu->name = intel_hybrid_pmu_type_map[bit].name; + + pmu->num_counters = x86_pmu.num_counters; + pmu->num_counters_fixed = x86_pmu.num_counters_fixed; + pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); + pmu->unconstrained = (struct event_constraint) + __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, + 0, pmu->num_counters, 0, 0); + + pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; + if (pmu->pmu_type & hybrid_small) { + pmu->intel_cap.perf_metrics = 0; + pmu->intel_cap.pebs_output_pt_available = 1; + pmu->mid_ack = true; + } else if (pmu->pmu_type & hybrid_big) { + pmu->intel_cap.perf_metrics = 1; + pmu->intel_cap.pebs_output_pt_available = 0; + pmu->late_ack = true; + } + } + + return 0; } static __always_inline void intel_pmu_init_glc(struct pmu *pmu) @@ -6706,23 +6778,14 @@ __init int intel_pmu_init(void) case INTEL_FAM6_RAPTORLAKE: case INTEL_FAM6_RAPTORLAKE_P: case INTEL_FAM6_RAPTORLAKE_S: - case INTEL_FAM6_METEORLAKE: - case INTEL_FAM6_METEORLAKE_L: /* * Alder Lake has 2 types of CPU, core and atom. * * Initialize the common PerfMon capabilities here. */ - x86_pmu.hybrid_pmu = kcalloc(X86_HYBRID_NUM_PMUS, - sizeof(struct x86_hybrid_pmu), - GFP_KERNEL); - if (!x86_pmu.hybrid_pmu) - return -ENOMEM; - static_branch_enable(&perf_is_hybrid); - x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS; + intel_pmu_init_hybrid(hybrid_big_small); x86_pmu.pebs_latency_data = adl_latency_data_small; - x86_pmu.filter = intel_pmu_filter; x86_pmu.get_event_constraints = adl_get_event_constraints; x86_pmu.hw_config = adl_hw_config; x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type; @@ -6735,10 +6798,7 @@ __init int intel_pmu_init(void) /* Initialize big core specific PerfMon capabilities.*/ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; - pmu->name = "cpu_core"; - pmu->pmu_type = hybrid_big; intel_pmu_init_glc(&pmu->pmu); - pmu->late_ack = true; if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { pmu->num_counters = x86_pmu.num_counters + 2; pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1; @@ -6763,45 +6823,45 @@ __init int intel_pmu_init(void) pmu->unconstrained = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, 0, pmu->num_counters, 0, 0); - pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; - pmu->intel_cap.perf_metrics = 1; - pmu->intel_cap.pebs_output_pt_available = 0; - pmu->extra_regs = intel_glc_extra_regs; /* Initialize Atom core specific PerfMon capabilities.*/ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; - pmu->name = "cpu_atom"; - pmu->pmu_type = hybrid_small; intel_pmu_init_grt(&pmu->pmu); - pmu->mid_ack = true; - pmu->num_counters = x86_pmu.num_counters; - pmu->num_counters_fixed = x86_pmu.num_counters_fixed; - pmu->max_pebs_events = x86_pmu.max_pebs_events; - pmu->unconstrained = (struct event_constraint) - __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, - 0, pmu->num_counters, 0, 0); - pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; - pmu->intel_cap.perf_metrics = 0; - pmu->intel_cap.pebs_output_pt_available = 1; - - if (is_mtl(boot_cpu_data.x86_model)) { - x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_rwc_extra_regs; - x86_pmu.pebs_latency_data = mtl_latency_data_small; - extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? - mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; - mem_attr = mtl_hybrid_mem_attrs; - intel_pmu_pebs_data_source_mtl(); - x86_pmu.get_event_constraints = mtl_get_event_constraints; - pmu->extra_regs = intel_cmt_extra_regs; - pr_cont("Meteorlake Hybrid events, "); - name = "meteorlake_hybrid"; - } else { - x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; - intel_pmu_pebs_data_source_adl(); - pr_cont("Alderlake Hybrid events, "); - name = "alderlake_hybrid"; - } + + x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; + intel_pmu_pebs_data_source_adl(); + pr_cont("Alderlake Hybrid events, "); + name = "alderlake_hybrid"; + break; + + case INTEL_FAM6_METEORLAKE: + case INTEL_FAM6_METEORLAKE_L: + intel_pmu_init_hybrid(hybrid_big_small); + + x86_pmu.pebs_latency_data = mtl_latency_data_small; + x86_pmu.get_event_constraints = mtl_get_event_constraints; + x86_pmu.hw_config = adl_hw_config; + + td_attr = adl_hybrid_events_attrs; + mem_attr = mtl_hybrid_mem_attrs; + tsx_attr = adl_hybrid_tsx_attrs; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; + + /* Initialize big core specific PerfMon capabilities.*/ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; + intel_pmu_init_glc(&pmu->pmu); + pmu->extra_regs = intel_rwc_extra_regs; + + /* Initialize Atom core specific PerfMon capabilities.*/ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; + intel_pmu_init_grt(&pmu->pmu); + pmu->extra_regs = intel_cmt_extra_regs; + + intel_pmu_pebs_data_source_mtl(); + pr_cont("Meteorlake Hybrid events, "); + name = "meteorlake_hybrid"; break; default: @@ -6913,7 +6973,7 @@ __init int intel_pmu_init(void) if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; - if (is_hybrid()) + if (is_hybrid() && !boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) intel_pmu_check_hybrid_pmus((u64)fixed_mask); if (x86_pmu.intel_cap.pebs_timing_info) -- Gitee From 60bb06b11097237ad40c5ca0c424d98af4421612 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 8 Jul 2024 12:33:34 -0700 Subject: [PATCH 07/10] perf/x86/intel: Hide Topdown metrics events if the feature is not enumerated mainline inclusion from mainline-v6.11-rc1 commit 556a7c039a52c21da33eaae9269984a1ef59189b category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/IAGLFT CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=556a7c039a52c21da33eaae9269984a1ef59189b ------------------------------------- The below error is observed on Ice Lake VM. $ perf stat Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (slots). /bin/dmesg | grep -i perf may provide additional information. In a virtualization env, the Topdown metrics and the slots event haven't been supported yet. The guest CPUID doesn't enumerate them. However, the current kernel unconditionally exposes the slots event and the Topdown metrics events to sysfs, which misleads the perf tool and triggers the error. Hide the perf-metrics topdown events and the slots event if the perf-metrics feature is not enumerated. The big core of a hybrid platform can also supports the perf-metrics feature. Fix the hybrid platform as well. Intel-SIG: commit 556a7c039a52 perf/x86/intel: Hide Topdown metrics events if the feature is not enumerated Backport 3 core pmu bugfixes to kernel v6.6 Closes: https://lore.kernel.org/lkml/CAM9d7cj8z+ryyzUHR+P1Dcpot2jjW+Qcc4CPQpfafTXN=LEU0Q@mail.gmail.com/ Reported-by: Dongli Zhang Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Tested-by: Dongli Zhang Link: https://lkml.kernel.org/r/20240708193336.1192217-2-kan.liang@linux.intel.com Signed-off-by: Yunying Sun --- arch/x86/events/intel/core.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index dd56bbff47fb..6aac4b80be1f 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -5675,8 +5675,22 @@ exra_is_visible(struct kobject *kobj, struct attribute *attr, int i) return x86_pmu.version >= 2 ? attr->mode : 0; } +static umode_t +td_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + /* + * Hide the perf metrics topdown events + * if the feature is not enumerated. + */ + if (x86_pmu.num_topdown_events) + return x86_pmu.intel_cap.perf_metrics ? attr->mode : 0; + + return attr->mode; +} + static struct attribute_group group_events_td = { .name = "events", + .is_visible = td_is_visible, }; static struct attribute_group group_events_mem = { @@ -5878,9 +5892,27 @@ static umode_t hybrid_format_is_visible(struct kobject *kobj, return (cpu >= 0) && (pmu->pmu_type & pmu_attr->pmu_type) ? attr->mode : 0; } +static umode_t hybrid_td_is_visible(struct kobject *kobj, + struct attribute *attr, int i) +{ + struct device *dev = kobj_to_dev(kobj); + struct x86_hybrid_pmu *pmu = + container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu); + + if (!is_attr_for_this_pmu(kobj, attr)) + return 0; + + + /* Only the big core supports perf metrics */ + if (pmu->pmu_type == hybrid_big) + return pmu->intel_cap.perf_metrics ? attr->mode : 0; + + return attr->mode; +} + static struct attribute_group hybrid_group_events_td = { .name = "events", - .is_visible = hybrid_events_is_visible, + .is_visible = hybrid_td_is_visible, }; static struct attribute_group hybrid_group_events_mem = { -- Gitee From 54fa0758817d7209e9bc6b4662a7f9e62649b946 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 8 Jul 2024 12:33:35 -0700 Subject: [PATCH 08/10] perf/x86/intel: Add a distinct name for Granite Rapids MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-v6.11-rc1 commit fa0c1c9d283b37fdb7fc1dcccbb88fc8f48a4aa4 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/IAGLFT CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fa0c1c9d283b37fdb7fc1dcccbb88fc8f48a4aa4 ------------------------------------- Currently, the Sapphire Rapids and Granite Rapids share the same PMU name, sapphire_rapids. Because from the kernel’s perspective, GNR is similar to SPR. The only key difference is that they support different extra MSRs. The code path and the PMU name are shared. However, from end users' perspective, they are quite different. Besides the extra MSRs, GNR has a newer PEBS format, supports Retire Latency, supports new CPUID enumeration architecture, doesn't required the load-latency AUX event, has additional TMA Level 1 Architectural Events, etc. The differences can be enumerated by CPUID or the PERF_CAPABILITIES MSR. They weren't reflected in the model-specific kernel setup. But it is worth to have a distinct PMU name for GNR. Intel-SIG: commit fa0c1c9d283b perf/x86/intel: Add a distinct name for Granite Rapids Backport 3 core pmu bugfixes to kernel v6.6 Fixes: a6742cb90b56 ("perf/x86/intel: Fix the FRONTEND encoding on GNR and MTL") Suggested-by: Ahmad Yasin Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20240708193336.1192217-3-kan.liang@linux.intel.com Signed-off-by: Yunying Sun --- arch/x86/events/intel/core.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 6aac4b80be1f..230b45c77afe 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -6785,12 +6785,18 @@ __init int intel_pmu_init(void) case INTEL_FAM6_EMERALDRAPIDS_X: x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; x86_pmu.extra_regs = intel_glc_extra_regs; - fallthrough; + pr_cont("Sapphire Rapids events, "); + name = "sapphire_rapids"; + goto glc_common; + case INTEL_FAM6_GRANITERAPIDS_X: case INTEL_FAM6_GRANITERAPIDS_D: + x86_pmu.extra_regs = intel_rwc_extra_regs; + pr_cont("Granite Rapids events, "); + name = "granite_rapids"; + + glc_common: intel_pmu_init_glc(NULL); - if (!x86_pmu.extra_regs) - x86_pmu.extra_regs = intel_rwc_extra_regs; x86_pmu.pebs_ept = 1; x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = glc_get_event_constraints; @@ -6801,8 +6807,6 @@ __init int intel_pmu_init(void) td_attr = glc_td_events_attrs; tsx_attr = glc_tsx_events_attrs; intel_pmu_pebs_data_source_skl(true); - pr_cont("Sapphire Rapids events, "); - name = "sapphire_rapids"; break; case INTEL_FAM6_ALDERLAKE: -- Gitee From fc18a9b216ce480c7c0497c4a5d7a3ae5d9f042e Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 11 Sep 2023 06:51:28 -0700 Subject: [PATCH 09/10] perf/x86/intel: Fix broken fixed event constraints extension mainline inclusion from mainline-v6.7-rc1 commit 950ecdc672aec9cd29036b2e2535b07c103af494 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/IAGLFT CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=950ecdc672aec9cd29036b2e2535b07c103af494 ------------------------------------- Unnecessary multiplexing is triggered when running an "instructions" event on an MTL. perf stat -e cpu_core/instructions/,cpu_core/instructions/ -a sleep 1 Performance counter stats for 'system wide': 115,489,000 cpu_core/instructions/ (50.02%) 127,433,777 cpu_core/instructions/ (49.98%) 1.002294504 seconds time elapsed Linux architectural perf events, e.g., cycles and instructions, usually have dedicated fixed counters. These events also have equivalent events which can be used in the general-purpose counters. The counters are precious. In the intel_pmu_check_event_constraints(), perf check/extend the event constraints of these events. So these events can utilize both fixed counters and general-purpose counters. The following cleanup commit: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()") forgot adding the intel_pmu_check_event_constraints() into update_pmu_cap(). The architectural perf events cannot utilize the general-purpose counters. The code to check and update the counters, event constraints and extra_regs is the same among hybrid systems. Move intel_pmu_check_hybrid_pmus() to init_hybrid_pmu(), and emove the duplicate check in update_pmu_cap(). Intel-SIG: commit 950ecdc672ae perf/x86/intel: Fix broken fixed event constraints extension Backport following hybrid pmu bugfixes for commit 97588df87b56 Fixes: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()") Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230911135128.2322833-1-kan.liang@linux.intel.com Signed-off-by: Yunying Sun --- arch/x86/events/intel/core.c | 65 +++++++++++++++--------------------- 1 file changed, 26 insertions(+), 39 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 230b45c77afe..1e2fde76780c 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4678,6 +4678,13 @@ static void intel_pmu_check_num_counters(int *num_counters, int *num_counters_fixed, u64 *intel_ctrl, u64 fixed_mask); +static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints, + int num_counters, + int num_counters_fixed, + u64 intel_ctrl); + +static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs); + static inline bool intel_pmu_broken_perf_cap(void) { /* The Perf Metric (Bit 15) is always cleared */ @@ -4698,12 +4705,6 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) &eax, &ebx, &ecx, &edx); pmu->num_counters = fls(eax); pmu->num_counters_fixed = fls(ebx); - intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed, - &pmu->intel_ctrl, ebx); - pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); - pmu->unconstrained = (struct event_constraint) - __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, - 0, pmu->num_counters, 0, 0); } @@ -4711,6 +4712,16 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) /* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */ rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities); } +} + +static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) +{ + intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed, + &pmu->intel_ctrl, (1ULL << pmu->num_counters_fixed) - 1); + pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); + pmu->unconstrained = (struct event_constraint) + __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, + 0, pmu->num_counters, 0, 0); if (pmu->intel_cap.perf_metrics) pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; @@ -4721,6 +4732,13 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; else pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT; + + intel_pmu_check_event_constraints(pmu->event_constraints, + pmu->num_counters, + pmu->num_counters_fixed, + pmu->intel_ctrl); + + intel_pmu_check_extra_regs(pmu->extra_regs); } static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) @@ -4776,6 +4794,8 @@ static bool init_hybrid_pmu(int cpu) if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) update_pmu_cap(pmu); + intel_pmu_check_hybrid_pmus(pmu); + if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed)) return false; @@ -6051,36 +6071,6 @@ static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs) } } -static void intel_pmu_check_hybrid_pmus(u64 fixed_mask) -{ - struct x86_hybrid_pmu *pmu; - int i; - - for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { - pmu = &x86_pmu.hybrid_pmu[i]; - - intel_pmu_check_num_counters(&pmu->num_counters, - &pmu->num_counters_fixed, - &pmu->intel_ctrl, - fixed_mask); - - if (pmu->intel_cap.perf_metrics) { - pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; - pmu->intel_ctrl |= INTEL_PMC_MSK_FIXED_SLOTS; - } - - if (pmu->intel_cap.pebs_output_pt_available) - pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; - - intel_pmu_check_event_constraints(pmu->event_constraints, - pmu->num_counters, - pmu->num_counters_fixed, - pmu->intel_ctrl); - - intel_pmu_check_extra_regs(pmu->extra_regs); - } -} - static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = { { hybrid_small, "cpu_atom" }, { hybrid_big, "cpu_core" }, @@ -7009,9 +6999,6 @@ __init int intel_pmu_init(void) if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; - if (is_hybrid() && !boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) - intel_pmu_check_hybrid_pmus((u64)fixed_mask); - if (x86_pmu.intel_cap.pebs_timing_info) x86_pmu.flags |= PMU_FL_RETIRE_LATENCY; -- Gitee From 6cedcf0d4d6538349cb2e0d713fd54feddfd0717 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Tue, 21 Nov 2023 09:46:28 +0800 Subject: [PATCH 10/10] perf/x86/intel: Correct incorrect 'or' operation for PMU capabilities mainline inclusion from mainline-v6.7-rc3 commit e8df9d9f4209c04161321d8c12640ae560f65939 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/IAGLFT CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e8df9d9f4209c04161321d8c12640ae560f65939 ------------------------------------- When running perf-stat command on Intel hybrid platform, perf-stat reports the following errors: sudo taskset -c 7 ./perf stat -vvvv -e cpu_atom/instructions/ sleep 1 Opening: cpu/cycles/:HG ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) config 0xa00000000 disabled 1 ------------------------------------------------------------ sys_perf_event_open: pid 0 cpu -1 group_fd -1 flags 0x8 sys_perf_event_open failed, error -16 Performance counter stats for 'sleep 1': cpu_atom/instructions/ It looks the cpu_atom/instructions/ event can't be enabled on atom PMU even when the process is pinned on atom core. Investigation shows that exclusive_event_init() helper always returns -EBUSY error in the perf event creation. That's strange since the atom PMU should not be an exclusive PMU. Further investigation shows the issue was introduced by commit: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()") The commit originally intents to clear the bit PERF_PMU_CAP_AUX_OUTPUT from PMU capabilities if intel_cap.pebs_output_pt_available is not set, but it incorrectly uses 'or' operation and leads to all PMU capabilities bits are set to 1 except bit PERF_PMU_CAP_AUX_OUTPUT. Testing this fix on Intel hybrid platforms, the observed issues disappear. Intel-SIG: commit e8df9d9f4209 perf/x86/intel: Correct incorrect 'or' operation for PMU capabilities Backport following hybrid pmu bugfixes for commit 97588df87b56 Fixes: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()") Signed-off-by: Dapeng Mi Signed-off-by: Ingo Molnar Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231121014628.729989-1-dapeng1.mi@linux.intel.com Signed-off-by: Yunying Sun --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 1e2fde76780c..8b8e9189fd41 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4731,7 +4731,7 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) if (pmu->intel_cap.pebs_output_pt_available) pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; else - pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT; + pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT; intel_pmu_check_event_constraints(pmu->event_constraints, pmu->num_counters, -- Gitee