diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 9a97651b7afb1eb3aaf4efbd95a8c995ae835fdc..ea9a8d0c0b21bf810da7a5d09169db8a43c092a5 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -978,6 +978,12 @@ int armpmu_register(struct arm_pmu *pmu) if (ret) return ret; + /* + * By this stage we know our supported CPUs on either DT/ACPI platforms, + * detect the SMT implementation. + */ + pmu->has_smt = topology_core_has_smt(cpumask_first(&pmu->supported_cpus)); + if (!pmu->set_event_filter) pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 29a659d5a273e2ce983199834a9e6abf1671b3bc..dac5a4dbdf1e920cc0847f863e6ece1585013823 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -1195,13 +1195,40 @@ static int armv8pmu_get_hw_metric_event_idx(struct pmu_hw_events *cpuc, } #endif -static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) +static bool armv8pmu_can_use_pmccntr(struct pmu_hw_events *cpuc, + struct perf_event *event) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; + if (evtype != ARMV8_PMUV3_PERFCTR_CPU_CYCLES) + return false; + + /* + * A CPU_CYCLES event with threshold counting cannot use PMCCNTR_EL0 + * since it lacks threshold support. + */ + if (armv8pmu_event_get_threshold(&event->attr)) + return false; + + /* + * The PMCCNTR_EL0 increments from the processor clock rather than + * the PE clock (ARM DDI0487 L.b D13.1.3) which means it'll continue + * counting on a WFI PE if one of its SMT silbing is not idle on a + * multi-threaded implementation. So don't use it on SMT cores. + */ + if (cpu_pmu->has_smt) + return false; + + return true; +} + +static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + #ifdef CONFIG_HISILICON_HW_METRIC if (armv8pmu_event_is_hw_metric(event)) return armv8pmu_get_hw_metric_event_idx(cpuc, event); @@ -1211,8 +1238,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, #endif /* Always prefer to place a cycle counter into the cycle counter. */ - if ((evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) && - !armv8pmu_event_get_threshold(&event->attr)) { + if (armv8pmu_can_use_pmccntr(cpuc, event)) { if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) return ARMV8_IDX_CYCLE_COUNTER; else if (armv8pmu_event_is_64bit(event) && diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index a63d61ca55afc82ec865de9de60d8fe667964fef..c531cec149d1de4fe832951ec05d48e26c7bbad1 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -100,6 +100,17 @@ void remove_cpu_topology(unsigned int cpuid); void reset_cpu_topology(void); int parse_acpi_topology(void); void freq_inv_set_max_ratio(int cpu, u64 max_rate); + +/* + * Architectures like ARM64 don't have reliable architectural way to get SMT + * information and depend on the firmware (ACPI/OF) report. Non-SMT core won't + * initialize thread_id so we can use this to detect the SMT implementation. + */ +static inline bool topology_core_has_smt(int cpu) +{ + return cpu_topology[cpu].thread_id != -1; +} + #endif #endif /* _LINUX_ARCH_TOPOLOGY_H_ */ diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index b4964e1f66c49837d5e182cab843b48bbb7ef80f..850f363cac87c99862bc45580dfcd5e558da80ee 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -110,6 +110,7 @@ struct arm_pmu { cpumask_t supported_cpus; char *name; int pmuver; + bool has_smt; irqreturn_t (*handle_irq)(struct arm_pmu *pmu); void (*enable)(struct perf_event *event); void (*disable)(struct perf_event *event);