From 0dc7bedf6e19c10febe76fee73351bc818184b73 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 10 Mar 2021 08:21:04 +0530 Subject: [PATCH 01/21] arch_topology: Allow multiple entities to provide sched_freq_tick() callback ANBZ: #26062 commit 01e055c120a46e78650b5f903088badbbdaae9ad upstream This patch attempts to make it generic enough so other parts of the kernel can also provide their own implementation of scale_freq_tick() callback, which is called by the scheduler periodically to update the per-cpu arch_freq_scale variable. The implementations now need to provide 'struct scale_freq_data' for the CPUs for which they have hardware counters available, and a callback gets registered for each possible CPU in a per-cpu variable. The arch specific (or ARM AMU) counters are updated to adapt to this and they take the highest priority if they are available, i.e. they will be used instead of CPPC based counters for example. The special code to rebuild the sched domains, in case invariance status change for the system, is moved out of arm64 specific code and is added to arch_topology.c. Note that this also defines SCALE_FREQ_SOURCE_CPUFREQ but doesn't use it and it is added to show that cpufreq is also acts as source of information for FIE and will be used by default if no other counters are supported for a platform. Reviewed-by: Ionela Voinescu Tested-by: Ionela Voinescu Acked-by: Will Deacon # for arm64 Tested-by: Vincent Guittot Signed-off-by: Viresh Kumar Signed-off-by: Lin Ruier <2878455989@qq.com> --- arch/arm64/include/asm/topology.h | 1 + arch/arm64/kernel/topology.c | 47 ----------------- drivers/base/arch_topology.c | 85 ++++++++++++++++++++++++++++--- include/linux/arch_topology.h | 18 +++++-- 4 files changed, 95 insertions(+), 56 deletions(-) diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index ecd02a7e1506..665ba58d3382 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -31,6 +31,7 @@ void topology_scale_freq_tick(void); #endif /* CONFIG_ARM64_AMU_EXTN */ /* Replace task scheduler's default frequency-invariant accounting */ +#define arch_scale_freq_tick topology_scale_freq_tick #define arch_set_freq_scale topology_set_freq_scale #define arch_scale_freq_capacity topology_get_freq_scale #define arch_scale_freq_invariant topology_scale_freq_invariant diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index ba5482201fef..4d26d005cb0d 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -415,51 +415,4 @@ static int __init init_amu_fie(void) } late_initcall_sync(init_amu_fie); -bool arch_freq_counters_available(const struct cpumask *cpus) -{ - return amu_freq_invariant() && - cpumask_subset(cpus, amu_fie_cpus); -} - -void topology_scale_freq_tick(void) -{ - u64 prev_core_cnt, prev_const_cnt; - u64 core_cnt, const_cnt, scale; - int cpu = smp_processor_id(); - - if (!amu_freq_invariant()) - return; - - if (!cpumask_test_cpu(cpu, amu_fie_cpus)) - return; - - const_cnt = read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0); - core_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0); - prev_const_cnt = this_cpu_read(arch_const_cycles_prev); - prev_core_cnt = this_cpu_read(arch_core_cycles_prev); - - if (unlikely(core_cnt <= prev_core_cnt || - const_cnt <= prev_const_cnt)) - goto store_and_exit; - - /* - * /\core arch_max_freq_scale - * scale = ------- * -------------------- - * /\const SCHED_CAPACITY_SCALE - * - * See validate_cpu_freq_invariance_counters() for details on - * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT. - */ - scale = core_cnt - prev_core_cnt; - scale *= this_cpu_read(arch_max_freq_scale); - scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT, - const_cnt - prev_const_cnt); - - scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE); - this_cpu_write(freq_scale, (unsigned long)scale); - -store_and_exit: - this_cpu_write(arch_core_cycles_prev, core_cnt); - this_cpu_write(arch_const_cycles_prev, const_cnt); -} #endif /* CONFIG_ARM64_AMU_EXTN */ diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 9e5a33fa99fd..394888aabb5f 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -21,17 +21,90 @@ #include #include +static DEFINE_PER_CPU(struct scale_freq_data *, sft_data); +static struct cpumask scale_freq_counters_mask; +static bool scale_freq_invariant; + +static bool supports_scale_freq_counters(const struct cpumask *cpus) +{ + return cpumask_subset(cpus, &scale_freq_counters_mask); +} + bool topology_scale_freq_invariant(void) { return cpufreq_supports_freq_invariance() || - arch_freq_counters_available(cpu_online_mask); + supports_scale_freq_counters(cpu_online_mask); } -__weak bool arch_freq_counters_available(const struct cpumask *cpus) +static void update_scale_freq_invariant(bool status) { - return false; + if (scale_freq_invariant == status) + return; + + /* + * Task scheduler behavior depends on frequency invariance support, + * either cpufreq or counter driven. If the support status changes as + * a result of counter initialisation and use, retrigger the build of + * scheduling domains to ensure the information is propagated properly. + */ + if (topology_scale_freq_invariant() == status) { + scale_freq_invariant = status; + } } -DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE; + +void topology_set_scale_freq_source(struct scale_freq_data *data, + const struct cpumask *cpus) +{ + struct scale_freq_data *sfd; + int cpu; + + /* + * Avoid calling rebuild_sched_domains() unnecessarily if FIE is + * supported by cpufreq. + */ + if (cpumask_empty(&scale_freq_counters_mask)) + scale_freq_invariant = topology_scale_freq_invariant(); + + for_each_cpu(cpu, cpus) { + sfd = per_cpu(sft_data, cpu); + + /* Use ARCH provided counters whenever possible */ + if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) { + per_cpu(sft_data, cpu) = data; + cpumask_set_cpu(cpu, &scale_freq_counters_mask); + } + } + + update_scale_freq_invariant(true); +} + +void topology_clear_scale_freq_source(enum scale_freq_source source, + const struct cpumask *cpus) +{ + struct scale_freq_data *sfd; + int cpu; + + for_each_cpu(cpu, cpus) { + sfd = per_cpu(sft_data, cpu); + + if (sfd && sfd->source == source) { + per_cpu(sft_data, cpu) = NULL; + cpumask_clear_cpu(cpu, &scale_freq_counters_mask); + } + } + + update_scale_freq_invariant(false); +} + +void topology_scale_freq_tick(void) +{ + struct scale_freq_data *sfd = *this_cpu_ptr(&sft_data); + + if (sfd) + sfd->set_freq_scale(); +} + +DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE; void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq, unsigned long max_freq) @@ -47,13 +120,13 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq, * want to update the scale factor with information from CPUFREQ. * Instead the scale factor will be updated from arch_scale_freq_tick. */ - if (arch_freq_counters_available(cpus)) + if (supports_scale_freq_counters(cpus)) return; scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq; for_each_cpu(i, cpus) - per_cpu(freq_scale, i) = scale; + per_cpu(arch_freq_scale, i) = scale; } DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 414fd7f21768..820549fdb167 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -23,18 +23,30 @@ static inline unsigned long topology_get_cpu_scale(int cpu) void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); -DECLARE_PER_CPU(unsigned long, freq_scale); +DECLARE_PER_CPU(unsigned long, arch_freq_scale); static inline unsigned long topology_get_freq_scale(int cpu) { - return per_cpu(freq_scale, cpu); + return per_cpu(arch_freq_scale, cpu); } void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq, unsigned long max_freq); bool topology_scale_freq_invariant(void); -bool arch_freq_counters_available(const struct cpumask *cpus); +enum scale_freq_source { + SCALE_FREQ_SOURCE_CPUFREQ = 0, + SCALE_FREQ_SOURCE_ARCH, +}; + +struct scale_freq_data { + enum scale_freq_source source; + void (*set_freq_scale)(void); +}; + +void topology_scale_freq_tick(void); +void topology_set_scale_freq_source(struct scale_freq_data *data, const struct cpumask *cpus); +void topology_clear_scale_freq_source(enum scale_freq_source source, const struct cpumask *cpus); DECLARE_PER_CPU(unsigned long, thermal_pressure); -- Gitee From fe54c60708d4ac58522aedc6217ce14a21cc9278 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 18 Jun 2021 13:42:23 +0530 Subject: [PATCH 02/21] cpufreq: CPPC: Pass structure instance by reference and Add support for frequency invariance ANBZ: #26062 commit 4c38f2df71c8e33c0b64865992d693f5022eeaad upstream The Frequency Invariance Engine (FIE) is providing a frequency scaling correction factor that helps achieve more accurate load-tracking. Normally, this scaling factor can be obtained directly with the help of the cpufreq drivers as they know the exact frequency the hardware is running at. But that isn't the case for CPPC cpufreq driver. Another way of obtaining that is using the arch specific counter support, which is already present in kernel, but that hardware is optional for platforms. This patch updates the CPPC driver to register itself with the topology core to provide its own implementation (cppc_scale_freq_tick()) of topology_scale_freq_tick() which gets called by the scheduler on every tick. Note that the arch specific counters have higher priority than CPPC counters, if available, though the CPPC driver doesn't need to have any special handling for that. On an invocation of cppc_scale_freq_tick(), we schedule an irq work (since we reach here from hard-irq context), which then schedules a normal work item and cppc_scale_freq_workfn() updates the per_cpu arch_freq_scale variable based on the counter updates since the last tick. To allow platforms to disable this CPPC counter-based frequency invariance support, this is all done under CONFIG_ACPI_CPPC_CPUFREQ_FIE, which is enabled by default. This also exports sched_setattr_nocheck() as the CPPC driver can be built as a module. Cc: linux-acpi@vger.kernel.org Reviewed-by: Ionela Voinescu Tested-by: Ionela Voinescu Tested-by: Vincent Guittot Acked-by: Rafael J. Wysocki Signed-off-by: Viresh Kumar Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/Kconfig.arm | 10 ++ drivers/cpufreq/cppc_cpufreq.c | 262 ++++++++++++++++++++++++++++++--- include/linux/arch_topology.h | 1 + kernel/sched/core.c | 1 + 4 files changed, 254 insertions(+), 20 deletions(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 1f73fa75b1a0..4a95897f1d67 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -19,6 +19,16 @@ config ACPI_CPPC_CPUFREQ If in doubt, say N. +config ACPI_CPPC_CPUFREQ_FIE + bool "Frequency Invariance support for CPPC cpufreq driver" + depends on ACPI_CPPC_CPUFREQ && GENERIC_ARCH_TOPOLOGY + default y + help + This extends frequency invariance support in the CPPC cpufreq driver, + by using CPPC delivered and reference performance counters. + + If in doubt, say N. + config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM tristate "Allwinner nvmem based SUN50I CPUFreq driver" depends on ARCH_SUNXI diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 1c3f7a8d40eb..dec35155e50d 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -10,14 +10,18 @@ #define pr_fmt(fmt) "CPPC Cpufreq:" fmt +#include #include #include #include #include #include #include +#include +#include #include #include +#include #include @@ -57,6 +61,215 @@ static struct cppc_workaround_oem_info wa_info[] = { } }; +#ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE + +/* Frequency invariance support */ +struct cppc_freq_invariance { + int cpu; + struct irq_work irq_work; + struct kthread_work work; + struct cppc_perf_fb_ctrs prev_perf_fb_ctrs; + struct cppc_cpudata *cpu_data; +}; + +static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv); +static struct kthread_worker *kworker_fie; +static bool fie_disabled; + +static struct cpufreq_driver cppc_cpufreq_driver; +static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu); +static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, + struct cppc_perf_fb_ctrs *fb_ctrs_t0, + struct cppc_perf_fb_ctrs *fb_ctrs_t1); + +/** + * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance + * @work: The work item. + * + * The CPPC driver register itself with the topology core to provide its own + * implementation (cppc_scale_freq_tick()) of topology_scale_freq_tick() which + * gets called by the scheduler on every tick. + * + * Note that the arch specific counters have higher priority than CPPC counters, + * if available, though the CPPC driver doesn't need to have any special + * handling for that. + * + * On an invocation of cppc_scale_freq_tick(), we schedule an irq work (since we + * reach here from hard-irq context), which then schedules a normal work item + * and cppc_scale_freq_workfn() updates the per_cpu arch_freq_scale variable + * based on the counter updates since the last tick. + */ +static void cppc_scale_freq_workfn(struct kthread_work *work) +{ + struct cppc_freq_invariance *cppc_fi; + struct cppc_perf_fb_ctrs fb_ctrs = {0}; + struct cppc_cpudata *cpu_data; + unsigned long local_freq_scale; + u64 perf; + + cppc_fi = container_of(work, struct cppc_freq_invariance, work); + cpu_data = cppc_fi->cpu_data; + + if (cppc_get_perf_ctrs(cppc_fi->cpu, &fb_ctrs)) { + pr_warn("%s: failed to read perf counters\n", __func__); + return; + } + + cppc_fi->prev_perf_fb_ctrs = fb_ctrs; + perf = cppc_perf_from_fbctrs(cpu_data, &cppc_fi->prev_perf_fb_ctrs, + &fb_ctrs); + + perf <<= SCHED_CAPACITY_SHIFT; + local_freq_scale = div64_u64(perf, cpu_data->perf_caps.highest_perf); + if (WARN_ON(local_freq_scale > 1024)) + local_freq_scale = 1024; + + per_cpu(arch_freq_scale, cppc_fi->cpu) = local_freq_scale; +} + +static void cppc_irq_work(struct irq_work *irq_work) +{ + struct cppc_freq_invariance *cppc_fi; + + cppc_fi = container_of(irq_work, struct cppc_freq_invariance, irq_work); + kthread_queue_work(kworker_fie, &cppc_fi->work); +} + +static void cppc_scale_freq_tick(void) +{ + struct cppc_freq_invariance *cppc_fi = &per_cpu(cppc_freq_inv, smp_processor_id()); + + /* + * cppc_get_perf_ctrs() can potentially sleep, call that from the right + * context. + */ + irq_work_queue(&cppc_fi->irq_work); +} + +static struct scale_freq_data cppc_sftd = { + .source = SCALE_FREQ_SOURCE_CPPC, + .set_freq_scale = cppc_scale_freq_tick, +}; + +static void cppc_cpufreq_cpu_fie_init(struct cpufreq_policy *policy) +{ + struct cppc_freq_invariance *cppc_fi; + int cpu, ret; + + if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + return; + + for_each_cpu(cpu, policy->cpus) { + cppc_fi = &per_cpu(cppc_freq_inv, cpu); + cppc_fi->cpu = cpu; + cppc_fi->cpu_data = policy->driver_data; + kthread_init_work(&cppc_fi->work, cppc_scale_freq_workfn); + init_irq_work(&cppc_fi->irq_work, cppc_irq_work); + + ret = cppc_get_perf_ctrs(cpu, &cppc_fi->prev_perf_fb_ctrs); + if (ret) { + pr_warn("%s: failed to read perf counters for cpu:%d: %d\n", + __func__, cpu, ret); + + /* + * Don't abort if the CPU was offline while the driver + * was getting registered. + */ + if (cpu_online(cpu)) + return; + } + } + + /* Register for freq-invariance */ + topology_set_scale_freq_source(&cppc_sftd, policy->cpus); +} + +/* + * We free all the resources on policy's removal and not on CPU removal as the + * irq-work are per-cpu and the hotplug core takes care of flushing the pending + * irq-works (hint: smpcfd_dying_cpu()) on CPU hotplug. Even if the kthread-work + * fires on another CPU after the concerned CPU is removed, it won't harm. + * + * We just need to make sure to remove them all on policy->exit(). + */ +static void cppc_cpufreq_cpu_fie_exit(struct cpufreq_policy *policy) +{ + struct cppc_freq_invariance *cppc_fi; + int cpu; + + if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + return; + + /* policy->cpus will be empty here, use related_cpus instead */ + topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_CPPC, policy->related_cpus); + + for_each_cpu(cpu, policy->related_cpus) { + cppc_fi = &per_cpu(cppc_freq_inv, cpu); + irq_work_sync(&cppc_fi->irq_work); + kthread_cancel_work_sync(&cppc_fi->work); + } +} + +static void __init cppc_freq_invariance_init(void) +{ + struct sched_attr attr = { + .size = sizeof(struct sched_attr), + .sched_policy = SCHED_DEADLINE, + .sched_nice = 0, + .sched_priority = 0, + /* + * Fake (unused) bandwidth; workaround to "fix" + * priority inheritance. + */ + .sched_runtime = 1000000, + .sched_deadline = 10000000, + .sched_period = 10000000, + }; + int ret; + + if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + return; + + kworker_fie = kthread_create_worker(0, "cppc_fie"); + if (IS_ERR(kworker_fie)) + return; + + ret = sched_setattr_nocheck(kworker_fie->task, &attr); + if (ret) { + pr_warn("%s: failed to set SCHED_DEADLINE: %d\n", __func__, + ret); + kthread_destroy_worker(kworker_fie); + return; + } +} + +static void cppc_freq_invariance_exit(void) +{ + if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + return; + + kthread_destroy_worker(kworker_fie); + kworker_fie = NULL; +} + +#else +static inline void cppc_cpufreq_cpu_fie_init(struct cpufreq_policy *policy) +{ +} + +static inline void cppc_cpufreq_cpu_fie_exit(struct cpufreq_policy *policy) +{ +} + +static inline void cppc_freq_invariance_init(void) +{ +} + +static inline void cppc_freq_invariance_exit(void) +{ +} +#endif /* CONFIG_ACPI_CPPC_CPUFREQ_FIE */ + /* Callback function used to retrieve the max frequency from DMI */ static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private) { @@ -350,10 +563,10 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) cpu_data->perf_ctrls.desired_perf = cpu_data->perf_caps.highest_perf; ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); - if (ret) + if (ret) { pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", cpu_data->perf_caps.highest_perf, cpu, ret); - + } return ret; } @@ -365,28 +578,25 @@ static inline u64 get_delta(u64 t1, u64 t0) return (u32)t1 - (u32)t0; } -static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data, - struct cppc_perf_fb_ctrs fb_ctrs_t0, - struct cppc_perf_fb_ctrs fb_ctrs_t1) +static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, + struct cppc_perf_fb_ctrs *fb_ctrs_t0, + struct cppc_perf_fb_ctrs *fb_ctrs_t1) { u64 delta_reference, delta_delivered; - u64 reference_perf, delivered_perf; + u64 reference_perf; - reference_perf = fb_ctrs_t0.reference_perf; + reference_perf = fb_ctrs_t0->reference_perf; - delta_reference = get_delta(fb_ctrs_t1.reference, - fb_ctrs_t0.reference); - delta_delivered = get_delta(fb_ctrs_t1.delivered, - fb_ctrs_t0.delivered); + delta_reference = get_delta(fb_ctrs_t1->reference, + fb_ctrs_t0->reference); + delta_delivered = get_delta(fb_ctrs_t1->delivered, + fb_ctrs_t0->delivered); - /* Check to avoid divide-by zero */ - if (delta_reference || delta_delivered) - delivered_perf = (reference_perf * delta_delivered) / - delta_reference; - else - delivered_perf = cpu_data->perf_ctrls.desired_perf; + /* Check to avoid divide-by zero and invalid delivered_perf */ + if (!delta_reference || !delta_delivered) + return cpu_data->perf_ctrls.desired_perf; - return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf); + return (reference_perf * delta_delivered) / delta_reference; } static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) @@ -394,6 +604,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0}; struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); struct cppc_cpudata *cpu_data = policy->driver_data; + u64 delivered_perf; int ret; cpufreq_cpu_put(policy); @@ -408,7 +619,10 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) if (ret) return ret; - return cppc_get_rate_from_fbctrs(cpu_data, fb_ctrs_t0, fb_ctrs_t1); + delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0, + &fb_ctrs_t1); + + return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf); } static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) @@ -494,14 +708,21 @@ static void cppc_check_hisi_workaround(void) static int __init cppc_cpufreq_init(void) { + int ret; + if ((acpi_disabled) || !acpi_cpc_valid()) return -ENODEV; INIT_LIST_HEAD(&cpu_data_list); cppc_check_hisi_workaround(); + cppc_freq_invariance_init(); - return cpufreq_register_driver(&cppc_cpufreq_driver); + ret = cpufreq_register_driver(&cppc_cpufreq_driver); + if (ret) + cppc_freq_invariance_exit(); + + return ret; } static inline void free_cpu_data(void) @@ -519,6 +740,7 @@ static inline void free_cpu_data(void) static void __exit cppc_cpufreq_exit(void) { cpufreq_unregister_driver(&cppc_cpufreq_driver); + cppc_freq_invariance_exit(); free_cpu_data(); } diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 820549fdb167..c6abbfd928ab 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -37,6 +37,7 @@ bool topology_scale_freq_invariant(void); enum scale_freq_source { SCALE_FREQ_SOURCE_CPUFREQ = 0, SCALE_FREQ_SOURCE_ARCH, + SCALE_FREQ_SOURCE_CPPC, }; struct scale_freq_data { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index cb4cb2bc16d7..7442b978997b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6770,6 +6770,7 @@ int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr) { return __sched_setscheduler(p, attr, false, true); } +EXPORT_SYMBOL_GPL(sched_setattr_nocheck); /** * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. -- Gitee From fb6cbf75d5f4a22aaa06c457f1067639425c431e Mon Sep 17 00:00:00 2001 From: Jie Zhan Date: Thu, 13 Feb 2025 11:55:10 +0800 Subject: [PATCH 03/21] cpufreq: governor: Fix negative 'idle_time' handling in dbs_update() ANBZ: #26062 commit 3698dd6b139dc37b35a9ad83d9330c1f99666c02 upstream We observed an issue that the CPU frequency can't raise up with a 100% CPU load when NOHZ is off and the 'conservative' governor is selected. 'idle_time' can be negative if it's obtained from get_cpu_idle_time_jiffy() when NOHZ is off. This was found and explained in commit 9485e4ca0b48 ("cpufreq: governor: Fix handling of special cases in dbs_update()"). However, commit 7592019634f8 ("cpufreq: governors: Fix long idle detection logic in load calculation") introduced a comparison between 'idle_time' and 'samling_rate' to detect a long idle interval. While 'idle_time' is converted to int before comparison, it's actually promoted to unsigned again when compared with an unsigned 'sampling_rate'. Hence, this leads to wrong idle interval detection when it's in fact 100% busy and sets policy_dbs->idle_periods to a very large value. 'conservative' adjusts the frequency to minimum because of the large 'idle_periods', such that the frequency can't raise up. 'Ondemand' doesn't use policy_dbs->idle_periods so it fortunately avoids the issue. Correct negative 'idle_time' to 0 before any use of it in dbs_update(). Fixes: 7592019634f8 ("cpufreq: governors: Fix long idle detection logic in load calculation") Signed-off-by: Jie Zhan Reviewed-by: Chen Yu Link: https://patch.msgid.link/20250213035510.2402076-1-zhanjie9@hisilicon.com Signed-off-by: Rafael J. Wysocki Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/cpufreq_governor.c | 45 +++++++++++++++--------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 63f7c219062b..d8b1a0d4cd21 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -145,7 +145,23 @@ unsigned int dbs_update(struct cpufreq_policy *policy) time_elapsed = update_time - j_cdbs->prev_update_time; j_cdbs->prev_update_time = update_time; - idle_time = cur_idle_time - j_cdbs->prev_cpu_idle; + /* + * cur_idle_time could be smaller than j_cdbs->prev_cpu_idle if + * it's obtained from get_cpu_idle_time_jiffy() when NOHZ is + * off, where idle_time is calculated by the difference between + * time elapsed in jiffies and "busy time" obtained from CPU + * statistics. If a CPU is 100% busy, the time elapsed and busy + * time should grow with the same amount in two consecutive + * samples, but in practice there could be a tiny difference, + * making the accumulated idle time decrease sometimes. Hence, + * in this case, idle_time should be regarded as 0 in order to + * make the further process correct. + */ + if (cur_idle_time > j_cdbs->prev_cpu_idle) + idle_time = cur_idle_time - j_cdbs->prev_cpu_idle; + else + idle_time = 0; + j_cdbs->prev_cpu_idle = cur_idle_time; if (ignore_nice) { @@ -162,7 +178,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy) * calls, so the previous load value can be used then. */ load = j_cdbs->prev_load; - } else if (unlikely((int)idle_time > 2 * sampling_rate && + } else if (unlikely(idle_time > 2 * sampling_rate && j_cdbs->prev_load)) { /* * If the CPU had gone completely idle and a task has @@ -189,30 +205,15 @@ unsigned int dbs_update(struct cpufreq_policy *policy) load = j_cdbs->prev_load; j_cdbs->prev_load = 0; } else { - if (time_elapsed >= idle_time) { + if (time_elapsed > idle_time) load = 100 * (time_elapsed - idle_time) / time_elapsed; - } else { - /* - * That can happen if idle_time is returned by - * get_cpu_idle_time_jiffy(). In that case - * idle_time is roughly equal to the difference - * between time_elapsed and "busy time" obtained - * from CPU statistics. Then, the "busy time" - * can end up being greater than time_elapsed - * (for example, if jiffies_64 and the CPU - * statistics are updated by different CPUs), - * so idle_time may in fact be negative. That - * means, though, that the CPU was busy all - * the time (on the rough average) during the - * last sampling interval and 100 can be - * returned as the load. - */ - load = (int)idle_time < 0 ? 100 : 0; - } + else + load = 0; + j_cdbs->prev_load = load; } - if (unlikely((int)idle_time > 2 * sampling_rate)) { + if (unlikely(idle_time > 2 * sampling_rate)) { unsigned int periods = idle_time / sampling_rate; if (periods < idle_periods) -- Gitee From 9fc2471fa85c2c0dbd52be2cbd426988588cae77 Mon Sep 17 00:00:00 2001 From: Jie Zhan Date: Fri, 13 Jun 2025 15:27:50 +0800 Subject: [PATCH 04/21] cpufreq: CPPC: Fix error handling in cppc_scale_freq_workfn() ANBZ: #26062 commit 2eab37aa8a55cdb8b85a912a4b1156c39689c89d upstream driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICEXYT ---------------------------------------------------------------------- Perf counters could be 0 if the cpu is in a low-power idle state. Just try it again next time and update the frequency scale when the cpu is active and perf counters successfully return. Also, remove the FIE source on an actual failure. Fixes: 1eb5dde674f5 ("cpufreq: CPPC: Add support for frequency invariance") Signed-off-by: Jie Zhan Signed-off-by: Hongye Lin Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/cppc_cpufreq.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index dec35155e50d..4c522e3c7c56 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -106,12 +106,23 @@ static void cppc_scale_freq_workfn(struct kthread_work *work) struct cppc_cpudata *cpu_data; unsigned long local_freq_scale; u64 perf; + int ret; cppc_fi = container_of(work, struct cppc_freq_invariance, work); cpu_data = cppc_fi->cpu_data; - if (cppc_get_perf_ctrs(cppc_fi->cpu, &fb_ctrs)) { + ret = cppc_get_perf_ctrs(cppc_fi->cpu, &fb_ctrs); + /* + * Perf counters could be 0 if the cpu is in a low-power idle state. + * Just try it again next time. + */ + if (ret == -EFAULT) + return; + + if (ret) { pr_warn("%s: failed to read perf counters\n", __func__); + topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_CPPC, + cpu_data->shared_cpu_map); return; } -- Gitee From 777858660c1b0f523100af326fa19e1d16272c61 Mon Sep 17 00:00:00 2001 From: Jie Zhan Date: Fri, 13 Jun 2025 15:27:49 +0800 Subject: [PATCH 05/21] cpufreq: CPPC: Don't warn on failing to read perf counters on offline cpus ANBZ: #26062 commit d08e86c77069fbbdd7fdbdaa408c198223bc0900 upstream driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICEXYT ---------------------------------------------------------------------- Reading perf counters on offline cpus should be expected to fail, e.g. it returns -EFAULT as counters are shown to be 0. Remove the unnecessary warning print on this failure path. Fixes: 1eb5dde674f5 ("cpufreq: CPPC: Add support for frequency invariance") Signed-off-by: Jie Zhan Signed-off-by: Hongye Lin Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/cppc_cpufreq.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 4c522e3c7c56..9623f5fb730f 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -178,16 +178,14 @@ static void cppc_cpufreq_cpu_fie_init(struct cpufreq_policy *policy) init_irq_work(&cppc_fi->irq_work, cppc_irq_work); ret = cppc_get_perf_ctrs(cpu, &cppc_fi->prev_perf_fb_ctrs); - if (ret) { - pr_warn("%s: failed to read perf counters for cpu:%d: %d\n", - __func__, cpu, ret); - + if (ret && cpu_online(cpu)) { /* * Don't abort if the CPU was offline while the driver * was getting registered. */ - if (cpu_online(cpu)) - return; + pr_debug("%s: failed to read perf counters for cpu:%d: %d\n", + __func__, cpu, ret); + return; } } -- Gitee From 4d5848503c3da066e70b97f40cbce775e34a90fc Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Thu, 28 Dec 2023 11:01:02 +0800 Subject: [PATCH 06/21] arm64: cpufeature: Export cpu_has_amu_feat() ANBZ: #26062 commit 789ca6a6076bfc04b1a9261fd6af5b9df9fff4b5 upstream hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I73EPL -------------------------------- Export the cpu_has_amu_feat() function for using by cppc_cpufreq.c to check if the processor implements ARM's Activity Monitor Unit (AMU). Signed-off-by: Zeng Heng Signed-off-by: Lin Ruier <2878455989@qq.com> --- arch/arm64/kernel/cpufeature.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9c905c972545..df4ed86720a2 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1574,6 +1574,7 @@ bool cpu_has_amu_feat(int cpu) { return cpumask_test_cpu(cpu, &amu_cpus); } +EXPORT_SYMBOL(cpu_has_amu_feat); /* Initialize the use of AMU counters for frequency invariance */ extern void init_cpu_freq_invariance_counters(void); -- Gitee From 958a1126cd15ac5fd170d82654708139cd1cfde0 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Thu, 28 Dec 2023 11:01:03 +0800 Subject: [PATCH 07/21] cpufreq: CPPC: Keep the target core awake when reading its cpufreq rate ANBZ: #26062 commit 12f136b2134d4ded731c3ef23ac08c85b9c0b1fa upstream hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I73EPL -------------------------------- As ARM AMU's document says, all counters are subject to any changes in clock frequency, including clock stopping caused by the WFI and WFE instructions. Therefore, using smp_call_on_cpu() to trigger target CPU to read self's AMU counters, which ensures the counters are working properly while cstate feature is enabled. Reported-by: Sumit Gupta Link: https://lore.kernel.org/all/20230418113459.12860-7-sumitg@nvidia.com/ Signed-off-by: Zeng Heng Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/cppc_cpufreq.c | 39 ++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 9623f5fb730f..98a4eeacb8d4 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -82,6 +82,12 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, struct cppc_perf_fb_ctrs *fb_ctrs_t0, struct cppc_perf_fb_ctrs *fb_ctrs_t1); +struct fb_ctr_pair { + u32 cpu; + struct cppc_perf_fb_ctrs fb_ctrs_t0; + struct cppc_perf_fb_ctrs fb_ctrs_t1; +}; + /** * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance * @work: The work item. @@ -608,9 +614,24 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, return (reference_perf * delta_delivered) / delta_reference; } +static int cppc_get_perf_ctrs_pair(void *val) +{ + struct fb_ctr_pair *fb_ctrs = val; + int cpu = fb_ctrs->cpu; + int ret; + + ret = cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t0); + if (ret) + return ret; + + udelay(2); /* 2usec delay between sampling */ + + return cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t1); +} + static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) { - struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0}; + struct fb_ctr_pair fb_ctrs = { .cpu = cpu, }; struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); struct cppc_cpudata *cpu_data = policy->driver_data; u64 delivered_perf; @@ -618,18 +639,18 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) cpufreq_cpu_put(policy); - ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0); - if (ret) - return ret; - - udelay(2); /* 2usec delay between sampling */ + if (cpu_has_amu_feat(cpu)) + ret = smp_call_on_cpu(cpu, cppc_get_perf_ctrs_pair, + &fb_ctrs, false); + else + ret = cppc_get_perf_ctrs_pair(&fb_ctrs); - ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1); if (ret) return ret; - delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0, - &fb_ctrs_t1); + delivered_perf = cppc_perf_from_fbctrs(cpu_data, + &fb_ctrs.fb_ctrs_t0, + &fb_ctrs.fb_ctrs_t1); return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf); } -- Gitee From 21a1bfd5c697501b9979d0b07c95b15ac297cb78 Mon Sep 17 00:00:00 2001 From: Jie Zhan Date: Tue, 22 Aug 2023 20:48:37 +0800 Subject: [PATCH 08/21] cpufreq: Support per-policy performance boost ANBZ: #26062 commit 218a06a79d9a98a96ef46bb003d4d8adb0962056 upstream The boost control currently applies to the whole system. However, users may prefer to boost a subset of cores in order to provide prioritized performance to workloads running on the boosted cores. Enable per-policy boost by adding a 'boost' sysfs interface under each policy path. This can be found at: /sys/devices/system/cpu/cpufreq/policy<*>/boost Same to the global boost switch, writing 1/0 to the per-policy 'boost' enables/disables boost on a cpufreq policy respectively. The user view of global and per-policy boost controls should be: 1. Enabling global boost initially enables boost on all policies, and per-policy boost can then be enabled or disabled individually, given that the platform does support so. 2. Disabling global boost makes the per-policy boost interface illegal. Signed-off-by: Jie Zhan Reviewed-by: Wei Xu Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/cpufreq.c | 43 +++++++++++++++++++++++++++++++++++++++ include/linux/cpufreq.h | 3 +++ 2 files changed, 46 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 058609b233e0..a6455b2e66b3 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -83,6 +83,7 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy); static int cpufreq_set_policy(struct cpufreq_policy *policy, struct cpufreq_governor *new_gov, unsigned int new_pol); +static bool cpufreq_boost_supported(void); /* * Two notifier lists: the "policy" list is involved in the @@ -617,6 +618,40 @@ static ssize_t store_boost(struct kobject *kobj, struct kobj_attribute *attr, } define_one_global_rw(boost); +static ssize_t show_local_boost(struct cpufreq_policy *policy, char *buf) +{ + return sysfs_emit(buf, "%d\n", policy->boost_enabled); +} + +static ssize_t store_local_boost(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + int ret, enable; + + ret = kstrtoint(buf, 10, &enable); + if (ret || enable < 0 || enable > 1) + return -EINVAL; + + if (!cpufreq_driver->boost_enabled) + return -EINVAL; + + if (policy->boost_enabled == enable) + return count; + + cpus_read_lock(); + ret = cpufreq_driver->set_boost(policy, enable); + cpus_read_unlock(); + + if (ret) + return ret; + + policy->boost_enabled = enable; + + return count; +} + +static struct freq_attr local_boost = __ATTR(boost, 0644, show_local_boost, store_local_boost); + static struct cpufreq_governor *find_governor(const char *str_governor) { struct cpufreq_governor *t; @@ -1058,6 +1093,12 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) return ret; } + if (cpufreq_boost_supported()) { + ret = sysfs_create_file(&policy->kobj, &local_boost.attr); + if (ret) + return ret; + } + return 0; } @@ -2675,6 +2716,8 @@ int cpufreq_boost_trigger_state(int state) ret = cpufreq_driver->set_boost(policy, state); if (ret) goto err_reset_state; + + policy->boost_enabled = state; } put_online_cpus(); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index e5be3e61edaa..1cc3f8b2b8f4 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -131,6 +131,9 @@ struct cpufreq_policy { */ bool dvfs_possible_from_any_cpu; + /* Per policy boost enabled flag. */ + bool boost_enabled; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ unsigned int cached_target_freq; unsigned int cached_resolved_idx; -- Gitee From 408134709d4174a699df35513642251490d7ee84 Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Fri, 17 Jan 2025 18:14:54 +0800 Subject: [PATCH 09/21] cpufreq: Fix re-boost issue after hotplugging a CPU ANBZ: #26062 commit 1608f0230510489d74a2e24e47054233b7e4678a upstream It turns out that CPUX will stay on the base frequency after performing these operations: 1. boost all CPUs: echo 1 > /sys/devices/system/cpu/cpufreq/boost 2. offline one CPU: echo 0 > /sys/devices/system/cpu/cpuX/online 3. deboost all CPUs: echo 0 > /sys/devices/system/cpu/cpufreq/boost 4. online CPUX: echo 1 > /sys/devices/system/cpu/cpuX/online 5. boost all CPUs again: echo 1 > /sys/devices/system/cpu/cpufreq/boost This is because max_freq_req of the policy is not updated during the online process, and the value of max_freq_req before the last offline is retained. When the CPU is boosted again, freq_qos_update_request() will do nothing because the old value is the same as the new one. This causes the CPU to stay at the base frequency. Updating max_freq_req in cpufreq_online() will solve this problem. Signed-off-by: Lifeng Zheng Acked-by: Viresh Kumar Link: https://patch.msgid.link/20250117101457.1530653-2-zhenglifeng1@huawei.com [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/cpufreq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a6455b2e66b3..0e1a8eda8eb3 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1479,6 +1479,10 @@ static int cpufreq_online(unsigned int cpu) blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_CREATE_POLICY, policy); + } else { + ret = freq_qos_update_request(policy->max_freq_req, policy->max); + if (ret < 0) + goto out_destroy_policy; } if (cpufreq_driver->get && has_target()) { -- Gitee From 834b5e89d13f900102474e0f29d1360d270c4dfb Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Fri, 17 Jan 2025 18:14:56 +0800 Subject: [PATCH 10/21] cpufreq: CPPC: Fix wrong max_freq in policy initialization ANBZ: #26062 commit 03d8b4e76266e11662c5e544854b737843173e2d upstream In policy initialization, policy->max and policy->cpuinfo.max_freq are always set to the value calculated from caps->nominal_perf. This will cause the frequency stay on base frequency even if the policy is already boosted when a CPU is going online. Fix this by using policy->boost_enabled to determine which value should be set. Signed-off-by: Lifeng Zheng Acked-by: Viresh Kumar Link: https://patch.msgid.link/20250117101457.1530653-4-zhenglifeng1@huawei.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/cppc_cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 98a4eeacb8d4..fa11ae50a325 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -533,7 +533,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) * Section 8.4.7.1.1.5 of ACPI 6.1 spec) */ policy->min = cppc_cpufreq_perf_to_khz(cpu_data, cpu_data->perf_caps.lowest_nonlinear_perf); - policy->max = cppc_cpufreq_perf_to_khz(cpu_data, cpu_data->perf_caps.nominal_perf); + policy->max = cppc_cpufreq_perf_to_khz(cpu_data, policy->boost_enabled ? cpu_data->perf_caps.highest_perf : cpu_data->perf_caps.nominal_perf); /* * Set cpuinfo.min_freq to Lowest to make the full range of performance @@ -541,7 +541,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) * nonlinear perf */ policy->cpuinfo.min_freq = cppc_cpufreq_perf_to_khz(cpu_data, cpu_data->perf_caps.lowest_perf); - policy->cpuinfo.max_freq = cppc_cpufreq_perf_to_khz(cpu_data, cpu_data->perf_caps.nominal_perf); + policy->cpuinfo.max_freq = policy->max; policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu); policy->shared_type = cpu_data->shared_type; -- Gitee From e90e1e3069ca638e28c188eb05c557b65f2a5489 Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Fri, 17 Jan 2025 18:14:55 +0800 Subject: [PATCH 11/21] cpufreq: Introduce a more generic way to set default per-policy boost flag ANBZ: #26062 commit dd016f379ebc2d43a9405742d1a6066577509bd7 upstream In cpufreq_online() of cpufreq.c, the per-policy boost flag is already set to mirror the cpufreq_driver boost during init but using freq_table to judge if the policy has boost frequency. There are two drawbacks to this approach: 1. It doesn't work for the cpufreq drivers that do not use a frequency table. For now, acpi-cpufreq and amd-pstate have to enable boost in policy initialization. And cppc_cpufreq never set policy to boost when going online no matter what the cpufreq_driver boost flag is. 2. If the CPU goes offline when cpufreq_driver boost is enabled and then goes online when cpufreq_driver boost is disabled, the per-policy boost flag will incorrectly remain true. Running set_boost at the end of the online process is a more generic way for all cpufreq drivers. Signed-off-by: Lifeng Zheng Link: https://patch.msgid.link/20250117101457.1530653-3-zhenglifeng1@huawei.com Acked-by: Viresh Kumar [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/cpufreq.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0e1a8eda8eb3..0b362237e68a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1562,6 +1562,18 @@ static int cpufreq_online(unsigned int cpu) if (cpufreq_thermal_control_enabled(cpufreq_driver)) policy->cdev = of_cpufreq_cooling_register(policy); + /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ + if (policy->boost_enabled != cpufreq_boost_enabled()) { + policy->boost_enabled = cpufreq_boost_enabled(); + ret = cpufreq_driver->set_boost(policy, policy->boost_enabled); + if (ret) { + /* If the set_boost fails, the online operation is not affected */ + pr_info("%s: CPU%d: Cannot %s BOOST\n", __func__, policy->cpu, + policy->boost_enabled ? "enable" : "disable"); + policy->boost_enabled = !policy->boost_enabled; + } + } + pr_debug("initialization complete\n"); return 0; -- Gitee From 971dd91d352309ef37b642d49ee896422f802ba3 Mon Sep 17 00:00:00 2001 From: Aboorva Devarajan Date: Wed, 5 Feb 2025 23:43:47 +0530 Subject: [PATCH 12/21] cpufreq: prevent NULL dereference in cpufreq_online() ANBZ: #26062 commit 0813fd2e14ca6ecd4e6ba005a9766f08e26020d7 upstream Ensure cpufreq_driver->set_boost is non-NULL before using it in cpufreq_online() to prevent a potential NULL pointer dereference. Reported-by: Gautam Menghani Closes: https://lore.kernel.org/all/c9e56c5f54cc33338762c94e9bed7b5a0d5de812.camel@linux.ibm.com/ Fixes: dd016f379ebc ("cpufreq: Introduce a more generic way to set default per-policy boost flag") Suggested-by: Viresh Kumar Signed-off-by: Aboorva Devarajan Link: https://patch.msgid.link/20250205181347.2079272-1-aboorvad@linux.ibm.com [ rjw: Minor edits in the subject and changelog ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/cpufreq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0b362237e68a..e2b2fd4e654b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1563,7 +1563,8 @@ static int cpufreq_online(unsigned int cpu) policy->cdev = of_cpufreq_cooling_register(policy); /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ - if (policy->boost_enabled != cpufreq_boost_enabled()) { + if (cpufreq_driver->set_boost && + policy->boost_enabled != cpufreq_boost_enabled()) { policy->boost_enabled = cpufreq_boost_enabled(); ret = cpufreq_driver->set_boost(policy, policy->boost_enabled); if (ret) { -- Gitee From 43141b160f4e52b940945962f8b4bbd7c9d886da Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Tue, 8 Oct 2024 11:09:18 +0000 Subject: [PATCH 13/21] cppc_cpufreq: Fix possible null pointer dereference ANBZ: #26062 commit cf7de25878a1f4508c69dc9f6819c21ba177dbfe upstream stable inclusion from stable-v6.6.33 commit f84b9b25d045e67a7eee5e73f21278c8ab06713c category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IA74DQ Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=f84b9b25d045e67a7eee5e73f21278c8ab06713c -------------------------------- [ Upstream commit cf7de25878a1f4508c69dc9f6819c21ba177dbfe ] cppc_cpufreq_get_rate() and hisi_cppc_cpufreq_get_rate() can be called from different places with various parameters. So cpufreq_cpu_get() can return null as 'policy' in some circumstances. Fix this bug by adding null return check. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: a28b2bfc099c ("cppc_cpufreq: replace per-cpu data array with a list") Signed-off-by: Aleksandr Mishin Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin Conflicts: drivers/cpufreq/cppc_cpufreq.c [Context conflict] Signed-off-by: ZhangPeng Signed-off-by: Liu Mingrui Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/cppc_cpufreq.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index fa11ae50a325..a459fd0d12a0 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -637,6 +637,11 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) u64 delivered_perf; int ret; + if (!policy) + return -ENODEV; + + cpu_data = policy->driver_data; + cpufreq_cpu_put(policy); if (cpu_has_amu_feat(cpu)) @@ -700,10 +705,15 @@ static struct cpufreq_driver cppc_cpufreq_driver = { static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - struct cppc_cpudata *cpu_data = policy->driver_data; + struct cppc_cpudata *cpu_data; u64 desired_perf; int ret; + if (!policy) + return -ENODEV; + + cpu_data = policy->driver_data; + cpufreq_cpu_put(policy); ret = cppc_get_desired_perf(cpu, &desired_perf); -- Gitee From c7b036b3696b41231e01150d0abaea0d8b1dce21 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 21 Aug 2025 15:15:57 +0800 Subject: [PATCH 14/21] cpufreq: cppc: Fix invalid return value in .get() callback ANBZ: #26062 commit 2b8e6b58889c672e1ae3601d9b2b070be4dc2fbc upstream mainline inclusion from mainline-v6.16-rc1 commit 2b8e6b58889c672e1ae3601d9b2b070be4dc2fbc category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICTP7R CVE: NA Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=2b8e6b58889c672e1ae3601d9b2b070be4dc2fbc ---------------------------------------------------------------------- Returning a negative error code in a function with an unsigned return type is a pretty bad idea. It is probably worse when the justification for the change is "our static analisys tool found it". Fixes: cf7de25878a1 ("cppc_cpufreq: Fix possible null pointer dereference") Signed-off-by: Marc Zyngier Cc: "Rafael J. Wysocki" Cc: Viresh Kumar Reviewed-by: Lifeng Zheng Signed-off-by: Viresh Kumar Signed-off-by: Hongye Lin Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/cppc_cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index a459fd0d12a0..83f69de3dfc9 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -638,7 +638,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) int ret; if (!policy) - return -ENODEV; + return 0; cpu_data = policy->driver_data; -- Gitee From f804e624cca9beac5e46d1813291a0907bf4a191 Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Thu, 21 Aug 2025 15:15:58 +0800 Subject: [PATCH 15/21] cpufreq: cppc: Fix invalid return value in hisi_cppc_cpufreq_get_rate() ANBZ: #26062 commit d7c560f56e528fbb009f5f2b70cc813aad66661d upstream driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICTP7R ---------------------------------------------------------------------- Returning a negative error code in a function with an unsigned return type is a pretty bad idea. Return 0 is enough when something wrong. Fixes: cf7de25878a1 ("cppc_cpufreq: Fix possible null pointer dereference") Signed-off-by: Lifeng Zheng Signed-off-by: Hongye Lin Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/cppc_cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 83f69de3dfc9..fdcb87b603fe 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -710,7 +710,7 @@ static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu) int ret; if (!policy) - return -ENODEV; + return 0; cpu_data = policy->driver_data; @@ -718,7 +718,7 @@ static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu) ret = cppc_get_desired_perf(cpu, &desired_perf); if (ret < 0) - return -EIO; + return 0; return cppc_cpufreq_perf_to_khz(cpu_data, desired_perf); } -- Gitee From 7049f39e4fa6b81daefbe29f992bd2de5867898c Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Thu, 21 Aug 2025 15:16:03 +0800 Subject: [PATCH 16/21] cpufreq: Contain scaling_cur_freq.attr in cpufreq_attrs ANBZ: #26062 commit e96bcc889f35e7302734817065be76e481e48527 upstream mainline inclusion from mainline-v6.17-rc2 commit 2e554cfa259fe07085a4fcff7d2ec4b7041bbd9c category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICTP7R CVE: NA Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=2e554cfa259fe07085a4fcff7d2ec4b7041bbd9c ---------------------------------------------------------------------- After commit c034b02e213d ("cpufreq: expose scaling_cur_freq sysfs file for set_policy() drivers"), the file scaling_cur_freq is exposed to all drivers. No need to create this file separately. It's better to be contained in cpufreq_attrs. Signed-off-by: Lifeng Zheng Link: https://patch.msgid.link/20250623133402.3120230-4-zhenglifeng1@huawei.com Signed-off-by: Rafael J. Wysocki Signed-off-by: Hongye Lin Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/cpufreq.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e2b2fd4e654b..2ae24592a7b2 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -965,6 +965,7 @@ static struct attribute *default_attrs[] = { &cpuinfo_min_freq.attr, &cpuinfo_max_freq.attr, &cpuinfo_transition_latency.attr, + &scaling_cur_freq.attr, &scaling_min_freq.attr, &scaling_max_freq.attr, &affected_cpus.attr, @@ -1083,10 +1084,6 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) return ret; } - ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); - if (ret) - return ret; - if (cpufreq_driver->bios_limit) { ret = sysfs_create_file(&policy->kobj, &bios_limit.attr); if (ret) -- Gitee From 1639385c95d8ae86f764f33025bb2409c00277e7 Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Thu, 21 Aug 2025 15:16:05 +0800 Subject: [PATCH 17/21] cpufreq: Hold cpufreq_driver_lock when assigning cpufreq_driver->set_boost ANBZ: #26062 commit efc1ef3222b0c34a14395f84330fa890cfd4ec3f upstream driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICTP7R ---------------------------------------------------------------------- Hold the lock to avoid concurrency problems in cpufreq_enable_boost_support() when assigning cpufreq_driver->set_boost. Fixes: 7a6c79f2fe53 ("cpufreq: Simplify core code related to boost support") Signed-off-by: Lifeng Zheng Signed-off-by: Hongye Lin Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/cpufreq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 2ae24592a7b2..2fc91b5306e5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2775,13 +2775,17 @@ static void remove_boost_sysfs_file(void) int cpufreq_enable_boost_support(void) { + unsigned long flags; + if (!cpufreq_driver) return -EINVAL; if (cpufreq_boost_supported()) return 0; + write_lock_irqsave(&cpufreq_driver_lock, flags); cpufreq_driver->set_boost = cpufreq_boost_set_sw; + write_unlock_irqrestore(&cpufreq_driver_lock, flags); /* This will get removed on driver unregister */ return create_boost_sysfs_file(); -- Gitee From 9f6bda8b859933ece5676e949e3574be301ef161 Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Thu, 21 Aug 2025 15:16:06 +0800 Subject: [PATCH 18/21] cpufreq: Initialize cpufreq-based frequency-invariance later ANBZ: #26062 commit c7ae9932eabda5e424003f0a3bc7dc5e59043996 upstream mainline inclusion from mainline-v6.17-rc2 commit 2a6c727387062a2ea79eb6cf5004820cb1b0afe2 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICTP7R CVE: NA Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=2a6c727387062a2ea79eb6cf5004820cb1b0afe2 ---------------------------------------------------------------------- The cpufreq-based invariance is enabled in cpufreq_register_driver(), but never disabled after registration fails. Move the invariance initialization to where all other initializations have been successfully done to solve this problem. Fixes: 874f63531064 ("cpufreq: report whether cpufreq supports Frequency Invariance (FI)") Signed-off-by: Lifeng Zheng Link: https://patch.msgid.link/20250709104145.2348017-2-zhenglifeng1@huawei.com [ rjw: New subject ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Hongye Lin Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/cpufreq.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 2fc91b5306e5..fa2c45f4201f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2865,15 +2865,6 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) cpufreq_driver = driver_data; write_unlock_irqrestore(&cpufreq_driver_lock, flags); - /* - * Mark support for the scheduler's frequency invariance engine for - * drivers that implement target(), target_index() or fast_switch(). - */ - if (!cpufreq_driver->setpolicy) { - static_branch_enable_cpuslocked(&cpufreq_freq_invariance); - pr_debug("supports frequency invariance"); - } - if (driver_data->setpolicy) driver_data->flags |= CPUFREQ_CONST_LOOPS; @@ -2905,6 +2896,15 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) hp_online = ret; ret = 0; + /* + * Mark support for the scheduler's frequency invariance engine for + * drivers that implement target(), target_index() or fast_switch(). + */ + if (!cpufreq_driver->setpolicy) { + static_branch_enable_cpuslocked(&cpufreq_freq_invariance); + pr_debug("supports frequency invariance"); + } + pr_debug("driver %s up and running\n", driver_data->name); goto out; -- Gitee From d73e528a21c8643657c4a56a505fb3a975db3c46 Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Thu, 21 Aug 2025 15:16:07 +0800 Subject: [PATCH 19/21] cpufreq: Init policy->rwsem before it may be possibly used ANBZ: #26062 commit 8289834528413ea26acc3dff17fb6ac916f98dd6 upstream mainline inclusion from mainline-v6.17-rc2 commit d1378d1d7edb3a4c4935a44fe834ae135be03564 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICTP7R CVE: NA Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d1378d1d7edb3a4c4935a44fe834ae135be03564 ---------------------------------------------------------------------- In cpufreq_policy_put_kobj(), policy->rwsem is used. But in cpufreq_policy_alloc(), if freq_qos_add_notifier() returns an error, error path via err_kobj_remove or err_min_qos_notifier will be reached and cpufreq_policy_put_kobj() will be called before policy->rwsem is initialized. Thus, the calling of init_rwsem() should be moved to where before these two error paths can be reached. Fixes: 67d874c3b2c6 ("cpufreq: Register notifiers with the PM QoS framework") Signed-off-by: Lifeng Zheng Link: https://patch.msgid.link/20250709104145.2348017-3-zhenglifeng1@huawei.com Signed-off-by: Rafael J. Wysocki Signed-off-by: Hongye Lin Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/cpufreq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index fa2c45f4201f..c49ee74a9f34 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1267,6 +1267,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) goto err_free_real_cpus; } + init_rwsem(&policy->rwsem); + freq_constraints_init(&policy->constraints); policy->nb_min.notifier_call = cpufreq_notifier_min; @@ -1289,7 +1291,6 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) } INIT_LIST_HEAD(&policy->policy_list); - init_rwsem(&policy->rwsem); spin_lock_init(&policy->transition_lock); init_waitqueue_head(&policy->transition_wait); INIT_WORK(&policy->update, handle_update); -- Gitee From f5816b90c191975149d549db15c47d8c0290210d Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Thu, 21 Aug 2025 15:16:08 +0800 Subject: [PATCH 20/21] cpufreq: Move the check of cpufreq_driver->get into cpufreq_verify_current_freq() ANBZ: #26062 commit 1eda0d49800ae9f8508bc5aa2cc679900e32a528 upstream mainline inclusion from mainline-v6.17-rc2 commit 908981d85f86c5e2b39dfe0b2267c6d44d9c48f7 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICTP7R CVE: NA Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=908981d85f86c5e2b39dfe0b2267c6d44d9c48f7 ---------------------------------------------------------------------- Move the check of cpufreq_driver->get into cpufreq_verify_current_freq() in case of calling it without check. Signed-off-by: Lifeng Zheng Link: https://patch.msgid.link/20250709104145.2348017-4-zhenglifeng1@huawei.com Signed-off-by: Rafael J. Wysocki Signed-off-by: Hongye Lin Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/cpufreq.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c49ee74a9f34..5b0f44edbee3 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1748,6 +1748,9 @@ static unsigned int cpufreq_verify_current_freq(struct cpufreq_policy *policy, b { unsigned int new_freq; + if (!cpufreq_driver->get) + return 0; + new_freq = cpufreq_driver->get(policy->cpu); if (!new_freq) return 0; @@ -1862,8 +1865,7 @@ unsigned int cpufreq_get(unsigned int cpu) if (policy) { down_read(&policy->rwsem); - if (cpufreq_driver->get) - ret_freq = __cpufreq_get(policy); + ret_freq = __cpufreq_get(policy); up_read(&policy->rwsem); cpufreq_cpu_put(policy); @@ -2425,8 +2427,7 @@ int cpufreq_start_governor(struct cpufreq_policy *policy) pr_debug("%s: for CPU %u\n", __func__, policy->cpu); - if (cpufreq_driver->get) - cpufreq_verify_current_freq(policy, false); + cpufreq_verify_current_freq(policy, false); if (policy->governor->start) { ret = policy->governor->start(policy); -- Gitee From 20873b9a5534205232f488d46eadc44d9dd9341d Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Thu, 21 Aug 2025 15:16:09 +0800 Subject: [PATCH 21/21] cpufreq: Exit governor when failed to start old governor ANBZ: #26062 commit 0233cbaed3a2dcc458bb46b5bc499b1906bf31e9 upstream mainline inclusion from mainline-v6.17-rc2 commit 0ae204405095abfbc2d694ee0fbb49bcbbe55c57 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICTP7R CVE: NA Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0ae204405095abfbc2d694ee0fbb49bcbbe55c57 ---------------------------------------------------------------------- Detect the result of starting old governor in cpufreq_set_policy(). If it fails, exit the governor and clear policy->governor. Signed-off-by: Lifeng Zheng Link: https://patch.msgid.link/20250709104145.2348017-5-zhenglifeng1@huawei.com Signed-off-by: Rafael J. Wysocki Signed-off-by: Hongye Lin Signed-off-by: Lin Ruier <2878455989@qq.com> --- drivers/cpufreq/cpufreq.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5b0f44edbee3..3301229f6a13 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2635,10 +2635,12 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, pr_debug("starting governor %s failed\n", policy->governor->name); if (old_gov) { policy->governor = old_gov; - if (cpufreq_init_governor(policy)) + if (cpufreq_init_governor(policy)) { policy->governor = NULL; - else - cpufreq_start_governor(policy); + } else if (cpufreq_start_governor(policy)) { + cpufreq_exit_governor(policy); + policy->governor = NULL; + } } return ret; -- Gitee