From bedb19f66ef42783a82ced6a49cfec1d17b6768a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 7 Jan 2021 19:42:15 +0100 Subject: [PATCH 01/21] cpufreq: intel_pstate: Always read hwp_cap_cached with READ_ONCE() ANBZ: #9550 commit 9dd04ec6bc6fa7b310e5595f2ad9bef13eacd3a0 upstream. Because intel_pstate_get_hwp_max() which updates hwp_cap_cached may run in parallel with the readers of it, annotate all of the read accesses to it with READ_ONCE(). Intel-SIG: commit 9dd04ec6bc6f cpufreq: intel_pstate: Always read hwp_cap_cached with. Backport intel_pstate driver update for 5.10 Signed-off-by: Rafael J. Wysocki Tested-by: Chen Yu [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index d43a053125a1..78b9b555c3b0 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -924,7 +924,7 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu) } value &= ~GENMASK_ULL(31, 0); - min_perf = HWP_LOWEST_PERF(cpu->hwp_cap_cached); + min_perf = HWP_LOWEST_PERF(READ_ONCE(cpu->hwp_cap_cached)); /* Set hwp_max = hwp_min */ value |= HWP_MAX_PERF(min_perf); @@ -1762,6 +1762,7 @@ static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC; static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu) { u64 hwp_req = READ_ONCE(cpu->hwp_req_cached); + u64 hwp_cap = READ_ONCE(cpu->hwp_cap_cached); u32 max_limit = (hwp_req & 0xff00) >> 8; u32 min_limit = (hwp_req & 0xff); u32 boost_level1; @@ -1788,14 +1789,14 @@ static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu) cpu->hwp_boost_min = min_limit; /* level at half way mark between min and guranteed */ - boost_level1 = (HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) + min_limit) >> 1; + boost_level1 = (HWP_GUARANTEED_PERF(hwp_cap) + min_limit) >> 1; if (cpu->hwp_boost_min < boost_level1) cpu->hwp_boost_min = boost_level1; - else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(cpu->hwp_cap_cached)) - cpu->hwp_boost_min = HWP_GUARANTEED_PERF(cpu->hwp_cap_cached); - else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) && - max_limit != HWP_GUARANTEED_PERF(cpu->hwp_cap_cached)) + else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(hwp_cap)) + cpu->hwp_boost_min = HWP_GUARANTEED_PERF(hwp_cap); + else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(hwp_cap) && + max_limit != HWP_GUARANTEED_PERF(hwp_cap)) cpu->hwp_boost_min = max_limit; else return; -- Gitee From adead4eacffced7f9d6ad1309176a54d0817414c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 16 Mar 2021 16:52:43 +0100 Subject: [PATCH 02/21] cpufreq: intel_pstate: Clean up frequency computations ANBZ: #9550 commit de5bcf404acee6218f8f49c9d6dc1f6031a62fa6 upstream. Notice that some computations related to frequency in intel_pstate can be simplified if (a) intel_pstate_get_hwp_max() updates the relevant members of struct cpudata by itself and (b) the "turbo disabled" check is moved from it to its callers, so modify the code accordingly and while at it rename intel_pstate_get_hwp_max() to intel_pstate_get_hwp_cap() which better reflects its purpose and provide a simplified variat of it, __intel_pstate_get_hwp_cap(), suitable for the initialization path. No intentional functional impact. Intel-SIG: commit de5bcf404ace cpufreq: intel_pstate: Clean up frequency computations. Backport intel_pstate driver update for 5.10 Signed-off-by: Rafael J. Wysocki Tested-by: Chen Yu [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 101 +++++++++++++++------------------ 1 file changed, 45 insertions(+), 56 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 78b9b555c3b0..313fe60abf81 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -829,19 +829,21 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { NULL, }; -static void intel_pstate_get_hwp_max(struct cpudata *cpu, int *phy_max, - int *current_max) +static void __intel_pstate_get_hwp_cap(struct cpudata *cpu) { u64 cap; rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap); WRITE_ONCE(cpu->hwp_cap_cached, cap); - if (global.no_turbo || global.turbo_disabled) - *current_max = HWP_GUARANTEED_PERF(cap); - else - *current_max = HWP_HIGHEST_PERF(cap); + cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(cap); + cpu->pstate.turbo_pstate = HWP_HIGHEST_PERF(cap); +} - *phy_max = HWP_HIGHEST_PERF(cap); +static void intel_pstate_get_hwp_cap(struct cpudata *cpu) +{ + __intel_pstate_get_hwp_cap(cpu); + cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; + cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; } static void intel_pstate_hwp_set(unsigned int cpu) @@ -1206,12 +1208,13 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, static void update_qos_request(enum freq_qos_req_type type) { - int max_state, turbo_max, freq, i, perf_pct; struct freq_qos_request *req; struct cpufreq_policy *policy; + int i; for_each_possible_cpu(i) { struct cpudata *cpu = all_cpu_data[i]; + unsigned int freq, perf_pct; policy = cpufreq_cpu_get(i); if (!policy) @@ -1224,9 +1227,7 @@ static void update_qos_request(enum freq_qos_req_type type) continue; if (hwp_active) - intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); - else - turbo_max = cpu->pstate.turbo_pstate; + intel_pstate_get_hwp_cap(cpu); if (type == FREQ_QOS_MIN) { perf_pct = global.min_perf_pct; @@ -1235,8 +1236,7 @@ static void update_qos_request(enum freq_qos_req_type type) perf_pct = global.max_perf_pct; } - freq = DIV_ROUND_UP(turbo_max * perf_pct, 100); - freq *= cpu->pstate.scaling; + freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * perf_pct, 100); if (freq_qos_update_request(req, freq) < 0) pr_warn("Failed to update freq constraint: CPU%d\n", i); @@ -1726,21 +1726,17 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) { cpu->pstate.min_pstate = pstate_funcs.get_min(); cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical(); - cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); cpu->pstate.scaling = pstate_funcs.get_scaling(); if (hwp_active && !hwp_mode_bdw) { - unsigned int phy_max, current_max; - - intel_pstate_get_hwp_max(cpu, &phy_max, ¤t_max); - cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling; - cpu->pstate.turbo_pstate = phy_max; - cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(READ_ONCE(cpu->hwp_cap_cached)); + __intel_pstate_get_hwp_cap(cpu); } else { - cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; cpu->pstate.max_pstate = pstate_funcs.get_max(); + cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); } + cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; + cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; if (pstate_funcs.get_aperf_mperf_shift) cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift(); @@ -2210,18 +2206,21 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu, int max_freq; /* - * HWP needs some special consideration, because on BDX the - * HWP_REQUEST uses abstract value to represent performance - * rather than pure ratios. + * HWP needs some special consideration, because HWP_REQUEST uses + * abstract values to represent performance rather than pure ratios. */ - if (hwp_active) { - intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); + if (hwp_active) + intel_pstate_get_hwp_cap(cpu); + + if (global.no_turbo || global.turbo_disabled) { + max_state = cpu->pstate.max_pstate; + max_freq = cpu->pstate.max_freq; } else { - max_state = global.no_turbo || global.turbo_disabled ? - cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; - turbo_max = cpu->pstate.turbo_pstate; + max_state = cpu->pstate.turbo_pstate; + max_freq = cpu->pstate.turbo_freq; } - max_freq = max_state * cpu->pstate.scaling; + + turbo_max = cpu->pstate.turbo_pstate; max_policy_perf = max_state * policy_max / max_freq; if (policy_max == policy_min) { @@ -2328,10 +2327,9 @@ static void intel_pstate_verify_cpu_policy(struct cpudata *cpu, update_turbo_state(); if (hwp_active) { - int max_state, turbo_max; - - intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); - max_freq = max_state * cpu->pstate.scaling; + intel_pstate_get_hwp_cap(cpu); + max_freq = global.no_turbo || global.turbo_disabled ? + cpu->pstate.max_freq : cpu->pstate.turbo_freq; } else { max_freq = intel_pstate_get_max_freq(cpu); } @@ -2422,25 +2420,15 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) cpu->max_perf_ratio = 0xFF; cpu->min_perf_ratio = 0; - policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; - policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; - /* cpuinfo and default policy values */ policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; update_turbo_state(); global.turbo_disabled_mf = global.turbo_disabled; policy->cpuinfo.max_freq = global.turbo_disabled ? - cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; - policy->cpuinfo.max_freq *= cpu->pstate.scaling; - - if (hwp_active) { - unsigned int max_freq; - - max_freq = global.turbo_disabled ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; - if (max_freq < policy->cpuinfo.max_freq) - policy->cpuinfo.max_freq = max_freq; - } + + policy->min = policy->cpuinfo.min_freq; + policy->max = policy->cpuinfo.max_freq; intel_pstate_init_acpi_perf_limits(policy); @@ -2645,10 +2633,10 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) { - int max_state, turbo_max, min_freq, max_freq, ret; struct freq_qos_request *req; struct cpudata *cpu; struct device *dev; + int ret, freq; dev = get_cpu_device(policy->cpu); if (!dev) @@ -2673,30 +2661,31 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) if (hwp_active) { u64 value; - intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY_HWP; + + intel_pstate_get_hwp_cap(cpu); + rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value); WRITE_ONCE(cpu->hwp_req_cached, value); + cpu->epp_cached = intel_pstate_get_epp(cpu, value); } else { - turbo_max = cpu->pstate.turbo_pstate; policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY; } - min_freq = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100); - min_freq *= cpu->pstate.scaling; - max_freq = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100); - max_freq *= cpu->pstate.scaling; + freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * global.min_perf_pct, 100); ret = freq_qos_add_request(&policy->constraints, req, FREQ_QOS_MIN, - min_freq); + freq); if (ret < 0) { dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); goto free_req; } + freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * global.max_perf_pct, 100); + ret = freq_qos_add_request(&policy->constraints, req + 1, FREQ_QOS_MAX, - max_freq); + freq); if (ret < 0) { dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret); goto remove_min_req; -- Gitee From c7c848974e5fda994664a5c30407adf019168a57 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Apr 2021 16:21:55 +0200 Subject: [PATCH 03/21] cpufreq: intel_pstate: Simplify intel_pstate_update_perf_limits() ANBZ: #9550 commit b989bc0f3cf24122ec700e66eb8ffb93432f18c5 upstream. Because pstate.max_freq is always equal to the product of pstate.max_pstate and pstate.scaling and, analogously, pstate.turbo_freq is always equal to the product of pstate.turbo_pstate and pstate.scaling, the result of the max_policy_perf computation in intel_pstate_update_perf_limits() is always equal to the quotient of policy_max and pstate.scaling, regardless of whether or not turbo is disabled. Analogously, the result of min_policy_perf in intel_pstate_update_perf_limits() is always equal to the quotient of policy_min and pstate.scaling. Accordingly, intel_pstate_update_perf_limits() need not check whether or not turbo is enabled at all and in order to compute max_policy_perf and min_policy_perf it can always divide policy_max and policy_min, respectively, by pstate.scaling. Make it do so. While at it, move the definition and initialization of the turbo_max local variable to the code branch using it. No intentional functional impact. Intel-SIG: commit b989bc0f3cf2 cpufreq: intel_pstate: Simplify. Backport intel_pstate driver update for 5.10 Signed-off-by: Rafael J. Wysocki Tested-by: Chen Yu [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 313fe60abf81..bf4c2ea12019 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2201,9 +2201,8 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu, unsigned int policy_min, unsigned int policy_max) { + int scaling = cpu->pstate.scaling; int32_t max_policy_perf, min_policy_perf; - int max_state, turbo_max; - int max_freq; /* * HWP needs some special consideration, because HWP_REQUEST uses @@ -2212,33 +2211,24 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu, if (hwp_active) intel_pstate_get_hwp_cap(cpu); - if (global.no_turbo || global.turbo_disabled) { - max_state = cpu->pstate.max_pstate; - max_freq = cpu->pstate.max_freq; - } else { - max_state = cpu->pstate.turbo_pstate; - max_freq = cpu->pstate.turbo_freq; - } - - turbo_max = cpu->pstate.turbo_pstate; - - max_policy_perf = max_state * policy_max / max_freq; + max_policy_perf = policy_max / scaling; if (policy_max == policy_min) { min_policy_perf = max_policy_perf; } else { - min_policy_perf = max_state * policy_min / max_freq; + min_policy_perf = policy_min / scaling; min_policy_perf = clamp_t(int32_t, min_policy_perf, 0, max_policy_perf); } - pr_debug("cpu:%d max_state %d min_policy_perf:%d max_policy_perf:%d\n", - cpu->cpu, max_state, min_policy_perf, max_policy_perf); + pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n", + cpu->cpu, min_policy_perf, max_policy_perf); /* Normalize user input to [min_perf, max_perf] */ if (per_cpu_limits) { cpu->min_perf_ratio = min_policy_perf; cpu->max_perf_ratio = max_policy_perf; } else { + int turbo_max = cpu->pstate.turbo_pstate; int32_t global_min, global_max; /* Global limits are in percent of the maximum turbo P-state. */ -- Gitee From d0f32b7a097320e22fc605722e7b9c134d92fbb1 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 28 Sep 2021 09:42:17 -0700 Subject: [PATCH 04/21] cpufreq: intel_pstate: Process HWP Guaranteed change notification ANBZ: #9550 commit 57577c996d731ce1e5a4a488e64e6e201b360847 upstream. It is possible that HWP guaranteed ratio is changed in response to change in power and thermal limits. For example when Intel Speed Select performance profile is changed or there is change in TDP, hardware can send notifications. It is possible that the guaranteed ratio is increased. This creates an issue when turbo is disabled, as the old limits set in MSR_HWP_REQUEST are still lower and hardware will clip to older limits. This change enables HWP interrupt and process HWP interrupts. When guaranteed is changed, calls cpufreq_update_policy() so that driver callbacks are called to update to new HWP limits. This callback is called from a delayed workqueue of 10ms to avoid frequent updates. Although the scope of IA32_HWP_INTERRUPT is per logical cpu, on some plaforms interrupt is generated on all CPUs. This is particularly a problem during initialization, when the driver didn't allocated data for other CPUs. So this change uses a cpumask of enabled CPUs and process interrupts on those CPUs only. When the cpufreq offline() or suspend() callback is called, HWP interrupt is disabled on those CPUs and also cancels any pending work item. Spin lock is used to protect data and processing shared with interrupt handler. Here READ_ONCE(), WRITE_ONCE() macros are used to designate shared data, even though spin lock act as an optimization barrier here. Intel-SIG: commit 57577c996d73 cpufreq: intel_pstate: Process HWP Guaranteed change. Backport intel_pstate driver update for 5.10 Signed-off-by: Srinivas Pandruvada Tested-by: pablomh@gmail.com Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 117 +++++++++++++++++++++++++++++++-- 1 file changed, 111 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index bf4c2ea12019..4667b160f3c5 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -32,6 +32,7 @@ #include #include #include +#include "../drivers/thermal/intel/thermal_interrupt.h" #define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC) @@ -226,6 +227,7 @@ struct global_params { * @sched_flags: Store scheduler flags for possible cross CPU update * @hwp_boost_min: Last HWP boosted min performance * @suspended: Whether or not the driver has been suspended. + * @hwp_notify_work: workqueue for HWP notifications. * * This structure stores per CPU instance data for all CPUs. */ @@ -264,6 +266,7 @@ struct cpudata { unsigned int sched_flags; u32 hwp_boost_min; bool suspended; + struct delayed_work hwp_notify_work; }; static struct cpudata **all_cpu_data; @@ -909,11 +912,15 @@ static void intel_pstate_hwp_set(unsigned int cpu) wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); } +static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata); + static void intel_pstate_hwp_offline(struct cpudata *cpu) { u64 value = READ_ONCE(cpu->hwp_req_cached); int min_perf; + intel_pstate_disable_hwp_interrupt(cpu); + if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { /* * In case the EPP has been set to "performance" by the @@ -977,6 +984,9 @@ static int intel_pstate_suspend(struct cpufreq_policy *policy) cpu->suspended = true; + /* disable HWP interrupt and cancel any pending work */ + intel_pstate_disable_hwp_interrupt(cpu); + return 0; } @@ -1460,15 +1470,105 @@ static void intel_pstate_sysfs_hide_hwp_dynamic_boost(void) /************************** sysfs end ************************/ +static void intel_pstate_notify_work(struct work_struct *work) +{ + struct cpudata *cpudata = + container_of(to_delayed_work(work), struct cpudata, hwp_notify_work); + + cpufreq_update_policy(cpudata->cpu); + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); +} + +static DEFINE_SPINLOCK(hwp_notify_lock); +static cpumask_t hwp_intr_enable_mask; + +void notify_hwp_interrupt(void) +{ + unsigned int this_cpu = smp_processor_id(); + struct cpudata *cpudata; + unsigned long flags; + u64 value; + + if (!READ_ONCE(hwp_active) || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) + return; + + rdmsrl_safe(MSR_HWP_STATUS, &value); + if (!(value & 0x01)) + return; + + spin_lock_irqsave(&hwp_notify_lock, flags); + + if (!cpumask_test_cpu(this_cpu, &hwp_intr_enable_mask)) + goto ack_intr; + + /* + * Currently we never free all_cpu_data. And we can't reach here + * without this allocated. But for safety for future changes, added + * check. + */ + if (unlikely(!READ_ONCE(all_cpu_data))) + goto ack_intr; + + /* + * The free is done during cleanup, when cpufreq registry is failed. + * We wouldn't be here if it fails on init or switch status. But for + * future changes, added check. + */ + cpudata = READ_ONCE(all_cpu_data[this_cpu]); + if (unlikely(!cpudata)) + goto ack_intr; + + schedule_delayed_work(&cpudata->hwp_notify_work, msecs_to_jiffies(10)); + + spin_unlock_irqrestore(&hwp_notify_lock, flags); + + return; + +ack_intr: + wrmsrl_safe(MSR_HWP_STATUS, 0); + spin_unlock_irqrestore(&hwp_notify_lock, flags); +} + +static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) +{ + unsigned long flags; + + /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); + + spin_lock_irqsave(&hwp_notify_lock, flags); + if (cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask)) + cancel_delayed_work(&cpudata->hwp_notify_work); + spin_unlock_irqrestore(&hwp_notify_lock, flags); +} + +static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) +{ + /* Enable HWP notification interrupt for guaranteed performance change */ + if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) { + unsigned long flags; + + spin_lock_irqsave(&hwp_notify_lock, flags); + INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work); + cpumask_set_cpu(cpudata->cpu, &hwp_intr_enable_mask); + spin_unlock_irqrestore(&hwp_notify_lock, flags); + + /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01); + } +} + static void intel_pstate_hwp_enable(struct cpudata *cpudata) { - /* First disable HWP notification interrupt as we don't process them */ + /* First disable HWP notification interrupt till we activate again */ if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); if (cpudata->epp_default == -EINVAL) cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); + + intel_pstate_enable_hwp_interrupt(cpudata); } static int atom_get_min_pstate(void) @@ -2129,7 +2229,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum) if (!cpu) return -ENOMEM; - all_cpu_data[cpunum] = cpu; + WRITE_ONCE(all_cpu_data[cpunum], cpu); cpu->cpu = cpunum; @@ -2735,8 +2835,10 @@ static void intel_pstate_driver_cleanup(void) if (intel_pstate_driver == &intel_pstate) intel_pstate_clear_update_util_hook(cpu); + spin_lock(&hwp_notify_lock); kfree(all_cpu_data[cpu]); - all_cpu_data[cpu] = NULL; + WRITE_ONCE(all_cpu_data[cpu], NULL); + spin_unlock(&hwp_notify_lock); } } put_online_cpus(); @@ -3005,6 +3107,7 @@ static bool intel_pstate_hwp_is_enabled(void) static int __init intel_pstate_init(void) { + static struct cpudata **_all_cpu_data; const struct x86_cpu_id *id; int rc; @@ -3030,7 +3133,7 @@ static int __init intel_pstate_init(void) * deal with it. */ if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) || hwp_forced) { - hwp_active++; + WRITE_ONCE(hwp_active, 1); hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; intel_cpufreq.attr = hwp_cpufreq_attrs; @@ -3077,10 +3180,12 @@ static int __init intel_pstate_init(void) pr_info("Intel P-state driver initializing\n"); - all_cpu_data = vzalloc(array_size(sizeof(void *), num_possible_cpus())); - if (!all_cpu_data) + _all_cpu_data = vzalloc(array_size(sizeof(void *), num_possible_cpus())); + if (!_all_cpu_data) return -ENOMEM; + WRITE_ONCE(all_cpu_data, _all_cpu_data); + intel_pstate_request_control_from_smm(); intel_pstate_sysfs_expose_params(); -- Gitee From 57db3a271855ec506d20db11fe64ac663d82335e Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 3 Nov 2021 22:19:25 -0700 Subject: [PATCH 05/21] cpufreq: intel_pstate: Fix unchecked MSR 0x773 access ANBZ: #9550 commit 5521055670a53bd495676d1163b40ecb0a37af9c upstream. It is possible that on some platforms HWP interrupts are disabled. In that case accessing MSR 0x773 will result in warning. So check X86_FEATURE_HWP_NOTIFY feature to access MSR 0x773. The other places in code where this MSR is accessed, already checks this feature except during disable path called during cpufreq offline and suspend callbacks. Intel-SIG: commit 5521055670a5 cpufreq: intel_pstate: Fix unchecked MSR 0x773 access. Backport intel_pstate driver update for 5.10 Fixes: 57577c996d73 ("cpufreq: intel_pstate: Process HWP Guaranteed change notification") Reported-by: Steven Rostedt Signed-off-by: Srinivas Pandruvada Tested-by: Steven Rostedt (VMware) Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 4667b160f3c5..f8281129ab30 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1533,6 +1533,9 @@ static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) { unsigned long flags; + if (!boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) + return; + /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); -- Gitee From 14632766c9a780a4fc8187459ce9f2e7bae8912b Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 4 Nov 2021 03:22:30 -0700 Subject: [PATCH 06/21] cpufreq: intel_pstate: Clear HWP Status during HWP Interrupt enable ANBZ: #9550 commit 074d0cdfbb2f5985c5748fe80f6f8a2a7db8b63f upstream. It is possible that some performance excursions happened before OS boot or enable HWP interrupts. So clear MSR_HWP_STATUS bits when we enable HWP interrupt. In this way a next excursion will results in a HWP interrupt. The status bits of MSR_HWP_STATUS must be cleared (0) by software so that a new status condition change will cause the hardware to set the bit again and issue the notification. Intel-SIG: commit 074d0cdfbb2f cpufreq: intel_pstate: Clear HWP Status during HWP. Backport intel_pstate driver update for 5.10 Fixes: 57577c996d73 ("cpufreq: intel_pstate: Process HWP Guaranteed change notification") Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f8281129ab30..c073a7b72533 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1558,6 +1558,7 @@ static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01); + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); } } -- Gitee From 8a9838a6ce94bc0441779e53ff0d2f20efca6dd7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 17 Nov 2021 14:57:31 +0100 Subject: [PATCH 07/21] cpufreq: intel_pstate: Fix active mode offline/online EPP handling ANBZ: #9550 commit ed38eb49d101e829ae0f8c0a0d3bf5cb6bcbc6b2 upstream. After commit 4adcf2e5829f ("cpufreq: intel_pstate: Add ->offline and ->online callbacks") the EPP value set by the "performance" scaling algorithm in the active mode is not restored after an offline/online cycle which replaces it with the saved EPP value coming from user space. Address this issue by forcing intel_pstate_hwp_set() to set a new EPP value when it runs first time after online. Intel-SIG: commit ed38eb49d101 cpufreq: intel_pstate: Fix active mode offline/online. Backport intel_pstate driver update for 5.10 Fixes: 4adcf2e5829f ("cpufreq: intel_pstate: Add ->offline and ->online callbacks") Link: https://lore.kernel.org/linux-pm/adc7132c8655bd4d1c8b6129578e931a14fe1db2.camel@linux.intel.com/ Reported-by: Srinivas Pandruvada Cc: 5.9+ # 5.9+ Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index c073a7b72533..0296fdeb751d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -930,6 +930,13 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu) value &= ~GENMASK_ULL(31, 24); value |= HWP_ENERGY_PERF_PREFERENCE(cpu->epp_cached); WRITE_ONCE(cpu->hwp_req_cached, value); + + /* + * However, make sure that EPP will be set to "performance" when + * the CPU is brought back online again and the "performance" + * scaling algorithm is still in effect. + */ + cpu->epp_policy = CPUFREQ_POLICY_UNKNOWN; } value &= ~GENMASK_ULL(31, 0); -- Gitee From 6d144a1e322da2507701f13f48fe0505e34d7f9b Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 18 Nov 2021 21:18:01 -0800 Subject: [PATCH 08/21] cpufreq: intel_pstate: ITMT support for overclocked system ANBZ: #9550 commit 03c83982a0278207709143ba78c5a470179febee upstream. On systems with overclocking enabled, CPPC Highest Performance can be hard coded to 0xff. In this case even if we have cores with different highest performance, ITMT can't be enabled as the current implementation depends on CPPC Highest Performance. On such systems we can use MSR_HWP_CAPABILITIES maximum performance field when CPPC.Highest Performance is 0xff. Due to legacy reasons, we can't solely depend on MSR_HWP_CAPABILITIES as in some older systems CPPC Highest Performance is the only way to identify different performing cores. Intel-SIG: commit 03c83982a027 cpufreq: intel_pstate: ITMT support for overclocked. Backport intel_pstate driver update for 5.10 Reported-by: Michael Larabel Signed-off-by: Srinivas Pandruvada Tested-by: Michael Larabel Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 0296fdeb751d..3653e578f832 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -343,6 +343,8 @@ static void intel_pstste_sched_itmt_work_fn(struct work_struct *work) static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn); +#define CPPC_MAX_PERF U8_MAX + static void intel_pstate_set_itmt_prio(int cpu) { struct cppc_perf_caps cppc_perf; @@ -353,6 +355,14 @@ static void intel_pstate_set_itmt_prio(int cpu) if (ret) return; + /* + * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff. + * In this case we can't use CPPC.highest_perf to enable ITMT. + * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide. + */ + if (cppc_perf.highest_perf == CPPC_MAX_PERF) + cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached)); + /* * The priorities can be set regardless of whether or not * sched_set_itmt_support(true) has been called and it is valid to -- Gitee From 4044b092f65069bee16b46a67b3ef28c81676824 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Dec 2021 20:06:08 +0100 Subject: [PATCH 09/21] cpufreq: intel_pstate: Update cpuinfo.max_freq on HWP_CAP changes ANBZ: #9550 commit dfeeedc1bf5772226bddf51ed3f853e5a6707bf1 upstream. With HWP enabled, when the turbo range of performance levels is disabled by the platform firmware, the CPU capacity is given by the "guaranteed performance" field in MSR_HWP_CAPABILITIES which is generally dynamic. When it changes, the kernel receives an HWP notification interrupt handled by notify_hwp_interrupt(). When the "guaranteed performance" value changes in the above configuration, the CPU performance scaling needs to be adjusted so as to use the new CPU capacity in computations, which means that the cpuinfo.max_freq value needs to be updated for that CPU. Accordingly, modify intel_pstate_notify_work() to read MSR_HWP_CAPABILITIES and update cpuinfo.max_freq to reflect the new configuration (this update can be carried out even if the configuration doesn't actually change, because it simply doesn't matter then and it takes less time to update it than to do extra checks to decide whether or not a change has really occurred). Intel-SIG: commit dfeeedc1bf57 cpufreq: intel_pstate: Update cpuinfo.max_freq on. Backport intel_pstate driver update for 5.10 Reported-by: Srinivas Pandruvada Tested-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 3653e578f832..11103d394f72 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1041,19 +1041,22 @@ static void intel_pstate_update_policies(void) cpufreq_update_policy(cpu); } +static void __intel_pstate_update_max_freq(struct cpudata *cpudata, + struct cpufreq_policy *policy) +{ + policy->cpuinfo.max_freq = global.turbo_disabled_mf ? + cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; + refresh_frequency_limits(policy); +} + static void intel_pstate_update_max_freq(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); - struct cpudata *cpudata; if (!policy) return; - cpudata = all_cpu_data[cpu]; - policy->cpuinfo.max_freq = global.turbo_disabled_mf ? - cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; - - refresh_frequency_limits(policy); + __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); cpufreq_cpu_release(policy); } @@ -1491,8 +1494,15 @@ static void intel_pstate_notify_work(struct work_struct *work) { struct cpudata *cpudata = container_of(to_delayed_work(work), struct cpudata, hwp_notify_work); + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu); + + if (policy) { + intel_pstate_get_hwp_cap(cpudata); + __intel_pstate_update_max_freq(cpudata, policy); + + cpufreq_cpu_release(policy); + } - cpufreq_update_policy(cpudata->cpu); wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); } -- Gitee From e0459ba2218d0acdb4d4b2da18d0d88a21bbaa90 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 28 Dec 2022 22:26:04 +0100 Subject: [PATCH 10/21] cpufreq: intel_pstate: Drop ACPI _PSS states table patching ANBZ: #9550 commit e8a0e30b742f76ebd0f3b196973df4bf65d8fbbb upstream. After making acpi_processor_get_platform_limit() use the "no limit" value for its frequency QoS request when _PPC returns 0, it is not necessary to replace the frequency corresponding to the first _PSS return package entry with the maximum turbo frequency of the given CPU in intel_pstate_init_acpi_perf_limits() any more, so drop the code doing that along with the comment explaining it. Intel-SIG: commit e8a0e30b742f cpufreq: intel_pstate: Drop ACPI _PSS states table. Backport intel_pstate driver update for 5.10 Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 11103d394f72..b13cd8c5fb66 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -456,20 +456,6 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) (u32) cpu->acpi_perf_data.states[i].control); } - /* - * The _PSS table doesn't contain whole turbo frequency range. - * This just contains +1 MHZ above the max non turbo frequency, - * with control value corresponding to max turbo ratio. But - * when cpufreq set policy is called, it will call with this - * max frequency, which will cause a reduced performance as - * this driver uses real max turbo frequency as the max - * frequency. So correct this frequency in _PSS table to - * correct max turbo frequency based on the turbo state. - * Also need to convert to MHz as _PSS freq is in MHz. - */ - if (!global.turbo_disabled) - cpu->acpi_perf_data.states[0].core_frequency = - policy->cpuinfo.max_freq / 1000; cpu->valid_pss_table = true; pr_debug("_PPC limits will be enforced\n"); -- Gitee From 4fcc880be8b3ae8d959a06eac0fa4114ec4129da Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Fri, 24 Feb 2023 15:07:49 +0000 Subject: [PATCH 11/21] cpufreq: intel_pstate: remove MODULE_LICENSE in non-modules ANBZ: #9550 commit 5bd289f69bc145fa10927a6883502a0e7cbcb811 upstream. Since commit 8b41fc4454e ("kbuild: create modules.builtin without Makefile.modbuiltin or tristate.conf"), MODULE_LICENSE declarations are used to identify modules. As a consequence, uses of the macro in non-modules will cause modprobe to misidentify their containing object file as a module when it is not (false positives), and modprobe might succeed rather than failing with a suitable error message. So remove it in the files in this commit, none of which can be built as modules. Intel-SIG: commit 5bd289f69bc1 cpufreq: intel_pstate: remove MODULE_LICENSE in. Backport intel_pstate driver update for 5.10 Signed-off-by: Nick Alcock Suggested-by: Luis Chamberlain Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b13cd8c5fb66..7baf626cc020 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3264,4 +3264,3 @@ early_param("intel_pstate", intel_pstate_setup); MODULE_AUTHOR("Dirk Brandewie "); MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors"); -MODULE_LICENSE("GPL"); -- Gitee From 05e70e073b62886678427ec6209bba1095c96c66 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Wed, 21 Jun 2023 09:58:39 +0300 Subject: [PATCH 12/21] cpufreq: intel_pstate: Fix energy_performance_preference for passive ANBZ: #9550 commit 03f44ffb3d5be2fceda375d92c70ab6de4df7081 upstream. If the intel_pstate driver is set to passive mode, then writing the same value to the energy_performance_preference sysfs twice will fail. This is caused by the wrong return value used (index of the matched energy_perf_string), instead of the length of the passed in parameter. Fix by forcing the internal return value to zero when the same preference is passed in by user. This same issue is not present when active mode is used for the driver. Intel-SIG: commit 03f44ffb3d5b cpufreq: intel_pstate: Fix energy_performance_preference for passive. Backport intel_pstate driver update for 5.10 Fixes: f6ebbcf08f37 ("cpufreq: intel_pstate: Implement passive mode with HWP enabled") Reported-by: Niklas Neronin Signed-off-by: Tero Kristo Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7baf626cc020..fccc3f00746d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -776,6 +776,8 @@ static ssize_t store_energy_performance_preference( err = cpufreq_start_governor(policy); if (!ret) ret = err; + } else { + ret = 0; } } -- Gitee From 50fadd624bc0018691e8f9d2bf94def4d5418641 Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Sun, 20 Aug 2023 13:46:49 -0700 Subject: [PATCH 13/21] cpufreq: intel_pstate: set stale CPU frequency to minimum ANBZ: #9550 commit d51847acb018d83186e4af67bc93f9a00a8644f7 upstream. The intel_pstate CPU frequency scaling driver does not use policy->cur and it is 0. When the CPU frequency is outdated arch_freq_get_on_cpu() will default to the nominal clock frequency when its call to cpufreq_quick_getpolicy_cur returns the never updated 0. Thus, the listed frequency might be outside of currently set limits. Some users are complaining about the high reported frequency, albeit stale, when their system is idle and/or it is above the reduced maximum they have set. This patch will maintain policy_cur for the intel_pstate driver at the current minimum CPU frequency. Intel-SIG: commit d51847acb018 cpufreq: intel_pstate: set stale CPU frequency to minimum. Backport intel_pstate driver update for 5.10 Reported-by: Yang Jie Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217597 Signed-off-by: Doug Smythies [ rjw: White space damage fixes and comment adjustment ] Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index fccc3f00746d..25f1cf2aabc5 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2411,6 +2411,11 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) intel_pstate_clear_update_util_hook(policy->cpu); intel_pstate_hwp_set(policy->cpu); } + /* + * policy->cur is never updated with the intel_pstate driver, but it + * is used as a stale frequency value. So, keep it within limits. + */ + policy->cur = policy->min; mutex_unlock(&intel_pstate_limits_lock); -- Gitee From a1855c32b1ee668cfdbb0b78abed67e68a46ca7a Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 7 Sep 2023 11:02:07 -0700 Subject: [PATCH 14/21] cpufreq: intel_pstate: Revise global turbo disable check ANBZ: #9550 commit 37b6ddba967c601479bea418a7ac6ff16b6232b7 upstream. Setting global turbo flag based on CPU 0 P-state limits is problematic as it limits max P-state request on every CPU on the system just based on its P-state limits. There are two cases in which global.turbo_disabled flag is set: - When the MSR_IA32_MISC_ENABLE_TURBO_DISABLE bit is set to 1 in the MSR MSR_IA32_MISC_ENABLE. This bit can be only changed by the system BIOS before power up. - When the max non turbo P-state is same as max turbo P-state for CPU 0. The second check is not a valid to decide global turbo state based on the CPU 0. CPU 0 max turbo P-state can be same as max non turbo P-state, but for other CPUs this may not be true. There is no guarantee that max P-state limits are same for every CPU. This is possible that during fusing max P-state for a CPU is constrained. Also with the Intel Speed Select performance profile, CPU 0 may not be present in all profiles. In this case the max non turbo and turbo P-state can be set to the lowest possible P-state by the hardware when switched to such profile. Since max non turbo and turbo P-state is same, global.turbo_disabled flag will be set. Once global.turbo_disabled is set, any scaling max and min frequency update for any CPU will result in its max P-state constrained to the max non turbo P-state. Hence remove the check of max non turbo P-state equal to max turbo P-state of CPU 0 to set global turbo disabled flag. Intel-SIG: commit 37b6ddba967c cpufreq: intel_pstate: Revise global turbo disable check. Backport intel_pstate driver update for 5.10 Signed-off-by: Srinivas Pandruvada [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 25f1cf2aabc5..9feb8501c1ce 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -501,13 +501,9 @@ static int intel_pstate_get_cppc_guranteed(int cpu) static inline void update_turbo_state(void) { u64 misc_en; - struct cpudata *cpu; - cpu = all_cpu_data[0]; rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); - global.turbo_disabled = - (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || - cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); + global.turbo_disabled = misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE; } static int min_perf_pct_min(void) -- Gitee From e66d55db95e83690b17c659afa12c04c8c75b404 Mon Sep 17 00:00:00 2001 From: Zhenguo Yao Date: Wed, 13 Dec 2023 18:28:08 +0800 Subject: [PATCH 15/21] cpufreq: intel_pstate: Add Emerald Rapids support in no-HWP mode ANBZ: #9550 commit e95013156ad88e6a1e1db6545881f49183e2ee0a upstream. Users may disable HWP in firmware, in which case intel_pstate will give up unless the CPU model is explicitly supported. See also the following past commits: - commit df51f287b5de ("cpufreq: intel_pstate: Add Sapphire Rapids support in no-HWP mode") - commit d8de7a44e11f ("cpufreq: intel_pstate: Add Skylake servers support") - commit 706c5328851d ("cpufreq: intel_pstate: Add Cometlake support in no-HWP mode") - commit fbdc21e9b038 ("cpufreq: intel_pstate: Add Icelake servers support in no-HWP mode") - commit 71bb5c82aaae ("cpufreq: intel_pstate: Add Tigerlake support in no-HWP mode") Intel-SIG: commit e95013156ad8 cpufreq: intel_pstate: Add Emerald Rapids support in no-HWP mode. Backport intel_pstate driver update for 5.10 Signed-off-by: Zhenguo Yao Acked-by: Srinivas Pandruvada [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 9feb8501c1ce..936db3bfda5e 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2215,6 +2215,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { X86_MATCH(SKYLAKE_X, core_funcs), X86_MATCH(ICELAKE_X, core_funcs), X86_MATCH(SAPPHIRERAPIDS_X, core_funcs), + X86_MATCH(EMERALDRAPIDS_X, core_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); -- Gitee From 2e25e2734699e26cfe70afd888e3961246dcd781 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 13 Feb 2024 12:16:00 +0100 Subject: [PATCH 16/21] cpufreq: intel_pstate: remove cpudata::prev_cummulative_iowait ANBZ: #9550 commit 4615ac9010be84d676f9e893e5a7ea4b5febd1e8 upstream. Commit 09c448d3c61f ("cpufreq: intel_pstate: Use IOWAIT flag in Atom algorithm") removed the last user of cpudata::prev_cummulative_iowait. Remove the member too. Found by https://github.com/jirislaby/clang-struct. Intel-SIG: commit 4615ac9010be cpufreq: intel_pstate: remove cpudata. Backport intel_pstate driver update for 5.10 Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 936db3bfda5e..d1458891a0ae 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -207,8 +207,6 @@ struct global_params { * @prev_aperf: Last APERF value read from APERF MSR * @prev_mperf: Last MPERF value read from MPERF MSR * @prev_tsc: Last timestamp counter (TSC) value - * @prev_cummulative_iowait: IO Wait time difference from last and - * current sample * @sample: Storage for storing last Sample data * @min_perf_ratio: Minimum capacity in terms of PERF or HWP ratios * @max_perf_ratio: Maximum capacity in terms of PERF or HWP ratios @@ -247,7 +245,6 @@ struct cpudata { u64 prev_aperf; u64 prev_mperf; u64 prev_tsc; - u64 prev_cummulative_iowait; struct sample sample; int32_t min_perf_ratio; int32_t max_perf_ratio; -- Gitee From 6326e97364822d5652366358f98cd248226dc921 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 21 Mar 2024 20:29:43 +0100 Subject: [PATCH 17/21] cpufreq: intel_pstate: Drop redundant locking from intel_pstate_driver_cleanup() ANBZ: #9550 commit f186b2dace86f36cc08872b693185eaf71128898 upstream. Remove the spinlock locking from intel_pstate_driver_cleanup() as it is not necessary because no other code accessing all_cpu_data[] can run in parallel with that function. Had the locking been necessary, though, it would have been incorrect because the lock in question is acquired from a hardirq handler and it cannot be acquired from thread context without disabling interrupts. Intel-SIG: commit f186b2dace86 cpufreq: intel_pstate: Drop redundant locking from. Backport intel_pstate driver update for 5.10 Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index d1458891a0ae..90f2150eb5a1 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2853,10 +2853,8 @@ static void intel_pstate_driver_cleanup(void) if (intel_pstate_driver == &intel_pstate) intel_pstate_clear_update_util_hook(cpu); - spin_lock(&hwp_notify_lock); kfree(all_cpu_data[cpu]); WRITE_ONCE(all_cpu_data[cpu], NULL); - spin_unlock(&hwp_notify_lock); } } put_online_cpus(); -- Gitee From 34c136cd5dfa32e811e2dd57db63fed8ac9330dc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 21 Mar 2024 20:30:42 +0100 Subject: [PATCH 18/21] cpufreq: intel_pstate: Simplify spinlock locking ANBZ: #9550 commit 12ebba42d2f1eadc0f897ffeb6dbcfaf2449e107 upstream. Because intel_pstate_enable/disable_hwp_interrupt() are only called from thread context, they need not save the IRQ flags when using a spinlock as interrupts are guaranteed to be enabled when they run, so make them use spin_lock/unlock_irq(). Intel-SIG: commit 12ebba42d2f1 cpufreq: intel_pstate: Simplify spinlock locking. Backport intel_pstate driver update for 5.10 Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 90f2150eb5a1..7650423f0258 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1539,30 +1539,26 @@ void notify_hwp_interrupt(void) static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) { - unsigned long flags; - if (!boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) return; /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); - spin_lock_irqsave(&hwp_notify_lock, flags); + spin_lock_irq(&hwp_notify_lock); if (cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask)) cancel_delayed_work(&cpudata->hwp_notify_work); - spin_unlock_irqrestore(&hwp_notify_lock, flags); + spin_unlock_irq(&hwp_notify_lock); } static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) { /* Enable HWP notification interrupt for guaranteed performance change */ if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) { - unsigned long flags; - - spin_lock_irqsave(&hwp_notify_lock, flags); + spin_lock_irq(&hwp_notify_lock); INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work); cpumask_set_cpu(cpudata->cpu, &hwp_intr_enable_mask); - spin_unlock_irqrestore(&hwp_notify_lock, flags); + spin_unlock_irq(&hwp_notify_lock); /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01); -- Gitee From 877fd98992baf637055258668d8ef4afb5eea412 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Mar 2024 18:02:42 +0100 Subject: [PATCH 19/21] cpufreq: intel_pstate: Do not update global.turbo_disabled after initialization ANBZ: #9550 commit 0940f1a8011fd69be5082015068e0dc31c800c20 upstream. The global.turbo_disabled is updated quite often, especially in the passive mode in which case it is updated every time the scheduler calls into the driver. However, this is generally not necessary and it adds MSR read overhead to scheduler code paths (and that particular MSR is slow to read). For this reason, make the driver read MSR_IA32_MISC_ENABLE_TURBO_DISABLE just once at the cpufreq driver registration time and remove all of the in-flight updates of global.turbo_disabled. Intel-SIG: commit 0940f1a8011f cpufreq: intel_pstate: Do not update global.turbo_disabled. Backport intel_pstate driver update for 5.10 Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 50 ++++++---------------------------- 1 file changed, 8 insertions(+), 42 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7650423f0258..33cd2b4004bb 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -178,7 +178,6 @@ struct vid_data { * based on the MSR_IA32_MISC_ENABLE value and whether or * not the maximum reported turbo P-state is different from * the maximum reported non-turbo one. - * @turbo_disabled_mf: The @turbo_disabled value reflected by cpuinfo.max_freq. * @min_perf_pct: Minimum capacity limit in percent of the maximum turbo * P-state capacity. * @max_perf_pct: Maximum capacity limit in percent of the maximum turbo @@ -187,7 +186,6 @@ struct vid_data { struct global_params { bool no_turbo; bool turbo_disabled; - bool turbo_disabled_mf; int max_perf_pct; int min_perf_pct; }; @@ -495,12 +493,13 @@ static int intel_pstate_get_cppc_guranteed(int cpu) } #endif /* CONFIG_ACPI_CPPC_LIB */ -static inline void update_turbo_state(void) +static bool turbo_is_disabled(void) { u64 misc_en; rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); - global.turbo_disabled = misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE; + + return !!(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); } static int min_perf_pct_min(void) @@ -1025,40 +1024,16 @@ static void intel_pstate_update_policies(void) static void __intel_pstate_update_max_freq(struct cpudata *cpudata, struct cpufreq_policy *policy) { - policy->cpuinfo.max_freq = global.turbo_disabled_mf ? + policy->cpuinfo.max_freq = global.turbo_disabled ? cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; refresh_frequency_limits(policy); } -static void intel_pstate_update_max_freq(unsigned int cpu) -{ - struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); - - if (!policy) - return; - - __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); - - cpufreq_cpu_release(policy); -} - static void intel_pstate_update_limits(unsigned int cpu) { mutex_lock(&intel_pstate_driver_lock); - update_turbo_state(); - /* - * If turbo has been turned on or off globally, policy limits for - * all CPUs need to be updated to reflect that. - */ - if (global.turbo_disabled_mf != global.turbo_disabled) { - global.turbo_disabled_mf = global.turbo_disabled; - arch_set_max_freq_ratio(global.turbo_disabled); - for_each_possible_cpu(cpu) - intel_pstate_update_max_freq(cpu); - } else { - cpufreq_update_policy(cpu); - } + cpufreq_update_policy(cpu); mutex_unlock(&intel_pstate_driver_lock); } @@ -1158,7 +1133,6 @@ static ssize_t show_no_turbo(struct kobject *kobj, return -EAGAIN; } - update_turbo_state(); if (global.turbo_disabled) ret = sprintf(buf, "%u\n", global.turbo_disabled); else @@ -1188,7 +1162,6 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, mutex_lock(&intel_pstate_limits_lock); - update_turbo_state(); if (global.turbo_disabled) { pr_notice_once("Turbo disabled by BIOS or unavailable on processor\n"); mutex_unlock(&intel_pstate_limits_lock); @@ -1826,7 +1799,6 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu) { int pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio); - update_turbo_state(); intel_pstate_set_pstate(cpu, pstate); } @@ -2084,8 +2056,6 @@ static void intel_pstate_adjust_pstate(struct cpudata *cpu) struct sample *sample; int target_pstate; - update_turbo_state(); - target_pstate = get_target_pstate(cpu); target_pstate = intel_pstate_prepare_request(cpu, target_pstate); trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu); @@ -2429,7 +2399,6 @@ static void intel_pstate_verify_cpu_policy(struct cpudata *cpu, { int max_freq; - update_turbo_state(); if (hwp_active) { intel_pstate_get_hwp_cap(cpu); max_freq = global.no_turbo || global.turbo_disabled ? @@ -2526,8 +2495,6 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; - update_turbo_state(); - global.turbo_disabled_mf = global.turbo_disabled; policy->cpuinfo.max_freq = global.turbo_disabled ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; @@ -2692,8 +2659,6 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, struct cpufreq_freqs freqs; int target_pstate; - update_turbo_state(); - freqs.old = policy->cur; freqs.new = target_freq; @@ -2726,8 +2691,6 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, struct cpudata *cpu = all_cpu_data[policy->cpu]; int target_pstate; - update_turbo_state(); - target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true); @@ -2867,6 +2830,9 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver) memset(&global, 0, sizeof(global)); global.max_perf_pct = 100; + global.turbo_disabled = turbo_is_disabled(); + + arch_set_max_freq_ratio(global.turbo_disabled); intel_pstate_driver = driver; ret = cpufreq_register_driver(intel_pstate_driver); -- Gitee From 0d3fbe90b53b90492a1dbf77051362016059f3f8 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 24 Jun 2024 09:11:08 -0700 Subject: [PATCH 20/21] x86/cpufeatures: Add HWP highest perf change feature flag ANBZ: #9550 commit 7ea81936b85317aee8a73cd35d7f9cd6ce654dee upstream. When CPUID[6].EAX[15] is set to 1, this CPU supports notification for HWP (Hardware P-states) highest performance change. Add a feature flag to check if the CPU supports HWP highest performance change. Intel-SIG: commit 7ea81936b853 x86/cpufeatures: Add HWP highest perf change feature flag. Backport intel_pstate driver update for 5.10 Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20240624161109.1427640-2-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 3944928998e9..463b23f0354f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -376,6 +376,7 @@ #define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ #define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ #define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ +#define X86_FEATURE_HWP_HIGHEST_PERF_CHANGE (14*32+15) /* "" HWP Highest perf change */ #define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ -- Gitee From 6b13eb8526c5d5cd432de86c4eeb9799e00337aa Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 24 Jun 2024 09:11:09 -0700 Subject: [PATCH 21/21] cpufreq: intel_pstate: Support highest performance change interrupt ANBZ: #9550 commit d845cd901b28f1b6c02a208b864fc3fc46d14536 upstream. On some systems, the HWP (Hardware P-states) highest performance level can change from the value set at boot-up. This behavior can lead to two issues: - The 'cpuinfo_max_freq' within the 'cpufreq' sysfs will not reflect the CPU's highest achievable performance. - Even if the CPU's highest performance level is increased after booting, the CPU may not reach the full expected performance. The availability of this feature is indicated by the CPUID instruction: if CPUID[6].EAX[15] is set to 1, the feature is supported. When supported, setting bit 2 of the MSR_HWP_INTERRUPT register enables notifications of the highest performance level changes. Therefore, as part of enabling the HWP interrupt, bit 2 of the MSR_HWP_INTERRUPT should also be set when this feature is supported. Upon a change in the highest performance level, a new HWP interrupt is generated, with bit 3 of the MSR_HWP_STATUS register set, and the MSR_HWP_CAPABILITIES register is updated with the new highest performance limit. The processing of the interrupt is the same as the guaranteed performance change. Notify change to cpufreq core and update MSR_HWP_REQUEST with new performance limits. The current driver implementation already takes care of the highest performance change as part of: commit dfeeedc1bf57 ("cpufreq: intel_pstate: Update cpuinfo.max_freq on HWP_CAP changes") For example: Before highest performance change interrupt: cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq 3700000 cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq 3700000 After highest performance changes interrupt: cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq 3900000 cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq 3900000 Intel-SIG: commit d845cd901b28 cpufreq: intel_pstate: Support highest performance change interrupt. Backport intel_pstate driver update for 5.10 Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20240624161109.1427640-3-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 33cd2b4004bb..ea47a81801d2 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1463,18 +1463,25 @@ static void intel_pstate_notify_work(struct work_struct *work) static DEFINE_SPINLOCK(hwp_notify_lock); static cpumask_t hwp_intr_enable_mask; +#define HWP_GUARANTEED_PERF_CHANGE_STATUS BIT(0) +#define HWP_HIGHEST_PERF_CHANGE_STATUS BIT(3) + void notify_hwp_interrupt(void) { unsigned int this_cpu = smp_processor_id(); + u64 value, status_mask; struct cpudata *cpudata; unsigned long flags; - u64 value; if (!READ_ONCE(hwp_active) || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) return; + status_mask = HWP_GUARANTEED_PERF_CHANGE_STATUS; + if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE)) + status_mask |= HWP_HIGHEST_PERF_CHANGE_STATUS; + rdmsrl_safe(MSR_HWP_STATUS, &value); - if (!(value & 0x01)) + if (!(value & status_mask)) return; spin_lock_irqsave(&hwp_notify_lock, flags); @@ -1524,17 +1531,24 @@ static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) spin_unlock_irq(&hwp_notify_lock); } +#define HWP_GUARANTEED_PERF_CHANGE_REQ BIT(0) +#define HWP_HIGHEST_PERF_CHANGE_REQ BIT(2) + static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) { - /* Enable HWP notification interrupt for guaranteed performance change */ + /* Enable HWP notification interrupt for performance change */ if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) { + u64 interrupt_mask = HWP_GUARANTEED_PERF_CHANGE_REQ; + spin_lock_irq(&hwp_notify_lock); INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work); cpumask_set_cpu(cpudata->cpu, &hwp_intr_enable_mask); spin_unlock_irq(&hwp_notify_lock); + if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE)) + interrupt_mask |= HWP_HIGHEST_PERF_CHANGE_REQ; /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ - wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01); + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, interrupt_mask); wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); } } -- Gitee