diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 3944928998e9e9e0a6156a0ab6d66a48c7554fe3..463b23f0354ff9e53ec415881de4d43f46103526 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -376,6 +376,7 @@ #define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ #define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ #define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ +#define X86_FEATURE_HWP_HIGHEST_PERF_CHANGE (14*32+15) /* "" HWP Highest perf change */ #define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index d43a053125a1752c8899545cd79fc0dddf7b9fc2..ea47a81801d29e8cfe8c778bbb4b2a3d79fa050e 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -32,6 +32,7 @@ #include #include #include +#include "../drivers/thermal/intel/thermal_interrupt.h" #define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC) @@ -177,7 +178,6 @@ struct vid_data { * based on the MSR_IA32_MISC_ENABLE value and whether or * not the maximum reported turbo P-state is different from * the maximum reported non-turbo one. - * @turbo_disabled_mf: The @turbo_disabled value reflected by cpuinfo.max_freq. * @min_perf_pct: Minimum capacity limit in percent of the maximum turbo * P-state capacity. * @max_perf_pct: Maximum capacity limit in percent of the maximum turbo @@ -186,7 +186,6 @@ struct vid_data { struct global_params { bool no_turbo; bool turbo_disabled; - bool turbo_disabled_mf; int max_perf_pct; int min_perf_pct; }; @@ -206,8 +205,6 @@ struct global_params { * @prev_aperf: Last APERF value read from APERF MSR * @prev_mperf: Last MPERF value read from MPERF MSR * @prev_tsc: Last timestamp counter (TSC) value - * @prev_cummulative_iowait: IO Wait time difference from last and - * current sample * @sample: Storage for storing last Sample data * @min_perf_ratio: Minimum capacity in terms of PERF or HWP ratios * @max_perf_ratio: Maximum capacity in terms of PERF or HWP ratios @@ -226,6 +223,7 @@ struct global_params { * @sched_flags: Store scheduler flags for possible cross CPU update * @hwp_boost_min: Last HWP boosted min performance * @suspended: Whether or not the driver has been suspended. + * @hwp_notify_work: workqueue for HWP notifications. * * This structure stores per CPU instance data for all CPUs. */ @@ -245,7 +243,6 @@ struct cpudata { u64 prev_aperf; u64 prev_mperf; u64 prev_tsc; - u64 prev_cummulative_iowait; struct sample sample; int32_t min_perf_ratio; int32_t max_perf_ratio; @@ -264,6 +261,7 @@ struct cpudata { unsigned int sched_flags; u32 hwp_boost_min; bool suspended; + struct delayed_work hwp_notify_work; }; static struct cpudata **all_cpu_data; @@ -340,6 +338,8 @@ static void intel_pstste_sched_itmt_work_fn(struct work_struct *work) static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn); +#define CPPC_MAX_PERF U8_MAX + static void intel_pstate_set_itmt_prio(int cpu) { struct cppc_perf_caps cppc_perf; @@ -350,6 +350,14 @@ static void intel_pstate_set_itmt_prio(int cpu) if (ret) return; + /* + * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff. + * In this case we can't use CPPC.highest_perf to enable ITMT. + * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide. + */ + if (cppc_perf.highest_perf == CPPC_MAX_PERF) + cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached)); + /* * The priorities can be set regardless of whether or not * sched_set_itmt_support(true) has been called and it is valid to @@ -443,20 +451,6 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) (u32) cpu->acpi_perf_data.states[i].control); } - /* - * The _PSS table doesn't contain whole turbo frequency range. - * This just contains +1 MHZ above the max non turbo frequency, - * with control value corresponding to max turbo ratio. But - * when cpufreq set policy is called, it will call with this - * max frequency, which will cause a reduced performance as - * this driver uses real max turbo frequency as the max - * frequency. So correct this frequency in _PSS table to - * correct max turbo frequency based on the turbo state. - * Also need to convert to MHz as _PSS freq is in MHz. - */ - if (!global.turbo_disabled) - cpu->acpi_perf_data.states[0].core_frequency = - policy->cpuinfo.max_freq / 1000; cpu->valid_pss_table = true; pr_debug("_PPC limits will be enforced\n"); @@ -499,16 +493,13 @@ static int intel_pstate_get_cppc_guranteed(int cpu) } #endif /* CONFIG_ACPI_CPPC_LIB */ -static inline void update_turbo_state(void) +static bool turbo_is_disabled(void) { u64 misc_en; - struct cpudata *cpu; - cpu = all_cpu_data[0]; rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); - global.turbo_disabled = - (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || - cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); + + return !!(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); } static int min_perf_pct_min(void) @@ -777,6 +768,8 @@ static ssize_t store_energy_performance_preference( err = cpufreq_start_governor(policy); if (!ret) ret = err; + } else { + ret = 0; } } @@ -829,19 +822,21 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { NULL, }; -static void intel_pstate_get_hwp_max(struct cpudata *cpu, int *phy_max, - int *current_max) +static void __intel_pstate_get_hwp_cap(struct cpudata *cpu) { u64 cap; rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap); WRITE_ONCE(cpu->hwp_cap_cached, cap); - if (global.no_turbo || global.turbo_disabled) - *current_max = HWP_GUARANTEED_PERF(cap); - else - *current_max = HWP_HIGHEST_PERF(cap); + cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(cap); + cpu->pstate.turbo_pstate = HWP_HIGHEST_PERF(cap); +} - *phy_max = HWP_HIGHEST_PERF(cap); +static void intel_pstate_get_hwp_cap(struct cpudata *cpu) +{ + __intel_pstate_get_hwp_cap(cpu); + cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; + cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; } static void intel_pstate_hwp_set(unsigned int cpu) @@ -907,11 +902,15 @@ static void intel_pstate_hwp_set(unsigned int cpu) wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); } +static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata); + static void intel_pstate_hwp_offline(struct cpudata *cpu) { u64 value = READ_ONCE(cpu->hwp_req_cached); int min_perf; + intel_pstate_disable_hwp_interrupt(cpu); + if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { /* * In case the EPP has been set to "performance" by the @@ -921,10 +920,17 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu) value &= ~GENMASK_ULL(31, 24); value |= HWP_ENERGY_PERF_PREFERENCE(cpu->epp_cached); WRITE_ONCE(cpu->hwp_req_cached, value); + + /* + * However, make sure that EPP will be set to "performance" when + * the CPU is brought back online again and the "performance" + * scaling algorithm is still in effect. + */ + cpu->epp_policy = CPUFREQ_POLICY_UNKNOWN; } value &= ~GENMASK_ULL(31, 0); - min_perf = HWP_LOWEST_PERF(cpu->hwp_cap_cached); + min_perf = HWP_LOWEST_PERF(READ_ONCE(cpu->hwp_cap_cached)); /* Set hwp_max = hwp_min */ value |= HWP_MAX_PERF(min_perf); @@ -975,6 +981,9 @@ static int intel_pstate_suspend(struct cpufreq_policy *policy) cpu->suspended = true; + /* disable HWP interrupt and cancel any pending work */ + intel_pstate_disable_hwp_interrupt(cpu); + return 0; } @@ -1012,40 +1021,19 @@ static void intel_pstate_update_policies(void) cpufreq_update_policy(cpu); } -static void intel_pstate_update_max_freq(unsigned int cpu) +static void __intel_pstate_update_max_freq(struct cpudata *cpudata, + struct cpufreq_policy *policy) { - struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); - struct cpudata *cpudata; - - if (!policy) - return; - - cpudata = all_cpu_data[cpu]; - policy->cpuinfo.max_freq = global.turbo_disabled_mf ? + policy->cpuinfo.max_freq = global.turbo_disabled ? cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; - refresh_frequency_limits(policy); - - cpufreq_cpu_release(policy); } static void intel_pstate_update_limits(unsigned int cpu) { mutex_lock(&intel_pstate_driver_lock); - update_turbo_state(); - /* - * If turbo has been turned on or off globally, policy limits for - * all CPUs need to be updated to reflect that. - */ - if (global.turbo_disabled_mf != global.turbo_disabled) { - global.turbo_disabled_mf = global.turbo_disabled; - arch_set_max_freq_ratio(global.turbo_disabled); - for_each_possible_cpu(cpu) - intel_pstate_update_max_freq(cpu); - } else { - cpufreq_update_policy(cpu); - } + cpufreq_update_policy(cpu); mutex_unlock(&intel_pstate_driver_lock); } @@ -1145,7 +1133,6 @@ static ssize_t show_no_turbo(struct kobject *kobj, return -EAGAIN; } - update_turbo_state(); if (global.turbo_disabled) ret = sprintf(buf, "%u\n", global.turbo_disabled); else @@ -1175,7 +1162,6 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, mutex_lock(&intel_pstate_limits_lock); - update_turbo_state(); if (global.turbo_disabled) { pr_notice_once("Turbo disabled by BIOS or unavailable on processor\n"); mutex_unlock(&intel_pstate_limits_lock); @@ -1206,12 +1192,13 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, static void update_qos_request(enum freq_qos_req_type type) { - int max_state, turbo_max, freq, i, perf_pct; struct freq_qos_request *req; struct cpufreq_policy *policy; + int i; for_each_possible_cpu(i) { struct cpudata *cpu = all_cpu_data[i]; + unsigned int freq, perf_pct; policy = cpufreq_cpu_get(i); if (!policy) @@ -1224,9 +1211,7 @@ static void update_qos_request(enum freq_qos_req_type type) continue; if (hwp_active) - intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); - else - turbo_max = cpu->pstate.turbo_pstate; + intel_pstate_get_hwp_cap(cpu); if (type == FREQ_QOS_MIN) { perf_pct = global.min_perf_pct; @@ -1235,8 +1220,7 @@ static void update_qos_request(enum freq_qos_req_type type) perf_pct = global.max_perf_pct; } - freq = DIV_ROUND_UP(turbo_max * perf_pct, 100); - freq *= cpu->pstate.scaling; + freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * perf_pct, 100); if (freq_qos_update_request(req, freq) < 0) pr_warn("Failed to update freq constraint: CPU%d\n", i); @@ -1460,15 +1444,126 @@ static void intel_pstate_sysfs_hide_hwp_dynamic_boost(void) /************************** sysfs end ************************/ +static void intel_pstate_notify_work(struct work_struct *work) +{ + struct cpudata *cpudata = + container_of(to_delayed_work(work), struct cpudata, hwp_notify_work); + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu); + + if (policy) { + intel_pstate_get_hwp_cap(cpudata); + __intel_pstate_update_max_freq(cpudata, policy); + + cpufreq_cpu_release(policy); + } + + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); +} + +static DEFINE_SPINLOCK(hwp_notify_lock); +static cpumask_t hwp_intr_enable_mask; + +#define HWP_GUARANTEED_PERF_CHANGE_STATUS BIT(0) +#define HWP_HIGHEST_PERF_CHANGE_STATUS BIT(3) + +void notify_hwp_interrupt(void) +{ + unsigned int this_cpu = smp_processor_id(); + u64 value, status_mask; + struct cpudata *cpudata; + unsigned long flags; + + if (!READ_ONCE(hwp_active) || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) + return; + + status_mask = HWP_GUARANTEED_PERF_CHANGE_STATUS; + if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE)) + status_mask |= HWP_HIGHEST_PERF_CHANGE_STATUS; + + rdmsrl_safe(MSR_HWP_STATUS, &value); + if (!(value & status_mask)) + return; + + spin_lock_irqsave(&hwp_notify_lock, flags); + + if (!cpumask_test_cpu(this_cpu, &hwp_intr_enable_mask)) + goto ack_intr; + + /* + * Currently we never free all_cpu_data. And we can't reach here + * without this allocated. But for safety for future changes, added + * check. + */ + if (unlikely(!READ_ONCE(all_cpu_data))) + goto ack_intr; + + /* + * The free is done during cleanup, when cpufreq registry is failed. + * We wouldn't be here if it fails on init or switch status. But for + * future changes, added check. + */ + cpudata = READ_ONCE(all_cpu_data[this_cpu]); + if (unlikely(!cpudata)) + goto ack_intr; + + schedule_delayed_work(&cpudata->hwp_notify_work, msecs_to_jiffies(10)); + + spin_unlock_irqrestore(&hwp_notify_lock, flags); + + return; + +ack_intr: + wrmsrl_safe(MSR_HWP_STATUS, 0); + spin_unlock_irqrestore(&hwp_notify_lock, flags); +} + +static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) +{ + if (!boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) + return; + + /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); + + spin_lock_irq(&hwp_notify_lock); + if (cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask)) + cancel_delayed_work(&cpudata->hwp_notify_work); + spin_unlock_irq(&hwp_notify_lock); +} + +#define HWP_GUARANTEED_PERF_CHANGE_REQ BIT(0) +#define HWP_HIGHEST_PERF_CHANGE_REQ BIT(2) + +static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) +{ + /* Enable HWP notification interrupt for performance change */ + if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) { + u64 interrupt_mask = HWP_GUARANTEED_PERF_CHANGE_REQ; + + spin_lock_irq(&hwp_notify_lock); + INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work); + cpumask_set_cpu(cpudata->cpu, &hwp_intr_enable_mask); + spin_unlock_irq(&hwp_notify_lock); + + if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE)) + interrupt_mask |= HWP_HIGHEST_PERF_CHANGE_REQ; + /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, interrupt_mask); + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); + } +} + static void intel_pstate_hwp_enable(struct cpudata *cpudata) { - /* First disable HWP notification interrupt as we don't process them */ + /* First disable HWP notification interrupt till we activate again */ if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); if (cpudata->epp_default == -EINVAL) cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); + + intel_pstate_enable_hwp_interrupt(cpudata); } static int atom_get_min_pstate(void) @@ -1718,7 +1813,6 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu) { int pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio); - update_turbo_state(); intel_pstate_set_pstate(cpu, pstate); } @@ -1726,21 +1820,17 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) { cpu->pstate.min_pstate = pstate_funcs.get_min(); cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical(); - cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); cpu->pstate.scaling = pstate_funcs.get_scaling(); if (hwp_active && !hwp_mode_bdw) { - unsigned int phy_max, current_max; - - intel_pstate_get_hwp_max(cpu, &phy_max, ¤t_max); - cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling; - cpu->pstate.turbo_pstate = phy_max; - cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(READ_ONCE(cpu->hwp_cap_cached)); + __intel_pstate_get_hwp_cap(cpu); } else { - cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; cpu->pstate.max_pstate = pstate_funcs.get_max(); + cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); } + cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; + cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; if (pstate_funcs.get_aperf_mperf_shift) cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift(); @@ -1762,6 +1852,7 @@ static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC; static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu) { u64 hwp_req = READ_ONCE(cpu->hwp_req_cached); + u64 hwp_cap = READ_ONCE(cpu->hwp_cap_cached); u32 max_limit = (hwp_req & 0xff00) >> 8; u32 min_limit = (hwp_req & 0xff); u32 boost_level1; @@ -1788,14 +1879,14 @@ static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu) cpu->hwp_boost_min = min_limit; /* level at half way mark between min and guranteed */ - boost_level1 = (HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) + min_limit) >> 1; + boost_level1 = (HWP_GUARANTEED_PERF(hwp_cap) + min_limit) >> 1; if (cpu->hwp_boost_min < boost_level1) cpu->hwp_boost_min = boost_level1; - else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(cpu->hwp_cap_cached)) - cpu->hwp_boost_min = HWP_GUARANTEED_PERF(cpu->hwp_cap_cached); - else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) && - max_limit != HWP_GUARANTEED_PERF(cpu->hwp_cap_cached)) + else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(hwp_cap)) + cpu->hwp_boost_min = HWP_GUARANTEED_PERF(hwp_cap); + else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(hwp_cap) && + max_limit != HWP_GUARANTEED_PERF(hwp_cap)) cpu->hwp_boost_min = max_limit; else return; @@ -1979,8 +2070,6 @@ static void intel_pstate_adjust_pstate(struct cpudata *cpu) struct sample *sample; int target_pstate; - update_turbo_state(); - target_pstate = get_target_pstate(cpu); target_pstate = intel_pstate_prepare_request(cpu, target_pstate); trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu); @@ -2103,6 +2192,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { X86_MATCH(SKYLAKE_X, core_funcs), X86_MATCH(ICELAKE_X, core_funcs), X86_MATCH(SAPPHIRERAPIDS_X, core_funcs), + X86_MATCH(EMERALDRAPIDS_X, core_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); @@ -2132,7 +2222,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum) if (!cpu) return -ENOMEM; - all_cpu_data[cpunum] = cpu; + WRITE_ONCE(all_cpu_data[cpunum], cpu); cpu->cpu = cpunum; @@ -2204,41 +2294,34 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu, unsigned int policy_min, unsigned int policy_max) { + int scaling = cpu->pstate.scaling; int32_t max_policy_perf, min_policy_perf; - int max_state, turbo_max; - int max_freq; /* - * HWP needs some special consideration, because on BDX the - * HWP_REQUEST uses abstract value to represent performance - * rather than pure ratios. + * HWP needs some special consideration, because HWP_REQUEST uses + * abstract values to represent performance rather than pure ratios. */ - if (hwp_active) { - intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); - } else { - max_state = global.no_turbo || global.turbo_disabled ? - cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; - turbo_max = cpu->pstate.turbo_pstate; - } - max_freq = max_state * cpu->pstate.scaling; + if (hwp_active) + intel_pstate_get_hwp_cap(cpu); - max_policy_perf = max_state * policy_max / max_freq; + max_policy_perf = policy_max / scaling; if (policy_max == policy_min) { min_policy_perf = max_policy_perf; } else { - min_policy_perf = max_state * policy_min / max_freq; + min_policy_perf = policy_min / scaling; min_policy_perf = clamp_t(int32_t, min_policy_perf, 0, max_policy_perf); } - pr_debug("cpu:%d max_state %d min_policy_perf:%d max_policy_perf:%d\n", - cpu->cpu, max_state, min_policy_perf, max_policy_perf); + pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n", + cpu->cpu, min_policy_perf, max_policy_perf); /* Normalize user input to [min_perf, max_perf] */ if (per_cpu_limits) { cpu->min_perf_ratio = min_policy_perf; cpu->max_perf_ratio = max_policy_perf; } else { + int turbo_max = cpu->pstate.turbo_pstate; int32_t global_min, global_max; /* Global limits are in percent of the maximum turbo P-state. */ @@ -2302,6 +2385,11 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) intel_pstate_clear_update_util_hook(policy->cpu); intel_pstate_hwp_set(policy->cpu); } + /* + * policy->cur is never updated with the intel_pstate driver, but it + * is used as a stale frequency value. So, keep it within limits. + */ + policy->cur = policy->min; mutex_unlock(&intel_pstate_limits_lock); @@ -2325,12 +2413,10 @@ static void intel_pstate_verify_cpu_policy(struct cpudata *cpu, { int max_freq; - update_turbo_state(); if (hwp_active) { - int max_state, turbo_max; - - intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); - max_freq = max_state * cpu->pstate.scaling; + intel_pstate_get_hwp_cap(cpu); + max_freq = global.no_turbo || global.turbo_disabled ? + cpu->pstate.max_freq : cpu->pstate.turbo_freq; } else { max_freq = intel_pstate_get_max_freq(cpu); } @@ -2421,25 +2507,13 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) cpu->max_perf_ratio = 0xFF; cpu->min_perf_ratio = 0; - policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; - policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; - /* cpuinfo and default policy values */ policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; - update_turbo_state(); - global.turbo_disabled_mf = global.turbo_disabled; policy->cpuinfo.max_freq = global.turbo_disabled ? - cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; - policy->cpuinfo.max_freq *= cpu->pstate.scaling; - - if (hwp_active) { - unsigned int max_freq; - - max_freq = global.turbo_disabled ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; - if (max_freq < policy->cpuinfo.max_freq) - policy->cpuinfo.max_freq = max_freq; - } + + policy->min = policy->cpuinfo.min_freq; + policy->max = policy->cpuinfo.max_freq; intel_pstate_init_acpi_perf_limits(policy); @@ -2599,8 +2673,6 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, struct cpufreq_freqs freqs; int target_pstate; - update_turbo_state(); - freqs.old = policy->cur; freqs.new = target_freq; @@ -2633,8 +2705,6 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, struct cpudata *cpu = all_cpu_data[policy->cpu]; int target_pstate; - update_turbo_state(); - target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true); @@ -2644,10 +2714,10 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) { - int max_state, turbo_max, min_freq, max_freq, ret; struct freq_qos_request *req; struct cpudata *cpu; struct device *dev; + int ret, freq; dev = get_cpu_device(policy->cpu); if (!dev) @@ -2672,30 +2742,31 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) if (hwp_active) { u64 value; - intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY_HWP; + + intel_pstate_get_hwp_cap(cpu); + rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value); WRITE_ONCE(cpu->hwp_req_cached, value); + cpu->epp_cached = intel_pstate_get_epp(cpu, value); } else { - turbo_max = cpu->pstate.turbo_pstate; policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY; } - min_freq = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100); - min_freq *= cpu->pstate.scaling; - max_freq = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100); - max_freq *= cpu->pstate.scaling; + freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * global.min_perf_pct, 100); ret = freq_qos_add_request(&policy->constraints, req, FREQ_QOS_MIN, - min_freq); + freq); if (ret < 0) { dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); goto free_req; } + freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * global.max_perf_pct, 100); + ret = freq_qos_add_request(&policy->constraints, req + 1, FREQ_QOS_MAX, - max_freq); + freq); if (ret < 0) { dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret); goto remove_min_req; @@ -2756,7 +2827,7 @@ static void intel_pstate_driver_cleanup(void) intel_pstate_clear_update_util_hook(cpu); kfree(all_cpu_data[cpu]); - all_cpu_data[cpu] = NULL; + WRITE_ONCE(all_cpu_data[cpu], NULL); } } put_online_cpus(); @@ -2773,6 +2844,9 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver) memset(&global, 0, sizeof(global)); global.max_perf_pct = 100; + global.turbo_disabled = turbo_is_disabled(); + + arch_set_max_freq_ratio(global.turbo_disabled); intel_pstate_driver = driver; ret = cpufreq_register_driver(intel_pstate_driver); @@ -3025,6 +3099,7 @@ static bool intel_pstate_hwp_is_enabled(void) static int __init intel_pstate_init(void) { + static struct cpudata **_all_cpu_data; const struct x86_cpu_id *id; int rc; @@ -3050,7 +3125,7 @@ static int __init intel_pstate_init(void) * deal with it. */ if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) || hwp_forced) { - hwp_active++; + WRITE_ONCE(hwp_active, 1); hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; intel_cpufreq.attr = hwp_cpufreq_attrs; @@ -3097,10 +3172,12 @@ static int __init intel_pstate_init(void) pr_info("Intel P-state driver initializing\n"); - all_cpu_data = vzalloc(array_size(sizeof(void *), num_possible_cpus())); - if (!all_cpu_data) + _all_cpu_data = vzalloc(array_size(sizeof(void *), num_possible_cpus())); + if (!_all_cpu_data) return -ENOMEM; + WRITE_ONCE(all_cpu_data, _all_cpu_data); + intel_pstate_request_control_from_smm(); intel_pstate_sysfs_expose_params(); @@ -3162,4 +3239,3 @@ early_param("intel_pstate", intel_pstate_setup); MODULE_AUTHOR("Dirk Brandewie "); MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors"); -MODULE_LICENSE("GPL");