diff --git a/arch/Kconfig b/arch/Kconfig index 98116fbfcff667a7ab8cb4a7d112fef45ff52e85..b30284a2397d27ad04e37922aad19393aacc6efa 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -264,6 +264,9 @@ config HAVE_ARCH_TRACEHOOK config HAVE_DMA_CONTIGUOUS bool +config ARCH_HAS_OPTIMIZED_POLL + bool + config GENERIC_SMP_IDLE_THREAD bool diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c11aa218c08e7f40a7a0a7c71aa6350978d7a74a..bb2d63aa2dd72ade95b1788210582a86219401a2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -36,6 +36,9 @@ config ARM64 select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_NONLEAF_PMD_YOUNG if ARM64_HAFT + select ARCH_HAS_OPTIMIZED_POLL + select ARCH_CPUIDLE_HALTPOLL + select HALTPOLL_CPUIDLE select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SETUP_DMA_OPS @@ -2617,6 +2620,12 @@ config ARCH_HIBERNATION_HEADER config ARCH_SUSPEND_POSSIBLE def_bool y +config ARCH_CPUIDLE_HALTPOLL + bool "Enable selection of the cpuidle-haltpoll driver" + help + cpuidle-haltpoll allows for adaptive polling based on + current load before entering the idle state. + endmenu # "Power management options" menu "CPU Power Management" diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 41fba69b84e2d8cec1606bde0b3295fbbd582809..95ddc85f6ddf0a2f429ffdca49d3752823ce0c55 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -628,6 +628,7 @@ CONFIG_CPU_PM=y CONFIG_ARCH_HIBERNATION_POSSIBLE=y CONFIG_ARCH_HIBERNATION_HEADER=y CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_ARCH_CPUIDLE_HALTPOLL=y # end of Power management options # @@ -647,6 +648,7 @@ CONFIG_CPU_IDLE_GOV_TEO=y # # CONFIG_ARM_PSCI_CPUIDLE is not set # end of ARM CPU Idle Drivers +CONFIG_HALTPOLL_CPUIDLE=y # end of CPU Idle # @@ -789,6 +791,7 @@ CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_TRACE_IRQFLAGS_NMI_SUPPORT=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_DMA_CONTIGUOUS=y +CONFIG_ARCH_HAS_OPTIMIZED_POLL=y CONFIG_GENERIC_SMP_IDLE_THREAD=y CONFIG_GENERIC_IDLE_POLL_SETUP=y CONFIG_ARCH_HAS_FORTIFY_SOURCE=y diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 1ca947d5c93963d33fe8fb02d6037fc71bd9fd7a..8f7861ccab7ee9146273d1d061b3c8fa685a0a9e 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -12,6 +12,7 @@ #include #include +#include #define __nops(n) ".rept " #n "\nnop\n.endr\n" #define nops(n) asm volatile(__nops(n)) @@ -198,7 +199,7 @@ do { \ VAL = READ_ONCE(*__PTR); \ if (cond_expr) \ break; \ - __cmpwait_relaxed(__PTR, VAL); \ + __cmpwait_relaxed(__PTR, VAL, ~0UL); \ } \ (typeof(*ptr))VAL; \ }) @@ -211,11 +212,68 @@ do { \ VAL = smp_load_acquire(__PTR); \ if (cond_expr) \ break; \ - __cmpwait_relaxed(__PTR, VAL); \ + __cmpwait_relaxed(__PTR, VAL, ~0UL); \ } \ (typeof(*ptr))VAL; \ }) +#define __smp_cond_load_timeout_spin(ptr, cond_expr, \ + time_expr_ns, time_limit_ns) \ +({ \ + typeof(ptr) __PTR = (ptr); \ + __unqual_scalar_typeof(*ptr) VAL; \ + unsigned int __count = 0; \ + for (;;) { \ + VAL = READ_ONCE(*__PTR); \ + if (cond_expr) \ + break; \ + cpu_relax(); \ + if (__count++ < smp_cond_time_check_count) \ + continue; \ + if ((time_expr_ns) >= time_limit_ns) \ + break; \ + __count = 0; \ + } \ + (typeof(*ptr))VAL; \ +}) + +#define __smp_cond_load_timeout_wait(ptr, cond_expr, \ + time_expr_ns, time_limit_ns) \ +({ \ + typeof(ptr) __PTR = (ptr); \ + __unqual_scalar_typeof(*ptr) VAL; \ + const unsigned long __time_limit_cycles = \ + NSECS_TO_CYCLES(time_limit_ns); \ + for (;;) { \ + VAL = READ_ONCE(*__PTR); \ + if (cond_expr) \ + break; \ + __cmpwait_relaxed(__PTR, VAL, __time_limit_cycles); \ + if ((time_expr_ns) >= time_limit_ns) \ + break; \ + } \ + (typeof(*ptr))VAL; \ +}) + +#define smp_cond_load_relaxed_timeout(ptr, cond_expr, \ + time_expr_ns, time_limit_ns) \ +({ \ + __unqual_scalar_typeof(*ptr) _val; \ + \ + int __wfe = arch_timer_evtstrm_available() || \ + alternative_has_cap_unlikely(ARM64_HAS_WFXT); \ + if (likely(__wfe)) \ + _val = __smp_cond_load_timeout_wait(ptr, cond_expr, \ + time_expr_ns, \ + time_limit_ns); \ + else \ + _val = __smp_cond_load_timeout_spin(ptr, cond_expr, \ + time_expr_ns, \ + time_limit_ns); \ + (typeof(*ptr))_val; \ +}) + + #include #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index d7a5407367414df69a898bef948f7c896778f38b..bb842dab5d0efe3f5e8dfdcb2c45087e675dca12 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -210,7 +210,8 @@ __CMPXCHG_GEN(_mb) #define __CMPWAIT_CASE(w, sfx, sz) \ static inline void __cmpwait_case_##sz(volatile void *ptr, \ - unsigned long val) \ + unsigned long val, \ + unsigned long time_limit_cycles) \ { \ unsigned long tmp; \ \ @@ -220,10 +221,12 @@ static inline void __cmpwait_case_##sz(volatile void *ptr, \ " ldxr" #sfx "\t%" #w "[tmp], %[v]\n" \ " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \ " cbnz %" #w "[tmp], 1f\n" \ - " wfe\n" \ + ALTERNATIVE("wfe\n", \ + "msr s0_3_c1_c0_0, %[time_limit_cycles]\n", \ + ARM64_HAS_WFXT) \ "1:" \ : [tmp] "=&r" (tmp), [v] "+Q" (*(u##sz *)ptr) \ - : [val] "r" (val)); \ + : [val] "r" (val), [time_limit_cycles] "r" (time_limit_cycles));\ } __CMPWAIT_CASE(w, b, 8); @@ -236,17 +239,22 @@ __CMPWAIT_CASE( , , 64); #define __CMPWAIT_GEN(sfx) \ static __always_inline void __cmpwait##sfx(volatile void *ptr, \ unsigned long val, \ + unsigned long time_limit_cycles, \ int size) \ { \ switch (size) { \ case 1: \ - return __cmpwait_case##sfx##_8(ptr, (u8)val); \ + return __cmpwait_case##sfx##_8(ptr, (u8)val, \ + time_limit_cycles); \ case 2: \ - return __cmpwait_case##sfx##_16(ptr, (u16)val); \ + return __cmpwait_case##sfx##_16(ptr, (u16)val, \ + time_limit_cycles); \ case 4: \ - return __cmpwait_case##sfx##_32(ptr, val); \ + return __cmpwait_case##sfx##_32(ptr, val, \ + time_limit_cycles); \ case 8: \ - return __cmpwait_case##sfx##_64(ptr, val); \ + return __cmpwait_case##sfx##_64(ptr, val, \ + time_limit_cycles); \ default: \ BUILD_BUG(); \ } \ @@ -258,7 +266,7 @@ __CMPWAIT_GEN() #undef __CMPWAIT_GEN -#define __cmpwait_relaxed(ptr, val) \ - __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr))) +#define __cmpwait_relaxed(ptr, val, time_limit_cycles) \ + __cmpwait((ptr), (unsigned long)(val), time_limit_cycles, sizeof(*(ptr))) #endif /* __ASM_CMPXCHG_H */ diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h new file mode 100644 index 0000000000000000000000000000000000000000..aa01ae9ad5ddd57053ea454d6220e21fcd4ca938 --- /dev/null +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ARCH_HALTPOLL_H +#define _ARCH_HALTPOLL_H + +static inline void arch_haltpoll_enable(unsigned int cpu) { } +static inline void arch_haltpoll_disable(unsigned int cpu) { } + +static inline bool arch_haltpoll_want(bool force) +{ + /* + * Enabling haltpoll requires KVM support for arch_haltpoll_enable(), + * arch_haltpoll_disable(). + * + * Given that that's missing right now, only allow force loading for + * haltpoll. + */ + return force; +} +#endif diff --git a/arch/arm64/include/asm/delay-const.h b/arch/arm64/include/asm/delay-const.h new file mode 100644 index 0000000000000000000000000000000000000000..610283ba8712ac1b9c10e05836931d1f37d29eb6 --- /dev/null +++ b/arch/arm64/include/asm/delay-const.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_DELAY_CONST_H +#define _ASM_DELAY_CONST_H + +#include /* For HZ */ + +/* 2**32 / 1000000000 (rounded up) */ +#define __nsecs_to_xloops_mult 0x5UL + +extern unsigned long loops_per_jiffy; + +#define NSECS_TO_CYCLES(time_nsecs) \ + ((((time_nsecs) * __nsecs_to_xloops_mult) * loops_per_jiffy * HZ) >> 32) + +#endif /* _ASM_DELAY_CONST_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 2dd890c8e4f8d287403ebbc5b216399ca8c23918..379d24059f5b140ebaac142054706c1e59f7ee17 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -76,6 +76,7 @@ void arch_setup_new_exec(void); #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ #define TIF_SECCOMP 11 /* syscall secure computing */ #define TIF_SYSCALL_EMU 12 /* syscall emulation active */ +#define TIF_POLLING_NRFLAG 16 /* set while polling in poll_idle() */ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_FREEZE 19 #define TIF_RESTORE_SIGMASK 20 @@ -98,6 +99,7 @@ void arch_setup_new_exec(void); #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) +#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_32BIT (1 << TIF_32BIT) diff --git a/arch/arm64/kernel/idle.c b/arch/arm64/kernel/idle.c index c1125753fe9b5d36267c0c90174f35dede204fe3..3a0b59aa12e27153d0150699024fc068d769da8a 100644 --- a/arch/arm64/kernel/idle.c +++ b/arch/arm64/kernel/idle.c @@ -43,3 +43,4 @@ void noinstr arch_cpu_idle(void) */ cpu_do_idle(); } +EXPORT_SYMBOL_GPL(arch_cpu_idle); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9be87b701f0f3fb4145df9c4c13ac885b4188217..df023e1cb5ddb411d4a83f2eb71a924bf4f2cfcb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -133,6 +133,7 @@ config X86 select ARCH_WANTS_NO_INSTR select ARCH_WANT_GENERAL_HUGETLB select ARCH_WANT_HUGE_PMD_SHARE + select ARCH_HAS_OPTIMIZED_POLL select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP if X86_64 select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP if X86_64 @@ -370,9 +371,6 @@ config ARCH_MAY_HAVE_PC_FDC config GENERIC_CALIBRATE_DELAY def_bool y -config ARCH_HAS_CPU_RELAX - def_bool y - config ARCH_HIBERNATION_POSSIBLE def_bool y @@ -841,6 +839,7 @@ config KVM_GUEST config ARCH_CPUIDLE_HALTPOLL def_bool n + depends on KVM_GUEST prompt "Disable host haltpoll when loading haltpoll driver" help If virtualized under KVM, disable host haltpoll. diff --git a/arch/x86/include/asm/cpuidle_haltpoll.h b/arch/x86/include/asm/cpuidle_haltpoll.h index c8b39c6716ff1798ab3e1391722e9b4f99aab2aa..8a0a12769c2e5976dd3acad8c09d392cf669df18 100644 --- a/arch/x86/include/asm/cpuidle_haltpoll.h +++ b/arch/x86/include/asm/cpuidle_haltpoll.h @@ -4,5 +4,6 @@ void arch_haltpoll_enable(unsigned int cpu); void arch_haltpoll_disable(unsigned int cpu); +bool arch_haltpoll_want(bool force); #endif diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index b8ab9ee5896c191611b89ab6aa0a672304bbdb06..4d45b1db340a3e72252e5bdc3e9b7f6aa99e3f13 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -1149,4 +1149,17 @@ void arch_haltpoll_disable(unsigned int cpu) smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1); } EXPORT_SYMBOL_GPL(arch_haltpoll_disable); + +bool arch_haltpoll_want(bool force) +{ + /* Do not load haltpoll if idle= is passed */ + if (boot_option_idle_override != IDLE_NO_OVERRIDE) + return false; + + if (!kvm_para_available()) + return false; + + return kvm_para_has_hint(KVM_HINTS_REALTIME) || force; +} +EXPORT_SYMBOL_GPL(arch_haltpoll_want); #endif diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 831fa4a121598545749a64b518128ab538834a55..d154b5d7732827ab178bffd57f470a45ecbdb717 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -35,7 +35,7 @@ #include #endif -#define ACPI_IDLE_STATE_START (IS_ENABLED(CONFIG_ARCH_HAS_CPU_RELAX) ? 1 : 0) +#define ACPI_IDLE_STATE_START (IS_ENABLED(CONFIG_ARCH_HAS_OPTIMIZED_POLL) ? 1 : 0) static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER; module_param(max_cstate, uint, 0400); @@ -782,7 +782,7 @@ static int acpi_processor_setup_cstates(struct acpi_processor *pr) if (max_cstate == 0) max_cstate = 1; - if (IS_ENABLED(CONFIG_ARCH_HAS_CPU_RELAX)) { + if (IS_ENABLED(CONFIG_ARCH_HAS_OPTIMIZED_POLL)) { cpuidle_poll_state_init(drv); count = 1; } else { @@ -1194,20 +1194,46 @@ static int acpi_idle_lpi_enter(struct cpuidle_device *dev, return -EINVAL; } +/* To correctly reflect the entered state if the poll state is enabled. */ +static int acpi_idle_lpi_enter_with_poll_state(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + int entered_state; + + if (unlikely(index < 1)) + return -EINVAL; + + entered_state = acpi_idle_lpi_enter(dev, drv, index - 1); + if (entered_state < 0) + return entered_state; + + return entered_state + 1; +} + static int acpi_processor_setup_lpi_states(struct acpi_processor *pr) { - int i; + int i, count; struct acpi_lpi_state *lpi; struct cpuidle_state *state; struct cpuidle_driver *drv = &acpi_idle_driver; + typeof(state->enter) enter_method; if (!pr->flags.has_lpi) return -EOPNOTSUPP; + if (IS_ENABLED(CONFIG_ARCH_HAS_OPTIMIZED_POLL)) { + cpuidle_poll_state_init(drv); + count = 1; + enter_method = acpi_idle_lpi_enter_with_poll_state; + } else { + count = 0; + enter_method = acpi_idle_lpi_enter; + } + for (i = 0; i < pr->power.count && i < CPUIDLE_STATE_MAX; i++) { lpi = &pr->power.lpi_states[i]; - state = &drv->states[i]; + state = &drv->states[count]; snprintf(state->name, CPUIDLE_NAME_LEN, "LPI-%d", i); strscpy(state->desc, lpi->desc, CPUIDLE_DESC_LEN); state->exit_latency = lpi->wake_latency; @@ -1215,11 +1241,14 @@ static int acpi_processor_setup_lpi_states(struct acpi_processor *pr) state->flags |= arch_get_idle_state_flags(lpi->arch_flags); if (i != 0 && lpi->entry_method == ACPI_CSTATE_FFH) state->flags |= CPUIDLE_FLAG_RCU_IDLE; - state->enter = acpi_idle_lpi_enter; - drv->safe_state_index = i; + state->enter = enter_method; + drv->safe_state_index = count; + count++; + if (count == CPUIDLE_STATE_MAX) + break; } - drv->state_count = i; + drv->state_count = count; return 0; } diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index cac5997dca505fe70352c0d4134229aca203a871..c1bebadf22bcf6fd2243b85e8b42dc670816f393 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -35,7 +35,6 @@ config CPU_IDLE_GOV_TEO config CPU_IDLE_GOV_HALTPOLL bool "Haltpoll governor (for virtualized systems)" - depends on KVM_GUEST help This governor implements haltpoll idle state selection, to be used in conjunction with the haltpoll cpuidle driver, allowing @@ -72,8 +71,8 @@ source "drivers/cpuidle/Kconfig.riscv" endmenu config HALTPOLL_CPUIDLE - tristate "Halt poll cpuidle driver" - depends on X86 && KVM_GUEST + tristate "Haltpoll cpuidle driver" + depends on ARCH_CPUIDLE_HALTPOLL && ARCH_HAS_OPTIMIZED_POLL select CPU_IDLE_GOV_HALTPOLL default y help diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile index d103342b7cfc2120d2981dc1f61be9029aa69c54..f29dfd1525b01b895bf439716d07fef84d3429fe 100644 --- a/drivers/cpuidle/Makefile +++ b/drivers/cpuidle/Makefile @@ -7,7 +7,7 @@ obj-y += cpuidle.o driver.o governor.o sysfs.o governors/ obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o obj-$(CONFIG_DT_IDLE_STATES) += dt_idle_states.o obj-$(CONFIG_DT_IDLE_GENPD) += dt_idle_genpd.o -obj-$(CONFIG_ARCH_HAS_CPU_RELAX) += poll_state.o +obj-$(CONFIG_ARCH_HAS_OPTIMIZED_POLL) += poll_state.o obj-$(CONFIG_HALTPOLL_CPUIDLE) += cpuidle-haltpoll.o ################################################################################## diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index d8515d5c0853dc6c4caf05a91337b053f2653fa0..24f379d72b043820b0de1cef6522ae60583f6e7d 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -15,12 +15,18 @@ #include #include #include -#include #include -static bool force __read_mostly; -module_param(force, bool, 0444); +static bool force; MODULE_PARM_DESC(force, "Load unconditionally"); +static int enable_haltpoll_driver(const char *val, const struct kernel_param *kp); + +static const struct kernel_param_ops enable_haltpoll_ops = { + .set = enable_haltpoll_driver, + .get = param_get_bool, +}; +module_param_cb(force, &enable_haltpoll_ops, &force, 0644); + static struct cpuidle_device __percpu *haltpoll_cpuidle_devices; static enum cpuhp_state haltpoll_hp_state; @@ -93,22 +99,54 @@ static void haltpoll_uninit(void) haltpoll_cpuidle_devices = NULL; } -static bool haltpoll_want(void) +static int __init haltpoll_init(void) { - return kvm_para_has_hint(KVM_HINTS_REALTIME) || force; + int ret; + struct cpuidle_driver *drv = &haltpoll_driver; + + if (!arch_haltpoll_want(force)) + return -ENODEV; + + cpuidle_poll_state_init(drv); + + ret = cpuidle_register_driver(drv); + if (ret < 0) + return ret; + + haltpoll_cpuidle_devices = alloc_percpu(struct cpuidle_device); + if (haltpoll_cpuidle_devices == NULL) { + cpuidle_unregister_driver(drv); + return -ENOMEM; + } + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "cpuidle/haltpoll:online", + haltpoll_cpu_online, haltpoll_cpu_offline); + if (ret < 0) { + haltpoll_uninit(); + } else { + haltpoll_hp_state = ret; + ret = 0; + } + + return ret; } -static int __init haltpoll_init(void) +static void __exit haltpoll_exit(void) +{ + haltpoll_uninit(); +} + +#ifdef CONFIG_ARM64 +static int register_haltpoll_driver(void) { int ret; struct cpuidle_driver *drv = &haltpoll_driver; +#ifdef CONFIG_X86 /* Do not load haltpoll if idle= is passed */ if (boot_option_idle_override != IDLE_NO_OVERRIDE) return -ENODEV; - - if (!kvm_para_available() || !haltpoll_want()) - return -ENODEV; +#endif cpuidle_poll_state_init(drv); @@ -123,7 +161,7 @@ static int __init haltpoll_init(void) } ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "cpuidle/haltpoll:online", - haltpoll_cpu_online, haltpoll_cpu_offline); + haltpoll_cpu_online, haltpoll_cpu_offline); if (ret < 0) { haltpoll_uninit(); } else { @@ -134,10 +172,53 @@ static int __init haltpoll_init(void) return ret; } -static void __exit haltpoll_exit(void) +static void unregister_haltpoll_driver(void) { - haltpoll_uninit(); + if (haltpoll_hp_state) + cpuhp_remove_state(haltpoll_hp_state); + cpuidle_unregister_driver(&haltpoll_driver); + + free_percpu(haltpoll_cpuidle_devices); + haltpoll_cpuidle_devices = NULL; + +} + +static int enable_haltpoll_driver(const char *val, const struct kernel_param *kp) +{ + int ret; + bool do_enable; + + if (!val) + return 0; + + ret = strtobool(val, &do_enable); + + if (ret || force == do_enable) + return ret; + + if (do_enable) { + ret = register_haltpoll_driver(); + + if (!ret) { + pr_info("Enable haltpoll driver.\n"); + force = 1; + } else { + pr_err("Fail to enable haltpoll driver.\n"); + } + } else { + unregister_haltpoll_driver(); + force = 0; + pr_info("Unregister haltpoll driver.\n"); + } + + return ret; +} +#else +static int enable_haltpoll_driver(const char *val, const struct kernel_param *kp) +{ + return -1; } +#endif module_init(haltpoll_init); module_exit(haltpoll_exit); diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c index 1dff3a52917de9a6b79d1cd9e301a24659b412f6..e7b1c602ed085f964f46d52b2cd09970a49d4573 100644 --- a/drivers/cpuidle/governors/haltpoll.c +++ b/drivers/cpuidle/governors/haltpoll.c @@ -18,7 +18,6 @@ #include #include #include -#include #include static unsigned int guest_halt_poll_ns __read_mostly = 200000; @@ -143,10 +142,7 @@ static struct cpuidle_governor haltpoll_governor = { static int __init init_haltpoll(void) { - if (kvm_para_available()) - return cpuidle_register_governor(&haltpoll_governor); - - return 0; + return cpuidle_register_governor(&haltpoll_governor); } postcore_initcall(init_haltpoll); diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c index 9b6d90a726019c61580671991539f9eaa95a8da2..0b42971393c9715bbe142cdfe43c6d27652de474 100644 --- a/drivers/cpuidle/poll_state.c +++ b/drivers/cpuidle/poll_state.c @@ -8,35 +8,24 @@ #include #include -#define POLL_IDLE_RELAX_COUNT 200 - static int __cpuidle poll_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - u64 time_start; - - time_start = local_clock_noinstr(); dev->poll_time_limit = false; raw_local_irq_enable(); if (!current_set_polling_and_test()) { - unsigned int loop_count = 0; - u64 limit; - - limit = cpuidle_poll_time(drv, dev); - - while (!need_resched()) { - cpu_relax(); - if (loop_count++ < POLL_IDLE_RELAX_COUNT) - continue; - - loop_count = 0; - if (local_clock_noinstr() - time_start > limit) { - dev->poll_time_limit = true; - break; - } - } + unsigned long flags; + u64 time_start = local_clock_noinstr(); + u64 limit = cpuidle_poll_time(drv, dev); + + flags = smp_cond_load_relaxed_timeout(¤t_thread_info()->flags, + VAL & _TIF_NEED_RESCHED, + local_clock_noinstr(), + time_start + limit); + + dev->poll_time_limit = !(flags & _TIF_NEED_RESCHED); } raw_local_irq_disable(); diff --git a/drivers/idle/Kconfig b/drivers/idle/Kconfig index 6707d2539fc48bf79146efa2d26a3717e299d6b2..6f9b1d48fedeac3d66ed99698d0b4f28e9a06cf2 100644 --- a/drivers/idle/Kconfig +++ b/drivers/idle/Kconfig @@ -4,6 +4,7 @@ config INTEL_IDLE depends on CPU_IDLE depends on X86 depends on CPU_SUP_INTEL + depends on ARCH_HAS_OPTIMIZED_POLL help Enable intel_idle, a cpuidle driver that includes knowledge of native Intel hardware idle features. The acpi_idle driver diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 1985c22d90ca47f63d9816899ed4870fccbf3176..f5d6c8444d338f5b91a212149536f204c35f3644 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -275,6 +275,48 @@ do { \ }) #endif +#ifndef smp_cond_time_check_count +/* + * Limit how often smp_cond_load_relaxed_timeout() evaluates time_expr_ns. + * This helps reduce the number of instructions executed while spin-waiting. + */ +#define smp_cond_time_check_count 200 +#endif + +/** + * smp_cond_load_relaxed_timeout() - (Spin) wait for cond with no ordering + * guarantees until a timeout expires. + * @ptr: pointer to the variable to wait on + * @cond: boolean expression to wait for + * @time_expr_ns: evaluates to the current time + * @time_limit_ns: compared against time_expr_ns + * + * Equivalent to using READ_ONCE() on the condition variable. + * + * Due to C lacking lambda expressions we load the value of *ptr into a + * pre-named variable @VAL to be used in @cond. + */ +#ifndef smp_cond_load_relaxed_timeout +#define smp_cond_load_relaxed_timeout(ptr, cond_expr, time_expr_ns, \ + time_limit_ns) ({ \ + typeof(ptr) __PTR = (ptr); \ + __unqual_scalar_typeof(*ptr) VAL; \ + unsigned int __count = 0; \ + for (;;) { \ + VAL = READ_ONCE(*__PTR); \ + if (cond_expr) \ + break; \ + cpu_relax(); \ + if (__count++ < smp_cond_time_check_count) \ + continue; \ + if ((time_expr_ns) >= time_limit_ns) \ + break; \ + __count = 0; \ + } \ + (typeof(*ptr))VAL; \ +}) +#endif + /* * pmem_wmb() ensures that all stores for which the modification * are written to persistent storage by preceding instructions have diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 3183aeb7f5b4238a2b88349ae4bf8c492475460f..7e7e58a17b074072e37582f80c9d46c2da73e17d 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -275,7 +275,7 @@ static inline void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, } #endif -#if defined(CONFIG_CPU_IDLE) && defined(CONFIG_ARCH_HAS_CPU_RELAX) +#if defined(CONFIG_CPU_IDLE) && defined(CONFIG_ARCH_HAS_OPTIMIZED_POLL) void cpuidle_poll_state_init(struct cpuidle_driver *drv); #else static inline void cpuidle_poll_state_init(struct cpuidle_driver *drv) {} diff --git a/include/linux/cpuidle_haltpoll.h b/include/linux/cpuidle_haltpoll.h index d50c1e0411a2dda2376ff01e34f01075ebe7a245..68eb7a757120bcaf89a8861a9ec0559bfc625bf6 100644 --- a/include/linux/cpuidle_haltpoll.h +++ b/include/linux/cpuidle_haltpoll.h @@ -12,5 +12,10 @@ static inline void arch_haltpoll_enable(unsigned int cpu) static inline void arch_haltpoll_disable(unsigned int cpu) { } + +static inline bool arch_haltpoll_want(bool force) +{ + return false; +} #endif #endif