diff --git a/Documentation/arch/arm64/cpu-hotplug.rst b/Documentation/arch/arm64/cpu-hotplug.rst new file mode 100644 index 0000000000000000000000000000000000000000..76ba8d932c72264521cdbb4d3e6937525a1d6d87 --- /dev/null +++ b/Documentation/arch/arm64/cpu-hotplug.rst @@ -0,0 +1,79 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. _cpuhp_index: + +==================== +CPU Hotplug and ACPI +==================== + +CPU hotplug in the arm64 world is commonly used to describe the kernel taking +CPUs online/offline using PSCI. This document is about ACPI firmware allowing +CPUs that were not available during boot to be added to the system later. + +``possible`` and ``present`` refer to the state of the CPU as seen by linux. + + +CPU Hotplug on physical systems - CPUs not present at boot +---------------------------------------------------------- + +Physical systems need to mark a CPU that is ``possible`` but not ``present`` as +being ``present``. An example would be a dual socket machine, where the package +in one of the sockets can be replaced while the system is running. + +This is not supported. + +In the arm64 world CPUs are not a single device but a slice of the system. +There are no systems that support the physical addition (or removal) of CPUs +while the system is running, and ACPI is not able to sufficiently describe +them. + +e.g. New CPUs come with new caches, but the platform's cache toplogy is +described in a static table, the PPTT. How caches are shared between CPUs is +not discoverable, and must be described by firmware. + +e.g. The GIC redistributor for each CPU must be accessed by the driver during +boot to discover the system wide supported features. ACPI's MADT GICC +structures can describe a redistributor associated with a disabled CPU, but +can't describe whether the redistributor is accessible, only that it is not +'always on'. + +arm64's ACPI tables assume that everything described is ``present``. + + +CPU Hotplug on virtual systems - CPUs not enabled at boot +--------------------------------------------------------- + +Virtual systems have the advantage that all the properties the system will +ever have can be described at boot. There are no power-domain considerations +as such devices are emulated. + +CPU Hotplug on virtual systems is supported. It is distinct from physical +CPU Hotplug as all resources are described as ``present``, but CPUs may be +marked as disabled by firmware. Only the CPU's online/offline behaviour is +influenced by firmware. An example is where a virtual machine boots with a +single CPU, and additional CPUs are added once a cloud orchestrator deploys +the workload. + +For a virtual machine, the VMM (e.g. Qemu) plays the part of firmware. + +Virtual hotplug is implemented as a firmware policy affecting which CPUs can be +brought online. Firmware can enforce its policy via PSCI's return codes. e.g. +``DENIED``. + +The ACPI tables must describe all the resources of the virtual machine. CPUs +that firmware wishes to disable either from boot (or later) should not be +``enabled`` in the MADT GICC structures, but should have the ``online capable`` +bit set, to indicate they can be enabled later. The boot CPU must be marked as +``enabled``. The 'always on' GICR structure must be used to describe the +redistributors. + +CPUs described as ``online capable`` but not ``enabled`` can be set to enabled +by the DSDT's Processor object's _STA method. On virtual systems the _STA method +must always report the CPU as ``present``. Changes to the firmware policy can +be notified to the OS via device-check or eject-request. + +CPUs described as ``enabled`` in the static table, should not have their _STA +modified dynamically by firmware. Soft-restart features such as kexec will +re-read the static properties of the system from these static tables, and +may malfunction if these no longer describe the running system. Linux will +re-discover the dynamic properties of the system from the _STA method later +during boot. diff --git a/Documentation/arch/arm64/index.rst b/Documentation/arch/arm64/index.rst index d08e924204bf15a6528e4b20ff046e74e11c04aa..78544de0a8a9e8b833c1fca80439999247137244 100644 --- a/Documentation/arch/arm64/index.rst +++ b/Documentation/arch/arm64/index.rst @@ -13,6 +13,7 @@ ARM64 Architecture asymmetric-32bit booting cpu-feature-registers + cpu-hotplug elf_hwcaps hugetlbpage kdump diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 6792a1f83f2ad400c98efa360f91ae0e802b1c7c..bc9a6656fc0c55626284279e9bb6fea17e1ec323 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -119,6 +119,17 @@ static inline u32 get_acpi_id_for_cpu(unsigned int cpu) return acpi_cpu_get_madt_gicc(cpu)->uid; } +static inline int get_cpu_for_acpi_id(u32 uid) +{ + int cpu; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (uid == get_acpi_id_for_cpu(cpu)) + return cpu; + + return -EINVAL; +} + static inline void arch_fix_phys_package_id(int num, u32 slot) { } void __init acpi_init_cpus(void); int apei_claim_sea(struct pt_regs *regs); diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index e51535a5f939a9f6fb6ad29839fdd6b722009f2f..0c036a9a3c338ae81e0e31626c54948f227e312d 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -34,17 +34,6 @@ int __init acpi_numa_get_nid(unsigned int cpu) return acpi_early_node_map[cpu]; } -static inline int get_cpu_for_acpi_id(u32 uid) -{ - int cpu; - - for (cpu = 0; cpu < nr_cpu_ids; cpu++) - if (uid == get_acpi_id_for_cpu(cpu)) - return cpu; - - return -EINVAL; -} - static int __init acpi_parse_gicc_pxm(union acpi_subtable_headers *header, const unsigned long end) { diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 29a8e444db836a70a17fecf2794f358a5f8f3003..fabd732d0a2dfee37074ef4ebb6ce5894871c8bd 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -40,7 +40,7 @@ static int cpu_psci_cpu_boot(unsigned int cpu) { phys_addr_t pa_secondary_entry = __pa_symbol(secondary_entry); int err = psci_ops.cpu_on(cpu_logical_map(cpu), pa_secondary_entry); - if (err) + if (err && err != -EPERM) pr_err("failed to boot CPU%d (%d)\n", cpu, err); return err; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6fcf73f1eeed172975b5fed1d04a6d18091218ee..5aea096675e768c6e434599ef73eedf806ae6ed2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -125,7 +125,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) /* Now bring the CPU into our world */ ret = boot_secondary(cpu, idle); if (ret) { - pr_err("CPU%u: failed to boot: %d\n", cpu, ret); + if (ret != -EPERM) + pr_err("CPU%u: failed to boot: %d\n", cpu, ret); return ret; } diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 12f5fcb630a7fb9ec3398e3cd86a0bca2b634777..1b49ba42de5855581a946ec7809f8b29a4bc20b0 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -5,7 +5,7 @@ config LOONGARCH select ACPI select ACPI_GENERIC_GSI if ACPI select ACPI_MCFG if ACPI - select ACPI_HOTPLUG_CPU if ACPI_PROCESSOR && HOTPLUG_CPU + select ACPI_HOTPLUG_PRESENT_CPU if ACPI_PROCESSOR && HOTPLUG_CPU select ACPI_PPTT if ACPI select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_BINFMT_ELF_STATE diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 569484674f7e753a1f6d883faebc55b1245d077f..ec21b2e7aae31cbf286aa207a9d0f4da22888f07 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -59,7 +59,7 @@ CONFIG_ACPI_SPCR_TABLE=y CONFIG_ACPI_TAD=y CONFIG_ACPI_DOCK=y CONFIG_ACPI_IPMI=m -CONFIG_ACPI_HOTPLUG_CPU=y +CONFIG_ACPI_HOTPLUG_PRESENT_CPU=y CONFIG_ACPI_PCI_SLOT=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_STAT=y diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 23601a3a8352f511e8ec45ec8fca2de6e3e88aad..5b5990765908d9081039baafc33e616d2f5ca971 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -293,7 +293,7 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) memblock_reserve(addr, size); } -#ifdef CONFIG_ACPI_HOTPLUG_CPU +#ifdef CONFIG_ACPI_HOTPLUG_PRESENT_CPU #include @@ -345,4 +345,4 @@ int acpi_unmap_cpu(int cpu) } EXPORT_SYMBOL(acpi_unmap_cpu); -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ +#endif /* CONFIG_ACPI_HOTPLUG_PRESENT_CPU */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 54c10fdcf5d92effdec68ae25109ea302d00596c..b41243f8c2413efb11f019efae3a5933cc2b2cd8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -60,7 +60,8 @@ config X86 # select ACPI_LEGACY_TABLES_LOOKUP if ACPI select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI - select ACPI_HOTPLUG_CPU if ACPI_PROCESSOR && HOTPLUG_CPU + select ACPI_HOTPLUG_PRESENT_CPU if ACPI_PROCESSOR && HOTPLUG_CPU + select ACPI_HOTPLUG_IGNORE_OSC if ACPI && HOTPLUG_CPU select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_INIT select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c55c0ef47a187eb961616ea6af6eac8e246b1fe0..8202bd2d957d93a29f665357e2b288fc800e3743 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -817,7 +817,7 @@ static void __init acpi_set_irq_model_ioapic(void) /* * ACPI based hotplug support for CPU */ -#ifdef CONFIG_ACPI_HOTPLUG_CPU +#ifdef CONFIG_ACPI_HOTPLUG_PRESENT_CPU #include static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) @@ -866,7 +866,7 @@ int acpi_unmap_cpu(int cpu) return (0); } EXPORT_SYMBOL(acpi_unmap_cpu); -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ +#endif /* CONFIG_ACPI_HOTPLUG_PRESENT_CPU */ int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) { diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 8456d48ba702dbba92dce458444bcf92a7708ea2..c49978b4b11f16256817dd840034bb2dc369c185 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -305,11 +305,20 @@ config ACPI_IPMI To compile this driver as a module, choose M here: the module will be called as acpi_ipmi. -config ACPI_HOTPLUG_CPU +config ACPI_HOTPLUG_PRESENT_CPU bool depends on ACPI_PROCESSOR && HOTPLUG_CPU select ACPI_CONTAINER +config ACPI_HOTPLUG_IGNORE_OSC + bool + depends on ACPI_HOTPLUG_PRESENT_CPU + help + Ignore whether firmware acknowledged support for toggling the CPU + present bit in _STA. Some architectures predate the _OSC bits, so + firmware doesn't know to do this. + + config ACPI_PROCESSOR_AGGREGATOR tristate "Processor Aggregator" depends on ACPI_PROCESSOR @@ -399,7 +408,7 @@ config ACPI_PCI_SLOT config ACPI_CONTAINER bool "Container and Module Devices" - default (ACPI_HOTPLUG_MEMORY || ACPI_HOTPLUG_CPU) + default (ACPI_HOTPLUG_MEMORY || ACPI_HOTPLUG_PRESENT_CPU) help This driver supports ACPI Container and Module devices (IDs ACPI0004, PNP0A05, and PNP0A06). diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 4fe2ef54088c65234ace0f111a0ffe93f40f8fb0..5bb207a7a1dd917952b4e67d826785f1beb8d094 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -182,14 +182,30 @@ static void __init acpi_pcc_cpufreq_init(void) static void __init acpi_pcc_cpufreq_init(void) {} #endif /* CONFIG_X86 */ +static bool acpi_processor_hotplug_present_supported(void) +{ + if (!IS_ENABLED(CONFIG_ACPI_HOTPLUG_PRESENT_CPU)) + return false; + + /* x86 systems pre-date the _OSC bit */ + if (IS_ENABLED(CONFIG_ACPI_HOTPLUG_IGNORE_OSC)) + return true; + + return osc_sb_hotplug_present_support_acked; +} + /* Initialization */ -#ifdef CONFIG_ACPI_HOTPLUG_CPU -static int acpi_processor_hotadd_init(struct acpi_processor *pr) +static int acpi_processor_make_present(struct acpi_processor *pr) { unsigned long long sta; acpi_status status; int ret; + if (!acpi_processor_hotplug_present_supported()) { + pr_err_once("Changing CPU present bit is not supported\n"); + return -ENODEV; + } + if (invalid_phys_cpuid(pr->phys_id)) return -ENODEV; @@ -223,12 +239,32 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr) cpu_maps_update_done(); return ret; } -#else -static inline int acpi_processor_hotadd_init(struct acpi_processor *pr) + +static int acpi_processor_make_enabled(struct acpi_processor *pr) { - return -ENODEV; + unsigned long long sta; + acpi_status status; + bool present, enabled; + + if (!acpi_has_method(pr->handle, "_STA")) + return arch_register_cpu(pr->id); + + status = acpi_evaluate_integer(pr->handle, "_STA", NULL, &sta); + if (ACPI_FAILURE(status)) + return -ENODEV; + + present = sta & ACPI_STA_DEVICE_PRESENT; + enabled = sta & ACPI_STA_DEVICE_ENABLED; + + if (cpu_online(pr->id) && (!present || !enabled)) { + pr_err_once(FW_BUG "CPU %u is online, but described as not present or disabled!\n", pr->id); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); + } else if (!present || !enabled) { + return -ENODEV; + } + + return arch_register_cpu(pr->id); } -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ static int acpi_processor_get_info(struct acpi_device *device) { @@ -314,6 +350,18 @@ static int acpi_processor_get_info(struct acpi_device *device) cpufreq_add_device("acpi-cpufreq"); } + /* + * Register CPUs that are present. get_cpu_device() is used to skip + * duplicate CPU descriptions from firmware. + */ + if (!invalid_logical_cpuid(pr->id) && cpu_present(pr->id) && + !get_cpu_device(pr->id)) { + int ret = acpi_processor_make_enabled(pr); + + if (ret) + return ret; + } + /* * Extra Processor objects may be enumerated on MP systems with * less than the max # of CPUs. They should be ignored _iff @@ -323,7 +371,7 @@ static int acpi_processor_get_info(struct acpi_device *device) * because cpuid <-> apicid mapping is persistent now. */ if (invalid_logical_cpuid(pr->id) || !cpu_present(pr->id)) { - int ret = acpi_processor_hotadd_init(pr); + int ret = acpi_processor_make_present(pr); if (ret) return ret; @@ -449,14 +497,15 @@ static int acpi_processor_add(struct acpi_device *device, return result; } -#ifdef CONFIG_ACPI_HOTPLUG_CPU /* Removal */ -static void acpi_processor_remove(struct acpi_device *device) +static void acpi_processor_make_not_present(struct acpi_device *device) { struct acpi_processor *pr; - if (!device || !acpi_driver_data(device)) + if (!IS_ENABLED(CONFIG_ACPI_HOTPLUG_PRESENT_CPU)) { + pr_err_once("Changing CPU present bit is not supported"); return; + } pr = acpi_driver_data(device); if (pr->id >= nr_cpu_ids) @@ -493,7 +542,32 @@ static void acpi_processor_remove(struct acpi_device *device) free_cpumask_var(pr->throttling.shared_cpu_map); kfree(pr); } -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ + +static void acpi_processor_post_eject(struct acpi_device *device) +{ + struct acpi_processor *pr; + unsigned long long sta; + acpi_status status; + + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || !device) + return; + + pr = acpi_driver_data(device); + if (!pr || pr->id >= nr_cpu_ids || invalid_phys_cpuid(pr->phys_id)) + return; + + status = acpi_evaluate_integer(pr->handle, "_STA", NULL, &sta); + if (ACPI_FAILURE(status)) + return; + + if (cpu_present(pr->id) && !(sta & ACPI_STA_DEVICE_PRESENT)) { + acpi_processor_make_not_present(device); + return; + } + + if (cpu_present(pr->id) && !(sta & ACPI_STA_DEVICE_ENABLED)) + arch_unregister_cpu(pr->id); +} #ifdef CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC bool __init processor_physically_present(acpi_handle handle) @@ -618,17 +692,37 @@ static const struct acpi_device_id processor_device_ids[] = { static struct acpi_scan_handler processor_handler = { .ids = processor_device_ids, .attach = acpi_processor_add, -#ifdef CONFIG_ACPI_HOTPLUG_CPU - .detach = acpi_processor_remove, -#endif + .post_eject = acpi_processor_post_eject, .hotplug = { .enabled = true, }, }; +static acpi_status acpi_processor_container_walk(acpi_handle handle, + u32 lvl, + void *context, + void **rv) +{ + struct acpi_device *adev; + acpi_status status; + + adev = acpi_get_acpi_dev(handle); + if (!adev) + return AE_ERROR; + + status = acpi_processor_add(adev, &processor_device_ids[0]); + acpi_put_acpi_dev(adev); + + return status; +} + static int acpi_processor_container_attach(struct acpi_device *dev, const struct acpi_device_id *id) { + acpi_walk_namespace(ACPI_TYPE_PROCESSOR, dev->handle, + ACPI_UINT32_MAX, acpi_processor_container_walk, + NULL, NULL, NULL); + return 1; } @@ -769,6 +863,25 @@ void __init acpi_processor_init(void) acpi_pcc_cpufreq_init(); } +static int __init acpi_processor_register_missing_cpus(void) +{ + int cpu; + + if (acpi_disabled) + return 0; + + for_each_online_cpu(cpu) { + if (!get_cpu_device(cpu)) { + pr_err_once(FW_BUG "CPU %u has no ACPI namespace description!\n", cpu); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); + arch_register_cpu(cpu); + } + } + + return 0; +} +subsys_initcall_sync(acpi_processor_register_missing_cpus); + #ifdef CONFIG_ACPI_PROCESSOR_CSTATE /** * acpi_processor_claim_cst_control - Request _CST control from the platform. diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index a4aa53b7e2bb30789165145c4d1ccdea214809a3..b42f17bfbb0997081e3d9f291dfd625f2d1a268d 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -298,6 +298,13 @@ EXPORT_SYMBOL_GPL(osc_sb_native_usb4_support_confirmed); bool osc_sb_cppc2_support_acked; +/* + * ACPI 6.? Proposed Operating System Capabilities for modifying CPU + * present/enable. + */ +bool osc_sb_hotplug_enabled_support_acked; +bool osc_sb_hotplug_present_support_acked; + static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48"; static void acpi_bus_osc_negotiate_platform_control(void) { @@ -346,6 +353,11 @@ static void acpi_bus_osc_negotiate_platform_control(void) if (!ghes_disable) capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_APEI_SUPPORT; + + capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_HOTPLUG_ENABLED_SUPPORT; + if (IS_ENABLED(CONFIG_ACPI_HOTPLUG_PRESENT_CPU)) + capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_HOTPLUG_PRESENT_SUPPORT; + if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))) return; @@ -383,6 +395,10 @@ static void acpi_bus_osc_negotiate_platform_control(void) capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT; osc_cpc_flexible_adr_space_confirmed = capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_CPC_FLEXIBLE_ADR_SPACE; + osc_sb_hotplug_enabled_support_acked = + capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_HOTPLUG_ENABLED_SUPPORT; + osc_sb_hotplug_present_support_acked = + capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_HOTPLUG_PRESENT_SUPPORT; } kfree(context.ret.pointer); diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 3b4d048c49417303bf5c4451e94e6d9f3dd77e56..e3c80f3b3b573a77232bd89ec8cd59dd0812aa7d 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -313,7 +313,7 @@ int acpi_bus_init_power(struct acpi_device *device) return -EINVAL; device->power.state = ACPI_STATE_UNKNOWN; - if (!acpi_device_is_present(device)) { + if (!acpi_dev_ready_for_enumeration(device)) { device->flags.initialized = false; return -ENXIO; } diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c index a34d8578b3da6cd159b1e10f035b9a7e96475d16..ec05e40e8a9692bebfdb4faab52bd996faedcf4e 100644 --- a/drivers/acpi/device_sysfs.c +++ b/drivers/acpi/device_sysfs.c @@ -141,7 +141,7 @@ static int create_pnp_modalias(const struct acpi_device *acpi_dev, char *modalia struct acpi_hardware_id *id; /* Avoid unnecessarily loading modules for non present devices. */ - if (!acpi_device_is_present(acpi_dev)) + if (!acpi_dev_ready_for_enumeration(acpi_dev)) return 0; /* diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 253afdcf3aa994f5f58486bff6d445381216cbde..ef5d0ae079038904cced550be1047d7ebfa1ce2f 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -106,7 +106,6 @@ int acpi_device_setup_files(struct acpi_device *dev); void acpi_device_remove_files(struct acpi_device *dev); void acpi_device_add_finalize(struct acpi_device *device); void acpi_free_pnp_ids(struct acpi_device_pnp *pnp); -bool acpi_device_is_present(const struct acpi_device *adev); bool acpi_device_is_battery(struct acpi_device *adev); bool acpi_device_is_first_physical_node(struct acpi_device *adev, const struct device *dev); diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 99b4e33554355227e223a2f145859d6a2a8ad8b7..1562e980621692cbcb3cb0a376e6444048da9744 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -1419,7 +1419,7 @@ static bool acpi_fwnode_device_is_available(const struct fwnode_handle *fwnode) if (!is_acpi_device_node(fwnode)) return false; - return acpi_device_is_present(to_acpi_device_node(fwnode)); + return acpi_dev_ready_for_enumeration(to_acpi_device_node(fwnode)); } static const void * diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 083467e8141599e5bf9ea0f3aa769780531c5dbc..7b2f126c335caa29ff4b70ff4f8525272496a377 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -244,11 +244,85 @@ static int acpi_scan_try_to_offline(struct acpi_device *device) return 0; } +/** + * acpi_bus_trim_one() - Detach scan handlers and drivers from ACPI device + * objects. + * @adev: Root of the ACPI namespace scope to walk. + * @eject: Pointer to a bool that indicates if this was due to an + * eject-request. + * + * Must be called under acpi_scan_lock. + * If @eject points to true, clearing the device enumeration is deferred until + * acpi_bus_post_eject() is called. + */ +static int acpi_bus_trim_one(struct acpi_device *adev, void *eject) +{ + struct acpi_scan_handler *handler = adev->handler; + bool is_eject = *(bool *)eject; + + acpi_dev_for_each_child_reverse(adev, acpi_bus_trim_one, eject); + + adev->flags.match_driver = false; + if (handler) { + if (handler->detach) + handler->detach(adev); + } else { + device_release_driver(&adev->dev); + } + /* + * Most likely, the device is going away, so put it into D3cold before + * that. + */ + acpi_device_set_power(adev, ACPI_STATE_D3_COLD); + adev->flags.initialized = false; + + /* For eject this is deferred to acpi_bus_post_eject() */ + if (!is_eject) { + adev->handler = NULL; + acpi_device_clear_enumerated(adev); + } + + return 0; +} + +/** + * acpi_bus_trim - Detach scan handlers and drivers from ACPI device objects. + * @adev: Root of the ACPI namespace scope to walk. + * + * Must be called under acpi_scan_lock. + */ +void acpi_bus_trim(struct acpi_device *adev) +{ + bool eject = false; + + acpi_bus_trim_one(adev, &eject); +} +EXPORT_SYMBOL_GPL(acpi_bus_trim); + +static int acpi_bus_post_eject(struct acpi_device *adev, void *not_used) +{ + struct acpi_scan_handler *handler = adev->handler; + + acpi_dev_for_each_child_reverse(adev, acpi_bus_post_eject, NULL); + + if (handler) { + if (handler->post_eject) + handler->post_eject(adev); + + adev->handler = NULL; + } + + acpi_device_clear_enumerated(adev); + + return 0; +} + static int acpi_scan_hot_remove(struct acpi_device *device) { acpi_handle handle = device->handle; unsigned long long sta; acpi_status status; + bool eject = true; if (device->handler && device->handler->hotplug.demand_offline) { if (!acpi_scan_is_offline(device, true)) @@ -261,7 +335,7 @@ static int acpi_scan_hot_remove(struct acpi_device *device) acpi_handle_debug(handle, "Ejecting\n"); - acpi_bus_trim(device); + acpi_bus_trim_one(device, &eject); acpi_evaluate_lck(handle, 0); /* @@ -284,6 +358,8 @@ static int acpi_scan_hot_remove(struct acpi_device *device) } else if (sta & ACPI_STA_DEVICE_ENABLED) { acpi_handle_warn(handle, "Eject incomplete - status 0x%llx\n", sta); + } else { + acpi_bus_post_eject(device, NULL); } return 0; @@ -304,7 +380,7 @@ static int acpi_scan_device_check(struct acpi_device *adev) int error; acpi_bus_get_status(adev); - if (acpi_device_is_present(adev)) { + if (acpi_dev_ready_for_enumeration(adev)) { /* * This function is only called for device objects for which * matching scan handlers exist. The only situation in which @@ -338,7 +414,7 @@ static int acpi_scan_bus_check(struct acpi_device *adev, void *not_used) int error; acpi_bus_get_status(adev); - if (!acpi_device_is_present(adev)) { + if (!acpi_dev_ready_for_enumeration(adev)) { acpi_scan_device_not_enumerated(adev); return 0; } @@ -1913,11 +1989,6 @@ static bool acpi_device_should_be_hidden(acpi_handle handle) return true; } -bool acpi_device_is_present(const struct acpi_device *adev) -{ - return adev->status.present || adev->status.functional; -} - static bool acpi_scan_handler_matching(struct acpi_scan_handler *handler, const char *idstr, const struct acpi_device_id **matchid) @@ -2380,16 +2451,25 @@ EXPORT_SYMBOL_GPL(acpi_dev_clear_dependencies); * acpi_dev_ready_for_enumeration - Check if the ACPI device is ready for enumeration * @device: Pointer to the &struct acpi_device to check * - * Check if the device is present and has no unmet dependencies. + * Check if the device is functional or enabled and has no unmet dependencies. * - * Return true if the device is ready for enumeratino. Otherwise, return false. + * Return true if the device is ready for enumeration. Otherwise, return false. */ bool acpi_dev_ready_for_enumeration(const struct acpi_device *device) { if (device->flags.honor_deps && device->dep_unmet) return false; - return acpi_device_is_present(device); + /* + * ACPI 6.5's 6.3.7 "_STA (Device Status)" allows firmware to return + * (!present && functional) for certain types of devices that should be + * enumerated. Note that the enabled bit can't be sert until the present + * bit is set. + */ + if (device->status.present) + return device->status.enabled; + else + return device->status.functional; } EXPORT_SYMBOL_GPL(acpi_dev_ready_for_enumeration); @@ -2508,44 +2588,6 @@ int acpi_bus_scan(acpi_handle handle) } EXPORT_SYMBOL(acpi_bus_scan); -static int acpi_bus_trim_one(struct acpi_device *adev, void *not_used) -{ - struct acpi_scan_handler *handler = adev->handler; - - acpi_dev_for_each_child_reverse(adev, acpi_bus_trim_one, NULL); - - adev->flags.match_driver = false; - if (handler) { - if (handler->detach) - handler->detach(adev); - - adev->handler = NULL; - } else { - device_release_driver(&adev->dev); - } - /* - * Most likely, the device is going away, so put it into D3cold before - * that. - */ - acpi_device_set_power(adev, ACPI_STATE_D3_COLD); - adev->flags.initialized = false; - acpi_device_clear_enumerated(adev); - - return 0; -} - -/** - * acpi_bus_trim - Detach scan handlers and drivers from ACPI device objects. - * @adev: Root of the ACPI namespace scope to walk. - * - * Must be called under acpi_scan_lock. - */ -void acpi_bus_trim(struct acpi_device *adev) -{ - acpi_bus_trim_one(adev, NULL); -} -EXPORT_SYMBOL_GPL(acpi_bus_trim); - int acpi_bus_register_early_device(int type) { struct acpi_device *device = NULL; diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 47de0f140ba65e0b9cfbff8574a8adeec69e471d..a6e96a0a92b727c79576fea350e27b2c7bea11f4 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -95,6 +95,7 @@ void unregister_cpu(struct cpu *cpu) { int logical_cpu = cpu->dev.id; + set_cpu_enabled(logical_cpu, false); unregister_cpu_under_node(logical_cpu, cpu_to_node(logical_cpu)); device_unregister(&cpu->dev); @@ -273,6 +274,13 @@ static ssize_t print_cpus_offline(struct device *dev, } static DEVICE_ATTR(offline, 0444, print_cpus_offline, NULL); +static ssize_t print_cpus_enabled(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(cpu_enabled_mask)); +} +static DEVICE_ATTR(enabled, 0444, print_cpus_enabled, NULL); + static ssize_t print_cpus_isolated(struct device *dev, struct device_attribute *attr, char *buf) { @@ -413,6 +421,7 @@ int register_cpu(struct cpu *cpu, int num) register_cpu_under_node(num, cpu_to_node(num)); dev_pm_qos_expose_latency_limit(&cpu->dev, PM_QOS_RESUME_LATENCY_NO_CONSTRAINT); + set_cpu_enabled(num, true); return 0; } @@ -494,6 +503,7 @@ static struct attribute *cpu_root_attrs[] = { &cpu_attrs[2].attr.attr, &dev_attr_kernel_max.attr, &dev_attr_offline.attr, + &dev_attr_enabled.attr, &dev_attr_isolated.attr, #ifdef CONFIG_NO_HZ_FULL &dev_attr_nohz_full.attr, @@ -553,7 +563,11 @@ static void __init cpu_dev_register_generic(void) { int i, ret; - if (!IS_ENABLED(CONFIG_GENERIC_CPU_DEVICES)) + /* + * When ACPI is enabled, CPUs are registered via + * acpi_processor_get_info(). + */ + if (!IS_ENABLED(CONFIG_GENERIC_CPU_DEVICES) || !acpi_disabled) return; for_each_present_cpu(i) { diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 515cbb53dada19cf8539f50cc0fe43c987b7461c..d49d6e62f86927b5dfe895ba93c900dfc1e3a26c 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -2391,11 +2391,25 @@ gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header, (struct acpi_madt_generic_interrupt *)header; u32 reg = readl_relaxed(acpi_data.dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK; u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2; + int cpu = get_cpu_for_acpi_id(gicc->uid); void __iomem *redist_base; if (!acpi_gicc_is_usable(gicc)) return 0; + /* + * Capable but disabled CPUs can be brought online later. What about + * the redistributor? ACPI doesn't want to say! + * Virtual hotplug systems can use the MADT's "always-on" GICR entries. + * Otherwise, prevent such CPUs from being brought online. + */ + if (!(gicc->flags & ACPI_MADT_ENABLED)) { + pr_warn_once("CPU %u's redistributor is inaccessible: this CPU can't be brought online\n", cpu); + set_cpu_present(cpu, false); + set_cpu_possible(cpu, false); + return 0; + } + redist_base = ioremap(gicc->gicr_base_address, size); if (!redist_base) return -ENOMEM; @@ -2441,21 +2455,15 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header, /* * If GICC is enabled and has valid gicr base address, then it means - * GICR base is presented via GICC + * GICR base is presented via GICC. The redistributor is only known to + * be accessible if the GICC is marked as enabled. If this bit is not + * set, we'd need to add the redistributor at runtime, which isn't + * supported. */ - if (acpi_gicc_is_usable(gicc) && gicc->gicr_base_address) { + if (gicc->flags & ACPI_MADT_ENABLED && gicc->gicr_base_address) acpi_data.enabled_rdists++; - return 0; - } - /* - * It's perfectly valid firmware can pass disabled GICC entry, driver - * should not treat as errors, skip the entry instead of probe fail. - */ - if (!acpi_gicc_is_usable(gicc)) - return 0; - - return -ENODEV; + return 0; } static int __init gic_acpi_count_gicr_regions(void) diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 0b7eab0ef7d7f2de4c0fef6c5510da98111ca7a9..03f565245dfded760a819b12b33f326156b30ad6 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -127,6 +127,7 @@ struct acpi_scan_handler { bool (*match)(const char *idstr, const struct acpi_device_id **matchid); int (*attach)(struct acpi_device *dev, const struct acpi_device_id *id); void (*detach)(struct acpi_device *dev); + void (*post_eject)(struct acpi_device *dev); void (*bind)(struct device *phys_dev); void (*unbind)(struct device *phys_dev); struct acpi_hotplug_profile hotplug; diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 8104c262bbae9b6d517841827b127ce22c584861..f805bc581003c5ac437a1f1a7a6c7bbffb48d8f2 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -1047,6 +1047,7 @@ struct acpi_madt_generic_interrupt { /* ACPI_MADT_ENABLED (1) Processor is usable if set */ #define ACPI_MADT_PERFORMANCE_IRQ_MODE (1<<1) /* 01: Performance Interrupt Mode */ #define ACPI_MADT_VGIC_IRQ_MODE (1<<2) /* 02: VGIC Maintenance Interrupt mode */ +#define ACPI_MADT_GICC_CPU_CAPABLE (1<<3) /* 03: CPU is online capable */ /* 12: Generic Distributor (ACPI 5.0 + ACPI 6.0 changes) */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 29edd74072afd04e18ea7a272428d6037dd8d788..0c7223e2906d5843d7edf69efaacf1cc9d72bc0a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -258,7 +258,7 @@ void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); static inline bool acpi_gicc_is_usable(struct acpi_madt_generic_interrupt *gicc) { - return gicc->flags & ACPI_MADT_ENABLED; + return gicc->flags & (ACPI_MADT_ENABLED | ACPI_MADT_GICC_CPU_CAPABLE); } /* the following numa functions are architecture-dependent */ @@ -321,12 +321,10 @@ static inline int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu, } #endif -#ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu); int acpi_unmap_cpu(int cpu); -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr); @@ -581,12 +579,16 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); #define OSC_SB_NATIVE_USB4_SUPPORT 0x00040000 #define OSC_SB_PRM_SUPPORT 0x00200000 #define OSC_SB_FFH_OPR_SUPPORT 0x00400000 +#define OSC_SB_HOTPLUG_ENABLED_SUPPORT 0x00800000 +#define OSC_SB_HOTPLUG_PRESENT_SUPPORT 0x01000000 extern bool osc_sb_apei_support_acked; extern bool osc_pc_lpi_support_confirmed; extern bool osc_sb_native_usb4_support_confirmed; extern bool osc_sb_cppc2_support_acked; extern bool osc_cpc_flexible_adr_space_confirmed; +extern bool osc_sb_hotplug_enabled_support_acked; +extern bool osc_sb_hotplug_present_support_acked; /* USB4 Capabilities */ #define OSC_USB_USB3_TUNNELING 0x00000001 @@ -649,7 +651,7 @@ static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context) #define ACPI_GSB_ACCESS_ATTRIB_RAW_PROCESS 0x0000000F /* Enable _OST when all relevant hotplug operations are enabled */ -#if defined(CONFIG_ACPI_HOTPLUG_CPU) && \ +#if defined(CONFIG_ACPI_HOTPLUG_PRESENT_CPU) && \ defined(CONFIG_ACPI_HOTPLUG_MEMORY) && \ defined(CONFIG_ACPI_CONTAINER) #define ACPI_HOTPLUG_OST diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index f10fb87d49dbe38bf220857202c9a2ece8e9f31e..a29ee03f13ffb34dc5b7c98ef3a4976abf9dcfc8 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -92,6 +92,7 @@ static inline void set_nr_cpu_ids(unsigned int nr) * * cpu_possible_mask- has bit 'cpu' set iff cpu is populatable * cpu_present_mask - has bit 'cpu' set iff cpu is populated + * cpu_enabled_mask - has bit 'cpu' set iff cpu can be brought online * cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler * cpu_active_mask - has bit 'cpu' set iff cpu available to migration * @@ -124,11 +125,13 @@ static inline void set_nr_cpu_ids(unsigned int nr) extern struct cpumask __cpu_possible_mask; extern struct cpumask __cpu_online_mask; +extern struct cpumask __cpu_enabled_mask; extern struct cpumask __cpu_present_mask; extern struct cpumask __cpu_active_mask; extern struct cpumask __cpu_dying_mask; #define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask) #define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask) +#define cpu_enabled_mask ((const struct cpumask *)&__cpu_enabled_mask) #define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask) #define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask) #define cpu_dying_mask ((const struct cpumask *)&__cpu_dying_mask) @@ -973,6 +976,7 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); #else #define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask) #define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask) +#define for_each_enabled_cpu(cpu) for_each_cpu((cpu), cpu_enabled_mask) #define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask) #endif @@ -995,6 +999,15 @@ set_cpu_possible(unsigned int cpu, bool possible) cpumask_clear_cpu(cpu, &__cpu_possible_mask); } +static inline void +set_cpu_enabled(unsigned int cpu, bool can_be_onlined) +{ + if (can_be_onlined) + cpumask_set_cpu(cpu, &__cpu_enabled_mask); + else + cpumask_clear_cpu(cpu, &__cpu_enabled_mask); +} + static inline void set_cpu_present(unsigned int cpu, bool present) { @@ -1074,6 +1087,7 @@ static __always_inline unsigned int num_online_cpus(void) return raw_atomic_read(&__num_online_cpus); } #define num_possible_cpus() cpumask_weight(cpu_possible_mask) +#define num_enabled_cpus() cpumask_weight(cpu_enabled_mask) #define num_present_cpus() cpumask_weight(cpu_present_mask) #define num_active_cpus() cpumask_weight(cpu_active_mask) @@ -1082,6 +1096,11 @@ static inline bool cpu_online(unsigned int cpu) return cpumask_test_cpu(cpu, cpu_online_mask); } +static inline bool cpu_enabled(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_enabled_mask); +} + static inline bool cpu_possible(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_possible_mask); @@ -1106,6 +1125,7 @@ static inline bool cpu_dying(unsigned int cpu) #define num_online_cpus() 1U #define num_possible_cpus() 1U +#define num_enabled_cpus() 1U #define num_present_cpus() 1U #define num_active_cpus() 1U @@ -1119,6 +1139,11 @@ static inline bool cpu_possible(unsigned int cpu) return cpu == 0; } +static inline bool cpu_enabled(unsigned int cpu) +{ + return cpu == 0; +} + static inline bool cpu_present(unsigned int cpu) { return cpu == 0; diff --git a/kernel/cpu.c b/kernel/cpu.c index 72e0f5380bf68121d826b1191bc05d2b41e3d774..75c4e69c06cfdda175373b951ff5ee6a2b3b059c 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -3118,6 +3118,9 @@ EXPORT_SYMBOL(__cpu_possible_mask); struct cpumask __cpu_online_mask __read_mostly; EXPORT_SYMBOL(__cpu_online_mask); +struct cpumask __cpu_enabled_mask __read_mostly; +EXPORT_SYMBOL(__cpu_enabled_mask); + struct cpumask __cpu_present_mask __read_mostly; EXPORT_SYMBOL(__cpu_present_mask);