diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 57d65656ab61d04523459ce0f97526d4c28a9e5d..16af021966df487b02a9b9b0d9ac260cb15092bd 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -150,6 +150,10 @@ * Processor MMIO stale data * vulnerabilities. */ +#define ARCH_CAP_MCU_ENUM BIT(16) /* + * Indicates the presence of microcode update + * feature enumeration and status information. + */ #define ARCH_CAP_FB_CLEAR BIT(17) /* * VERW clears CPU fill buffer * even on MDS_NO CPUs. @@ -853,6 +857,10 @@ #define MSR_IA32_APICBASE_BASE (0xfffff<<12) #define MSR_IA32_UCODE_WRITE 0x00000079 + +#define MSR_IA32_MCU_ENUMERATION 0x0000007b +#define MCU_STAGING BIT(4) + #define MSR_IA32_UCODE_REV 0x0000008b /* Intel SGX Launch Enclave Public Key Hash MSRs */ @@ -1159,6 +1167,8 @@ #define VMX_BASIC_MEM_TYPE_WB 6LLU #define VMX_BASIC_INOUT 0x0040000000000000LLU +#define MSR_IA32_MCU_STAGING_MBOX_ADDR 0x000007a5 + /* Resctrl MSRs: */ /* - Intel: */ #define MSR_IA32_L3_QOS_CFG 0xc81 diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 3235ba1e5b06d77f4f40962ef44888e62d30988c..1928381ebe7405c176c6c26139da0402bc35b59a 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -113,6 +113,8 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); #define topology_ppin(cpu) (cpu_data(cpu).ppin) extern unsigned int __max_die_per_package; +extern struct cpumask __cpu_primary_thread_mask; +#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) #ifdef CONFIG_SMP #define topology_cluster_id(cpu) (per_cpu(cpu_l2c_id, cpu)) @@ -142,9 +144,6 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu); int topology_update_die_map(unsigned int dieid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); -extern struct cpumask __cpu_primary_thread_mask; -#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) - /** * topology_is_primary_thread - Check whether CPU is the primary SMT thread * @cpu: CPU to check diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index f94af7776cb95666eb5688a901d443bf39e3578f..64e8f5321c8cc8e002487448936af6e8d1b81632 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -573,6 +573,17 @@ static int load_late_stop_cpus(bool is_safe) pr_err("You should switch to early loading, if possible.\n"); } + /* + * Pre-load the microcode image into a staging device. This + * process is preemptible and does not require stopping CPUs. + * Successful staging simplifies the subsequent late-loading + * process, reducing rendezvous time. + * + * Even if the transfer fails, the update will proceed as usual. + */ + if (microcode_ops->use_staging) + microcode_ops->stage_microcode(); + atomic_set(&late_cpus_in, num_online_cpus()); atomic_set(&offline_in_nmi, 0); loops_per_usec = loops_per_jiffy / (TICK_NSEC / 1000); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 973784e337a44059091fad806712cacdb044862d..f27312b4bbb14fb3c87af46b221c80df22205c2d 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -13,12 +13,15 @@ #define pr_fmt(fmt) "microcode: " fmt #include #include +#include #include #include #include +#include #include #include #include +#include #include #include @@ -33,6 +36,38 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin"; #define UCODE_BSP_LOADED ((struct microcode_intel *)0x1UL) +/* Defines for the microcode staging mailbox interface */ +#define MBOX_REG_NUM 4 +#define MBOX_REG_SIZE sizeof(u32) + +#define MBOX_CONTROL_OFFSET 0x0 +#define MBOX_STATUS_OFFSET 0x4 +#define MBOX_WRDATA_OFFSET 0x8 +#define MBOX_RDDATA_OFFSET 0xc + +#define MASK_MBOX_CTRL_ABORT BIT(0) +#define MASK_MBOX_CTRL_GO BIT(31) + +#define MASK_MBOX_STATUS_ERROR BIT(2) +#define MASK_MBOX_STATUS_READY BIT(31) + +#define MASK_MBOX_RESP_SUCCESS BIT(0) +#define MASK_MBOX_RESP_PROGRESS BIT(1) +#define MASK_MBOX_RESP_ERROR BIT(2) + +#define MBOX_CMD_LOAD 0x3 +#define MBOX_OBJ_STAGING 0xb +#define MBOX_HEADER(size) ((PCI_VENDOR_ID_INTEL) | \ + (MBOX_OBJ_STAGING << 16) | \ + ((u64)((size) / sizeof(u32)) << 32)) + +/* The size of each mailbox header */ +#define MBOX_HEADER_SIZE sizeof(u64) +/* The size of staging hardware response */ +#define MBOX_RESPONSE_SIZE sizeof(u64) + +#define MBOX_XACTION_TIMEOUT_MS (10 * MSEC_PER_SEC) + /* Current microcode patch used in early patching on the APs. */ static struct microcode_intel *ucode_patch_va __read_mostly; static struct microcode_intel *ucode_patch_late __read_mostly; @@ -54,6 +89,23 @@ struct extended_sigtable { struct extended_signature sigs[]; }; +/** + * struct staging_state - Track the current staging process state + * + * @mmio_base: MMIO base address for staging + * @ucode_len: Total size of the microcode image + * @chunk_size: Size of each data piece + * @bytes_sent: Total bytes transmitted so far + * @offset: Current offset in the microcode image + */ +struct staging_state { + void __iomem *mmio_base; + unsigned int ucode_len; + unsigned int chunk_size; + unsigned int bytes_sent; + unsigned int offset; +}; + #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) #define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) #define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) @@ -299,6 +351,298 @@ static __init struct microcode_intel *scan_microcode(void *data, size_t size, return size ? NULL : patch; } +static inline u32 read_mbox_dword(void __iomem *mmio_base) +{ + u32 dword = readl(mmio_base + MBOX_RDDATA_OFFSET); + + /* Acknowledge read completion to the staging hardware */ + writel(0, mmio_base + MBOX_RDDATA_OFFSET); + return dword; +} + +static inline void write_mbox_dword(void __iomem *mmio_base, u32 dword) +{ + writel(dword, mmio_base + MBOX_WRDATA_OFFSET); +} + +static inline u64 read_mbox_header(void __iomem *mmio_base) +{ + u32 high, low; + + low = read_mbox_dword(mmio_base); + high = read_mbox_dword(mmio_base); + + return ((u64)high << 32) | low; +} + +static inline void write_mbox_header(void __iomem *mmio_base, u64 value) +{ + write_mbox_dword(mmio_base, value); + write_mbox_dword(mmio_base, value >> 32); +} + +static void write_mbox_data(void __iomem *mmio_base, u32 *chunk, unsigned int chunk_bytes) +{ + int i; + + /* + * The MMIO space is mapped as Uncached (UC). Each write arrives + * at the device as an individual transaction in program order. + * The device can then reassemble the sequence accordingly. + */ + for (i = 0; i < chunk_bytes / sizeof(u32); i++) + write_mbox_dword(mmio_base, chunk[i]); +} + +/* + * Prepare for a new microcode transfer: reset hardware and record the + * image size. + */ +static void init_stage(struct staging_state *ss) +{ + ss->ucode_len = get_totalsize(&ucode_patch_late->hdr); + + /* + * Abort any ongoing process, effectively resetting the device. + * Unlike regular mailbox data processing requests, this + * operation does not require a status check. + */ + writel(MASK_MBOX_CTRL_ABORT, ss->mmio_base + MBOX_CONTROL_OFFSET); +} + +/* + * Update the chunk size and decide whether another chunk can be sent. + * This accounts for remaining data and retry limits. + */ +static bool can_send_next_chunk(struct staging_state *ss, int *err) +{ + /* A page size or remaining bytes if this is the final chunk */ + ss->chunk_size = min(PAGE_SIZE, ss->ucode_len - ss->offset); + + /* + * Each microcode image is divided into chunks, each at most + * one page size. A 10-chunk image would typically require 10 + * transactions. + * + * However, the hardware managing the mailbox has limited + * resources and may not cache the entire image, potentially + * requesting the same chunk multiple times. + * + * To tolerate this behavior, allow up to twice the expected + * number of transactions (i.e., a 10-chunk image can take up to + * 20 attempts). + * + * If the number of attempts exceeds this limit, treat it as + * exceeding the maximum allowed transfer size. + */ + if (ss->bytes_sent + ss->chunk_size > ss->ucode_len * 2) { + *err = -EMSGSIZE; + return false; + } + + *err = 0; + return true; +} + +/* + * The hardware indicates completion by returning a sentinel end offset. + */ +static inline bool is_end_offset(u32 offset) +{ + return offset == UINT_MAX; +} + +/* + * Determine whether staging is complete: either the hardware signaled + * the end offset, or no more transactions are permitted (retry limit + * reached). + */ +static inline bool staging_is_complete(struct staging_state *ss, int *err) +{ + return is_end_offset(ss->offset) || !can_send_next_chunk(ss, err); +} + +/* + * Wait for the hardware to complete a transaction. + * Return 0 on success, or an error code on failure. + */ +static int wait_for_transaction(struct staging_state *ss) +{ + u32 timeout, status; + + /* Allow time for hardware to complete the operation: */ + for (timeout = 0; timeout < MBOX_XACTION_TIMEOUT_MS; timeout++) { + msleep(1); + + status = readl(ss->mmio_base + MBOX_STATUS_OFFSET); + /* Break out early if the hardware is ready: */ + if (status & MASK_MBOX_STATUS_READY) + break; + } + + /* Check for explicit error response */ + if (status & MASK_MBOX_STATUS_ERROR) + return -EIO; + + /* + * Hardware has neither responded to the action nor signaled any + * error. Treat this as a timeout. + */ + if (!(status & MASK_MBOX_STATUS_READY)) + return -ETIMEDOUT; + + return 0; +} + +/* + * Transmit a chunk of the microcode image to the hardware. + * Return 0 on success, or an error code on failure. + */ +static int send_data_chunk(struct staging_state *ss, void *ucode_ptr) +{ + u32 *src_chunk = ucode_ptr + ss->offset; + u16 mbox_size; + + /* + * Write a 'request' mailbox object in this order: + * 1. Mailbox header includes total size + * 2. Command header specifies the load operation + * 3. Data section contains a microcode chunk + * + * Thus, the mailbox size is two headers plus the chunk size. + */ + mbox_size = MBOX_HEADER_SIZE * 2 + ss->chunk_size; + write_mbox_header(ss->mmio_base, MBOX_HEADER(mbox_size)); + write_mbox_header(ss->mmio_base, MBOX_CMD_LOAD); + write_mbox_data(ss->mmio_base, src_chunk, ss->chunk_size); + ss->bytes_sent += ss->chunk_size; + + /* Notify the hardware that the mailbox is ready for processing. */ + writel(MASK_MBOX_CTRL_GO, ss->mmio_base + MBOX_CONTROL_OFFSET); + + return wait_for_transaction(ss); +} + +/* + * Retrieve the next offset from the hardware response. + * Return 0 on success, or an error code on failure. + */ +static int fetch_next_offset(struct staging_state *ss) +{ + const u64 expected_header = MBOX_HEADER(MBOX_HEADER_SIZE + MBOX_RESPONSE_SIZE); + u32 offset, status; + u64 header; + + /* + * The 'response' mailbox returns three fields, in order: + * 1. Header + * 2. Next offset in the microcode image + * 3. Status flags + */ + header = read_mbox_header(ss->mmio_base); + offset = read_mbox_dword(ss->mmio_base); + status = read_mbox_dword(ss->mmio_base); + + /* All valid responses must start with the expected header. */ + if (header != expected_header) { + pr_err_once("staging: invalid response header (0x%llx)\n", header); + return -EBADR; + } + + /* + * Verify the offset: If not at the end marker, it must not + * exceed the microcode image length. + */ + if (!is_end_offset(offset) && offset > ss->ucode_len) { + pr_err_once("staging: invalid offset (%u) past the image end (%u)\n", + offset, ss->ucode_len); + return -EINVAL; + } + + /* Hardware may report errors explicitly in the status field */ + if (status & MASK_MBOX_RESP_ERROR) + return -EPROTO; + + ss->offset = offset; + return 0; +} + +/* + * Handle the staging process using the mailbox MMIO interface. The + * microcode image is transferred in chunks until completion. + * Return 0 on success or an error code on failure. + */ +static int do_stage(u64 mmio_pa) +{ + struct staging_state ss = {}; + int err; + + ss.mmio_base = ioremap(mmio_pa, MBOX_REG_NUM * MBOX_REG_SIZE); + if (WARN_ON_ONCE(!ss.mmio_base)) + return -EADDRNOTAVAIL; + + init_stage(&ss); + + /* Perform the staging process while within the retry limit */ + while (!staging_is_complete(&ss, &err)) { + /* Send a chunk of microcode each time: */ + err = send_data_chunk(&ss, ucode_patch_late); + if (err) + break; + /* + * Then, ask the hardware which piece of the image it + * needs next. The same piece may be sent more than once. + */ + err = fetch_next_offset(&ss); + if (err) + break; + } + + iounmap(ss.mmio_base); + + return err; +} + +static void stage_microcode(void) +{ + unsigned int pkg_id = UINT_MAX; + int cpu, err; + u64 mmio_pa; + + if (!IS_ALIGNED(get_totalsize(&ucode_patch_late->hdr), sizeof(u32))) { + pr_err("Microcode image 32-bit misaligned (0x%x), staging failed.\n", + get_totalsize(&ucode_patch_late->hdr)); + return; + } + + lockdep_assert_cpus_held(); + + /* + * The MMIO address is unique per package, and all the SMT + * primary threads are online here. Find each MMIO space by + * their package IDs to avoid duplicate staging. + */ + for_each_cpu(cpu, cpu_primary_thread_mask) { + if (topology_logical_package_id(cpu) == pkg_id) + continue; + + pkg_id = topology_logical_package_id(cpu); + + err = rdmsrl_on_cpu(cpu, MSR_IA32_MCU_STAGING_MBOX_ADDR, &mmio_pa); + if (WARN_ON_ONCE(err)) + return; + + err = do_stage(mmio_pa); + if (err) { + pr_err("Error: staging failed (%d) for CPU%d at package %u.\n", + err, cpu, pkg_id); + return; + } + } + + pr_info("Staging of patch revision 0x%x succeeded.\n", ucode_patch_late->hdr.rev); +} + static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci, struct microcode_intel *mc, u32 *cur_rev) @@ -634,6 +978,7 @@ static struct microcode_ops microcode_intel_ops = { .collect_cpu_info = collect_cpu_info, .apply_microcode = apply_microcode_late, .finalize_late_load = finalize_late_load, + .stage_microcode = stage_microcode, .use_nmi = IS_ENABLED(CONFIG_X86_64), }; @@ -645,6 +990,18 @@ static __init void calc_llc_size_per_core(struct cpuinfo_x86 *c) llc_size_per_core = (unsigned int)llc_size; } +static __init bool staging_available(void) +{ + u64 val; + + val = x86_read_arch_cap_msr(); + if (!(val & ARCH_CAP_MCU_ENUM)) + return false; + + rdmsrl(MSR_IA32_MCU_ENUMERATION, val); + return !!(val & MCU_STAGING); +} + struct microcode_ops * __init init_intel_microcode(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -655,6 +1012,11 @@ struct microcode_ops * __init init_intel_microcode(void) return NULL; } + if (staging_available()) { + microcode_intel_ops.use_staging = true; + pr_info("Enabled staging feature.\n"); + } + calc_llc_size_per_core(c); return µcode_intel_ops; diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h index 2ab3102c65de6c40de997d47415a5df90d2b9f20..20d880e8e0d92d9c48dbb69c3e866ce5c73dc862 100644 --- a/arch/x86/kernel/cpu/microcode/internal.h +++ b/arch/x86/kernel/cpu/microcode/internal.h @@ -31,10 +31,12 @@ struct microcode_ops { * See also the "Synchronization" section in microcode_core.c. */ enum ucode_state (*apply_microcode)(int cpu); + void (*stage_microcode)(void); int (*collect_cpu_info)(int cpu, struct cpu_signature *csig); void (*finalize_late_load)(int result); unsigned int nmi_safe : 1, - use_nmi : 1; + use_nmi : 1, + use_staging : 1; }; struct early_load_data { diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 0270925fe013bcf9bbe76e589c93a35a6ddbfde5..c3177d97a76ebf04f23500efd923bc05557d757e 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -28,6 +28,9 @@ unsigned int __max_die_per_package __read_mostly = 1; EXPORT_SYMBOL(__max_die_per_package); +/* CPUs which are the primary SMT threads */ +struct cpumask __cpu_primary_thread_mask __read_mostly; + #ifdef CONFIG_SMP /* * Check if given CPUID extended topology "leaf" is implemented diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 11a7757552a94af161f85fa0b8750cbb2971c0c0..39d3056b09d210622586e3f813d3618d6ebbd617 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -105,9 +105,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map); DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); -/* CPUs which are the primary SMT threads */ -struct cpumask __cpu_primary_thread_mask __read_mostly; - /* Representing CPUs for which sibling maps can be computed */ static cpumask_var_t cpu_sibling_setup_mask;