diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 6cb84c19a3bd9aa6edd3e9449084193c76233410..497ff1901d9d37e7f0b14924f63151f08556d77f 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1276,11 +1276,6 @@ static void kill_me_maybe(struct callback_head *cb) return; } - /* - * -EHWPOISON from memory_failure() means that it already sent SIGBUS - * to the current process with the proper error info, so no need to - * send SIGBUS here again. - */ if (ret == -EHWPOISON) return; diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 6535b444ad7b9a3212ddaeab1ee43ac73455d44c..e638fa4b74263ac2e34ed6d43c2663e1076eb29f 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -99,6 +99,20 @@ static inline bool is_hest_type_generic_v2(struct ghes *ghes) return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2; } +/* + * A platform may describe one error source for the handling of synchronous + * errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI + * or External Interrupt). On x86, the HEST notifications are always + * asynchronous, so only SEA on ARM is delivered as a synchronous + * notification. + */ +static inline bool is_hest_sync_notify(struct ghes *ghes) +{ + u8 notify_type = ghes->generic->notify.type; + + return notify_type == ACPI_HEST_NOTIFY_SEA; +} + /* * This driver isn't really modular, however for the time being, * continuing to use module_param is the easiest way to remain @@ -427,28 +441,41 @@ static void ghes_clear_estatus(struct ghes *ghes, } /* - * Called as task_work before returning to user-space. - * Ensure any queued work has been done before we return to the context that - * triggered the notification. + * struct ghes_task_work - for synchronous RAS event + * + * @twork: callback_head for task work + * @pfn: page frame number of corrupted page + * @flags: work control flags + * + * Structure to pass task work to be handled before + * returning to user-space via task_work_add(). */ -static void ghes_kick_task_work(struct callback_head *head) +struct ghes_task_work { + struct callback_head twork; + u64 pfn; + int flags; +}; + +static void memory_failure_cb(struct callback_head *twork) { - struct acpi_hest_generic_status *estatus; - struct ghes_estatus_node *estatus_node; - u32 node_len; + struct ghes_task_work *twcb = container_of(twork, struct ghes_task_work, twork); + int ret; - estatus_node = container_of(head, struct ghes_estatus_node, task_work); - if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE)) - memory_failure_queue_kick(estatus_node->task_work_cpu); + ret = memory_failure(twcb->pfn, twcb->flags); + gen_pool_free(ghes_estatus_pool, (unsigned long)twcb, sizeof(*twcb)); - estatus = GHES_ESTATUS_FROM_NODE(estatus_node); - node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus)); - gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len); + if (!ret || ret == -EHWPOISON || ret == -EOPNOTSUPP) + return; + + pr_err("%#llx: Sending SIGBUS to %s:%d due to hardware memory corruption\n", + twcb->pfn, current->comm, task_pid_nr(current)); + force_sig(SIGBUS); } static bool ghes_do_memory_failure(u64 physical_addr, int flags) { unsigned long pfn; + struct ghes_task_work *twcb; if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE)) return false; @@ -461,12 +488,24 @@ static bool ghes_do_memory_failure(u64 physical_addr, int flags) return false; } + if (flags == MF_ACTION_REQUIRED && current->mm) { + twcb = (void *)gen_pool_alloc(ghes_estatus_pool, sizeof(*twcb)); + if (!twcb) + return false; + + twcb->pfn = pfn; + twcb->flags = flags; + init_task_work(&twcb->twork, memory_failure_cb); + task_work_add(current, &twcb->twork, TWA_RESUME); + return true; + } + memory_failure_queue(pfn, flags); return true; } static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, - int sev) + int sev, bool sync) { int flags = -1; int sec_sev = ghes_severity(gdata->error_severity); @@ -480,7 +519,7 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED)) flags = MF_SOFT_OFFLINE; if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE) - flags = 0; + flags = sync ? MF_ACTION_REQUIRED : 0; if (flags != -1) return ghes_do_memory_failure(mem_err->physical_addr, flags); @@ -488,9 +527,11 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, return false; } -static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int sev) +static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, + int sev, bool sync) { struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); + int flags = sync ? MF_ACTION_REQUIRED : 0; bool queued = false; int sec_sev, i; char *p; @@ -514,7 +555,7 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int s * and don't filter out 'corrected' error here. */ if (is_cache && has_pa) { - queued = ghes_do_memory_failure(err_info->physical_fault_addr, 0); + queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags); p += err_info->length; continue; } @@ -626,7 +667,7 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, schedule_work(&entry->work); } -static bool ghes_do_proc(struct ghes *ghes, +static void ghes_do_proc(struct ghes *ghes, const struct acpi_hest_generic_status *estatus) { int sev, sec_sev; @@ -635,6 +676,7 @@ static bool ghes_do_proc(struct ghes *ghes, const guid_t *fru_id = &guid_null; char *fru_text = ""; bool queued = false; + bool sync = is_hest_sync_notify(ghes); sev = ghes_severity(estatus->error_severity); apei_estatus_for_each_section(estatus, gdata) { @@ -652,13 +694,13 @@ static bool ghes_do_proc(struct ghes *ghes, ghes_edac_report_mem_error(sev, mem_err); arch_apei_report_mem_error(sev, mem_err); - queued = ghes_handle_memory_failure(gdata, sev); + queued = ghes_handle_memory_failure(gdata, sev, sync); } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { ghes_handle_aer(gdata); } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { - queued = ghes_handle_arm_hw_error(gdata, sev); + queued = ghes_handle_arm_hw_error(gdata, sev, sync); #ifdef CONFIG_ACPI_APEI_GHES_TS_CORE } else if (guid_equal(sec_type, &CPER_SEC_TS_CORE)) { @@ -681,7 +723,15 @@ static bool ghes_do_proc(struct ghes *ghes, #endif } - return queued; + /* + * If no memory failure work is queued for abnormal synchronous + * errors, do a force kill. + */ + if (sync && !queued) { + pr_err(HW_ERR GHES_PFX "%s:%d: hardware memory corruption (SIGBUS)\n", + current->comm, task_pid_nr(current)); + force_sig(SIGBUS); + } } static void __ghes_print_estatus(const char *pfx, @@ -977,9 +1027,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) struct ghes_estatus_node *estatus_node; struct acpi_hest_generic *generic; struct acpi_hest_generic_status *estatus; - bool task_work_pending; u32 len, node_len; - int ret; llnode = llist_del_all(&ghes_estatus_llist); /* @@ -994,25 +1042,16 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) estatus = GHES_ESTATUS_FROM_NODE(estatus_node); len = cper_estatus_len(estatus); node_len = GHES_ESTATUS_NODE_LEN(len); - task_work_pending = ghes_do_proc(estatus_node->ghes, estatus); + + ghes_do_proc(estatus_node->ghes, estatus); + if (!ghes_estatus_cached(estatus)) { generic = estatus_node->generic; if (ghes_print_estatus(NULL, generic, estatus)) ghes_estatus_cache_add(generic, estatus); } - - if (task_work_pending && current->mm) { - estatus_node->task_work.func = ghes_kick_task_work; - estatus_node->task_work_cpu = smp_processor_id(); - ret = task_work_add(current, &estatus_node->task_work, - TWA_RESUME); - if (ret) - estatus_node->task_work.func = NULL; - } - - if (!estatus_node->task_work.func) - gen_pool_free(ghes_estatus_pool, - (unsigned long)estatus_node, node_len); + gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, + node_len); llnode = next; } @@ -1073,7 +1112,6 @@ static int ghes_in_nmi_queue_one_entry(struct ghes *ghes, estatus_node->ghes = ghes; estatus_node->generic = ghes->generic; - estatus_node->task_work.func = NULL; estatus = GHES_ESTATUS_FROM_NODE(estatus_node); if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) { diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 544d92789b3416f6124c156c67cc8125f12e941b..ac8125c28c5af1eab74523edb69b03f4c4078a4e 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -33,9 +33,6 @@ struct ghes_estatus_node { struct llist_node llnode; struct acpi_hest_generic *generic; struct ghes *ghes; - - int task_work_cpu; - struct callback_head task_work; }; struct ghes_estatus_cache { diff --git a/include/linux/mm.h b/include/linux/mm.h index 00bc6978391b74afbabb1e3905e48c3515eeac73..d9d8e68c21096dc00fee69f105f8f6d4e961d529 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3172,7 +3172,6 @@ enum mf_flags { }; extern int memory_failure(unsigned long pfn, int flags); extern void memory_failure_queue(unsigned long pfn, int flags); -extern void memory_failure_queue_kick(int cpu); extern int unpoison_memory(unsigned long pfn); extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index a5ab0b2f213b3a3165f4b6d2e1230c8714337c14..3944859bb58730bab8b43ab00f6e026d93c45987 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1524,6 +1524,12 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, * * Must run in process context (e.g. a work queue) with interrupts * enabled and no spinlocks hold. + * Return: + * 0 - success, + * -ENXIO - memory not managed by the kernel + * -EHWPOISON - the page was already poisoned, potentially + * kill process, + * other negative values - failure. */ int memory_failure(unsigned long pfn, int flags) { @@ -1792,19 +1798,6 @@ static void memory_failure_work_func(struct work_struct *work) } } -/* - * Process memory_failure work queued on the specified CPU. - * Used to avoid return-to-userspace racing with the memory_failure workqueue. - */ -void memory_failure_queue_kick(int cpu) -{ - struct memory_failure_cpu *mf_cpu; - - mf_cpu = &per_cpu(memory_failure_cpu, cpu); - cancel_work_sync(&mf_cpu->work); - memory_failure_work_func(&mf_cpu->work); -} - static int __init memory_failure_init(void) { struct memory_failure_cpu *mf_cpu;