From 5561b26a4a8ae70e188e44f380ee06a5687117d1 Mon Sep 17 00:00:00 2001 From: Jiaqi Yan Date: Tue, 28 Nov 2023 20:51:42 +0800 Subject: [PATCH 1/7] mm/hwpoison: introduce copy_mc_highpage mainline inclusion from mainline-v5.4-rc1 commit 6efc7afb5cc98488410d44695685d003d832534d category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8K5CO Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6efc7afb5cc98488410d44695685d003d832534d -------------------------------- Similar to how copy_mc_user_highpage is implemented for copy_user_highpage on #MC supported architecture, introduce the #MC handled version of copy_highpage. This helper has immediate usage when khugepaged wants to copy file-backed memory pages and tolerate #MC. Link: https://lkml.kernel.org/r/20230329151121.949896-3-jiaqiyan@google.com Signed-off-by: Jiaqi Yan Reviewed-by: Yang Shi Cc: David Stevens Cc: Hugh Dickins Cc: Kefeng Wang Cc: Kirill A. Shutemov Cc: "Kirill A. Shutemov" Cc: Miaohe Lin Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Tong Tiangen Cc: Tony Luck Signed-off-by: Andrew Morton (wupeng: backport copy_mc_highpage for this patch) Signed-off-by: Ma Wupeng --- include/linux/highmem.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index cc5fe6c620ad..366198ebba71 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -396,4 +396,32 @@ static inline void memcpy_to_page(struct page *page, size_t offset, kunmap_atomic(to); } +#ifdef copy_mc_to_kernel +/* + * If architecture supports machine check exception handling, define the + * #MC versions of copy_user_highpage and copy_highpage. They copy a memory + * page with #MC in source page (@from) handled, and return the number + * of bytes not copied if there was a #MC, otherwise 0 for success. + */ +static inline int copy_mc_highpage(struct page *to, struct page *from) +{ + char *vfrom, *vto; + int ret; + + vfrom = kmap_atomic(from); + vto = kmap_atomic(to); + ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE); + kunmap_atomic(vto); + kunmap_atomic(vfrom); + + return ret; +} +#else +static inline int copy_mc_highpage(struct page *to, struct page *from) +{ + copy_highpage(to, from); + return 0; +} +#endif + #endif /* _LINUX_HIGHMEM_H */ -- Gitee From c049bcc11bdd84ccfb179e6f772a75796ccdaf18 Mon Sep 17 00:00:00 2001 From: Ma Wupeng Date: Tue, 28 Nov 2023 20:51:43 +0800 Subject: [PATCH 2/7] mm: introduce copy_mc_highpages hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8K5CO -------------------------------- Commit 6efc7afb5cc9("mm/hwpoison: introduce copy_mc_highpage") bring mc support to copy_mc_highpage, however during huge page copy, copy_mc_highpages is needed. intruduce copy_mc_highpages to support huge page copy mc. Signed-off-by: Ma Wupeng --- include/linux/highmem.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 366198ebba71..f86f8898941b 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -416,12 +416,33 @@ static inline int copy_mc_highpage(struct page *to, struct page *from) return ret; } + +static inline int copy_mc_highpages(struct page *to, struct page *from, int nr_pages) +{ + int ret = 0; + int i; + + for (i = 0; i < nr_pages; i++) { + cond_resched(); + ret = copy_mc_highpage(to + i, from + i); + if (!ret) + return ret; + } + + return ret; +} #else static inline int copy_mc_highpage(struct page *to, struct page *from) { copy_highpage(to, from); return 0; } + +static inline int copy_mc_highpages(struct page *to, struct page *from, int nr_pages) +{ + copy_highpages(to, from, nr_pages); + return 0; +} #endif #endif /* _LINUX_HIGHMEM_H */ -- Gitee From 42b0cea083080fa21d11dd40516e969d0c5b47cc Mon Sep 17 00:00:00 2001 From: Ma Wupeng Date: Tue, 28 Nov 2023 20:51:44 +0800 Subject: [PATCH 3/7] mm: arm64: Add migrate_page_copy_mc() hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8K5CO -------------------------------- During page migration, page is copied in kernel space. If the origin page has UCE, there will lead to kernel panic. In order to solve this problem, use machine check safe to catch this error which can be achieved by using copy_mc_to_kernel to replace copy_page. Signal SIGBUS will be send to user task if this UCE is consumed by this situation to avoid kernel panic. Add a new param to copy_huge_page to support mc. If mc is set copy_mc_higepage will be called rather than copy_hugepage during memory copy. Signed-off-by: Ma Wupeng --- mm/migrate.c | 63 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 12 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 646918708922..7f036a63473b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -548,26 +548,35 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, * arithmetic will work across the entire page. We need something more * specialized. */ -static void __copy_gigantic_page(struct page *dst, struct page *src, - int nr_pages) +static int __copy_gigantic_page(struct page *dst, struct page *src, + int nr_pages, bool mc) { - int i; + int i, ret = 0; struct page *dst_base = dst; struct page *src_base = src; for (i = 0; i < nr_pages; ) { cond_resched(); - copy_highpage(dst, src); + if (!mc) { + copy_highpage(dst, src); + } else { + ret = copy_mc_highpage(dst, src); + if (!ret) + return ret; + } i++; dst = mem_map_next(dst, dst_base, i); src = mem_map_next(src, src_base, i); } + + return ret; } -static void copy_huge_page(struct page *dst, struct page *src) +static int copy_huge_page(struct page *dst, struct page *src, bool mc) { int nr_pages; + int ret = 0; if (PageHuge(src)) { /* hugetlbfs page */ @@ -575,8 +584,8 @@ static void copy_huge_page(struct page *dst, struct page *src) nr_pages = pages_per_huge_page(h); if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) { - __copy_gigantic_page(dst, src, nr_pages); - return; + ret = __copy_gigantic_page(dst, src, nr_pages, mc); + return ret; } } else { /* thp page */ @@ -584,7 +593,12 @@ static void copy_huge_page(struct page *dst, struct page *src) nr_pages = thp_nr_pages(src); } - copy_highpages(dst, src, nr_pages); + if (!mc) + copy_highpages(dst, src, nr_pages); + else + ret = copy_mc_highpages(dst, src, nr_pages); + + return ret; } /* @@ -666,7 +680,7 @@ EXPORT_SYMBOL(migrate_page_states); void migrate_page_copy(struct page *newpage, struct page *page) { if (PageHuge(page) || PageTransHuge(page)) - copy_huge_page(newpage, page); + copy_huge_page(newpage, page, false); else copy_highpage(newpage, page); @@ -674,6 +688,23 @@ void migrate_page_copy(struct page *newpage, struct page *page) } EXPORT_SYMBOL(migrate_page_copy); +int migrate_page_copy_mc(struct page *newpage, struct page *page) +{ + int rc; + + if (PageHuge(page) || PageTransHuge(page)) + rc = copy_huge_page(newpage, page, true); + else + rc = copy_mc_highpage(newpage, page); + + if (rc) + return rc; + + migrate_page_states(newpage, page); + + return 0; +} + /************************************************************ * Migration functions ***********************************************************/ @@ -691,10 +722,18 @@ int migrate_page_extra(struct address_space *mapping, if (rc != MIGRATEPAGE_SUCCESS) return rc; - if (mode != MIGRATE_SYNC_NO_COPY) - migrate_page_copy(newpage, page); - else + if (mode != MIGRATE_SYNC_NO_COPY) { + if (IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC) && + current->flags & PF_MCS) { + rc = migrate_page_copy_mc(newpage, page); + if (!rc) + return -EFAULT; + } else { + migrate_page_copy(newpage, page); + } + } else { migrate_page_states(newpage, page); + } return MIGRATEPAGE_SUCCESS; } -- Gitee From 7e611632f2e11dc0cd74792cbf1d265e59c83622 Mon Sep 17 00:00:00 2001 From: Ma Wupeng Date: Tue, 28 Nov 2023 20:51:45 +0800 Subject: [PATCH 4/7] mm: Update PF_COREDUMP_MCS to PF_MCS hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8K5CO -------------------------------- Update PF_COREDUMP_MCS to PF_MCS to indicate machine check safe support for specific functions. Signed-off-by: Ma Wupeng --- fs/coredump.c | 4 ++-- include/linux/sched.h | 2 +- lib/iov_iter.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 6c0d0a42fda9..535c3fdc1598 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -907,9 +907,9 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start, if (page) { void *kaddr = kmap(page); - current->flags |= PF_COREDUMP_MCS; + current->flags |= PF_MCS; stop = !dump_emit(cprm, kaddr, PAGE_SIZE); - current->flags &= ~PF_COREDUMP_MCS; + current->flags &= ~PF_MCS; kunmap(page); put_page(page); } else { diff --git a/include/linux/sched.h b/include/linux/sched.h index d39427f8044d..5652309e99ea 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1648,7 +1648,7 @@ extern struct pid *cad_pid; #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ -#define PF_COREDUMP_MCS 0x01000000 /* Task coredump support machine check safe */ +#define PF_MCS 0x01000000 /* Mc is support for specific function(eg. coredump) for this task */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */ diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 0a4b7aa47097..ce8c225237f5 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -752,7 +752,7 @@ EXPORT_SYMBOL_GPL(_copy_mc_to_iter); static void *memcpy_iter(void *to, const void *from, __kernel_size_t size) { - if (IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC) && current->flags & PF_COREDUMP_MCS) + if (IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC) && current->flags & PF_MCS) return (void *)copy_mc_to_kernel(to, from, size); else return memcpy(to, from, size); -- Gitee From 38cea2a0507330969c60f301ca2b45e57f5b9c9d Mon Sep 17 00:00:00 2001 From: Ma Wupeng Date: Tue, 28 Nov 2023 20:51:46 +0800 Subject: [PATCH 5/7] mm/hwpoison: Export symbol soft_offline_page hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8K5CO -------------------------------- Export symbol soft_offline_page so that other module can use this. Function soft_offline_page is used to isolate pages. During page isolation, migrate will be issued if this current page is mapped by user task and the page will be removed from buddy system finally. Signed-off-by: Ma Wupeng --- mm/memory-failure.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 406895b98422..b87ba13fd56b 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2184,3 +2184,4 @@ int soft_offline_page(unsigned long pfn, int flags) return ret; } +EXPORT_SYMBOL_GPL(soft_offline_page); -- Gitee From 286afc8ead5f7abcb8e70b3ddb227ec1920d9baf Mon Sep 17 00:00:00 2001 From: Ma Wupeng Date: Tue, 28 Nov 2023 20:51:47 +0800 Subject: [PATCH 6/7] mm: page_eject: Introuduce page ejection hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8K5CO -------------------------------- Page ejection is achieved by soft_offline_page and unpoison_memory. A linked list is maintained to log the pfns which are offlined by this module. Only the pfns present in the list is allowed to go online. The follow sysfs is introduced to online/offline page via this module, since there is a internal linked list to record all offlined pages, remove_page is introduce to remove this page from list. - /sys/kernel/page_eject/online_page - /sys/kernel/page_eject/offline_page - /sys/kernel/page_eject/remove_page Signed-off-by: Ma Wupeng --- drivers/ras/Kconfig | 10 ++ drivers/ras/Makefile | 1 + drivers/ras/page_eject.c | 247 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 258 insertions(+) create mode 100644 drivers/ras/page_eject.c diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig index c2a236f2e846..0a98685b5075 100644 --- a/drivers/ras/Kconfig +++ b/drivers/ras/Kconfig @@ -29,6 +29,16 @@ menuconfig RAS so have ideal availability, but may be unreliable, with frequent data corruption. +config PAGE_EJECT + tristate "page eject" + default m + depends on MEMORY_FAILURE + help + Used to eject page, which is achieved by soft_offline_page and + unpoison_memory. A linked list is maintained to log the pfns + which are offlined by this module. Only the pfns present in the + list is allowed to go online. + if RAS source "arch/x86/ras/Kconfig" diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile index 6f0404f50107..ba551a2403c3 100644 --- a/drivers/ras/Makefile +++ b/drivers/ras/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_RAS) += ras.o obj-$(CONFIG_DEBUG_FS) += debugfs.o obj-$(CONFIG_RAS_CEC) += cec.o +obj-$(CONFIG_PAGE_EJECT) += page_eject.o diff --git a/drivers/ras/page_eject.c b/drivers/ras/page_eject.c new file mode 100644 index 000000000000..be5f58ab1518 --- /dev/null +++ b/drivers/ras/page_eject.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2023. All rights reserved. + */ + +#define pr_fmt(fmt) "page eject: " fmt + +#include +#include +#include + +static struct list_head eject_page_list = LIST_HEAD_INIT(eject_page_list); +static DEFINE_MUTEX(eject_page_mutex); +static struct kobject *eject_page_kobj; + +struct ejected_pfn { + struct list_head list; + unsigned long pfn; +}; + +static struct ejected_pfn *page_eject_remove_pfn_locked(unsigned long pfn) +{ + struct ejected_pfn *item = NULL, *next; + + mutex_lock(&eject_page_mutex); + list_for_each_entry_safe(item, next, &eject_page_list, list) { + if (pfn == item->pfn) + break; + } + if (item) + list_del(&item->list); + mutex_unlock(&eject_page_mutex); + + return item; +} + +static void page_eject_add_pfn_locked(struct ejected_pfn *item) +{ + mutex_lock(&eject_page_mutex); + list_add_tail(&item->list, &eject_page_list); + mutex_unlock(&eject_page_mutex); +} + +static void page_eject_clear_list_locked(void) +{ + struct ejected_pfn *item, *next; + + mutex_lock(&eject_page_mutex); + list_for_each_entry_safe(item, next, &eject_page_list, list) { + list_del(&item->list); + kfree(item); + } + mutex_unlock(&eject_page_mutex); +} + +static int page_eject_offline_page(unsigned long pfn) +{ + struct ejected_pfn *item; + struct page *page; + int ret; + + page = pfn_to_online_page(pfn); + if (!page) + return -EINVAL; + + if (PageHWPoison(page)) { + pr_err("page fail to be offlined, page is already offlined, pfn: %#lx\n", pfn); + return -EINVAL; + } + + /* + * if soft_offline_page return 0 because PageHWPoison, this pfn + * will add to list and this add will be removed during online + * since it is poisoned. + */ + ret = soft_offline_page(pfn, 0); + if (ret) { + pr_err("page fail to be offlined, soft_offline_page failed(%d), pfn=%#lx\n", + ret, pfn); + return ret; + } + + item = kzalloc(sizeof(struct ejected_pfn), GFP_KERNEL); + if (!item) + return -ENOMEM; + + item->pfn = pfn; + + page_eject_add_pfn_locked(item); + + return 0; +} + +static int page_eject_online_page(unsigned long pfn) +{ + struct ejected_pfn *item; + struct page *page; + int ret; + + page = pfn_to_online_page(pfn); + if (!page) + return -EINVAL; + + item = page_eject_remove_pfn_locked(pfn); + if (!item) { + pr_err("page failed to be onlined, pfn: %#lx\n", pfn); + return -EINVAL; + } + + ret = unpoison_memory(pfn); + if (!ret) { + kfree(item); + return ret; + } + + /* re-add pfn to list if unpoison failed */ + page_eject_add_pfn_locked(item); + pr_err("page failed to be onlined, unpoison_memory error(%d), pfn: %#lx\n", + ret, pfn); + return ret; +} + +static int page_eject_remove_page(unsigned long pfn) +{ + struct ejected_pfn *item; + + item = page_eject_remove_pfn_locked(pfn); + if (!item) { + pr_info("page fail to be removed, pfn: %#lx\n", pfn); + return -EINVAL; + } + + kfree(item); + + return 0; +} + +static ssize_t offline_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + u64 paddr; + int res; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (kstrtoull(buf, 16, &paddr)) + return -EINVAL; + + res = page_eject_offline_page(paddr >> PAGE_SHIFT); + if (res) + return res; + + return count; +} + +static ssize_t online_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + u64 paddr; + int res; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (kstrtoull(buf, 16, &paddr)) + return -EINVAL; + + res = page_eject_online_page(paddr >> PAGE_SHIFT); + if (res) + return res; + + return count; +} + +static ssize_t remove_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + u64 paddr; + int res; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (kstrtoull(buf, 16, &paddr)) + return -EINVAL; + + res = page_eject_remove_page(paddr >> PAGE_SHIFT); + if (res) + return res; + + return count; +} + +static struct kobj_attribute online_attr = + __ATTR(online_page, 0200, NULL, online_store); +static struct kobj_attribute offline_attr = + __ATTR(offline_page, 0200, NULL, offline_store); +static struct kobj_attribute remove_attr = + __ATTR(remove_page, 0200, NULL, remove_store); + +static struct attribute *eject_page_attrs[] = { + &offline_attr.attr, + &online_attr.attr, + &remove_attr.attr, + NULL, +}; + +static struct attribute_group eject_page_attr_group = { + .attrs = eject_page_attrs, +}; + +static int __init page_eject_init(void) +{ + int ret = -EINVAL; + + eject_page_kobj = kobject_create_and_add("page_eject", kernel_kobj); + if (!eject_page_kobj) + return ret; + + ret = sysfs_create_group(eject_page_kobj, &eject_page_attr_group); + if (ret) { + kobject_put(eject_page_kobj); + return ret; + } + + mutex_init(&eject_page_mutex); + + pr_info("init page eject succeed\n"); + return ret; +} + +static void __exit page_eject_exit(void) +{ + page_eject_clear_list_locked(); + + kobject_put(eject_page_kobj); + + pr_info("exit page eject succeed\n"); +} + +module_init(page_eject_init); +module_exit(page_eject_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ma Wupeng "); +MODULE_DESCRIPTION("page eject"); -- Gitee From be79bb17e412e11e57ccf5b8e9a3073382caca6d Mon Sep 17 00:00:00 2001 From: Ma Wupeng Date: Tue, 28 Nov 2023 20:51:48 +0800 Subject: [PATCH 7/7] mm: page_eject: Add mc support during offline page hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8K5CO -------------------------------- During offline page in page ejection, page is copied in kernel space in migrate page, set PF_MCS to task flags to support mc. Signed-off-by: Ma Wupeng --- drivers/ras/page_eject.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ras/page_eject.c b/drivers/ras/page_eject.c index be5f58ab1518..62a5dbd0fa0a 100644 --- a/drivers/ras/page_eject.c +++ b/drivers/ras/page_eject.c @@ -68,6 +68,7 @@ static int page_eject_offline_page(unsigned long pfn) return -EINVAL; } + current->flags |= PF_MCS; /* * if soft_offline_page return 0 because PageHWPoison, this pfn * will add to list and this add will be removed during online @@ -79,6 +80,7 @@ static int page_eject_offline_page(unsigned long pfn) ret, pfn); return ret; } + current->flags &= ~PF_MCS; item = kzalloc(sizeof(struct ejected_pfn), GFP_KERNEL); if (!item) -- Gitee