diff --git a/Documentation/ABI/testing/sysfs-memory-page-offline b/Documentation/ABI/testing/sysfs-memory-page-offline index e14703f12fdf38dbd26454b2a1f5039083e1a6c9..93285bbadc9e5fab2eb2816d56f29a30637a3c35 100644 --- a/Documentation/ABI/testing/sysfs-memory-page-offline +++ b/Documentation/ABI/testing/sysfs-memory-page-offline @@ -20,6 +20,9 @@ Description: number, or a error when the offlining failed. Reading the file is not allowed. + Soft-offline can be controlled via sysctl, see: + Documentation/admin-guide/sysctl/vm.rst + What: /sys/devices/system/memory/hard_offline_page Date: Sep 2009 KernelVersion: 2.6.33 diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 1fed918bb1e5ee539fe4eedd1a89f15b6d8c670e..0baeb9112e63a3ef69bddac8f52f032e45b70c16 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -267,6 +267,61 @@ static inline void copy_highpage(struct page *to, struct page *from) kunmap_atomic(vfrom); } +#ifdef CONFIG_UCE_KERNEL_RECOVERY +/* Return -EFAULT if there was a #MC during copy, otherwise 0 for success. */ +static inline int copy_mc_highpage(struct page *to, struct page *from) +{ + char *vfrom, *vto; + int ret; + + vfrom = kmap_atomic(from); + vto = kmap_atomic(to); + ret = copy_page_cow(vto, vfrom); + kunmap_atomic(vto); + kunmap_atomic(vfrom); + + return ret; +} + +/* Return -EFAULT if there was a #MC during copy, otherwise 0 for success. */ +static inline int copy_mc_highpages(struct page *to, struct page *from, + int nr_pages) +{ + int ret = 0; + int i; + + for (i = 0; i < nr_pages; i++) { + cond_resched(); + ret = copy_mc_highpage(to + i, from + i); + if (ret) + return -EFAULT; + } + + return ret; +} +#else +static inline int copy_mc_highpage(struct page *to, struct page *from) +{ + copy_highpage(to, from); + + return 0; +} + +/* Return -EFAULT if there was a #MC during copy, otherwise 0 for success. */ +static inline int copy_mc_highpages(struct page *to, struct page *from, + int nr_pages) +{ + int i; + + for (i = 0; i < nr_pages; i++) { + cond_resched(); + (void)copy_mc_highpage(to + i, from + i); + } + + return 0; +} +#endif + #endif #endif /* _LINUX_HIGHMEM_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 67e299374ac8aa1d7f1e2c23de79034500a1f3fa..0274a82144e4e7367d459a17cb7e37d809e87b59 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2873,6 +2873,7 @@ extern int get_hwpoison_page(struct page *page); #define put_hwpoison_page(page) put_page(page) extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; +extern int sysctl_enable_soft_offline; extern void shake_page(struct page *p, int access); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(struct page *page, int flags); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0d1f07dc7b4468595ddcc65432ddf8c43dea0e50..88f92eff7bf2a65906942e1fe7329e8b0b1f6509 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1824,6 +1824,15 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, .extra2 = &one, }, + { + .procname = "enable_soft_offline", + .data = &sysctl_enable_soft_offline, + .maxlen = sizeof(sysctl_enable_soft_offline), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &three, + }, #endif { .procname = "user_reserve_kbytes", diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 28bd5d6ed1bf9e3d9bc0a299aa60b5d34f517ef9..7fbc9c214da970a7a3ad87a54e208c9964164094 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -63,10 +63,15 @@ #include "internal.h" #include "ras/ras_event.h" +#define SOFT_OFFLINE_ENABLED BIT(0) +#define SOFT_OFFLINE_SKIP_HUGETLB BIT(1) + int sysctl_memory_failure_early_kill __read_mostly = 0; int sysctl_memory_failure_recovery __read_mostly = 1; +int sysctl_enable_soft_offline __read_mostly = SOFT_OFFLINE_ENABLED; + atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0); static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, bool release) @@ -1996,7 +2001,9 @@ static int soft_offline_free_page(struct page *page) * @page: page to offline * @flags: flags. Same as memory_failure(). * - * Returns 0 on success, otherwise negated errno. + * Returns 0 on success, + * -EOPNOTSUPP for disabled by /proc/sys/vm/enable_soft_offline, + * < 0 otherwise negated errno. * * Soft offline a page, by migration or invalidation, * without killing anything. This is for the case when @@ -2027,6 +2034,22 @@ int soft_offline_page(struct page *page, int flags) return -EIO; } + if (!(sysctl_enable_soft_offline & SOFT_OFFLINE_ENABLED)) { + pr_info_once("disabled by /proc/sys/vm/enable_soft_offline\n"); + if (flags & MF_COUNT_INCREASED) + put_page(page); + return -EOPNOTSUPP; + } + + if (sysctl_enable_soft_offline & SOFT_OFFLINE_SKIP_HUGETLB) { + if (PageHuge(page)) { + pr_info_once("disabled for HugeTLB pages by /proc/sys/vm/enable_soft_offline\n"); + if (flags & MF_COUNT_INCREASED) + put_page(page); + return -EOPNOTSUPP; + } + } + if (PageHWPoison(page)) { pr_info("soft offline: %#lx page already poisoned\n", pfn); if (flags & MF_COUNT_INCREASED) diff --git a/mm/migrate.c b/mm/migrate.c index f8c379a0b9b925f6c87dbe4a8e0cf9ca58e5585e..62e584841fa7732e73f5a3a5880ec85a61152c32 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -47,6 +47,7 @@ #include #include #include +#include #include @@ -640,24 +641,33 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, * arithmetic will work across the entire page. We need something more * specialized. */ -static void __copy_gigantic_page(struct page *dst, struct page *src, - int nr_pages) +static int __copy_gigantic_page(struct page *dst, struct page *src, + int nr_pages, bool mc) { - int i; + int i, ret = 0; struct page *dst_base = dst; struct page *src_base = src; for (i = 0; i < nr_pages; ) { cond_resched(); - copy_highpage(dst, src); + + if (mc) { + ret = copy_mc_highpage(dst, src); + if (ret) + return -EFAULT; + } else { + copy_highpage(dst, src); + } i++; dst = mem_map_next(dst, dst_base, i); src = mem_map_next(src, src_base, i); } + + return ret; } -static void copy_huge_page(struct page *dst, struct page *src) +static int __copy_huge_page(struct page *dst, struct page *src, bool mc) { int i; int nr_pages; @@ -667,20 +677,32 @@ static void copy_huge_page(struct page *dst, struct page *src) struct hstate *h = page_hstate(src); nr_pages = pages_per_huge_page(h); - if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) { - __copy_gigantic_page(dst, src, nr_pages); - return; - } + if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) + return __copy_gigantic_page(dst, src, nr_pages, mc); } else { /* thp page */ BUG_ON(!PageTransHuge(src)); nr_pages = hpage_nr_pages(src); } + if (mc) + return copy_mc_highpages(src, src, nr_pages); + for (i = 0; i < nr_pages; i++) { cond_resched(); copy_highpage(dst + i, src + i); } + return 0; +} + +static int copy_huge_page(struct page *dst, struct page *src) +{ + return __copy_huge_page(dst, src, false); +} + +static int copy_mc_huge_page(struct page *dst, struct page *src) +{ + return __copy_huge_page(dst, src, true); } /* @@ -756,6 +778,38 @@ void migrate_page_copy(struct page *newpage, struct page *page) } EXPORT_SYMBOL(migrate_page_copy); +static int migrate_page_copy_mc(struct page *newpage, struct page *page) +{ + int rc; + + if (PageHuge(page) || PageTransHuge(page)) + rc = copy_mc_huge_page(newpage, page); + else + rc = copy_mc_highpage(newpage, page); + + return rc; +} + +static int migrate_page_mc_extra(struct address_space *mapping, + struct page *newpage, struct page *page, + enum migrate_mode mode, int extra_count) +{ + int rc; + + rc = migrate_page_copy_mc(newpage, page); + if (rc) + return rc; + + rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, + extra_count); + if (rc != MIGRATEPAGE_SUCCESS) + return rc; + + migrate_page_states(newpage, page); + + return rc; +} + /************************************************************ * Migration functions ***********************************************************/ @@ -774,6 +828,13 @@ int migrate_page(struct address_space *mapping, BUG_ON(PageWriteback(page)); /* Writeback must be complete */ +#ifdef CONFIG_UCE_KERNEL_RECOVERY + if (IS_ENABLED(CONFIG_ARM64) && + is_cow_kernel_recovery_enable() && + (mode != MIGRATE_SYNC_NO_COPY)) + return migrate_page_mc_extra(mapping, newpage, page, mode, 0); +#endif + rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0); if (rc != MIGRATEPAGE_SUCCESS)