From a29486b771748358ac43c1be67d0e3b3a88bc8af Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Mon, 29 Aug 2022 16:04:49 +0800 Subject: [PATCH 01/16] anolis: mm: prezero: introduce PAGE_PREZERO config ANBZ: #2100 This introduces PAGE_PREZERO feature to speed up __GFP_ZERO page allocation, by enabling per-node kernel threads to clear (zero) buddy pages asynchronously in advance to build a pool of pre-zeroed pages on each NUMA node. Furthermore, the work of page clear can be offloaded and accelerated with accelerators like Intel DSA. User may configure the DMA device on each NUMA node before enabling this feature. Signed-off-by: Xiaochen Shen Signed-off-by: Xu Yu Reviewed-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/702 --- mm/Kconfig | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/mm/Kconfig b/mm/Kconfig index 7671747114d3..c5312bc1c35d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -945,4 +945,17 @@ config FAST_COPY_MM unserviceable duration. Note that it won't speed up child's return from fork(2). +config PAGE_PREZERO + bool "Clear page asynchronously" + depends on !INIT_ON_ALLOC_DEFAULT_ON + default n + help + This feature enables per-node kernel threads to clear (zero) buddy + pages asynchronously in advance to build a pool of pre-zeroed pages + on each NUMA node, and speeds up __GFP_ZERO page allocation. + + Furthermore, the work of page clear can be offloaded and accelerated + with accelerators like Intel DSA. User may configure the DMA device + on each NUMA node before enabling this feature. + endmenu -- Gitee From 4fff5df66e0fd4f943139f62939a562065ab412a Mon Sep 17 00:00:00 2001 From: Xu Yu Date: Mon, 29 Aug 2022 16:08:26 +0800 Subject: [PATCH 02/16] anolis: mm: prezero: introduce PG_zeroed flag ANBZ: #2100 This introduces PG_zeroed flag, which reuses PG_dirty flag and is only valid for buddy pages, to indicate whether a _buddy_ page is pre-zeroed or not. Signed-off-by: Xu Yu Reviewed-by: Xiaochen Shen Reviewed-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/702 --- include/linux/page-flags.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 64e4ac9a2a2f..4798742b5343 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -180,6 +180,10 @@ enum pageflags { /* Only valid for buddy pages. Used to track pages that are reported */ PG_reported = PG_uptodate, +#ifdef CONFIG_PAGE_PREZERO + /* Only valid for buddy pages. Used to track pages that are zeroed */ + PG_zeroed = PG_dirty, +#endif }; #ifndef __GENERATING_BOUNDS_H @@ -317,6 +321,9 @@ static inline void SetPage##uname(struct page *page) { } #define CLEARPAGEFLAG_NOOP(uname) \ static inline void ClearPage##uname(struct page *page) { } +#define __SETPAGEFLAG_NOOP(uname) \ +static inline void __SetPage##uname(struct page *page) { } + #define __CLEARPAGEFLAG_NOOP(uname) \ static inline void __ClearPage##uname(struct page *page) { } @@ -458,6 +465,14 @@ PAGEFLAG(Idle, idle, PF_ANY) */ __PAGEFLAG(Reported, reported, PF_NO_COMPOUND) +#ifdef CONFIG_PAGE_PREZERO +__PAGEFLAG(Zeroed, zeroed, PF_NO_COMPOUND) +#else +TESTPAGEFLAG_FALSE(Zeroed) +__SETPAGEFLAG_NOOP(Zeroed) +__CLEARPAGEFLAG_NOOP(Zeroed) +#endif + #ifdef CONFIG_DUPTEXT /* PageDup() is used to track page that has NUMA replicas. */ PAGEFLAG(Dup, dup, PF_HEAD) -- Gitee From 8ba27c07922944aa84255445523d46dc8d908323 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Mon, 7 Mar 2022 11:39:32 -0800 Subject: [PATCH 03/16] anolis: mm: prezero: introduce PAGE_ZEROED bit ANBZ: #2100 This uses the bit above highest-possible page order (MAX_ORDER - 1) of page->private, to _temporarily_ indicate whether the page is pre-zeroed in the page allocation path. Specifically, this bit is set in __rmqueue_smallest(), and cleared in prep_new_page() or free_pcppages_bulk(). Setting this bit anywhere else is a bug. [ Based on Dave Hansen's original patch ] [ Xiaochen Shen: break original patch into two patches ] [ Xu Yu: rename BUDDY_ZEROED to PAGE_ZEROED ] Signed-off-by: Xiaochen Shen Signed-off-by: Xu Yu Reviewed-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/702 --- mm/internal.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/mm/internal.h b/mm/internal.h index 404a44e457e9..daad3493bd67 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -270,6 +270,38 @@ int find_suitable_fallback(struct free_area *area, unsigned int order, #endif +/* + * Reuse the bit above highest-possible page order (MAX_ORDER - 1) of + * page->private, to _temporarily_ indicate that the page is pre-zeroed. + * + * This bit is only used for pages newly allocated from buddy, neither + * buddy pages nor lru pages, etc., in the page allocation path. + * + * Specifically, this bit is set in __rmqueue_smallest(), and cleared in + * prep_new_page() or free_pcppages_bulk(). Setting this bit anywhere else + * is a bug. + */ +#ifdef CONFIG_PAGE_PREZERO +#define PAGE_ZEROED (1UL << (ilog2(MAX_ORDER - 1) + 1)) +#else +#define PAGE_ZEROED 0 +#endif + +static inline bool page_zeroed(struct page *page) +{ + return page_private(page) & PAGE_ZEROED; +} + +static inline void set_page_zeroed(struct page *page) +{ + page->private |= PAGE_ZEROED; +} + +static inline void clear_page_zeroed(struct page *page) +{ + page->private &= ~PAGE_ZEROED; +} + /* * This function returns the order of a free page in the buddy system. In * general, page_zone(page)->lock must be held by the caller to prevent the -- Gitee From c0d07b8c7036df816a749707aa15b393f0259812 Mon Sep 17 00:00:00 2001 From: Xu Yu Date: Fri, 2 Sep 2022 16:52:58 +0800 Subject: [PATCH 04/16] anolis: mm: prezero: clear page->private when free pages ANBZ: #2100 This clears page->private in free_pages_prepare(), to stop stashing page->private for both pcp and buddy pages. Signed-off-by: Xu Yu Reviewed-by: Xiaochen Shen Reviewed-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/702 --- mm/page_alloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5f2fcaf55cb9..a5cf431fa034 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1284,6 +1284,7 @@ static __always_inline bool free_pages_prepare(struct page *page, } if (PageMappingFlags(page)) page->mapping = NULL; + set_page_private(page, 0); if (memcg_kmem_enabled() && PageKmemcg(page)) __memcg_kmem_uncharge_page(page, order); if (check_free) -- Gitee From 87d5041c424b2eab99c7e226a170fb5cefb5a234 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Thu, 1 Sep 2022 15:44:47 +0800 Subject: [PATCH 05/16] anolis: mm: prezero: clear page->private out of post_alloc_hook ANBZ: #2100 This makes split_map_pages clear page->private by itself. On the other hand, for pages not allocated from freelist, e.g., kfence_alloc_page() and compaction_capture(), clear page->private explicitly. This then eliminates clear of page->private in post_alloc_hook(). The basis for doing this is that page->private is cleared except PAGE_ZEROED bit when reach post_alloc_hook(). If this basis is not true, then this is a bug. [ Based on Dave Hansen's original patch ] Signed-off-by: Xiaochen Shen Co-developed-by: Xu Yu Signed-off-by: Xu Yu Reviewed-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/702 --- mm/compaction.c | 1 + mm/page_alloc.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index c6d55f1b627d..4b3fb6874808 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -97,6 +97,7 @@ static void split_map_pages(struct list_head *list) order = page_private(page); nr_pages = 1 << order; + set_page_private(page, 0); post_alloc_hook(page, order, __GFP_MOVABLE); if (order) split_page(page, order); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a5cf431fa034..842945d703d4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2316,7 +2316,7 @@ static bool check_new_pages(struct page *page, unsigned int order) inline void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags) { - set_page_private(page, 0); + WARN_ON_ONCE(page_private(page) & ~PAGE_ZEROED); set_page_refcounted(page); arch_alloc_page(page, order); @@ -4197,8 +4197,10 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, count_vm_event(COMPACTSTALL); /* Prep a captured page if available */ - if (page) + if (page) { + set_page_private(page, 0); prep_new_page(page, order, gfp_mask, alloc_flags); + } /* Try get a page from the freelist if available */ if (!page) @@ -5022,6 +5024,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, page = kfence_alloc_page(order, preferred_nid, gfp_mask); if (unlikely(page)) { + set_page_private(page, 0); prep_new_page(page, 0, gfp_mask, alloc_mask); goto out; } -- Gitee From 6e0e95ffa4f7737cae27e5b2243765319474ad9d Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Tue, 30 Aug 2022 00:52:05 +0800 Subject: [PATCH 06/16] anolis: mm: prezero: manage pre-zeroed pages ANBZ: #2100 This manages pre-zeroed pages, which are either in buddy or newly allocated, with PG_zeroed flag and PAGE_ZEROED bit, respectively. The zeroed state of page is zapped when leaving buddy. To transfer the zeroed state, save (PG_zeroed flag) and restore (PAGE_ZEROED bit of page->private) is performed in __rmqueue_smallest. The restored zeroed state can be used to skip page clear in kernel_init_free_pages() later. Signed-off-by: Xiaochen Shen Co-developed-by: Xu Yu Signed-off-by: Xu Yu Reviewed-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/702 --- include/linux/prezero.h | 23 +++++++++++++++++++ mm/page_alloc.c | 49 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 69 insertions(+), 3 deletions(-) create mode 100644 include/linux/prezero.h diff --git a/include/linux/prezero.h b/include/linux/prezero.h new file mode 100644 index 000000000000..2dca9719a9be --- /dev/null +++ b/include/linux/prezero.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PREZERO_H +#define _LINUX_PREZERO_H + +#include +#include + +#ifdef CONFIG_PAGE_PREZERO +DECLARE_STATIC_KEY_FALSE(prezero_enabled_key); + +static inline bool prezero_enabled(void) +{ + return static_branch_unlikely(&prezero_enabled_key); +} + +#else +static inline bool prezero_enabled(void) +{ + return false; +} +#endif /* CONFIG_KZEROPAGED */ + +#endif /* _LINUX_PREZERO_H */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 842945d703d4..e5d78e511600 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -74,6 +74,7 @@ #include #include #include +#include #include #include @@ -943,6 +944,9 @@ static inline void del_page_from_free_list(struct page *page, struct zone *zone, if (page_reported(page)) __ClearPageReported(page); + /* clear pre-zeroed state */ + __ClearPageZeroed(page); + list_del(&page->lru); __ClearPageBuddy(page); set_page_private(page, 0); @@ -1041,6 +1045,10 @@ static inline void __free_one_page(struct page *page, goto done_merging; if (!page_is_buddy(page, buddy, order)) goto done_merging; + + /* Clear PG_zeroed when merging. */ + __ClearPageZeroed(page); + /* * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page, * merge with it and move up one order. @@ -1218,6 +1226,13 @@ static void kernel_init_free_pages(struct page *page, int numpages) { int i; + /* + * Skip clear if page is pre-zeroed. + * But force clear if !prezero_enabled(). + */ + if (prezero_enabled() && page_zeroed(page)) + return; + /* s390's use of memset() could override KASAN redzones. */ kasan_disable_current(); for (i = 0; i < numpages; i++) @@ -1427,6 +1442,13 @@ static void free_pcppages_bulk(struct zone *zone, int count, list_del(&page->lru); pcp->count--; + /* + * PAGE_ZEROED bit may be set for pcp pages, see + * comments in __rmqueue_smallest(). Clear this bit + * if any. + */ + clear_page_zeroed(page); + if (bulkfree_pcp_prepare(page)) continue; @@ -2209,7 +2231,7 @@ void __init init_cma_reserved_pageblock(struct page *page) * -- nyc */ static inline void expand(struct zone *zone, struct page *page, - int low, int high, int migratetype) + int low, int high, int migratetype, bool zeroed) { unsigned long size = 1 << high; @@ -2227,8 +2249,14 @@ static inline void expand(struct zone *zone, struct page *page, if (set_page_guard(zone, &page[size], high, migratetype)) continue; - add_to_free_list(&page[size], zone, high, migratetype); set_buddy_order(&page[size], high); + if (zeroed) { + add_to_free_list_tail(&page[size], zone, high, + migratetype); + __SetPageZeroed(&page[size]); + } else { + add_to_free_list(&page[size], zone, high, migratetype); + } } } @@ -2335,6 +2363,9 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags if (!free_pages_prezeroed() && want_init_on_alloc(gfp_flags)) kernel_init_free_pages(page, 1 << order); + /* Clear pre-zeroed state (PAGE_ZEROED bit) if any. */ + clear_page_zeroed(page); + if (order && (gfp_flags & __GFP_COMP)) prep_compound_page(page, order); @@ -2364,13 +2395,25 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, /* Find a page of the appropriate size in the preferred list */ for (current_order = order; current_order < MAX_ORDER; ++current_order) { + bool zeroed; area = &(zone->free_area[current_order]); page = get_page_from_free_area(area, migratetype); if (!page) continue; + + /* Stash this away before del_page_from_free_list() zaps it */ + zeroed = PageZeroed(page); + del_page_from_free_list(page, zone, current_order); - expand(zone, page, order, current_order, migratetype); + expand(zone, page, order, current_order, migratetype, zeroed); set_pcppage_migratetype(page, migratetype); + /* + * NOTE This is a hack. The pre-zeroed state was zapped + * above and restored here, and should finally be cleared + * in prep_new_page() or free_pcppages_bulk(). + */ + if (zeroed) + set_page_zeroed(page); return page; } -- Gitee From cf31888656fe1e93ca055beddb59c2b5d7ee1ee9 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Tue, 30 Aug 2022 15:09:06 +0800 Subject: [PATCH 07/16] anolis: mm: prezero: add pre-zeroed page statistics ANBZ: #2100 This adds pre-zeroed page statistics in multiple system interfaces as follows. /proc/vmstat - nr_zeroed_pages - prezero_alloc - prezero_alloc_pages /proc/meminfo - MemZeroed /proc/zoneinfo - zeroed - nr_zeroed_pages /proc/zerobuddyinfo - zeroed version of /proc/buddyinfo Signed-off-by: Xiaochen Shen Signed-off-by: Xu Yu Reviewed-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/702 --- drivers/base/node.c | 7 ++++++ fs/proc/meminfo.c | 4 ++++ include/linux/mmzone.h | 6 +++++ include/linux/vm_event_item.h | 4 ++++ mm/page_alloc.c | 31 ++++++++++++++++++++++-- mm/vmstat.c | 44 +++++++++++++++++++++++++++++++++++ 6 files changed, 94 insertions(+), 2 deletions(-) diff --git a/drivers/base/node.c b/drivers/base/node.c index 7f35dc2cea51..47a022f945ad 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -439,6 +439,9 @@ static ssize_t node_read_meminfo(struct device *dev, #endif #ifdef CONFIG_DUPTEXT "Node %d DupText: %8lu kB\n" +#endif +#ifdef CONFIG_PAGE_PREZERO + "Node %d MemZeroed: %8lu kB\n" #endif , nid, K(node_page_state(pgdat, NR_FILE_DIRTY)), @@ -476,6 +479,10 @@ static ssize_t node_read_meminfo(struct device *dev, #ifdef CONFIG_DUPTEXT , nid, K(node_page_state(pgdat, NR_DUPTEXT)) +#endif +#ifdef CONFIG_PAGE_PREZERO + , + nid, K(sum_zone_node_page_state(nid, NR_ZEROED_PAGES)) #endif ); len += hugetlb_report_node_meminfo(buf, len, nid); diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index ade24eb04950..5e1a6be82aeb 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -173,6 +173,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "DupText: ", global_node_page_state(NR_DUPTEXT)); #endif +#ifdef CONFIG_PAGE_PREZERO + show_val_kb(m, "MemZeroed: ", + global_zone_page_state(NR_ZEROED_PAGES)); +#endif hugetlb_report_meminfo(m); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 23cfe3287d07..8d797fffb4a5 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -98,6 +98,9 @@ extern int page_group_by_mobility_disabled; struct free_area { struct list_head free_list[MIGRATE_TYPES]; unsigned long nr_free; +#ifdef CONFIG_PAGE_PREZERO + unsigned long nr_zeroed; /* Pre-zeroed pages */ +#endif }; static inline struct page *get_page_from_free_area(struct free_area *area, @@ -161,6 +164,9 @@ enum zone_stat_item { NR_ZSPAGES, /* allocated in zsmalloc */ #endif NR_FREE_CMA_PAGES, +#ifdef CONFIG_PAGE_PREZERO + NR_ZEROED_PAGES, /* Pre-zeroed pages */ +#endif NR_VM_ZONE_STAT_ITEMS }; enum node_stat_item { diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 18e75974d4e3..ee02644d933a 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -120,6 +120,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_SWAP SWAP_RA, SWAP_RA_HIT, +#endif +#ifdef CONFIG_PAGE_PREZERO + PREZERO_ALLOC, + PREZERO_ALLOC_PAGES, #endif NR_VM_EVENT_ITEMS }; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e5d78e511600..dd49bedfc33f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -944,8 +944,14 @@ static inline void del_page_from_free_list(struct page *page, struct zone *zone, if (page_reported(page)) __ClearPageReported(page); +#ifdef CONFIG_PAGE_PREZERO /* clear pre-zeroed state */ - __ClearPageZeroed(page); + if (PageZeroed(page)) { + __ClearPageZeroed(page); + zone->free_area[order].nr_zeroed--; + __mod_zone_page_state(zone, NR_ZEROED_PAGES, -(1 << order)); + } +#endif list_del(&page->lru); __ClearPageBuddy(page); @@ -1090,6 +1096,13 @@ static inline void __free_one_page(struct page *page, done_merging: set_buddy_order(page, order); +#ifdef CONFIG_PAGE_PREZERO + if (PageZeroed(page)) { + zone->free_area[order].nr_zeroed++; + __mod_zone_page_state(zone, NR_ZEROED_PAGES, 1 << order); + } +#endif + if (fpi_flags & FPI_TO_TAIL) to_tail = true; else if (is_shuffle_order(order)) @@ -1226,12 +1239,17 @@ static void kernel_init_free_pages(struct page *page, int numpages) { int i; +#ifdef CONFIG_PAGE_PREZERO /* * Skip clear if page is pre-zeroed. * But force clear if !prezero_enabled(). */ - if (prezero_enabled() && page_zeroed(page)) + if (prezero_enabled() && page_zeroed(page)) { + count_vm_event(PREZERO_ALLOC); + __count_vm_events(PREZERO_ALLOC_PAGES, numpages); return; + } +#endif /* s390's use of memset() could override KASAN redzones. */ kasan_disable_current(); @@ -2250,13 +2268,19 @@ static inline void expand(struct zone *zone, struct page *page, continue; set_buddy_order(&page[size], high); +#ifdef CONFIG_PAGE_PREZERO if (zeroed) { add_to_free_list_tail(&page[size], zone, high, migratetype); __SetPageZeroed(&page[size]); + zone->free_area[high].nr_zeroed++; + __mod_zone_page_state(zone, NR_ZEROED_PAGES, 1 << high); } else { add_to_free_list(&page[size], zone, high, migratetype); } +#else + add_to_free_list(&page[size], zone, high, migratetype); +#endif } } @@ -6325,6 +6349,9 @@ static void __meminit zone_init_free_lists(struct zone *zone) for_each_migratetype_order(order, t) { INIT_LIST_HEAD(&zone->free_area[order].free_list[t]); zone->free_area[order].nr_free = 0; +#ifdef CONFIG_PAGE_PREZERO + zone->free_area[order].nr_zeroed = 0; +#endif } } diff --git a/mm/vmstat.c b/mm/vmstat.c index 498ea2e06c0f..4793b355ff91 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1163,6 +1163,9 @@ const char * const vmstat_text[] = { "nr_zspages", #endif "nr_free_cma", +#ifdef CONFIG_PAGE_PREZERO + "nr_zeroed_pages", +#endif /* enum numa_stat_item counters */ #ifdef CONFIG_NUMA @@ -1353,6 +1356,10 @@ const char * const vmstat_text[] = { "swap_ra", "swap_ra_hit", #endif +#ifdef CONFIG_PAGE_PREZERO + "prezero_alloc", + "prezero_alloc_pages", +#endif #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ }; #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */ @@ -1600,6 +1607,34 @@ static const struct seq_operations pagetypeinfo_op = { .show = pagetypeinfo_show, }; +#ifdef CONFIG_PAGE_PREZERO +static void zerobuddy_show_print(struct seq_file *m, pg_data_t *pgdat, + struct zone *zone) +{ + int order; + + seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); + for (order = 0; order < MAX_ORDER; ++order) + seq_printf(m, "%6lu ", zone->free_area[order].nr_zeroed); + seq_putc(m, '\n'); +} + +static int zerobuddy_show(struct seq_file *m, void *arg) +{ + pg_data_t *pgdat = (pg_data_t *)arg; + + walk_zones_in_node(m, pgdat, true, false, zerobuddy_show_print); + return 0; +} + +static const struct seq_operations zerobuddy_op = { + .start = frag_start, + .next = frag_next, + .stop = frag_stop, + .show = zerobuddy_show, +}; +#endif + static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone) { int zid; @@ -1628,6 +1663,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, } seq_printf(m, "\n pages free %lu" +#ifdef CONFIG_PAGE_PREZERO + "\n zeroed %lu" +#endif "\n min %lu" "\n low %lu" "\n high %lu" @@ -1636,6 +1674,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, "\n managed %lu" "\n cma %lu", zone_page_state(zone, NR_FREE_PAGES), +#ifdef CONFIG_PAGE_PREZERO + zone_page_state(zone, NR_ZEROED_PAGES), +#endif min_wmark_pages(zone), low_wmark_pages(zone), high_wmark_pages(zone), @@ -2049,6 +2090,9 @@ void __init init_mm_internals(void) proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op); proc_create_seq("vmstat", 0444, NULL, &vmstat_op); proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op); +#ifdef CONFIG_PAGE_PREZERO + proc_create_seq("zerobuddyinfo", 0444, NULL, &zerobuddy_op); +#endif #endif } -- Gitee From 04941b6ec5b6f9ba2698e9b8433e47003517c61b Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Wed, 31 Aug 2022 13:51:42 +0800 Subject: [PATCH 08/16] anolis: mm: prezero: introduce kprezerod ANBZ: #2100 This adds per-node kernel thread named "kprezerod" to periodically clear buddy pages asynchronously in background. Multiple sysfs interfaces are introduced to adjust the aggressiveness of kprezerod, as follows. - /sys/kernel/mm/prezero/enabled - /sys/kernel/mm/prezero/min_order - /sys/kernel/mm/prezero/max_percent - /sys/kernel/mm/prezero/batch_pages - /sys/kernel/mm/prezero/sleep_msecs Signed-off-by: Xiaochen Shen Co-developed-by: Xu Yu Signed-off-by: Xu Yu Reviewed-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/702 --- include/linux/prezero.h | 32 +++- mm/Makefile | 1 + mm/prezero.c | 321 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 352 insertions(+), 2 deletions(-) create mode 100644 mm/prezero.c diff --git a/include/linux/prezero.h b/include/linux/prezero.h index 2dca9719a9be..365ecef6dc36 100644 --- a/include/linux/prezero.h +++ b/include/linux/prezero.h @@ -3,21 +3,49 @@ #define _LINUX_PREZERO_H #include -#include + +enum prezero_flag { + PREZERO_BUDDY_FLAG, + PREZERO_PCP_FLAG, + PREZERO_MAX_FLAG, +}; #ifdef CONFIG_PAGE_PREZERO DECLARE_STATIC_KEY_FALSE(prezero_enabled_key); +extern unsigned long prezero_enabled_flag; static inline bool prezero_enabled(void) { return static_branch_unlikely(&prezero_enabled_key); } +static inline bool prezero_buddy_enabled(void) +{ + return prezero_enabled() && + (prezero_enabled_flag & (1 << PREZERO_BUDDY_FLAG)); +} + +static inline bool prezero_pcp_enabled(void) +{ + return prezero_enabled() && + (prezero_enabled_flag & (1 << PREZERO_PCP_FLAG)); +} + #else static inline bool prezero_enabled(void) { return false; } -#endif /* CONFIG_KZEROPAGED */ + +static inline bool prezero_buddy_enabled(void) +{ + return false; +} + +static inline bool prezero_pcp_enabled(void) +{ + return false; +} +#endif /* CONFIG_PAGE_PREZERO */ #endif /* _LINUX_PREZERO_H */ diff --git a/mm/Makefile b/mm/Makefile index 42080afa8cd0..ab7463c31cbc 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -132,3 +132,4 @@ obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o obj-y += unevictable.o obj-$(CONFIG_DUPTEXT) += page_dup.o obj-$(CONFIG_FAST_COPY_MM) += fast_copy_mm.o +obj-$(CONFIG_PAGE_PREZERO) += prezero.o diff --git a/mm/prezero.c b/mm/prezero.c new file mode 100644 index 000000000000..c91af11c31e6 --- /dev/null +++ b/mm/prezero.c @@ -0,0 +1,321 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +#include +#include "internal.h" + +DEFINE_STATIC_KEY_FALSE(prezero_enabled_key); +unsigned long prezero_enabled_flag; +static unsigned int prezero_min_order = 9; +static unsigned int prezero_max_percent = 50; +static unsigned int prezero_batch_pages = 4096; +static unsigned int prezero_sleep_msecs = 1000; +static struct task_struct *prezero_kthread[MAX_NUMNODES]; +static wait_queue_head_t kprezerod_wait[MAX_NUMNODES]; +static unsigned long kprezerod_sleep_expire[MAX_NUMNODES]; + +static void my_clear_page(struct page *page, unsigned int order) +{ + int i, numpages = 1 << order; + + for (i = 0; i < numpages; i++) + clear_highpage(page + i); +} + +static int prezero_one_page(struct zone *zone, unsigned int order, int mtype) +{ + struct free_area *area = &zone->free_area[order]; + struct list_head *list = &area->free_list[mtype]; + struct page *page_to_zero = NULL, *page, *next; + int err = -ENOMEM; + + /* + * Perform early check, if free area is empty there is + * nothing to process so we can skip this free_list. + */ + if (list_empty(list)) + return err; + + /* Isolate a non-zeroed page */ + spin_lock_irq(&zone->lock); + list_for_each_entry_safe(page, next, list, lru) { + /* We are going to skip over the pre-zeroed pages. */ + if (PageZeroed(page)) + continue; + + if (__isolate_free_page(page, order)) + page_to_zero = page; + else + next = page; + + /* + * Make the next page in the free list the new head + * of the free list before we release the zone lock. + */ + if (&next->lru != list && !list_is_first(&next->lru, list)) + list_rotate_to_front(&next->lru, list); + + break; + } + spin_unlock_irq(&zone->lock); + + /* Failed to isolate non-zeroed page */ + if (!page_to_zero) + return err; + + /* Clear the page */ + my_clear_page(page, order); + __SetPageZeroed(page); + + /* Putback the pre-zeroed page */ + spin_lock_irq(&zone->lock); + mtype = get_pageblock_migratetype(page); + __putback_isolated_page(page, order, mtype); + spin_unlock_irq(&zone->lock); + + return err; +} + +static void prezero_do_work(pg_data_t *pgdat) +{ + struct zone *zone = &pgdat->node_zones[ZONE_NORMAL]; + /* NOTE only MIGRATE_MOVABLE is supported currently */ + int mtype = MIGRATE_MOVABLE; + unsigned int order; + unsigned long nr_free, nr_zeroed; + unsigned int nr_done; + + for (order = prezero_min_order; order < MAX_ORDER; order++) { + /* + * Use data_race to avoid KCSAN warning since access + * to nr_free and nr_zeroed is lockless here. + * + * Since only MIGRATE_MOVABLE is supported at present, + * to set prezero_max_percent too high could prevent + * kprezerod from early bailing out. + */ + nr_free = data_race(zone->free_area[order].nr_free); + /* Ditto. */ + nr_zeroed = data_race(zone->free_area[order].nr_zeroed); + + if (nr_zeroed >= nr_free * prezero_max_percent / 100) + continue; + + nr_done = 0; + while (nr_done < prezero_batch_pages) { + if (prezero_one_page(zone, order, mtype) < 0) + break; + nr_done += 1 << order; + } + } +} + +static bool kprezerod_should_wakeup(int nid) +{ + return kthread_should_stop() || + time_after_eq(jiffies, kprezerod_sleep_expire[nid]); +} + +static int prezero(void *data) +{ + pg_data_t *pgdat = (pg_data_t *)data; + int nid = pgdat->node_id; + + set_freezable(); + + while (!kthread_should_stop()) { + unsigned long sleep_jiffies = + msecs_to_jiffies(prezero_sleep_msecs); + + kprezerod_sleep_expire[nid] = jiffies + sleep_jiffies; + if (wait_event_freezable_timeout(kprezerod_wait[nid], + kprezerod_should_wakeup(nid), + sleep_jiffies)) + prezero_do_work(pgdat); + } + + return 0; +} + +static void __start_stop_kprezerod(int nid) +{ + if (prezero_enabled()) { + if (!prezero_kthread[nid]) + prezero_kthread[nid] = kthread_run(prezero, + NODE_DATA(nid), "kprezerod%d", nid); + if (IS_ERR(prezero_kthread[nid])) { + pr_err("failed to run kprezerod on node %d\n", nid); + prezero_kthread[nid] = NULL; + } + } else if (prezero_kthread[nid]) { + kthread_stop(prezero_kthread[nid]); + prezero_kthread[nid] = NULL; + } +} + +static void start_stop_kprezerod(void) +{ + int nid; + + for_each_node_state(nid, N_MEMORY) + __start_stop_kprezerod(nid); +} + +static int __init setup_prezero(char *str) +{ + unsigned long val; + int err; + + if (!str) + return 0; + + err = kstrtoul(str, 0, &val); + if (err < 0 || val > (1UL << PREZERO_MAX_FLAG) - 1) + return 0; + + prezero_enabled_flag = val; + + return 1; +} +__setup("prezero=", setup_prezero); + +#ifdef CONFIG_SYSFS +static ssize_t prezero_show_enabled(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", prezero_enabled_flag); +} +static ssize_t prezero_store_enabled(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + static DEFINE_MUTEX(mutex); + unsigned long val; + int err; + ssize_t ret = count; + + mutex_lock(&mutex); + + err = kstrtoul(buf, 0, &val); + if (err < 0 || val > (1UL << PREZERO_MAX_FLAG) - 1) { + ret = -EINVAL; + goto out; + } + + prezero_enabled_flag = val; + + if (prezero_enabled_flag) + static_branch_enable(&prezero_enabled_key); + else + static_branch_disable(&prezero_enabled_key); + + start_stop_kprezerod(); + +out: + mutex_unlock(&mutex); + return ret; +} +static struct kobj_attribute prezero_attr_enabled = + __ATTR(enabled, 0644, prezero_show_enabled, + prezero_store_enabled); + +#define PREZERO_SYSFS_ATTR(name, field, min_val, max_val, store_cb) \ +static ssize_t prezero_show_##name(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%u\n", field); \ +} \ +static ssize_t prezero_store_##name(struct kobject *kobj, \ + struct kobj_attribute *attr, const char *buf, size_t count) \ +{ \ + unsigned long val; \ + int ret; \ + \ + ret = kstrtoul(buf, 0, &val); \ + if (ret || val < min_val || val > max_val) \ + return -EINVAL; \ + \ + field = val; \ + store_cb(); \ + return count; \ +} \ +static struct kobj_attribute prezero_attr_##name = \ + __ATTR(name, 0644, prezero_show_##name, prezero_store_##name) + +static void dummy_store_cb(void) +{ +} + +static void prezero_sleep_msecs_store_cb(void) +{ + int nid; + + for_each_node_state(nid, N_MEMORY) { + kprezerod_sleep_expire[nid] = 0; + wake_up_interruptible(&kprezerod_wait[nid]); + } +} + +PREZERO_SYSFS_ATTR(min_order, prezero_min_order, 0, MAX_ORDER - 1, + dummy_store_cb); +PREZERO_SYSFS_ATTR(max_percent, prezero_max_percent, 0, 100, + dummy_store_cb); +PREZERO_SYSFS_ATTR(batch_pages, prezero_batch_pages, 0, UINT_MAX, + dummy_store_cb); +PREZERO_SYSFS_ATTR(sleep_msecs, prezero_sleep_msecs, 0, UINT_MAX, + prezero_sleep_msecs_store_cb); + +static struct attribute *prezero_attrs[] = { + &prezero_attr_enabled.attr, + &prezero_attr_min_order.attr, + &prezero_attr_max_percent.attr, + &prezero_attr_batch_pages.attr, + &prezero_attr_sleep_msecs.attr, + NULL, +}; + +static struct attribute_group prezero_attr_group = { + .attrs = prezero_attrs, + .name = "prezero", +}; + +static int __init prezero_sysfs_init(void) +{ + int err; + + err = sysfs_create_group(mm_kobj, &prezero_attr_group); + if (err) + pr_err("failed to register prezero group\n"); + + return err; +} +#else +static inline int __init prezero_sysfs_init(void) +{ + return 0; +} +#endif /* CONFIG_SYSFS */ + +static int __init prezero_init(void) +{ + int ret; + int nid; + + ret = prezero_sysfs_init(); + if (ret < 0) + return ret; + + for_each_node_state(nid, N_MEMORY) { + init_waitqueue_head(&kprezerod_wait[nid]); + __start_stop_kprezerod(nid); + } + + return 0; +} +module_init(prezero_init); -- Gitee From 5dda2bd7a026dec39a5958e58b365ae206921995 Mon Sep 17 00:00:00 2001 From: Xu Yu Date: Wed, 7 Sep 2022 18:43:47 +0800 Subject: [PATCH 09/16] anolis: mm: prezero: set PG_zeroed after free to buddy ANBZ: #2100 To set PG_zeroed before page is free to buddy will trigger VM_BUG_ON_PAGE in __free_one_page(), as follows. page dumped because: VM_BUG_ON_PAGE(page->flags & (((1UL << 29) - 1) & ~(1UL << PG_hwpoison))) ------------[ cut here ]------------ kernel BUG at mm/page_alloc.c:1031! This is reasonable because PG_zeroed is only valid for buddy pages. We should set PG_zeroed after page is free to buddy. When free (putback) a zeroed page, if page was not comingled with another page we can consider the page to be zeroed since the page hasn't been modified, otherwise we will need to discard the zeroed state of this page. Signed-off-by: Xu Yu Reviewed-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/702 --- mm/page_alloc.c | 7 ------- mm/prezero.c | 13 ++++++++++++- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dd49bedfc33f..54feb812659b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1096,13 +1096,6 @@ static inline void __free_one_page(struct page *page, done_merging: set_buddy_order(page, order); -#ifdef CONFIG_PAGE_PREZERO - if (PageZeroed(page)) { - zone->free_area[order].nr_zeroed++; - __mod_zone_page_state(zone, NR_ZEROED_PAGES, 1 << order); - } -#endif - if (fpi_flags & FPI_TO_TAIL) to_tail = true; else if (is_shuffle_order(order)) diff --git a/mm/prezero.c b/mm/prezero.c index c91af11c31e6..02d9c353fd5e 100644 --- a/mm/prezero.c +++ b/mm/prezero.c @@ -72,12 +72,23 @@ static int prezero_one_page(struct zone *zone, unsigned int order, int mtype) /* Clear the page */ my_clear_page(page, order); - __SetPageZeroed(page); /* Putback the pre-zeroed page */ spin_lock_irq(&zone->lock); mtype = get_pageblock_migratetype(page); __putback_isolated_page(page, order, mtype); + + /* + * If page was not comingled with another page we can consider + * the page to be zeroed since the page hasn't been modified, + * otherwise we will need to discard the zeroed state of this page. + */ + if (PageBuddy(page) && buddy_order(page) == order) { + __SetPageZeroed(page); + zone->free_area[order].nr_zeroed++; + __mod_zone_page_state(zone, NR_ZEROED_PAGES, 1 << order); + } + spin_unlock_irq(&zone->lock); return err; -- Gitee From 5dc85c0e97bc970534ceb19de0cb8db0049b426b Mon Sep 17 00:00:00 2001 From: Xu Yu Date: Fri, 2 Sep 2022 12:37:23 +0800 Subject: [PATCH 10/16] anolis: mm: prezero: pass gfp parameter to rmqueue_bulk ANBZ: #2100 This passes the gfp parameter to __rmqueue_pcplist(), and then rmqueue_bulk(). No functional changes. Signed-off-by: Xu Yu Reviewed-by: Xiaochen Shen Reviewed-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/702 --- mm/page_alloc.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 54feb812659b..afb550a33b2b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2991,7 +2991,8 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, */ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, - int migratetype, unsigned int alloc_flags) + gfp_t gfp_flags, int migratetype, + unsigned int alloc_flags) { int i, alloced = 0; @@ -3484,17 +3485,16 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z) } /* Remove page from the per-cpu list, caller must protect the list */ -static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, - unsigned int alloc_flags, - struct per_cpu_pages *pcp, - struct list_head *list) +static struct page *__rmqueue_pcplist(struct zone *zone, gfp_t gfp_flags, + int migratetype, unsigned int alloc_flags, + struct per_cpu_pages *pcp, struct list_head *list) { struct page *page; do { if (list_empty(list)) { pcp->count += rmqueue_bulk(zone, 0, - pcp->batch, list, + pcp->batch, list, gfp_flags, migratetype, alloc_flags); if (unlikely(list_empty(list))) return NULL; @@ -3521,7 +3521,8 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, local_irq_save(flags); pcp = &this_cpu_ptr(zone->pageset)->pcp; list = &pcp->lists[migratetype]; - page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list); + page = __rmqueue_pcplist(zone, gfp_flags, migratetype, alloc_flags, + pcp, list); if (page) { __count_zid_vm_events(PGALLOC, page_zonenum(page), 1); zone_statistics(preferred_zone, zone); -- Gitee From ab26cc0bf714bf9615c65b2dc51f979215f0e51a Mon Sep 17 00:00:00 2001 From: Xu Yu Date: Wed, 31 Aug 2022 22:37:23 +0800 Subject: [PATCH 11/16] anolis: mm: prezero: allocate page from tail for __GFP_ZERO ANBZ: #2100 This allocates page from tail of free_list for page allocation with __GFP_ZERO flag, when the prezero feature is enabled. Signed-off-by: Xu Yu Reviewed-by: Xiaochen Shen Reviewed-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/702 --- include/linux/mmzone.h | 4 ++++ mm/page_alloc.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 8d797fffb4a5..7e7e46f2136a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -605,6 +605,10 @@ struct zone { bool contiguous; +#ifdef CONFIG_PAGE_PREZERO + bool alloc_zero; +#endif + ZONE_PADDING(_pad3_) /* Zone statistics */ atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index afb550a33b2b..f96d84154ce7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2409,12 +2409,25 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, unsigned int current_order; struct free_area *area; struct page *page; + struct list_head __maybe_unused *list; /* Find a page of the appropriate size in the preferred list */ for (current_order = order; current_order < MAX_ORDER; ++current_order) { bool zeroed; area = &(zone->free_area[current_order]); +#ifdef CONFIG_PAGE_PREZERO + list = &area->free_list[migratetype]; + + if (unlikely(list_empty(list))) + page = NULL; + else if (zone->alloc_zero) + page = list_last_entry(list, struct page, lru); + else + page = list_first_entry(list, struct page, lru); +#else page = get_page_from_free_area(area, migratetype); +#endif + if (!page) continue; @@ -2997,6 +3010,9 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, int i, alloced = 0; spin_lock(&zone->lock); +#ifdef CONFIG_PAGE_PREZERO + zone->alloc_zero = prezero_pcp_enabled() && (gfp_flags & __GFP_ZERO); +#endif for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype, alloc_flags); @@ -3030,6 +3046,9 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, * pages added to the pcp list. */ __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); +#ifdef CONFIG_PAGE_PREZERO + zone->alloc_zero = false; +#endif spin_unlock(&zone->lock); return alloced; } @@ -3563,6 +3582,10 @@ struct page *rmqueue(struct zone *preferred_zone, WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); spin_lock_irqsave(&zone->lock, flags); +#ifdef CONFIG_PAGE_PREZERO + zone->alloc_zero = prezero_buddy_enabled() && (gfp_flags & __GFP_ZERO); +#endif + do { page = NULL; /* @@ -3579,6 +3602,11 @@ struct page *rmqueue(struct zone *preferred_zone, if (!page) page = __rmqueue(zone, order, migratetype, alloc_flags); } while (page && check_new_pages(page, order)); + +#ifdef CONFIG_PAGE_PREZERO + zone->alloc_zero = false; +#endif + spin_unlock(&zone->lock); if (!page) goto failed; -- Gitee From 52b15044f8e57c4e4c5978c797dc77c68ef63619 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Wed, 31 Aug 2022 22:51:50 +0800 Subject: [PATCH 12/16] anolis: mm: prezero: make anon THP use __GFP_ZERO ANBZ: #2100 This makes THP use __GFP_ZERO flag when allocating pages, when the prezero feature is enabled. [ Based on Dave Hansen's original patch ] Signed-off-by: Xiaochen Shen Signed-off-by: Xu Yu Reviewed-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/702 --- mm/huge_memory.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 904773bb34f9..1edf983f84d3 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -719,7 +720,7 @@ unsigned long hugetext_get_unmapped_area(struct file *filp, unsigned long addr, #endif /* CONFIG_HUGETEXT */ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, - struct page *page, gfp_t gfp) + struct page *page, gfp_t gfp, bool zeroed) { struct vm_area_struct *vma = vmf->vma; pgtable_t pgtable; @@ -742,7 +743,9 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, goto release; } - clear_huge_page(page, vmf->address, HPAGE_PMD_NR); + if (!zeroed) + clear_huge_page(page, vmf->address, HPAGE_PMD_NR); + /* * The memory barrier inside __SetPageUptodate makes sure that * clear_huge_page writes become visible before the set_pmd_at() @@ -854,6 +857,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) gfp_t gfp; struct page *page; unsigned long haddr = vmf->address & HPAGE_PMD_MASK; + bool zeroed = false; if (!transhuge_vma_suitable(vma, haddr)) return VM_FAULT_FALLBACK; @@ -900,13 +904,19 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) return ret; } gfp = alloc_hugepage_direct_gfpmask(vma); + + if (prezero_enabled()) { + gfp |= __GFP_ZERO; + zeroed = true; + } + page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER); if (unlikely(!page)) { count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; } prep_transhuge_page(page); - return __do_huge_pmd_anonymous_page(vmf, page, gfp); + return __do_huge_pmd_anonymous_page(vmf, page, gfp, zeroed); } static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, -- Gitee From 77b5c2191220214059422cdb2ac367aafb59282a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 6 Sep 2022 13:04:14 +0800 Subject: [PATCH 13/16] anolis: dmaengine: idxd: add memset operation ANBZ: #2100 This adds memset preparation function to support DSA MEMFILL operation. The dmaengine API provides an int type, but it is really an 8bit pattern. DSA supports a 64bit pattern, and therefore the 8bit pattern will be replicated to 64bits. [xuyu: optimize idxd_dma_prep_memset] Signed-off-by: Dave Jiang Co-developed-by: Xiaochen Shen Signed-off-by: Xiaochen Shen Signed-off-by: Kun(llfl) Signed-off-by: Xu Yu Reviewed-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/702 --- drivers/dma/idxd/dma.c | 46 ++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/registers.h | 1 + 2 files changed, 47 insertions(+) diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 04511a5be03e..7239cc51bb7c 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -272,6 +272,47 @@ idxd_dma_prep_memcpy_sg(struct dma_chan *chan, return &desc->txd; } +static struct dma_async_tx_descriptor * +idxd_dma_prep_memset(struct dma_chan *c, dma_addr_t dma_dest, int value, + size_t len, unsigned long flags) +{ + struct idxd_wq *wq = to_idxd_wq(c); + u32 desc_flags; + struct idxd_desc *desc; + u64 pattern = 0; + + if (wq->state != IDXD_WQ_ENABLED) + return NULL; + + if (len > wq->max_xfer_bytes) + return NULL; + + op_flag_setup(flags, &desc_flags); + desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); + if (IS_ERR(desc)) + return NULL; + + /* + * The dmaengine API provides an int 'value', but it is really an 8bit + * pattern. DSA supports a 64bit pattern, and therefore the 8bit pattern + * will be replicated to 64bits. + */ + if (value) { + pattern = value & 0xff; + pattern |= pattern << 8; + pattern |= pattern << 16; + pattern |= pattern << 32; + } + + idxd_prep_desc_common(wq, desc->hw, DSA_OPCODE_MEMFILL, + pattern, dma_dest, len, desc->compl_dma, + desc_flags); + + desc->txd.flags = flags; + + return &desc->txd; +} + static int idxd_dma_alloc_chan_resources(struct dma_chan *chan) { struct idxd_wq *wq = to_idxd_wq(chan); @@ -435,6 +476,11 @@ int idxd_register_dma_device(struct idxd_device *idxd) dma->device_prep_dma_memcpy_sg = idxd_dma_prep_memcpy_sg; } + if (idxd->hw.opcap.bits[0] & IDXD_OPCAP_MEMFILL) { + dma_cap_set(DMA_MEMSET, dma->cap_mask); + dma->device_prep_dma_memset = idxd_dma_prep_memset; + } + dma->device_tx_status = idxd_dma_tx_status; dma->device_issue_pending = idxd_dma_issue_pending; dma->device_alloc_chan_resources = idxd_dma_alloc_chan_resources; diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 982cf9915341..acee6cadd80e 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -92,6 +92,7 @@ union engine_cap_reg { #define IDXD_OPCAP_NOOP 0x0001 #define IDXD_OPCAP_BATCH 0x0002 #define IDXD_OPCAP_MEMMOVE 0x0008 +#define IDXD_OPCAP_MEMFILL BIT(DSA_OPCODE_MEMFILL) struct opcap { u64 bits[4]; }; -- Gitee From f6549d53936f54ba03ae2cbc7a88dfdfa8b5664c Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Tue, 6 Sep 2022 13:04:15 +0800 Subject: [PATCH 14/16] anolis: dmaengine: add a new DMA flag for direct data writes to be nontemporal ANBZ: #2100 This adds DMA prep flag DMA_PREP_NONTEMPORAL for dma operation preparation. With this flag set, direct data writes explicitly to memory instead of CPU cache if hardware supports it (e.g., Intel DSA device). Signed-off-by: Xiaochen Shen Reviewed-by: Dave Jiang Signed-off-by: Kun(llfl) Signed-off-by: Xu Yu Reviewed-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/702 --- drivers/dma/idxd/dma.c | 14 +++++++++++++- include/linux/dmaengine.h | 3 +++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 7239cc51bb7c..25e3cca9ce85 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -73,13 +73,25 @@ void idxd_dma_complete_txd(struct idxd_desc *desc, idxd_free_desc(desc->wq, desc); } -static void op_flag_setup(unsigned long flags, u32 *desc_flags) +static inline void op_control_flag_setup(unsigned long flags, u32 *desc_flags) { *desc_flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR; if (flags & DMA_PREP_INTERRUPT) *desc_flags |= IDXD_OP_FLAG_RCI; } +static inline void op_mem_flag_setup(unsigned long flags, u32 *desc_flags) +{ + if (!(flags & DMA_PREP_NONTEMPORAL)) + *desc_flags |= IDXD_OP_FLAG_CC; +} + +static inline void op_flag_setup(unsigned long flags, u32 *desc_flags) +{ + op_control_flag_setup(flags, desc_flags); + op_mem_flag_setup(flags, desc_flags); +} + static inline void set_completion_address(struct idxd_desc *desc, u64 *compl_addr) { diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 2f400f83040c..5bd995a479dc 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -189,6 +189,8 @@ struct dma_interleaved_template { * transaction is marked with DMA_PREP_REPEAT will cause the new transaction * to never be processed and stay in the issued queue forever. The flag is * ignored if the previous transaction is not a repeated transaction. + * @DMA_PREP_NONTEMPORAL - tell the driver that the transaction shall + * direct data writes to memory instead of CPU cache if hardware supports. */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), @@ -201,6 +203,7 @@ enum dma_ctrl_flags { DMA_PREP_CMD = (1 << 7), DMA_PREP_REPEAT = (1 << 8), DMA_PREP_LOAD_EOT = (1 << 9), + DMA_PREP_NONTEMPORAL = (1 << 10), }; /** -- Gitee From 14dc21ee126a13b5571e14aa950826b4d378fddd Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Wed, 7 Sep 2022 00:36:32 +0800 Subject: [PATCH 15/16] anolis: mm: prezero: add page clear engine with DMA device hardware offloading ANBZ: #2100 Page clear engine allows to use a DMA device through the dmaengine API to clear (zero) kernel memory asynchronously. It initially requests a DMA channel with DMA_MEMSET capability on each NUMA node and uses the DMA device to clear high order pages. The preference is to request the DMA channel from local NUMA node. If it is not available, try again to request the DMA channel from any NUMA node. DMA engine APIs are called to prepare and submit DMA descriptors, and to check completion status. The dst_addr of descriptor is filled with the DMA mapped address of the page to be cleared. In addition, interrupt (async callback) mode for DMA completion of hardware page clear engine for lower CPU utilization is supported. User may configure the DMA device on each NUMA node before enabling this feature. After the DMA device is configured, user can set up page clear engine with sysfs interfaces as follows. - /sys/kernel/mm/prezero/page_clear_engine/hw_enabled - /sys/kernel/mm/prezero/page_clear_engine/hw_flag_cc - /sys/kernel/mm/prezero/page_clear_engine/hw_polling Signed-off-by: Xiaochen Shen Signed-off-by: Kun(llfl) Signed-off-by: Xu Yu Reviewed-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/702 --- include/linux/vm_event_item.h | 2 + mm/prezero.c | 319 +++++++++++++++++++++++++++++++++- mm/vmstat.c | 2 + 3 files changed, 318 insertions(+), 5 deletions(-) diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index ee02644d933a..7706d404aa0e 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -124,6 +124,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_PAGE_PREZERO PREZERO_ALLOC, PREZERO_ALLOC_PAGES, + PREZERO_HW_CLEAR, + PREZERO_HW_CLEAR_PAGES, #endif NR_VM_EVENT_ITEMS }; diff --git a/mm/prezero.c b/mm/prezero.c index 02d9c353fd5e..efe3b24a16ca 100644 --- a/mm/prezero.c +++ b/mm/prezero.c @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include "internal.h" @@ -21,10 +23,26 @@ static struct task_struct *prezero_kthread[MAX_NUMNODES]; static wait_queue_head_t kprezerod_wait[MAX_NUMNODES]; static unsigned long kprezerod_sleep_expire[MAX_NUMNODES]; -static void my_clear_page(struct page *page, unsigned int order) +static DEFINE_STATIC_KEY_FALSE(prezero_hw_enabled_key); +static bool prezero_hw_flag_cc; +static bool prezero_hw_polling; +static inline bool prezero_hw_enabled(void) +{ + return static_branch_unlikely(&prezero_hw_enabled_key); +} +static int clear_page_hw(struct page *page, int order, int node); + +static void my_clear_page(struct page *page, unsigned int order, int node) { int i, numpages = 1 << order; + if (prezero_hw_enabled() && + !clear_page_hw(page, order, node)) { + count_vm_event(PREZERO_HW_CLEAR); + __count_vm_events(PREZERO_HW_CLEAR_PAGES, numpages); + return; + } + for (i = 0; i < numpages; i++) clear_highpage(page + i); } @@ -71,7 +89,7 @@ static int prezero_one_page(struct zone *zone, unsigned int order, int mtype) return err; /* Clear the page */ - my_clear_page(page, order); + my_clear_page(page, order, zone_to_nid(zone)); /* Putback the pre-zeroed page */ spin_lock_irq(&zone->lock); @@ -179,6 +197,211 @@ static void start_stop_kprezerod(void) __start_stop_kprezerod(nid); } +/* + * Page clear engine support - hardware offloading for page clear. + * + * Page clear engine allows to use a DMA device through the dmaengine API + * to clear (zero) page asynchronously. + * + * User may configure the DMA device on each NUMA node before enabling this + * feature. + */ +#define DMA_TIMEOUT 5000 +static DEFINE_MUTEX(nodedata_mutex); +static struct nodedata { + struct dma_chan *dma_chan; +} *nodedata; + +static void dma_completion_callback(void *arg) +{ + struct completion *done = arg; + + complete(done); +} + +/* + * DMA engine APIs are called to prepare and submit DMA descriptors, and to + * check completion status. The dest_addr of descriptor is filled with the DMA + * mapped address of the page to be cleared. + */ +static int clear_page_hw(struct page *page, int order, int node) +{ + struct dma_chan *dma_chan = NULL; + struct device *dev; + struct dma_async_tx_descriptor *tx = NULL; + dma_addr_t dst_dma; + dma_cookie_t cookie; + enum dma_status status; + unsigned long dma_flags = 0; + bool hw_flag_cc = prezero_hw_flag_cc; + bool hw_polling = prezero_hw_polling; + int ret = 0; + DECLARE_COMPLETION_ONSTACK(done); + + mutex_lock(&nodedata_mutex); + /* Page clear engine is already disabled */ + if (!nodedata) { + ret = -ENODEV; + goto err_nodedata; + } + + dma_chan = nodedata[node].dma_chan; + dev = dma_chan->device->dev; + + /* DMA map page */ + dst_dma = dma_map_page(dev, page, 0, PAGE_SIZE << order, + DMA_FROM_DEVICE); + ret = dma_mapping_error(dev, dst_dma); + if (ret) + goto err_nodedata; + + if (!hw_flag_cc) + dma_flags |= DMA_PREP_NONTEMPORAL; + + if (!hw_polling) + dma_flags |= DMA_PREP_INTERRUPT; + + /* Prep DMA memset */ + tx = dmaengine_prep_dma_memset(dma_chan, dst_dma, 0, + PAGE_SIZE << order, dma_flags); + if (!tx) { + pr_info("Failed to prep DMA memset on node %d\n", node); + ret = -EIO; + goto err_prep; + } + + if (!hw_polling) { + tx->callback = dma_completion_callback; + tx->callback_param = &done; + } + + /* Submit DMA descriptor */ + cookie = dmaengine_submit(tx); + if (dma_submit_error(cookie)) { + pr_info("Failed to submit DMA descriptor on node %d\n", node); + ret = -EIO; + goto err_prep; + } + + if (hw_polling) { + /* Check DMA completion status with polling */ + status = dma_sync_wait(dma_chan, cookie); + if (status != DMA_COMPLETE) { + pr_info("Failed to poll DMA completion status on node %d\n", node); + ret = -EIO; + } + } else { + dma_async_issue_pending(dma_chan); + if (!wait_for_completion_timeout(&done, + msecs_to_jiffies(DMA_TIMEOUT))) { + ret = -EIO; + goto err_prep; + } + status = dma_async_is_tx_complete(dma_chan, cookie); + if (status != DMA_COMPLETE) { + pr_info("Failed to check DMA completion status on node %d\n", node); + ret = -EIO; + } + } + +err_prep: + dma_unmap_page(dev, dst_dma, PAGE_SIZE << order, DMA_FROM_DEVICE); +err_nodedata: + mutex_unlock(&nodedata_mutex); + return ret; +} + +static bool engine_filter_fn(struct dma_chan *chan, void *node) +{ + return dev_to_node(&chan->dev->device) == (int)(unsigned long)node; +} + +/* + * It initially requests a DMA channel with DMA_MEMSET capability on each NUMA + * node and uses the DMA device to clear high order pages. + * + * The preference is to request the DMA channel from local NUMA node. If it is + * not available, try again to request the DMA channel from any NUMA node. + */ +static int get_dma_chan(int node) +{ + dma_cap_mask_t mask; + + /* Request DMA channel by mask */ + dma_cap_zero(mask); + dma_cap_set(DMA_MEMSET, mask); + + /* Prefer to request DMA channel from local NUMA node if available */ + nodedata[node].dma_chan = dma_request_channel(mask, engine_filter_fn, + (void *)(unsigned long)node); + if (!nodedata[node].dma_chan) { + /* Try again to request the DMA channel from any NUMA node */ + nodedata[node].dma_chan = dma_request_chan_by_mask(&mask); + if (IS_ERR(nodedata[node].dma_chan)) { + pr_info("Failed to request DMA channel on node %d\n", node); + nodedata[node].dma_chan = NULL; + return -ENODEV; + } + } + + return 0; +} + +static int init_page_clear_engine(void) +{ + int node, num_nodes; + int ret; + + /* Page clear engine is already enabled */ + if (nodedata) + return 0; + + num_nodes = num_online_nodes(); + nodedata = kcalloc(num_nodes, sizeof(*nodedata), GFP_KERNEL); + if (!nodedata) + return -ENOMEM; + + for_each_online_node(node) { + ret = get_dma_chan(node); + if (ret) + goto fail; + } + + pr_info("Hardware page clear engine is enabled\n"); + return 0; + +fail: + for (node = 0; node < num_nodes; node++) { + if (nodedata[node].dma_chan) + dma_release_channel(nodedata[node].dma_chan); + } + + kfree(nodedata); + nodedata = NULL; + + return ret; +} + +static void exit_page_clear_engine(void) +{ + int node; + + /* Page clear engine is already disabled */ + if (!nodedata) + return; + + mutex_lock(&nodedata_mutex); + for_each_online_node(node) { + dma_release_channel(nodedata[node].dma_chan); + } + + kfree(nodedata); + nodedata = NULL; + mutex_unlock(&nodedata_mutex); + + pr_info("Hardware page clear engine is disabled\n"); +} + static int __init setup_prezero(char *str) { unsigned long val; @@ -293,17 +516,103 @@ static struct attribute *prezero_attrs[] = { static struct attribute_group prezero_attr_group = { .attrs = prezero_attrs, - .name = "prezero", +}; + +static ssize_t prezero_show_hw_enabled(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", prezero_hw_enabled()); +} +static ssize_t prezero_store_hw_enabled(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + static DEFINE_MUTEX(mutex); + unsigned long val; + int err; + ssize_t ret = count; + + mutex_lock(&mutex); + + err = kstrtoul(buf, 0, &val); + if (err < 0 || val > 1) { + ret = -EINVAL; + goto out; + } + + if (val) { + if (!prezero_hw_enabled()) { + err = init_page_clear_engine(); + if (!err) + static_branch_enable(&prezero_hw_enabled_key); + else + ret = err; + } + } else { + if (prezero_hw_enabled()) { + static_branch_disable(&prezero_hw_enabled_key); + exit_page_clear_engine(); + } + } + +out: + mutex_unlock(&mutex); + return ret; +} +static struct kobj_attribute prezero_attr_hw_enabled = + __ATTR(hw_enabled, 0644, prezero_show_hw_enabled, + prezero_store_hw_enabled); + +PREZERO_SYSFS_ATTR(hw_flag_cc, prezero_hw_flag_cc, 0, 1, dummy_store_cb); +PREZERO_SYSFS_ATTR(hw_polling, prezero_hw_polling, 0, 1, dummy_store_cb); + +static struct attribute *page_clear_engine_attrs[] = { + &prezero_attr_hw_enabled.attr, + &prezero_attr_hw_flag_cc.attr, + &prezero_attr_hw_polling.attr, + NULL, +}; + +static struct attribute_group page_clear_engine_attr_group = { + .attrs = page_clear_engine_attrs, + .name = "page_clear_engine", }; static int __init prezero_sysfs_init(void) { + struct kobject *prezero_kobj; int err; - err = sysfs_create_group(mm_kobj, &prezero_attr_group); - if (err) + /* + * err = sysfs_create_group(mm_kobj, &prezero_attr_group); + * if (err) + * pr_err("failed to register prezero group\n"); + */ + + + prezero_kobj = kobject_create_and_add("prezero", mm_kobj); + if (unlikely(!prezero_kobj)) { + pr_err("failed to create prezero kobject\n"); + return -ENOMEM; + } + + err = sysfs_create_group(prezero_kobj, &prezero_attr_group); + if (err) { pr_err("failed to register prezero group\n"); + goto delete_obj; + } + + err = sysfs_create_group(prezero_kobj, &page_clear_engine_attr_group); + if (err) { + pr_err("failed to register page_clear_engine group\n"); + goto remove_prezero_group; + } + + return 0; +remove_prezero_group: + sysfs_remove_group(prezero_kobj, &prezero_attr_group); +delete_obj: + kobject_put(prezero_kobj); return err; } #else diff --git a/mm/vmstat.c b/mm/vmstat.c index 4793b355ff91..0b6d3aa29ddf 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1359,6 +1359,8 @@ const char * const vmstat_text[] = { #ifdef CONFIG_PAGE_PREZERO "prezero_alloc", "prezero_alloc_pages", + "prezero_hw_clear", + "prezero_hw_clear_pages", #endif #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ }; -- Gitee From 085d90d385cf26131a923f7ffabe9eeafd930e10 Mon Sep 17 00:00:00 2001 From: Xu Yu Date: Wed, 7 Sep 2022 14:41:48 +0800 Subject: [PATCH 16/16] anolis: configs: enable CONFIG_PAGE_PREZERO on x86 ANBZ: #2100 This enables CONFIG_PAGE_PREZERO on x86 by default. When there is support or requirement of DMA device hardware offloading on arm64, we will enable this config on arm64 as well. Signed-off-by: Xu Yu Reviewed-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/702 --- arch/arm64/configs/anolis-debug_defconfig | 1 + arch/arm64/configs/anolis_defconfig | 1 + arch/x86/configs/anolis-debug_defconfig | 1 + arch/x86/configs/anolis_defconfig | 1 + 4 files changed, 4 insertions(+) diff --git a/arch/arm64/configs/anolis-debug_defconfig b/arch/arm64/configs/anolis-debug_defconfig index 2d548d6f3794..52a91a051c50 100644 --- a/arch/arm64/configs/anolis-debug_defconfig +++ b/arch/arm64/configs/anolis-debug_defconfig @@ -1051,6 +1051,7 @@ CONFIG_DAMON_DBGFS=y # end of Data Access Monitoring CONFIG_FAST_COPY_MM=y +# CONFIG_PAGE_PREZERO is not set # end of Memory Management options CONFIG_NET=y diff --git a/arch/arm64/configs/anolis_defconfig b/arch/arm64/configs/anolis_defconfig index 7474f1853999..0e31f05062f8 100644 --- a/arch/arm64/configs/anolis_defconfig +++ b/arch/arm64/configs/anolis_defconfig @@ -1070,6 +1070,7 @@ CONFIG_DAMON_DBGFS=y # end of Data Access Monitoring CONFIG_FAST_COPY_MM=y +# CONFIG_PAGE_PREZERO is not set # end of Memory Management options CONFIG_NET=y diff --git a/arch/x86/configs/anolis-debug_defconfig b/arch/x86/configs/anolis-debug_defconfig index 2b30caf97b5b..d20981d39318 100644 --- a/arch/x86/configs/anolis-debug_defconfig +++ b/arch/x86/configs/anolis-debug_defconfig @@ -1066,6 +1066,7 @@ CONFIG_DAMON_DBGFS=y # end of Data Access Monitoring CONFIG_FAST_COPY_MM=y +CONFIG_PAGE_PREZERO=y # end of Memory Management options CONFIG_NET=y diff --git a/arch/x86/configs/anolis_defconfig b/arch/x86/configs/anolis_defconfig index 55fd36474288..041718400cb2 100644 --- a/arch/x86/configs/anolis_defconfig +++ b/arch/x86/configs/anolis_defconfig @@ -1065,6 +1065,7 @@ CONFIG_DAMON_DBGFS=y # end of Data Access Monitoring CONFIG_FAST_COPY_MM=y +CONFIG_PAGE_PREZERO=y # end of Memory Management options CONFIG_NET=y -- Gitee