diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b6b6cd1d91d16d062daa0c993008350e76330c27..40cb3d246557a20e4dc6638c4d033f4081a51134 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -400,6 +400,10 @@ static inline int memblock_get_region_node(const struct memblock_region *r) /* Flags for memblock allocation APIs */ #define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) #define MEMBLOCK_ALLOC_ACCESSIBLE 0 +/* + * MEMBLOCK_ALLOC_NOLEAKTRACE avoids kmemleak tracing. It implies + * MEMBLOCK_ALLOC_ACCESSIBLE + */ #define MEMBLOCK_ALLOC_NOLEAKTRACE 1 /* We are using top down, so it is safe to use 0 here */ diff --git a/include/linux/swap.h b/include/linux/swap.h index bea0c0f1f640552e399ae08e48da79046e413176..14d4bd7f42627200d5f881cce457a8ff406bd98c 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -235,6 +235,7 @@ enum { }; #define SWAP_CLUSTER_MAX 32UL +#define SWAP_CLUSTER_MAX_SKIPPED (SWAP_CLUSTER_MAX << 10) #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX /* Bit flag in swap_map */ diff --git a/mm/compaction.c b/mm/compaction.c index e2735752c374923662afa6f2a366ad6365ec8896..2e2caa4f86c34c11e1b60c48b35f57b53c374755 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2491,7 +2491,8 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order, */ static enum compact_result compaction_suit_allocation_order(struct zone *zone, unsigned int order, - int highest_zoneidx, unsigned int alloc_flags) + int highest_zoneidx, unsigned int alloc_flags, + bool async) { unsigned long watermark; @@ -2500,6 +2501,23 @@ compaction_suit_allocation_order(struct zone *zone, unsigned int order, alloc_flags)) return COMPACT_SUCCESS; + /* + * For unmovable allocations (without ALLOC_CMA), check if there is enough + * free memory in the non-CMA pageblocks. Otherwise compaction could form + * the high-order page in CMA pageblocks, which would not help the + * allocation to succeed. However, limit the check to costly order async + * compaction (such as opportunistic THP attempts) because there is the + * possibility that compaction would migrate pages from non-CMA to CMA + * pageblock. + */ + if (order > PAGE_ALLOC_COSTLY_ORDER && async && + !(alloc_flags & ALLOC_CMA)) { + watermark = low_wmark_pages(zone) + compact_gap(order); + if (!__zone_watermark_ok(zone, 0, watermark, highest_zoneidx, + 0, zone_page_state(zone, NR_FREE_PAGES))) + return COMPACT_SKIPPED; + } + if (!compaction_suitable(zone, order, highest_zoneidx)) return COMPACT_SKIPPED; @@ -2535,7 +2553,8 @@ compact_zone(struct compact_control *cc, struct capture_control *capc) if (!is_via_compact_memory(cc->order)) { ret = compaction_suit_allocation_order(cc->zone, cc->order, cc->highest_zoneidx, - cc->alloc_flags); + cc->alloc_flags, + cc->mode == MIGRATE_ASYNC); if (ret != COMPACT_CONTINUE) return ret; } @@ -3046,7 +3065,8 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat) ret = compaction_suit_allocation_order(zone, pgdat->kcompactd_max_order, - highest_zoneidx, ALLOC_WMARK_MIN); + highest_zoneidx, ALLOC_WMARK_MIN, + false); if (ret == COMPACT_CONTINUE) return true; } @@ -3087,7 +3107,8 @@ static void kcompactd_do_work(pg_data_t *pgdat) continue; ret = compaction_suit_allocation_order(zone, - cc.order, zoneid, ALLOC_WMARK_MIN); + cc.order, zoneid, ALLOC_WMARK_MIN, + false); if (ret != COMPACT_CONTINUE) continue; diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 7d210dd7438117a2e3078b7eef146abfccc870d3..f3b45e46195f2984cd161bc8c942a2482df94508 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1603,7 +1603,7 @@ static void kmemleak_scan(void) unsigned long phys = object->pointer; if (PHYS_PFN(phys) < min_low_pfn || - PHYS_PFN(phys + object->size) >= max_low_pfn) + PHYS_PFN(phys + object->size) > max_low_pfn) __paint_it(object, KMEMLEAK_BLACK); } diff --git a/mm/mm_init.c b/mm/mm_init.c index 0a3c20a003187665758beece2b57fcf5d0ee779a..6677aaa5972d4e97fe5604d64d73dab3903fe7c6 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1642,13 +1642,17 @@ void __init *memmap_alloc(phys_addr_t size, phys_addr_t align, { void *ptr; + /* + * Kmemleak will explicitly scan mem_map by traversing all valid + * `struct *page`,so memblock does not need to be added to the scan list. + */ if (exact_nid) ptr = memblock_alloc_exact_nid_raw(size, align, min_addr, - MEMBLOCK_ALLOC_ACCESSIBLE, + MEMBLOCK_ALLOC_NOLEAKTRACE, nid); else ptr = memblock_alloc_try_nid_raw(size, align, min_addr, - MEMBLOCK_ALLOC_ACCESSIBLE, + MEMBLOCK_ALLOC_NOLEAKTRACE, nid); if (ptr && size > 0) diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 2628fc02be08b96c2bb802d8be7c0585f2de2668..ab5a51779f2b61b833cff4c844b5dcf1efc27179 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -31,6 +31,8 @@ #include #include +#include "internal.h" + /* * Allocate a block of memory to be used to back the virtual memory map * or to back the page tables that are used to create the mapping. @@ -42,8 +44,7 @@ static void * __ref __earlyonly_bootmem_alloc(int node, unsigned long align, unsigned long goal) { - return memblock_alloc_try_nid_raw(size, align, goal, - MEMBLOCK_ALLOC_ACCESSIBLE, node); + return memmap_alloc(size, align, goal, node, false); } void * __meminit vmemmap_alloc_block(unsigned long size, int node) diff --git a/mm/vmscan.c b/mm/vmscan.c index 8e605361b714cf9f21ecee1ccbcef4d663abad0d..35f78952b278e46bc56213d699626b95fcd61325 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2332,6 +2332,7 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan, unsigned long nr_skipped[MAX_NR_ZONES] = { 0, }; unsigned long skipped = 0; unsigned long scan, total_scan, nr_pages; + unsigned long max_nr_skipped = 0; LIST_HEAD(folios_skipped); total_scan = 0; @@ -2346,9 +2347,12 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan, nr_pages = folio_nr_pages(folio); total_scan += nr_pages; - if (folio_zonenum(folio) > sc->reclaim_idx) { + /* Using max_nr_skipped to prevent hard LOCKUP*/ + if (max_nr_skipped < SWAP_CLUSTER_MAX_SKIPPED && + (folio_zonenum(folio) > sc->reclaim_idx)) { nr_skipped[folio_zonenum(folio)] += nr_pages; move_to = &folios_skipped; + max_nr_skipped++; goto move; }