diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index 45fe1813edfbbff227f9d8f5afde245050363214..2ad3bf320c55f539f375c5537c4b83823c6f626a 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -1035,6 +1035,7 @@ This is value OR'ed together of 1 Zone reclaim on 2 Zone reclaim writes dirty pages out 4 Zone reclaim swaps pages +8 Zone reclaim wakeup kswapd = =================================== zone_reclaim_mode is disabled by default. For file servers or workloads diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index 0e615ed2363581ef2cb9e5d83fccf29d660a67f7..f1a86af00d25dc1c748699be739fdf9ff321c6a2 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -190,8 +190,27 @@ void __init setup_per_cpu_areas(void) } #endif -#ifdef CONFIG_ARCH_CUSTOM_NUMA_DISTANCE #define DISTANCE_MAX (1 << DISTANCE_BITS) +int __init node_reclaim_distance_setup(char *str) +{ + int val; + + if (kstrtoint(str, 0, &val)) + return -EINVAL; + + if (val < LOCAL_DISTANCE || val >= DISTANCE_MAX) + return -EINVAL; + + if (val != RECLAIM_DISTANCE) { + node_reclaim_distance = val; + pr_info("Force set node_reclaim_distance to %d\n", val); + } + + return 0; +} +early_param("node_reclaim_distance", node_reclaim_distance_setup); + +#ifdef CONFIG_ARCH_CUSTOM_NUMA_DISTANCE static void get_numa_distance_info(int *numa_levels, int *max_distance) { DECLARE_BITMAP(distance_map, DISTANCE_MAX); @@ -222,25 +241,6 @@ static void get_numa_distance_info(int *numa_levels, int *max_distance) *max_distance = max; } -static int __init node_reclaim_distance_setup(char *str) -{ - int val; - - if (kstrtoint(str, 0, &val)) - return -EINVAL; - - if (val < LOCAL_DISTANCE || val >= DISTANCE_MAX) - return -EINVAL; - - if (val != RECLAIM_DISTANCE) { - node_reclaim_distance = val; - pr_info("Force set node_reclaim_distance to %d\n", val); - } - - return 0; -} -early_param("node_reclaim_distance", node_reclaim_distance_setup); - static void __init node_reclaim_distance_adjust(void) { unsigned int model = read_cpuid_id() & MIDR_CPU_MODEL_MASK; diff --git a/include/linux/swap.h b/include/linux/swap.h index 0b7ebe9b3e2c0da686f9b595fb8597e8d777e9fc..f11b96cec85184c9dc9515b597a290a5e5c90b23 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -450,7 +450,8 @@ extern int sysctl_min_slab_ratio; static inline bool node_reclaim_enabled(void) { /* Is any node_reclaim_mode bit set? */ - return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP); + return node_reclaim_mode & + (RECLAIM_ZONE | RECLAIM_WRITE | RECLAIM_UNMAP | RECLAIM_KSWAPD); } void check_move_unevictable_folios(struct folio_batch *fbatch); diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 1f9bb10d1a473f553f328d5b5a6747c687b7931f..d7c2d453a69115c4dd57ad9219691432b0744aed 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -72,5 +72,6 @@ enum { #define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */ #define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */ #define RECLAIM_UNMAP (1<<2) /* Unmap pages during reclaim */ +#define RECLAIM_KSWAPD (1<<3) /* Wakup kswapd during reclaim */ #endif /* _UAPI_LINUX_MEMPOLICY_H */ diff --git a/mm/internal.h b/mm/internal.h index 71e6f523175d954ebd36bc1e234f188878238d40..d9963389cbffa67c305347359a5a9753f3f8d2a2 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1077,11 +1077,12 @@ static inline void mminit_verify_zonelist(void) #define NODE_RECLAIM_SUCCESS 1 #ifdef CONFIG_NUMA -extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int); +int node_reclaim(struct pglist_data *pgdat, gfp_t mask, unsigned int order, + int alloc_flags, struct zone *zone); extern int find_next_best_node(int node, nodemask_t *used_node_mask); #else static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask, - unsigned int order) + unsigned int order, int alloc_flags, struct zone *zone) { return NODE_RECLAIM_NOSCAN; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8f2083fc544760a345253bfd03c7faa83990eff7..71ff9181f4a59b79332f218b816711804e6fc45b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3501,7 +3501,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, !zone_allows_reclaim(ac->preferred_zoneref->zone, zone)) continue; - ret = node_reclaim(zone->zone_pgdat, gfp_mask, order); + ret = node_reclaim(zone->zone_pgdat, gfp_mask, order, + alloc_flags, zone); switch (ret) { case NODE_RECLAIM_NOSCAN: /* did not scan */ diff --git a/mm/vmscan.c b/mm/vmscan.c index 2cecc9a173aa8af0ded7f362bdbe2853b82e2ef2..9e4aedbfd85e58a99a750fcd1d941639260b0468 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -7512,7 +7512,8 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in return sc.nr_reclaimed >= nr_pages; } -int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) +int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order, + int alloc_flags, struct zone *zone) { int ret; @@ -7549,6 +7550,10 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) if (test_and_set_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags)) return NODE_RECLAIM_NOSCAN; + if ((node_reclaim_mode & RECLAIM_KSWAPD) && + (alloc_flags & ALLOC_KSWAPD)) + wakeup_kswapd(zone, gfp_mask, order, gfp_zone(gfp_mask)); + ret = __node_reclaim(pgdat, gfp_mask, order); clear_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags);