diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 386e1eaac1fa47eedc270b008f1a4edf7527fc06..24049f0124a578ce2aacd8dc8d31d083cd4cafa6 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -155,6 +155,11 @@ static inline int folio_lru_refs(struct folio *folio) return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + workingset; } +static inline void folio_clear_lru_refs(struct folio *folio) +{ + set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, 0); +} + static inline int folio_lru_gen(struct folio *folio) { unsigned long flags = READ_ONCE(folio->flags); @@ -224,6 +229,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, { unsigned long seq; unsigned long flags; + unsigned long mask; int gen = folio_lru_gen(folio); int type = folio_is_file_lru(folio); int zone = folio_zonenum(folio); @@ -259,7 +265,14 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, gen = lru_gen_from_seq(seq); flags = (gen + 1UL) << LRU_GEN_PGOFF; /* see the comment on MIN_NR_GENS about PG_active */ - set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags); + mask = LRU_GEN_MASK; + /* + * Don't clear PG_workingset here because it can affect PSI accounting + * if the activation is due to workingset refault. + */ + if (folio_test_active(folio)) + mask |= LRU_REFS_MASK | BIT(PG_referenced) | BIT(PG_active); + set_mask_bits(&folio->flags, mask, flags); lru_gen_update_size(lruvec, folio, -1, gen); /* for folio_rotate_reclaimable() */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 3cee238de7c8f459f697c0b92f887baf8efcdfa6..412167d6fb4137fb697ff23a66815132409ac5ab 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -404,6 +404,8 @@ enum { NR_LRU_GEN_CAPS }; +#define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset)) + #define MIN_LRU_BATCH BITS_PER_LONG #define MAX_LRU_BATCH (MIN_LRU_BATCH * 64) diff --git a/mm/Kconfig b/mm/Kconfig index 782c43f08e8fead36161cdcb73920a55addcaab9..1bf38f9f72db0556d8e3f6a604bcb702d71b9e7c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -190,7 +190,7 @@ config Z3FOLD config ZSMALLOC tristate - prompt "N:1 compression allocator (zsmalloc)" if ZSWAP + prompt "N:1 compression allocator (zsmalloc)" if (ZSWAP || ZRAM) depends on MMU help zsmalloc is a slab-based memory allocator designed to store diff --git a/mm/migrate.c b/mm/migrate.c index 05538e2edd1b7284b874e1ab66704f87268b8f63..557c496c3f922529feae701357b93755c4802b38 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1137,7 +1137,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio, int rc = -EAGAIN; int old_page_state = 0; struct anon_vma *anon_vma = NULL; - bool is_lru = !__folio_test_movable(src); + bool is_lru = data_race(!__folio_test_movable(src)); bool locked = false; bool dst_locked = false; diff --git a/mm/page_io.c b/mm/page_io.c index f2b3c4eed6886383572595ec6b3f81793a25ec2f..1887cd2093e3dc40d09319f1d54798443aed873b 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -381,7 +381,12 @@ void __swap_writepage(struct page *page, struct writeback_control *wbc) */ if (data_race(sis->flags & SWP_FS_OPS)) swap_writepage_fs(page, wbc); - else if (sis->flags & SWP_SYNCHRONOUS_IO) + /* + * ->flags can be updated non-atomicially (scan_swap_map_slots), + * but that will never affect SWP_SYNCHRONOUS_IO, so the data_race + * is safe. + */ + else if (data_race(sis->flags & SWP_SYNCHRONOUS_IO)) swap_writepage_bdev_sync(page, wbc, sis); else swap_writepage_bdev_async(page, wbc, sis); diff --git a/mm/shmem.c b/mm/shmem.c index f9e48c353a9d739ee4454285a0917f42c59f7423..63e261f7fe8724c36c6f6022191b136495bbff2f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2805,7 +2805,10 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap, cache_no_acl(inode); if (sbinfo->noswap) mapping_set_unevictable(inode->i_mapping); - mapping_set_large_folios(inode->i_mapping); + + /* Don't consider 'deny' for emergencies and 'force' for testing */ + if (sbinfo->huge) + mapping_set_large_folios(inode->i_mapping); switch (mode & S_IFMT) { default: diff --git a/mm/vmscan.c b/mm/vmscan.c index 10acf5b339a7fe758bee724d584ea1abbca931dc..8e605361b714cf9f21ecee1ccbcef4d663abad0d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3284,8 +3284,6 @@ static bool should_clear_pmd_young(void) * shorthand helpers ******************************************************************************/ -#define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset)) - #define DEFINE_MAX_SEQ(lruvec) \ unsigned long max_seq = READ_ONCE((lruvec)->lrugen.max_seq) @@ -4787,8 +4785,10 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) old_gen = folio_lru_gen(folio); if (old_gen < 0) folio_set_referenced(folio); - else if (old_gen != new_gen) + else if (old_gen != new_gen) { + folio_clear_lru_refs(folio); folio_activate(folio); + } } arch_leave_lazy_mmu_mode(); @@ -4953,6 +4953,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c int tier_idx) { bool success; + bool dirty, writeback; int gen = folio_lru_gen(folio); int type = folio_is_file_lru(folio); int zone = folio_zonenum(folio); @@ -5007,9 +5008,17 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c return true; } + dirty = folio_test_dirty(folio); + writeback = folio_test_writeback(folio); + if (type == LRU_GEN_FILE && dirty) { + sc->nr.file_taken += delta; + if (!writeback) + sc->nr.unqueued_dirty += delta; + } + /* waiting for writeback */ - if (folio_test_locked(folio) || folio_test_writeback(folio) || - (type == LRU_GEN_FILE && folio_test_dirty(folio))) { + if (folio_test_locked(folio) || writeback || + (type == LRU_GEN_FILE && dirty)) { gen = folio_inc_gen(lruvec, folio, true); list_move(&folio->lru, &lrugen->folios[gen][type][zone]); return true; @@ -5040,7 +5049,7 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca /* see the comment on MAX_NR_TIERS */ if (!folio_test_referenced(folio)) - set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, 0); + folio_clear_lru_refs(folio); /* for shrink_folio_list() */ folio_clear_reclaim(folio); @@ -5125,7 +5134,8 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, MAX_LRU_BATCH, scanned, skipped, isolated, type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON); - + if (type == LRU_GEN_FILE) + sc->nr.file_taken += isolated; /* * There might not be eligible folios due to reclaim_idx. Check the * remaining to prevent livelock if it's not making progress. @@ -5254,6 +5264,7 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap return scanned; retry: reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false); + sc->nr.unqueued_dirty += stat.nr_unqueued_dirty; sc->nr_reclaimed += reclaimed; trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id, scanned, reclaimed, &stat, sc->priority, @@ -5467,6 +5478,13 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) cond_resched(); } + /* + * If too many file cache in the coldest generation can't be evicted + * due to being dirty, wake up the flusher. + */ + if (sc->nr.unqueued_dirty && sc->nr.unqueued_dirty == sc->nr.file_taken) + wakeup_flusher_threads(WB_REASON_VMSCAN); + /* whether this lruvec should be rotated */ return nr_to_scan < 0; } @@ -6588,6 +6606,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) bool reclaimable = false; if (lru_gen_enabled() && root_reclaim(sc)) { + memset(&sc->nr, 0, sizeof(sc->nr)); lru_gen_shrink_node(pgdat, sc); return; }