diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 78d94f2bc3e8e037905a930dfedd43c4f94677ef..b8b47b1f36111759b7ec42c045f1ea83951c9924 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -887,14 +887,6 @@ static inline void mem_cgroup_uncharge(struct folio *folio) __mem_cgroup_uncharge(folio); } -void __mem_cgroup_uncharge_list(struct list_head *page_list); -static inline void mem_cgroup_uncharge_list(struct list_head *page_list) -{ - if (mem_cgroup_disabled()) - return; - __mem_cgroup_uncharge_list(page_list); -} - void __mem_cgroup_uncharge_folios(struct folio_batch *folios); static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios) { @@ -1521,10 +1513,6 @@ static inline void mem_cgroup_uncharge(struct folio *folio) { } -static inline void mem_cgroup_uncharge_list(struct list_head *page_list) -{ -} - static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios) { } diff --git a/include/linux/mm.h b/include/linux/mm.h index 1a7c95f58cf83b71ab692b3191331e571c32c13f..0c95b119f959c50e380d2e5eeaa58224cd9228d0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -231,7 +231,6 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, /* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */ #define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE) -#define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) static inline struct folio *lru_to_folio(struct list_head *head) { return list_entry((head)->prev, struct folio, lru); diff --git a/include/linux/swap.h b/include/linux/swap.h index 0cf9dafde89b531bc7edca5a9922e729b48042c9..b06ad53e81090f8edb336b129ea704c0d7690666 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -475,9 +475,9 @@ static inline unsigned long total_swapcache_pages(void) return global_node_page_state(NR_SWAPCACHE); } -extern void free_swap_cache(struct page *page); -extern void free_page_and_swap_cache(struct page *); -extern void free_pages_and_swap_cache(struct encoded_page **, int); +void free_swap_cache(struct folio *folio); +void free_page_and_swap_cache(struct page *); +void free_pages_and_swap_cache(struct encoded_page **, int); /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; extern long total_swap_pages; @@ -561,7 +561,7 @@ static inline void free_swap_and_cache_nr(swp_entry_t entry, int nr) { } -static inline void free_swap_cache(struct page *page) +static inline void free_swap_cache(struct folio *folio) { } diff --git a/mm/internal.h b/mm/internal.h index 1166c56be066a55ee338b896675482541b8fb12b..88fc0e5c782faf8440d16c9845af1d847c3bf28f 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -709,7 +709,6 @@ extern int user_min_free_kbytes; void free_unref_page(struct page *page, unsigned int order); void free_unref_folios(struct folio_batch *fbatch); -void free_unref_page_list(struct list_head *list); extern void zone_pcp_reset(struct zone *zone); extern void zone_pcp_disable(struct zone *zone); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 6a921c393e319fb1a988e376942a640f4151d00d..cdec406d199d449fb5666d2ac53843c95209e037 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -709,9 +709,7 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte, spinlock_t *ptl, struct list_head *compound_pagelist) { - struct folio *src_folio; - struct page *src_page; - struct page *tmp; + struct folio *src, *tmp; pte_t *_pte; pte_t pteval; @@ -730,10 +728,11 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte, ksm_might_unmap_zero_page(vma->vm_mm, pteval); } } else { - src_page = pte_page(pteval); - src_folio = page_folio(src_page); - if (!folio_test_large(src_folio)) - release_pte_folio(src_folio); + struct page *src_page = pte_page(pteval); + + src = page_folio(src_page); + if (!folio_test_large(src)) + release_pte_folio(src); /* * ptl mostly unnecessary, but preempt has to * be disabled to update the per-cpu stats @@ -741,20 +740,19 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte, */ spin_lock(ptl); ptep_clear(vma->vm_mm, address, _pte); - folio_remove_rmap_pte(src_folio, src_page, vma); + folio_remove_rmap_pte(src, src_page, vma); spin_unlock(ptl); free_page_and_swap_cache(src_page); } } - list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) { - list_del(&src_page->lru); - mod_node_page_state(page_pgdat(src_page), - NR_ISOLATED_ANON + page_is_file_lru(src_page), - -compound_nr(src_page)); - unlock_page(src_page); - free_swap_cache(src_page); - putback_lru_page(src_page); + list_for_each_entry_safe(src, tmp, compound_pagelist, lru) { + list_del(&src->lru); + node_stat_sub_folio(src, NR_ISOLATED_ANON + + folio_is_file_lru(src)); + folio_unlock(src); + free_swap_cache(src); + folio_putback_lru(src); } } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 348bf46cba9daae3461269cccd4ba993eaaa1eef..18bd3d386d4a550d06bf4cfb301449fafb84bbff 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -9337,25 +9337,6 @@ void __mem_cgroup_uncharge(struct folio *folio) uncharge_batch(&ug); } -/** - * __mem_cgroup_uncharge_list - uncharge a list of page - * @page_list: list of pages to uncharge - * - * Uncharge a list of pages previously charged with - * __mem_cgroup_charge(). - */ -void __mem_cgroup_uncharge_list(struct list_head *page_list) -{ - struct uncharge_gather ug; - struct folio *folio; - - uncharge_gather_clear(&ug); - list_for_each_entry(folio, page_list, lru) - uncharge_folio(folio, &ug); - if (ug.memcg) - uncharge_batch(&ug); -} - void __mem_cgroup_uncharge_folios(struct folio_batch *folios) { struct uncharge_gather ug; diff --git a/mm/memory.c b/mm/memory.c index a3765e76a5968e576e40ccb0bc085a347953dbf6..f699a09e0a53da68fbdf724676a802243ea90e22 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3839,7 +3839,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) folio_put(new_folio); if (old_folio) { if (page_copied) - free_swap_cache(&old_folio->page); + free_swap_cache(old_folio); folio_put(old_folio); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index da5584dc2aa70531349fa1792b0dc7364195047a..53f2f68697ca68e259c1e96771cb2627c98d7ade 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2870,24 +2870,6 @@ void free_unref_folios(struct folio_batch *folios) folio_batch_reinit(folios); } -void free_unref_page_list(struct list_head *list) -{ - struct folio_batch fbatch; - - folio_batch_init(&fbatch); - while (!list_empty(list)) { - struct folio *folio = list_first_entry(list, struct folio, lru); - - list_del(&folio->lru); - if (folio_batch_add(&fbatch, folio) > 0) - continue; - free_unref_folios(&fbatch); - } - - if (fbatch.nr) - free_unref_folios(&fbatch); -} - /* * split_page takes a non-compound higher-order page, and splits it into * n (1<_nr_pages_mapped; - int first, nr = 0; + int first = 0, nr = 0; __folio_rmap_sanity_checks(folio, page, nr_pages, level); @@ -1127,13 +1127,12 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio, } do { - first = atomic_inc_and_test(&page->_mapcount); - if (first) { - first = atomic_inc_return_relaxed(mapped); - if (first < ENTIRELY_MAPPED) - nr++; - } + first += atomic_inc_and_test(&page->_mapcount); } while (page++, --nr_pages > 0); + + if (first && + atomic_add_return_relaxed(first, mapped) < ENTIRELY_MAPPED) + nr = first; break; case RMAP_LEVEL_PMD: first = atomic_inc_and_test(&folio->_entire_mapcount); @@ -1465,7 +1464,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, { atomic_t *mapped = &folio->_nr_pages_mapped; pg_data_t *pgdat = folio_pgdat(folio); - int last, nr = 0, nr_pmdmapped = 0; + int last = 0, nr = 0, nr_pmdmapped = 0; bool partially_mapped = false; enum node_stat_item idx; @@ -1479,14 +1478,13 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, } do { - last = atomic_add_negative(-1, &page->_mapcount); - if (last) { - last = atomic_dec_return_relaxed(mapped); - if (last < ENTIRELY_MAPPED) - nr++; - } + last += atomic_add_negative(-1, &page->_mapcount); } while (page++, --nr_pages > 0); + if (last && + atomic_sub_return_relaxed(last, mapped) < ENTIRELY_MAPPED) + nr = last; + partially_mapped = nr && atomic_read(mapped); break; case RMAP_LEVEL_PMD: diff --git a/mm/swap.c b/mm/swap.c index b555a5a35381ee3a5fdfb9ce3c7dec8ec2de5e73..2bbd02ed8c00ccfb2063866d2d22b88055928ea4 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -1010,15 +1010,17 @@ void folios_put_refs(struct folio_batch *folios, unsigned int *refs) if (!folio_ref_sub_and_test(folio, nr_refs)) continue; - if (folio_test_large(folio)) { + /* hugetlb has its own memcg */ + if (folio_test_hugetlb(folio)) { if (lruvec) { unlock_page_lruvec_irqrestore(lruvec, flags); lruvec = NULL; } - __folio_put_large(folio); + free_huge_folio(folio); continue; } + folio_unqueue_deferred_split(folio); __page_cache_release(folio, &lruvec, &flags); if (j != i) diff --git a/mm/swap_state.c b/mm/swap_state.c index df9a7630740ace5f637309340001fb5c0013d003..81390d0ddbbf984dcdf1b1d97b79486d2815fded 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -282,10 +283,8 @@ void clear_shadow_from_swap_cache(int type, unsigned long begin, * folio_free_swap() _with_ the lock. * - Marcelo */ -void free_swap_cache(struct page *page) +void free_swap_cache(struct folio *folio) { - struct folio *folio = page_folio(page); - if (folio_test_swapcache(folio) && !folio_mapped(folio) && folio_trylock(folio)) { folio_free_swap(folio); @@ -299,9 +298,11 @@ void free_swap_cache(struct page *page) */ void free_page_and_swap_cache(struct page *page) { - free_swap_cache(page); + struct folio *folio = page_folio(page); + + free_swap_cache(folio); if (!is_huge_zero_page(page)) - put_page(page); + folio_put(folio); } /* @@ -310,21 +311,25 @@ void free_page_and_swap_cache(struct page *page) */ void free_pages_and_swap_cache(struct encoded_page **pages, int nr) { + struct folio_batch folios; + unsigned int refs[PAGEVEC_SIZE]; + lru_add_drain(); + folio_batch_init(&folios); for (int i = 0; i < nr; i++) { - struct page *page = encoded_page_ptr(pages[i]); + struct folio *folio = page_folio(encoded_page_ptr(pages[i])); - /* - * Skip over the "nr_pages" entry. It's sufficient to call - * free_swap_cache() only once per folio. - */ + free_swap_cache(folio); + refs[folios.nr] = 1; if (unlikely(encoded_page_flags(pages[i]) & ENCODED_PAGE_BIT_NR_PAGES_NEXT)) - i++; + refs[folios.nr] = encoded_nr_pages(pages[++i]); - free_swap_cache(page); + if (folio_batch_add(&folios, folio) == 0) + folios_put_refs(&folios, refs); } - release_pages(pages, nr); + if (folios.nr) + folios_put_refs(&folios, refs); } static inline bool swap_use_vma_readahead(void) diff --git a/mm/vmscan.c b/mm/vmscan.c index 7215a899af1d1cbf36f8285eb4f8d69766079518..5a80ce16ef076bdc31d1c08b3f3043c73460d05f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1071,8 +1071,8 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, struct pglist_data *pgdat, struct scan_control *sc, struct reclaim_stat *stat, bool ignore_references) { + struct folio_batch free_folios; LIST_HEAD(ret_folios); - LIST_HEAD(free_folios); LIST_HEAD(demote_folios); unsigned int nr_reclaimed = 0; unsigned int pgactivate = 0; @@ -1083,6 +1083,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); + folio_batch_init(&free_folios); memset(stat, 0, sizeof(*stat)); cond_resched(); do_demote_pass = can_demote(pgdat->node_id, sc); @@ -1510,14 +1511,12 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, */ nr_reclaimed += nr_pages; - /* - * Is there need to periodically free_folio_list? It would - * appear not as the counts should be low - */ - if (unlikely(folio_test_large(folio))) - destroy_large_folio(folio); - else - list_add(&folio->lru, &free_folios); + folio_unqueue_deferred_split(folio); + if (folio_batch_add(&free_folios, folio) == 0) { + mem_cgroup_uncharge_folios(&free_folios); + try_to_unmap_flush(); + free_unref_folios(&free_folios); + } continue; activate_locked_split: @@ -1581,9 +1580,9 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, pgactivate = stat->nr_activate[0] + stat->nr_activate[1]; - mem_cgroup_uncharge_list(&free_folios); + mem_cgroup_uncharge_folios(&free_folios); try_to_unmap_flush(); - free_unref_page_list(&free_folios); + free_unref_folios(&free_folios); list_splice(&ret_folios, folio_list); count_vm_events(PGACTIVATE, pgactivate); @@ -1862,7 +1861,6 @@ static int too_many_isolated(struct pglist_data *pgdat, int file, /* * move_folios_to_lru() moves folios from private @list to appropriate LRU list. - * On return, @list is reused as a list of folios to be freed by the caller. * * Returns the number of pages moved to the given lruvec. */ @@ -1870,8 +1868,9 @@ static unsigned int move_folios_to_lru(struct lruvec *lruvec, struct list_head *list) { int nr_pages, nr_moved = 0; - LIST_HEAD(folios_to_free); + struct folio_batch free_folios; + folio_batch_init(&free_folios); while (!list_empty(list)) { struct folio *folio = lru_to_folio(list); @@ -1900,12 +1899,13 @@ static unsigned int move_folios_to_lru(struct lruvec *lruvec, if (unlikely(folio_put_testzero(folio))) { __folio_clear_lru_flags(folio); - if (unlikely(folio_test_large(folio))) { + folio_unqueue_deferred_split(folio); + if (folio_batch_add(&free_folios, folio) == 0) { spin_unlock_irq(&lruvec->lru_lock); - destroy_large_folio(folio); + mem_cgroup_uncharge_folios(&free_folios); + free_unref_folios(&free_folios); spin_lock_irq(&lruvec->lru_lock); - } else - list_add(&folio->lru, &folios_to_free); + } continue; } @@ -1922,10 +1922,12 @@ static unsigned int move_folios_to_lru(struct lruvec *lruvec, workingset_age_nonresident(lruvec, nr_pages); } - /* - * To save our caller's stack, now use input list for pages to free. - */ - list_splice(&folios_to_free, list); + if (free_folios.nr) { + spin_unlock_irq(&lruvec->lru_lock); + mem_cgroup_uncharge_folios(&free_folios); + free_unref_folios(&free_folios); + spin_lock_irq(&lruvec->lru_lock); + } return nr_moved; } @@ -2004,8 +2006,6 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan, spin_unlock_irq(&lruvec->lru_lock); lru_note_cost(lruvec, file, stat.nr_pageout, nr_scanned - nr_reclaimed); - mem_cgroup_uncharge_list(&folio_list); - free_unref_page_list(&folio_list); /* * If dirty folios are scanned that are not queued for IO, it @@ -2146,8 +2146,6 @@ static void shrink_active_list(unsigned long nr_to_scan, nr_activate = move_folios_to_lru(lruvec, &l_active); nr_deactivate = move_folios_to_lru(lruvec, &l_inactive); - /* Keep all free folios in l_active list */ - list_splice(&l_inactive, &l_active); __count_vm_events(PGDEACTIVATE, nr_deactivate); __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_deactivate); @@ -2157,8 +2155,6 @@ static void shrink_active_list(unsigned long nr_to_scan, if (nr_rotated) lru_note_cost(lruvec, file, 0, nr_rotated); - mem_cgroup_uncharge_list(&l_active); - free_unref_page_list(&l_active); trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate, nr_deactivate, nr_rotated, sc->priority, file); } @@ -4674,10 +4670,6 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap spin_unlock_irq(&lruvec->lru_lock); - mem_cgroup_uncharge_list(&list); - free_unref_page_list(&list); - - INIT_LIST_HEAD(&list); list_splice_init(&clean, &list); if (!list_empty(&list)) {