diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst index 3c5cc91696eb181c75c9682238a6c0697b6375a8..9c9d938cfae4163504fb95933fc0caded13edc40 100644 --- a/Documentation/admin-guide/mm/transhuge.rst +++ b/Documentation/admin-guide/mm/transhuge.rst @@ -600,6 +600,20 @@ anon_fault_fallback_charge instead falls back to using huge pages with lower orders or small pages even though the allocation was successful. +swpin + is incremented every time a huge page is swapped in from a non-zswap + swap device in one piece. + +swpin_fallback + is incremented if swapin fails to allocate or charge a huge page + and instead falls back to using huge pages with lower orders or + small pages. + +swpin_fallback_charge + is incremented if swapin fails to charge a huge page and instead + falls back to using huge pages with lower orders or small pages + even though the allocation was successful. + swpout is incremented every time a huge page is swapped out in one piece without splitting. diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 02910e742bb1cc927fd1147f999bdf7434f4a0e3..cfe42c43b55b679b02b35f362acee6f28d1d145c 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -124,6 +124,9 @@ enum mthp_stat_item { MTHP_STAT_ANON_FAULT_ALLOC, MTHP_STAT_ANON_FAULT_FALLBACK, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, + MTHP_STAT_SWPIN, + MTHP_STAT_SWPIN_FALLBACK, + MTHP_STAT_SWPIN_FALLBACK_CHARGE, MTHP_STAT_SWPOUT, MTHP_STAT_SWPOUT_FALLBACK, MTHP_STAT_SHMEM_ALLOC, diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index abe236201e68fdf2e8cc029499962e6684f5ee85..f6a1494ad9ac9999808236f0805a5f5a48877be5 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -820,7 +820,8 @@ int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp, int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); -void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); + +void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); void __mem_cgroup_uncharge(struct folio *folio); @@ -1419,7 +1420,7 @@ static inline int mem_cgroup_swapin_charge_folio(struct folio *folio, return 0; } -static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry) +static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr) { } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 412167d6fb4137fb697ff23a66815132409ac5ab..ebaa85d5ac700fc7105907fe5877881bfb5dad8c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -22,6 +22,7 @@ #include #include #include +#include #include /* Free memory management - zoned buddy allocator. */ diff --git a/include/linux/swap.h b/include/linux/swap.h index aa2261b5c772ca91e38c95714484eb985beea690..0b7ebe9b3e2c0da686f9b595fb8597e8d777e9fc 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -511,13 +511,12 @@ extern sector_t swapdev_block(int, pgoff_t); extern int __swap_count(swp_entry_t entry); extern int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); -extern struct swap_info_struct *page_swap_info(struct page *); -extern struct swap_info_struct *swp_swap_info(swp_entry_t entry); +struct swap_info_struct *swp_swap_info(swp_entry_t entry); struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); extern void exit_swap_address_space(unsigned int type); extern struct swap_info_struct *get_swap_device(swp_entry_t entry); -sector_t swap_page_sector(struct page *page); +sector_t swap_folio_sector(struct folio *folio); static inline void put_swap_device(struct swap_info_struct *si) { diff --git a/include/linux/zswap.h b/include/linux/zswap.h index 2a60ce39cfde19ac90cebb9b16aaba8b6a99c4ab..f647ee38598a692a8c19876dafcf1b8eae07bc8d 100644 --- a/include/linux/zswap.h +++ b/include/linux/zswap.h @@ -15,6 +15,7 @@ bool zswap_load(struct folio *folio); void zswap_invalidate(int type, pgoff_t offset); void zswap_swapon(int type); void zswap_swapoff(int type); +bool zswap_never_enabled(void); #else @@ -32,6 +33,11 @@ static inline void zswap_invalidate(int type, pgoff_t offset) {} static inline void zswap_swapon(int type) {} static inline void zswap_swapoff(int type) {} +static inline bool zswap_never_enabled(void) +{ + return true; +} + #endif #endif /* _LINUX_ZSWAP_H */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 4511d26fc28dfa610473a9cf58f677cbf734fa08..f22a0dc3fb1307b4de8b67b02b421b57752740b9 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -754,6 +754,9 @@ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) DEFINE_MTHP_STAT_ATTR(anon_fault_alloc, MTHP_STAT_ANON_FAULT_ALLOC); DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK); DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE); +DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN); +DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK); +DEFINE_MTHP_STAT_ATTR(swpin_fallback_charge, MTHP_STAT_SWPIN_FALLBACK_CHARGE); DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT); DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK); #ifdef CONFIG_SHMEM @@ -771,6 +774,9 @@ static struct attribute *anon_stats_attrs[] = { &anon_fault_fallback_attr.attr, &anon_fault_fallback_charge_attr.attr, #ifndef CONFIG_SHMEM + &swpin_attr.attr, + &swpin_fallback_attr.attr, + &swpin_fallback_charge_attr.attr, &swpout_attr.attr, &swpout_fallback_attr.attr, #endif @@ -800,6 +806,9 @@ static struct attribute_group file_stats_attr_grp = { static struct attribute *any_stats_attrs[] = { #ifdef CONFIG_SHMEM + &swpin_attr.attr, + &swpin_fallback_attr.attr, + &swpin_fallback_charge_attr.attr, &swpout_attr.attr, &swpout_fallback_attr.attr, #endif diff --git a/mm/madvise.c b/mm/madvise.c index 4fd5eb465bada646f9773a8aeafb564e5a5e57ad..0ad321350cfe000d01bb3521de3a28825dd63c6a 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -203,7 +203,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start, for (addr = start; addr < end; addr += PAGE_SIZE) { pte_t pte; swp_entry_t entry; - struct page *page; + struct folio *folio; if (!ptep++) { ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); @@ -221,10 +221,10 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start, pte_unmap_unlock(ptep, ptl); ptep = NULL; - page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE, + folio = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE, vma, addr, &splug); - if (page) - put_page(page); + if (folio) + folio_put(folio); } if (ptep) @@ -246,17 +246,17 @@ static void shmem_swapin_range(struct vm_area_struct *vma, { XA_STATE(xas, &mapping->i_pages, linear_page_index(vma, start)); pgoff_t end_index = linear_page_index(vma, end) - 1; - struct page *page; + struct folio *folio; struct swap_iocb *splug = NULL; rcu_read_lock(); - xas_for_each(&xas, page, end_index) { + xas_for_each(&xas, folio, end_index) { unsigned long addr; swp_entry_t entry; - if (!xa_is_value(page)) + if (!xa_is_value(folio)) continue; - entry = radix_to_swp_entry(page); + entry = radix_to_swp_entry(folio); /* There might be swapin error entries in shmem mapping. */ if (non_swap_entry(entry)) continue; @@ -266,10 +266,10 @@ static void shmem_swapin_range(struct vm_area_struct *vma, xas_pause(&xas); rcu_read_unlock(); - page = read_swap_cache_async(entry, mapping_gfp_mask(mapping), + folio = read_swap_cache_async(entry, mapping_gfp_mask(mapping), vma, addr, &splug); - if (page) - put_page(page); + if (folio) + folio_put(folio); rcu_read_lock(); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 023d85aabfe80e74875d1c83182e7249fce5371e..00b04c0ce172c2921a216d47dd3f930397a9fec9 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -8499,14 +8499,15 @@ int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, /* * mem_cgroup_swapin_uncharge_swap - uncharge swap slot - * @entry: swap entry for which the page is charged + * @entry: the first swap entry for which the pages are charged + * @nr_pages: number of pages which will be uncharged * * Call this function after successfully adding the charged page to swapcache. * * Note: This function assumes the page for which swap slot is being uncharged * is order 0 page. */ -void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry) +void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) { /* * Cgroup1's unified memory+swap counter has been charged with the @@ -8526,7 +8527,7 @@ void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry) * let's not wait for it. The page already received a * memory+swap charge, drop the swap entry duplicate. */ - mem_cgroup_uncharge_swap(entry, 1); + mem_cgroup_uncharge_swap(entry, nr_pages); } } diff --git a/mm/memory.c b/mm/memory.c index e8daa5a3d3691eb72d77cca5e3c2e87549ffc652..54f5b1044051c21b97e39f957a4ff4bbb1aacd92 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3965,6 +3965,189 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf) static DECLARE_WAIT_QUEUE_HEAD(swapcache_wq); +static struct folio *__alloc_swap_folio(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct folio *folio; + swp_entry_t entry; + + folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, + vmf->address, false); + if (!folio) + return NULL; + + entry = pte_to_swp_entry(vmf->orig_pte); + if (mem_cgroup_swapin_charge_folio(folio, vma->vm_mm, + GFP_KERNEL, entry)) { + folio_put(folio); + return NULL; + } + + return folio; +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) +{ + struct swap_info_struct *si = swp_swap_info(entry); + pgoff_t offset = swp_offset(entry); + int i; + + /* + * While allocating a large folio and doing swap_read_folio, which is + * the case the being faulted pte doesn't have swapcache. We need to + * ensure all PTEs have no cache as well, otherwise, we might go to + * swap devices while the content is in swapcache. + */ + for (i = 0; i < max_nr; i++) { + if ((si->swap_map[offset + i] & SWAP_HAS_CACHE)) + return i; + } + + return i; +} + +/* + * Check if the PTEs within a range are contiguous swap entries + * and have consistent swapcache, zeromap. + */ +static bool can_swapin_thp(struct vm_fault *vmf, pte_t *ptep, int nr_pages) +{ + unsigned long addr; + swp_entry_t entry; + int idx; + pte_t pte; + + addr = ALIGN_DOWN(vmf->address, nr_pages * PAGE_SIZE); + idx = (vmf->address - addr) / PAGE_SIZE; + pte = ptep_get(ptep); + + if (!pte_same(pte, pte_move_swp_offset(vmf->orig_pte, -idx))) + return false; + entry = pte_to_swp_entry(pte); + if (swap_pte_batch(ptep, nr_pages, pte) != nr_pages) + return false; + + /* + * swap_read_folio() can't handle the case a large folio is hybridly + * from different backends. And they are likely corner cases. Similar + * things might be added once zswap support large folios. + */ + if (unlikely(non_swapcache_batch(entry, nr_pages) != nr_pages)) + return false; + + return true; +} + +static inline unsigned long thp_swap_suitable_orders(pgoff_t swp_offset, + unsigned long addr, + unsigned long orders) +{ + int order, nr; + + order = highest_order(orders); + + /* + * To swap in a THP with nr pages, we require that its first swap_offset + * is aligned with that number, as it was when the THP was swapped out. + * This helps filter out most invalid entries. + */ + while (orders) { + nr = 1 << order; + if ((addr >> PAGE_SHIFT) % nr == swp_offset % nr) + break; + order = next_order(&orders, order); + } + + return orders; +} + +static struct folio *alloc_swap_folio(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + unsigned long orders; + struct folio *folio; + unsigned long addr; + swp_entry_t entry; + spinlock_t *ptl; + pte_t *pte; + gfp_t gfp; + int order; + + /* + * If uffd is active for the vma we need per-page fault fidelity to + * maintain the uffd semantics. + */ + if (unlikely(userfaultfd_armed(vma))) + goto fallback; + + /* + * A large swapped out folio could be partially or fully in zswap. We + * lack handling for such cases, so fallback to swapping in order-0 + * folio. + */ + if (!zswap_never_enabled()) + goto fallback; + + entry = pte_to_swp_entry(vmf->orig_pte); + /* + * Get a list of all the (large) orders below PMD_ORDER that are enabled + * and suitable for swapping THP. + */ + orders = thp_vma_allowable_orders(vma, vma->vm_flags, + TVA_IN_PF | TVA_ENFORCE_SYSFS, BIT(PMD_ORDER) - 1); + orders = thp_vma_suitable_orders(vma, vmf->address, orders); + orders = thp_swap_suitable_orders(swp_offset(entry), + vmf->address, orders); + + if (!orders) + goto fallback; + + pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, + vmf->address & PMD_MASK, &ptl); + if (unlikely(!pte)) + goto fallback; + + /* + * For do_swap_page, find the highest order where the aligned range is + * completely swap entries with contiguous swap offsets. + */ + order = highest_order(orders); + while (orders) { + addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order); + if (can_swapin_thp(vmf, pte + pte_index(addr), 1 << order)) + break; + order = next_order(&orders, order); + } + + pte_unmap_unlock(pte, ptl); + + /* Try allocating the highest of the remaining orders. */ + gfp = vma_thp_gfp_mask(vma); + while (orders) { + addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order); + folio = vma_alloc_folio(gfp, order, vma, addr, true); + if (folio) { + if (!mem_cgroup_swapin_charge_folio(folio, vma->vm_mm, + gfp, entry)) + return folio; + count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK_CHARGE); + folio_put(folio); + } + count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK); + order = next_order(&orders, order); + } + +fallback: + return __alloc_swap_folio(vmf); +} +#else /* !CONFIG_TRANSPARENT_HUGEPAGE */ +static struct folio *alloc_swap_folio(struct vm_fault *vmf) +{ + return __alloc_swap_folio(vmf); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + /* * We enter with non-exclusive mmap_lock (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. @@ -4058,37 +4241,37 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!folio) { if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && __swap_count(entry) == 1) { - /* - * Prevent parallel swapin from proceeding with - * the cache flag. Otherwise, another thread may - * finish swapin first, free the entry, and swapout - * reusing the same entry. It's undetectable as - * pte_same() returns true due to entry reuse. - */ - if (swapcache_prepare(entry, 1)) { - /* Relax a bit to prevent rapid repeated page faults */ - add_wait_queue(&swapcache_wq, &wait); - schedule_timeout_uninterruptible(1); - remove_wait_queue(&swapcache_wq, &wait); - goto out; - } - need_clear_cache = true; - /* skip swapcache */ - folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, - vma, vmf->address, false); + folio = alloc_swap_folio(vmf); page = &folio->page; if (folio) { __folio_set_locked(folio); __folio_set_swapbacked(folio); - if (mem_cgroup_swapin_charge_folio(folio, - vma->vm_mm, GFP_KERNEL, - entry)) { - ret = VM_FAULT_OOM; + nr_pages = folio_nr_pages(folio); + if (folio_test_large(folio)) + entry.val = ALIGN_DOWN(entry.val, nr_pages); + /* + * Prevent parallel swapin from proceeding with + * the cache flag. Otherwise, another thread + * may finish swapin first, free the entry, and + * swapout reusing the same entry. It's + * undetectable as pte_same() returns true due + * to entry reuse. + */ + if (swapcache_prepare(entry, nr_pages)) { + /* + * Relax a bit to prevent rapid + * repeated page faults. + */ + add_wait_queue(&swapcache_wq, &wait); + schedule_timeout_uninterruptible(1); + remove_wait_queue(&swapcache_wq, &wait); goto out_page; } - mem_cgroup_swapin_uncharge_swap(entry); + need_clear_cache = true; + + mem_cgroup_swapin_uncharge_swap(entry, nr_pages); shadow = get_shadow_from_swap_cache(entry); if (shadow) @@ -4096,9 +4279,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) folio_add_lru(folio); - /* To provide entry to swap_readpage() */ + /* To provide entry to swap_read_folio() */ folio->swap = entry; - swap_readpage(page, true, NULL); + swap_read_folio(folio, true, NULL); folio->private = NULL; } } else { @@ -4195,6 +4378,24 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) goto out_nomap; } + /* allocated large folios for SWP_SYNCHRONOUS_IO */ + if (folio_test_large(folio) && !folio_test_swapcache(folio)) { + unsigned long nr = folio_nr_pages(folio); + unsigned long folio_start = ALIGN_DOWN(vmf->address, nr * PAGE_SIZE); + unsigned long idx = (vmf->address - folio_start) / PAGE_SIZE; + pte_t *folio_ptep = vmf->pte - idx; + pte_t folio_pte = ptep_get(folio_ptep); + + if (!pte_same(folio_pte, pte_move_swp_offset(vmf->orig_pte, -idx)) || + swap_pte_batch(folio_ptep, nr, folio_pte) != nr) + goto out_nomap; + + page_idx = idx; + address = folio_start; + ptep = folio_ptep; + goto check_folio; + } + nr_pages = 1; page_idx = 0; address = vmf->address; @@ -4327,11 +4528,12 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) folio_add_lru_vma(folio, vma); } else if (!folio_test_anon(folio)) { /* - * We currently only expect small !anon folios, which are either - * fully exclusive or fully shared. If we ever get large folios - * here, we have to be careful. + * We currently only expect small !anon folios which are either + * fully exclusive or fully shared, or new allocated large + * folios which are fully exclusive. If we ever get large + * folios within swapcache here, we have to be careful. */ - VM_WARN_ON_ONCE(folio_test_large(folio)); + VM_WARN_ON_ONCE(folio_test_large(folio) && folio_test_swapcache(folio)); VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); folio_add_new_anon_rmap(folio, vma, address, rmap_flags); } else { @@ -4374,7 +4576,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) out: /* Clear the swap cache pin for direct swapin after PTL unlock */ if (need_clear_cache) { - swapcache_clear(si, entry, 1); + swapcache_clear(si, entry, nr_pages); if (waitqueue_active(&swapcache_wq)) wake_up(&swapcache_wq); } @@ -4393,7 +4595,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) folio_put(swapcache); } if (need_clear_cache) { - swapcache_clear(si, entry, 1); + swapcache_clear(si, entry, nr_pages); if (waitqueue_active(&swapcache_wq)) wake_up(&swapcache_wq); } diff --git a/mm/page_io.c b/mm/page_io.c index 1887cd2093e3dc40d09319f1d54798443aed873b..839a3b0a90f057e29c39c5f53ad812c96b6e450d 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -201,7 +201,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) folio_end_writeback(folio); return 0; } - __swap_writepage(&folio->page, wbc); + __swap_writepage(folio, wbc); return 0; } @@ -289,16 +289,16 @@ static void sio_write_complete(struct kiocb *iocb, long ret) mempool_free(sio, sio_pool); } -static void swap_writepage_fs(struct page *page, struct writeback_control *wbc) +static void swap_writepage_fs(struct folio *folio, struct writeback_control *wbc) { struct swap_iocb *sio = NULL; - struct swap_info_struct *sis = page_swap_info(page); + struct swap_info_struct *sis = swp_swap_info(folio->swap); struct file *swap_file = sis->swap_file; - loff_t pos = page_file_offset(page); + loff_t pos = folio_file_pos(folio); - count_swpout_vm_event(page_folio(page)); - set_page_writeback(page); - unlock_page(page); + count_swpout_vm_event(folio); + folio_start_writeback(folio); + folio_unlock(folio); if (wbc->swap_plug) sio = *wbc->swap_plug; if (sio) { @@ -316,8 +316,8 @@ static void swap_writepage_fs(struct page *page, struct writeback_control *wbc) sio->pages = 0; sio->len = 0; } - bvec_set_page(&sio->bvec[sio->pages], page, thp_size(page), 0); - sio->len += thp_size(page); + bvec_set_folio(&sio->bvec[sio->pages], folio, folio_size(folio), 0); + sio->len += folio_size(folio); sio->pages += 1; if (sio->pages == ARRAY_SIZE(sio->bvec) || !wbc->swap_plug) { swap_write_unplug(sio); @@ -327,17 +327,16 @@ static void swap_writepage_fs(struct page *page, struct writeback_control *wbc) *wbc->swap_plug = sio; } -static void swap_writepage_bdev_sync(struct page *page, +static void swap_writepage_bdev_sync(struct folio *folio, struct writeback_control *wbc, struct swap_info_struct *sis) { struct bio_vec bv; struct bio bio; - struct folio *folio = page_folio(page); bio_init(&bio, sis->bdev, &bv, 1, REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc)); - bio.bi_iter.bi_sector = swap_page_sector(page); - __bio_add_page(&bio, page, thp_size(page), 0); + bio.bi_iter.bi_sector = swap_folio_sector(folio); + bio_add_folio_nofail(&bio, folio, folio_size(folio), 0); bio_associate_blkg_from_page(&bio, folio); count_swpout_vm_event(folio); @@ -349,18 +348,17 @@ static void swap_writepage_bdev_sync(struct page *page, __end_swap_bio_write(&bio); } -static void swap_writepage_bdev_async(struct page *page, +static void swap_writepage_bdev_async(struct folio *folio, struct writeback_control *wbc, struct swap_info_struct *sis) { struct bio *bio; - struct folio *folio = page_folio(page); bio = bio_alloc(sis->bdev, 1, REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc), GFP_NOIO); - bio->bi_iter.bi_sector = swap_page_sector(page); + bio->bi_iter.bi_sector = swap_folio_sector(folio); bio->bi_end_io = end_swap_bio_write; - __bio_add_page(bio, page, thp_size(page), 0); + bio_add_folio_nofail(bio, folio, folio_size(folio), 0); bio_associate_blkg_from_page(bio, folio); count_swpout_vm_event(folio); @@ -369,27 +367,27 @@ static void swap_writepage_bdev_async(struct page *page, submit_bio(bio); } -void __swap_writepage(struct page *page, struct writeback_control *wbc) +void __swap_writepage(struct folio *folio, struct writeback_control *wbc) { - struct swap_info_struct *sis = page_swap_info(page); + struct swap_info_struct *sis = swp_swap_info(folio->swap); - VM_BUG_ON_PAGE(!PageSwapCache(page), page); + VM_BUG_ON_FOLIO(!folio_test_swapcache(folio), folio); /* * ->flags can be updated non-atomicially (scan_swap_map_slots), * but that will never affect SWP_FS_OPS, so the data_race * is safe. */ if (data_race(sis->flags & SWP_FS_OPS)) - swap_writepage_fs(page, wbc); + swap_writepage_fs(folio, wbc); /* * ->flags can be updated non-atomicially (scan_swap_map_slots), * but that will never affect SWP_SYNCHRONOUS_IO, so the data_race * is safe. */ else if (data_race(sis->flags & SWP_SYNCHRONOUS_IO)) - swap_writepage_bdev_sync(page, wbc, sis); + swap_writepage_bdev_sync(folio, wbc, sis); else - swap_writepage_bdev_async(page, wbc, sis); + swap_writepage_bdev_async(folio, wbc, sis); } void swap_write_unplug(struct swap_iocb *sio) @@ -413,6 +411,7 @@ static void sio_read_complete(struct kiocb *iocb, long ret) for (p = 0; p < sio->pages; p++) { struct folio *folio = page_folio(sio->bvec[p].bv_page); + count_mthp_stat(folio_order(folio), MTHP_STAT_SWPIN); folio_mark_uptodate(folio); folio_unlock(folio); } @@ -428,12 +427,11 @@ static void sio_read_complete(struct kiocb *iocb, long ret) mempool_free(sio, sio_pool); } -static void swap_readpage_fs(struct page *page, - struct swap_iocb **plug) +static void swap_read_folio_fs(struct folio *folio, struct swap_iocb **plug) { - struct swap_info_struct *sis = page_swap_info(page); + struct swap_info_struct *sis = swp_swap_info(folio->swap); struct swap_iocb *sio = NULL; - loff_t pos = page_file_offset(page); + loff_t pos = folio_file_pos(folio); if (plug) sio = *plug; @@ -452,8 +450,8 @@ static void swap_readpage_fs(struct page *page, sio->pages = 0; sio->len = 0; } - bvec_set_page(&sio->bvec[sio->pages], page, thp_size(page), 0); - sio->len += thp_size(page); + bvec_set_folio(&sio->bvec[sio->pages], folio, folio_size(folio), 0); + sio->len += folio_size(folio); sio->pages += 1; if (sio->pages == ARRAY_SIZE(sio->bvec) || !plug) { swap_read_unplug(sio); @@ -463,43 +461,45 @@ static void swap_readpage_fs(struct page *page, *plug = sio; } -static void swap_readpage_bdev_sync(struct page *page, +static void swap_read_folio_bdev_sync(struct folio *folio, struct swap_info_struct *sis) { struct bio_vec bv; struct bio bio; bio_init(&bio, sis->bdev, &bv, 1, REQ_OP_READ); - bio.bi_iter.bi_sector = swap_page_sector(page); - __bio_add_page(&bio, page, thp_size(page), 0); + bio.bi_iter.bi_sector = swap_folio_sector(folio); + bio_add_folio_nofail(&bio, folio, folio_size(folio), 0); /* * Keep this task valid during swap readpage because the oom killer may * attempt to access it in the page fault retry time check. */ get_task_struct(current); - count_vm_event(PSWPIN); + count_mthp_stat(folio_order(folio), MTHP_STAT_SWPIN); + count_vm_events(PSWPIN, folio_nr_pages(folio)); submit_bio_wait(&bio); __end_swap_bio_read(&bio); put_task_struct(current); } -static void swap_readpage_bdev_async(struct page *page, +static void swap_read_folio_bdev_async(struct folio *folio, struct swap_info_struct *sis) { struct bio *bio; bio = bio_alloc(sis->bdev, 1, REQ_OP_READ, GFP_KERNEL); - bio->bi_iter.bi_sector = swap_page_sector(page); + bio->bi_iter.bi_sector = swap_folio_sector(folio); bio->bi_end_io = end_swap_bio_read; - __bio_add_page(bio, page, thp_size(page), 0); - count_vm_event(PSWPIN); + bio_add_folio_nofail(bio, folio, folio_size(folio), 0); + count_mthp_stat(folio_order(folio), MTHP_STAT_SWPIN); + count_vm_events(PSWPIN, folio_nr_pages(folio)); submit_bio(bio); } -void swap_readpage(struct page *page, bool synchronous, struct swap_iocb **plug) +void swap_read_folio(struct folio *folio, bool synchronous, + struct swap_iocb **plug) { - struct folio *folio = page_folio(page); - struct swap_info_struct *sis = page_swap_info(page); + struct swap_info_struct *sis = swp_swap_info(folio->swap); bool workingset = folio_test_workingset(folio); unsigned long pflags; bool in_thrashing; @@ -524,11 +524,11 @@ void swap_readpage(struct page *page, bool synchronous, struct swap_iocb **plug) folio_mark_uptodate(folio); folio_unlock(folio); } else if (data_race(sis->flags & SWP_FS_OPS)) { - swap_readpage_fs(page, plug); + swap_read_folio_fs(folio, plug); } else if (synchronous || (sis->flags & SWP_SYNCHRONOUS_IO)) { - swap_readpage_bdev_sync(page, sis); + swap_read_folio_bdev_sync(folio, sis); } else { - swap_readpage_bdev_async(page, sis); + swap_read_folio_bdev_async(folio, sis); } if (workingset) { diff --git a/mm/shmem.c b/mm/shmem.c index 3f5ba0827ecfaf5ccb33552d9559564d04e369e3..6fdc7144ca91b8325e56135b789a88fb7548c20e 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1719,18 +1719,16 @@ static struct folio *shmem_swapin(swp_entry_t swap, gfp_t gfp, struct shmem_inode_info *info, pgoff_t index) { struct vm_area_struct pvma; - struct page *page; + struct folio *folio; struct vm_fault vmf = { .vma = &pvma, }; shmem_pseudo_vma_init(&pvma, info, index); - page = swap_cluster_readahead(swap, gfp, &vmf); + folio = swap_cluster_readahead(swap, gfp, &vmf); shmem_pseudo_vma_destroy(&pvma); - if (!page) - return NULL; - return page_folio(page); + return folio; } /* diff --git a/mm/swap.h b/mm/swap.h index 500f99202776627be33e61be98d30931872e7c2b..af4dc6921b904558c86b0c406ef3d858ef6b654b 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -8,7 +8,8 @@ /* linux/mm/page_io.c */ int sio_pool_init(void); struct swap_iocb; -void swap_readpage(struct page *page, bool do_poll, struct swap_iocb **plug); +void swap_read_folio(struct folio *folio, bool do_poll, + struct swap_iocb **plug); void __swap_read_unplug(struct swap_iocb *plug); static inline void swap_read_unplug(struct swap_iocb *plug) { @@ -17,7 +18,7 @@ static inline void swap_read_unplug(struct swap_iocb *plug) } void swap_write_unplug(struct swap_iocb *sio); int swap_writepage(struct page *page, struct writeback_control *wbc); -void __swap_writepage(struct page *page, struct writeback_control *wbc); +void __swap_writepage(struct folio *folio, struct writeback_control *wbc); /* linux/mm/swap_state.c */ /* One swap address space for each 64M swap space */ @@ -43,27 +44,25 @@ struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr); struct folio *filemap_get_incore_folio(struct address_space *mapping, pgoff_t index); - -struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, - struct vm_area_struct *vma, - unsigned long addr, - struct swap_iocb **plug); -struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, +struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, + struct vm_area_struct *vma, unsigned long addr, + struct swap_iocb **plug); +struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr, bool *new_page_allocated); -struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t flag, +struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag, struct vm_fault *vmf); struct page *swapin_readahead(swp_entry_t entry, gfp_t flag, struct vm_fault *vmf); static inline unsigned int folio_swap_flags(struct folio *folio) { - return page_swap_info(&folio->page)->flags; + return swp_swap_info(folio->swap)->flags; } #else /* CONFIG_SWAP */ struct swap_iocb; -static inline void swap_readpage(struct page *page, bool do_poll, +static inline void swap_read_folio(struct folio *folio, bool do_poll, struct swap_iocb **plug) { } @@ -80,7 +79,7 @@ static inline void show_swap_cache_info(void) { } -static inline struct page *swap_cluster_readahead(swp_entry_t entry, +static inline struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, struct vm_fault *vmf) { return NULL; diff --git a/mm/swap_state.c b/mm/swap_state.c index bd2bc1d4cd868c0896f214f0160a43d8620c01e1..75c149a830c67edda3159621fe478298c7fb32fa 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -426,13 +426,12 @@ struct folio *filemap_get_incore_folio(struct address_space *mapping, return folio; } -struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, +struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr, bool *new_page_allocated) { struct swap_info_struct *si; struct folio *folio; - struct page *page; void *shadow = NULL; *new_page_allocated = false; @@ -449,10 +448,8 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, */ folio = filemap_get_folio(swap_address_space(entry), swp_offset(entry)); - if (!IS_ERR(folio)) { - page = folio_file_page(folio, swp_offset(entry)); - goto got_page; - } + if (!IS_ERR(folio)) + goto got_folio; /* * Just skip read ahead for unused swap slot. @@ -466,7 +463,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, goto fail_put_swap; /* - * Get a new page to read into from swap. Allocate it now, + * Get a new folio to read into from swap. Allocate it now, * before marking swap_map SWAP_HAS_CACHE, when -EEXIST will * cause any racers to loop around until we add it to cache. */ @@ -490,13 +487,13 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, * stumble across a swap_map entry whose SWAP_HAS_CACHE * has not yet been cleared. Or race against another * __read_swap_cache_async(), which has set SWAP_HAS_CACHE - * in swap_map, but not yet added its page to swap cache. + * in swap_map, but not yet added its folio to swap cache. */ schedule_timeout_uninterruptible(1); } /* - * The swap entry is ours to swap in. Prepare the new page. + * The swap entry is ours to swap in. Prepare the new folio. */ __folio_set_locked(folio); @@ -509,7 +506,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, if (add_to_swap_cache(folio, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow)) goto fail_unlock; - mem_cgroup_swapin_uncharge_swap(entry); + mem_cgroup_swapin_uncharge_swap(entry, 1); if (shadow) workingset_refault(folio, shadow); @@ -517,10 +514,9 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, /* Caller will initiate read into locked folio */ folio_add_lru(folio); *new_page_allocated = true; - page = &folio->page; -got_page: +got_folio: put_swap_device(si); - return page; + return folio; fail_unlock: put_swap_folio(folio, entry); @@ -538,21 +534,21 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, * the swap entry is no longer in use. * * get/put_swap_device() aren't needed to call this function, because - * __read_swap_cache_async() call them and swap_readpage() holds the + * __read_swap_cache_async() call them and swap_read_folio() holds the * swap cache folio lock. */ -struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, - struct vm_area_struct *vma, - unsigned long addr, struct swap_iocb **plug) +struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, + struct vm_area_struct *vma, unsigned long addr, + struct swap_iocb **plug) { bool page_was_allocated; - struct page *retpage = __read_swap_cache_async(entry, gfp_mask, + struct folio *folio = __read_swap_cache_async(entry, gfp_mask, vma, addr, &page_was_allocated); if (page_was_allocated) - swap_readpage(retpage, false, plug); + swap_read_folio(folio, false, plug); - return retpage; + return folio; } static unsigned int __swapin_nr_pages(unsigned long prev_offset, @@ -622,7 +618,7 @@ static unsigned long swapin_nr_pages(unsigned long offset) * @gfp_mask: memory allocation flags * @vmf: fault information * - * Returns the struct page for entry and addr, after queueing swapin. + * Returns the struct folio for entry and addr, after queueing swapin. * * Primitive swap readahead code. We simply read an aligned block of * (1 << page_cluster) entries in the swap area. This method is chosen @@ -634,10 +630,10 @@ static unsigned long swapin_nr_pages(unsigned long offset) * * Caller must hold read mmap_lock if vmf->vma is not NULL. */ -struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, +struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, struct vm_fault *vmf) { - struct page *page; + struct folio *folio; unsigned long entry_offset = swp_offset(entry); unsigned long offset = entry_offset; unsigned long start_offset, end_offset; @@ -664,19 +660,19 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, blk_start_plug(&plug); for (offset = start_offset; offset <= end_offset ; offset++) { /* Ok, do the async read-ahead now */ - page = __read_swap_cache_async( + folio = __read_swap_cache_async( swp_entry(swp_type(entry), offset), gfp_mask, vma, addr, &page_allocated); - if (!page) + if (!folio) continue; if (page_allocated) { - swap_readpage(page, false, &splug); + swap_read_folio(folio, false, &splug); if (offset != entry_offset) { - SetPageReadahead(page); + folio_set_readahead(folio); count_vm_event(SWAP_RA); } } - put_page(page); + folio_put(folio); } blk_finish_plug(&plug); swap_read_unplug(splug); @@ -786,7 +782,7 @@ static void swap_ra_info(struct vm_fault *vmf, * @gfp_mask: memory allocation flags * @vmf: fault information * - * Returns the struct page for entry and addr, after queueing swapin. + * Returns the struct folio for entry and addr, after queueing swapin. * * Primitive swap readahead code. We simply read in a few pages whose * virtual addresses are around the fault address in the same vma. @@ -794,13 +790,13 @@ static void swap_ra_info(struct vm_fault *vmf, * Caller must hold read mmap_lock if vmf->vma is not NULL. * */ -static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask, +static struct folio *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask, struct vm_fault *vmf) { struct blk_plug plug; struct swap_iocb *splug = NULL; struct vm_area_struct *vma = vmf->vma; - struct page *page; + struct folio *folio; pte_t *pte = NULL, pentry; unsigned long addr; swp_entry_t entry; @@ -831,18 +827,18 @@ static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask, continue; pte_unmap(pte); pte = NULL; - page = __read_swap_cache_async(entry, gfp_mask, vma, + folio = __read_swap_cache_async(entry, gfp_mask, vma, addr, &page_allocated); - if (!page) + if (!folio) continue; if (page_allocated) { - swap_readpage(page, false, &splug); + swap_read_folio(folio, false, &splug); if (i != ra_info.offset) { - SetPageReadahead(page); + folio_set_readahead(folio); count_vm_event(SWAP_RA); } } - put_page(page); + folio_put(folio); } if (pte) pte_unmap(pte); @@ -870,9 +866,15 @@ static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask, struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, struct vm_fault *vmf) { - return swap_use_vma_readahead() ? + struct folio *folio; + + folio = swap_use_vma_readahead() ? swap_vma_readahead(entry, gfp_mask, vmf) : swap_cluster_readahead(entry, gfp_mask, vmf); + + if (!folio) + return NULL; + return folio_file_page(folio, swp_offset(entry)); } #ifdef CONFIG_SYSFS diff --git a/mm/swapfile.c b/mm/swapfile.c index 3af5b6ebb2412735e386f293eae789188a0350e6..5e77c6417ec66d71e8f9015ae83605ae186b197b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -232,14 +232,14 @@ offset_to_swap_extent(struct swap_info_struct *sis, unsigned long offset) BUG(); } -sector_t swap_page_sector(struct page *page) +sector_t swap_folio_sector(struct folio *folio) { - struct swap_info_struct *sis = page_swap_info(page); + struct swap_info_struct *sis = swp_swap_info(folio->swap); struct swap_extent *se; sector_t sector; pgoff_t offset; - offset = __page_file_index(page); + offset = swp_offset(folio->swap); se = offset_to_swap_extent(sis, offset); sector = se->start_block + (offset - se->start_page); return sector << (PAGE_SHIFT - 9); @@ -2423,7 +2423,7 @@ EXPORT_SYMBOL_GPL(add_swap_extent); /* * A `swap extent' is a simple thing which maps a contiguous range of pages * onto a contiguous range of disk blocks. A rbtree of swap extents is - * built at swapon time and is then used at swap_writepage/swap_readpage + * built at swapon time and is then used at swap_writepage/swap_read_folio * time for locating where on disk a page belongs. * * If the swapfile is an S_ISBLK block device, a single extent is installed. @@ -3595,18 +3595,12 @@ struct swap_info_struct *swp_swap_info(swp_entry_t entry) return swap_type_to_swap_info(swp_type(entry)); } -struct swap_info_struct *page_swap_info(struct page *page) -{ - swp_entry_t entry = page_swap_entry(page); - return swp_swap_info(entry); -} - /* * out-of-line methods to avoid include hell. */ struct address_space *swapcache_mapping(struct folio *folio) { - return page_swap_info(&folio->page)->swap_file->f_mapping; + return swp_swap_info(folio->swap)->swap_file->f_mapping; } EXPORT_SYMBOL_GPL(swapcache_mapping); diff --git a/mm/zswap.c b/mm/zswap.c index 69681b9173fdcbf9f90967e21b5159760a16c601..491487925192998a1eb9fbcbfe8316f4ae4b7c0f 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -84,6 +84,7 @@ static bool zswap_pool_reached_full; static int zswap_setup(void); /* Enable/disable zswap */ +static DEFINE_STATIC_KEY_MAYBE(CONFIG_ZSWAP_DEFAULT_ON, zswap_ever_enabled); static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON); static int zswap_enabled_param_set(const char *, const struct kernel_param *); @@ -144,6 +145,11 @@ module_param_named(exclusive_loads, zswap_exclusive_loads_enabled, bool, 0644); /* Number of zpools in zswap_pool (empirically determined for scalability) */ #define ZSWAP_NR_ZPOOLS 32 +bool zswap_never_enabled(void) +{ + return !static_branch_maybe(CONFIG_ZSWAP_DEFAULT_ON, &zswap_ever_enabled); +} + /********************************* * data structures **********************************/ @@ -1041,14 +1047,14 @@ static int zswap_enabled_param_set(const char *val, * writeback code **********************************/ /* - * Attempts to free an entry by adding a page to the swap cache, - * decompressing the entry data into the page, and issuing a - * bio write to write the page back to the swap device. + * Attempts to free an entry by adding a folio to the swap cache, + * decompressing the entry data into the folio, and issuing a + * bio write to write the folio back to the swap device. * - * This can be thought of as a "resumed writeback" of the page + * This can be thought of as a "resumed writeback" of the folio * to the swap device. We are basically resuming the same swap * writeback path that was intercepted with the zswap_store() - * in the first place. After the page has been decompressed into + * in the first place. After the folio has been decompressed into * the swap cache, the compressed version stored by zswap can be * freed. */ @@ -1056,11 +1062,11 @@ static int zswap_writeback_entry(struct zswap_entry *entry, struct zswap_tree *tree) { swp_entry_t swpentry = entry->swpentry; - struct page *page; + struct folio *folio; struct scatterlist input, output; struct crypto_acomp_ctx *acomp_ctx; struct zpool *pool = zswap_find_zpool(entry); - bool page_was_allocated; + bool folio_was_allocated; u8 *src, *tmp = NULL; unsigned int dlen; int ret; @@ -1074,34 +1080,34 @@ static int zswap_writeback_entry(struct zswap_entry *entry, return -ENOMEM; } - /* try to allocate swap cache page */ - page = __read_swap_cache_async(swpentry, GFP_KERNEL, NULL, 0, - &page_was_allocated); - if (!page) { + /* try to allocate swap cache folio */ + folio = __read_swap_cache_async(swpentry, GFP_KERNEL, NULL, 0, + &folio_was_allocated); + if (!folio) { ret = -ENOMEM; goto fail; } - /* Found an existing page, we raced with load/swapin */ - if (!page_was_allocated) { - put_page(page); + /* Found an existing folio, we raced with load/swapin */ + if (!folio_was_allocated) { + folio_put(folio); ret = -EEXIST; goto fail; } /* - * Page is locked, and the swapcache is now secured against + * folio is locked, and the swapcache is now secured against * concurrent swapping to and from the slot. Verify that the * swap entry hasn't been invalidated and recycled behind our * backs (our zswap_entry reference doesn't prevent that), to - * avoid overwriting a new swap page with old compressed data. + * avoid overwriting a new swap folio with old compressed data. */ spin_lock(&tree->lock); if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) { spin_unlock(&tree->lock); - delete_from_swap_cache(page_folio(page)); - unlock_page(page); - put_page(page); + delete_from_swap_cache(folio); + folio_unlock(folio); + folio_put(folio); ret = -ENOMEM; goto fail; } @@ -1121,7 +1127,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry, mutex_lock(acomp_ctx->mutex); sg_init_one(&input, src, entry->length); sg_init_table(&output, 1); - sg_set_page(&output, page, PAGE_SIZE, 0); + sg_set_page(&output, &folio->page, PAGE_SIZE, 0); acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen); ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); dlen = acomp_ctx->req->dlen; @@ -1135,15 +1141,15 @@ static int zswap_writeback_entry(struct zswap_entry *entry, BUG_ON(ret); BUG_ON(dlen != PAGE_SIZE); - /* page is up to date */ - SetPageUptodate(page); + /* folio is up to date */ + folio_mark_uptodate(folio); /* move it to the tail of the inactive list after end_writeback */ - SetPageReclaim(page); + folio_set_reclaim(folio); /* start writeback */ - __swap_writepage(page, &wbc); - put_page(page); + __swap_writepage(folio, &wbc); + folio_put(folio); zswap_written_back_pages++; return ret; @@ -1294,7 +1300,7 @@ bool zswap_store(struct folio *folio) dst = acomp_ctx->dstmem; sg_init_table(&input, 1); - sg_set_page(&input, page, PAGE_SIZE, 0); + sg_set_page(&input, &folio->page, PAGE_SIZE, 0); /* zswap_dstmem is of size (PAGE_SIZE * 2). Reflect same in sg_list */ sg_init_one(&output, dst, PAGE_SIZE * 2); @@ -1410,6 +1416,9 @@ bool zswap_load(struct folio *folio) VM_WARN_ON_ONCE(!folio_test_locked(folio)); + if (zswap_never_enabled()) + return false; + /* find */ spin_lock(&tree->lock); entry = zswap_entry_find_get(&tree->rbroot, offset); @@ -1611,6 +1620,7 @@ static int zswap_setup(void) zpool_get_type(pool->zpools[0])); list_add(&pool->list, &zswap_pools); zswap_has_pool = true; + static_branch_enable(&zswap_ever_enabled); } else { pr_err("pool creation failed\n"); zswap_enabled = false;