diff --git a/include/linux/swap.h b/include/linux/swap.h index cebabb6db07cbd1ad1f8806b3f8f3c893a59193c..c92fa9a823a77e37d637c50afe600870b78e1f7e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -468,6 +468,7 @@ extern void __delete_from_swap_cache(struct page *page, extern void delete_from_swap_cache(struct page *); extern void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end); +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry); extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct page **, int); extern struct page *lookup_swap_cache(swp_entry_t entry, @@ -610,6 +611,10 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc) return 0; } +static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ +} + static inline struct page *lookup_swap_cache(swp_entry_t swp, struct vm_area_struct *vma, unsigned long addr) diff --git a/mm/memory.c b/mm/memory.c index 8e4068ef034b03926507b46ab6221811c6707d1a..68e92af0bfa54159c1a6c7157e49ffe97e76e4b3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3364,6 +3364,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *page = NULL, *swapcache; + struct swap_info_struct *si = NULL; + bool need_clear_cache = false; swp_entry_t entry; pte_t pte; int locked; @@ -3429,10 +3431,24 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) swapcache = page; if (!page) { - struct swap_info_struct *si = swp_swap_info(entry); + si = swp_swap_info(entry); if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && __swap_count(entry) == 1) { + /* + * Prevent parallel swapin from proceeding with + * the cache flag. Otherwise, another thread may + * finish swapin first, free the entry, and swapout + * reusing the same entry. It's undetectable as + * pte_same() returns true due to entry reuse. + */ + if (swapcache_prepare(entry)) { + /* Relax a bit to prevent rapid repeated page faults */ + schedule_timeout_uninterruptible(1); + goto out; + } + need_clear_cache = true; + /* skip swapcache */ page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); @@ -3601,6 +3617,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); out: + /* Clear the swap cache pin for direct swapin after PTL unlock */ + if (need_clear_cache) + swapcache_clear(si, entry); return ret; out_nomap: pte_unmap_unlock(vmf->pte, vmf->ptl); @@ -3612,6 +3631,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) unlock_page(swapcache); put_page(swapcache); } + if (need_clear_cache) + swapcache_clear(si, entry); return ret; } diff --git a/mm/swapfile.c b/mm/swapfile.c index 9fe12f0b92058760582d28cb3f3f41f1b607a327..3d7b26249f62040d9cdcee2ce2566c1fee3f6c19 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3561,6 +3561,19 @@ int swapcache_prepare(swp_entry_t entry) return __swap_duplicate(entry, SWAP_HAS_CACHE); } +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ + struct swap_cluster_info *ci; + unsigned long offset = swp_offset(entry); + unsigned char usage; + + ci = lock_cluster_or_swap_info(si, offset); + usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE); + unlock_cluster_or_swap_info(si, ci); + if (!usage) + free_swap_slot(entry); +} + struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return swap_type_to_swap_info(swp_type(entry));