diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index d232de7cdc569f88adb5e2fe5d978fcaa68b2cc2..6c2754d7bfed629d75b4248abc41a43980d263d0 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -174,7 +174,7 @@ extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); /* Check if a vma is migratable */ extern bool vma_migratable(struct vm_area_struct *vma); -extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); +int mpol_misplaced(struct folio *, struct vm_area_struct *, unsigned long); extern void mpol_put_task_policy(struct task_struct *); static inline bool mpol_is_preferred_many(struct mempolicy *pol) @@ -278,7 +278,8 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol) } #endif -static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma, +static inline int mpol_misplaced(struct folio *folio, + struct vm_area_struct *vma, unsigned long address) { return -1; /* no node preference */ diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 711dd9412561f6291f73e35b6a8f0c75fa7f1b0b..2ce13e8a309bdcc6b9a05448471e62cdf62aae88 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -142,10 +142,10 @@ const struct movable_operations *page_movable_ops(struct page *page) } #ifdef CONFIG_NUMA_BALANCING -int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, +int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, int node); #else -static inline int migrate_misplaced_page(struct page *page, +static inline int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, int node) { return -EAGAIN; /* can't migrate now */ diff --git a/include/linux/mm.h b/include/linux/mm.h index bf5d0b1b16f4341e8851d9bdfef30df631f62226..8cf86b56aba5234cf4f55967437d14d39f24e6ef 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1337,7 +1337,6 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio, struct page *page, unsigned int nr, unsigned long addr); vm_fault_t finish_fault(struct vm_fault *vmf); -vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); #endif /* @@ -1686,26 +1685,26 @@ static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid) #define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid) #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS -static inline int page_cpupid_xchg_last(struct page *page, int cpupid) +static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid) { - return xchg(&page->_last_cpupid, cpupid & LAST_CPUPID_MASK); + return xchg(&folio->_last_cpupid, cpupid & LAST_CPUPID_MASK); } -static inline int page_cpupid_last(struct page *page) +static inline int folio_last_cpupid(struct folio *folio) { - return page->_last_cpupid; + return folio->_last_cpupid; } static inline void page_cpupid_reset_last(struct page *page) { page->_last_cpupid = -1 & LAST_CPUPID_MASK; } #else -static inline int page_cpupid_last(struct page *page) +static inline int folio_last_cpupid(struct folio *folio) { - return (page->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; + return (folio->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; } -extern int page_cpupid_xchg_last(struct page *page, int cpupid); +int folio_xchg_last_cpupid(struct folio *folio, int cpupid); static inline void page_cpupid_reset_last(struct page *page) { @@ -1713,11 +1712,12 @@ static inline void page_cpupid_reset_last(struct page *page) } #endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */ -static inline int xchg_page_access_time(struct page *page, int time) +static inline int folio_xchg_access_time(struct folio *folio, int time) { int last_time; - last_time = page_cpupid_xchg_last(page, time >> PAGE_ACCESS_TIME_BUCKETS); + last_time = folio_xchg_last_cpupid(folio, + time >> PAGE_ACCESS_TIME_BUCKETS); return last_time << PAGE_ACCESS_TIME_BUCKETS; } @@ -1731,19 +1731,19 @@ static inline void vma_set_access_pid_bit(struct vm_area_struct *vma) } } #else /* !CONFIG_NUMA_BALANCING */ -static inline int page_cpupid_xchg_last(struct page *page, int cpupid) +static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid) { - return page_to_nid(page); /* XXX */ + return folio_nid(folio); /* XXX */ } -static inline int xchg_page_access_time(struct page *page, int time) +static inline int folio_xchg_access_time(struct folio *folio, int time) { return 0; } -static inline int page_cpupid_last(struct page *page) +static inline int folio_last_cpupid(struct folio *folio) { - return page_to_nid(page); /* XXX */ + return folio_nid(folio); /* XXX */ } static inline int cpupid_to_nid(int cpupid) @@ -2327,6 +2327,8 @@ struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr, pte_t pte); struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte); +struct folio *vm_normal_folio_pmd(struct vm_area_struct *vma, + unsigned long addr, pmd_t pmd); struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t pmd); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 36c5b43999e608a84fda4f34c07e0df3e7884ac3..582aa5e44a5a7d04c9cc1198f45d65d68ef52218 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -199,6 +199,10 @@ struct page { not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ +#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS + int _last_cpupid; +#endif + #ifdef CONFIG_KMSAN /* * KMSAN metadata for this page: @@ -210,10 +214,6 @@ struct page { struct page *kmsan_shadow; struct page *kmsan_origin; #endif - -#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS - int _last_cpupid; -#endif } _struct_page_alignment; /* @@ -272,6 +272,8 @@ typedef struct { * @_refcount: Do not access this member directly. Use folio_ref_count() * to find how many references there are to this folio. * @memcg_data: Memory Control Group data. + * @virtual: Virtual address in the kernel direct map. + * @_last_cpupid: IDs of last CPU and last process that accessed the folio. * @_entire_mapcount: Do not use directly, call folio_entire_mapcount(). * @_nr_pages_mapped: Do not use directly, call folio_mapcount(). * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). @@ -317,6 +319,12 @@ struct folio { atomic_t _refcount; #ifdef CONFIG_MEMCG unsigned long memcg_data; +#endif +#if defined(WANT_PAGE_VIRTUAL) + void *virtual; +#endif +#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS + int _last_cpupid; #endif /* private: the union with struct page is transitional */ }; @@ -373,6 +381,12 @@ FOLIO_MATCH(_refcount, _refcount); #ifdef CONFIG_MEMCG FOLIO_MATCH(memcg_data, memcg_data); #endif +#if defined(WANT_PAGE_VIRTUAL) +FOLIO_MATCH(virtual, virtual); +#endif +#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS +FOLIO_MATCH(_last_cpupid, _last_cpupid); +#endif #undef FOLIO_MATCH #define FOLIO_MATCH(pg, fl) \ static_assert(offsetof(struct folio, fl) == \ diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h index 3988762efe15c0e5a80602e2c9acb6a5820a740e..06a9d35650f07da2dc19aeda9f927153d6f7853d 100644 --- a/include/linux/sched/numa_balancing.h +++ b/include/linux/sched/numa_balancing.h @@ -20,8 +20,8 @@ extern void task_numa_fault(int last_node, int node, int pages, int flags); extern pid_t task_numa_group_id(struct task_struct *p); extern void set_numabalancing_state(bool enabled); extern void task_numa_free(struct task_struct *p, bool final); -extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page, - int src_nid, int dst_cpu); +bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio, + int src_nid, int dst_cpu); #else static inline void task_numa_fault(int last_node, int node, int pages, int flags) @@ -38,7 +38,7 @@ static inline void task_numa_free(struct task_struct *p, bool final) { } static inline bool should_numa_migrate_memory(struct task_struct *p, - struct page *page, int src_nid, int dst_cpu) + struct folio *folio, int src_nid, int dst_cpu) { return true; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1795f6fe991f3c0f9c23123f224660dd0aed2d10..2430c88e04284f130a4c2b04c544baa026d4e489 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1774,12 +1774,12 @@ static bool pgdat_free_space_enough(struct pglist_data *pgdat) * The smaller the hint page fault latency, the higher the possibility * for the page to be hot. */ -static int numa_hint_fault_latency(struct page *page) +static int numa_hint_fault_latency(struct folio *folio) { int last_time, time; time = jiffies_to_msecs(jiffies); - last_time = xchg_page_access_time(page, time); + last_time = folio_xchg_access_time(folio, time); return (time - last_time) & PAGE_ACCESS_TIME_MASK; } @@ -1836,7 +1836,7 @@ static void numa_promotion_adjust_threshold(struct pglist_data *pgdat, } } -bool should_numa_migrate_memory(struct task_struct *p, struct page * page, +bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio, int src_nid, int dst_cpu) { struct numa_group *ng = deref_curr_numa_group(p); @@ -1866,16 +1866,16 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page, numa_promotion_adjust_threshold(pgdat, rate_limit, def_th); th = pgdat->nbp_threshold ? : def_th; - latency = numa_hint_fault_latency(page); + latency = numa_hint_fault_latency(folio); if (latency >= th) return false; return !numa_promotion_rate_limit(pgdat, rate_limit, - thp_nr_pages(page)); + folio_nr_pages(folio)); } this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid); - last_cpupid = page_cpupid_xchg_last(page, this_cpupid); + last_cpupid = folio_xchg_last_cpupid(folio, this_cpupid); if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) && !node_is_toptier(src_nid) && !cpupid_valid(last_cpupid)) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 064fbd90822b4991cfe642ac6dd8ded6ec9d2d39..981198300316740e4b2ad0e0d1c14e6c11dcdc5e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1490,9 +1490,9 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; pmd_t oldpmd = vmf->orig_pmd; pmd_t pmd; - struct page *page; + struct folio *folio; unsigned long haddr = vmf->address & HPAGE_PMD_MASK; - int page_nid = NUMA_NO_NODE; + int nid = NUMA_NO_NODE; int target_nid, last_cpupid = (-1 & LAST_CPUPID_MASK); bool migrated = false, writable = false; int flags = 0; @@ -1514,36 +1514,34 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) can_change_pmd_writable(vma, vmf->address, pmd)) writable = true; - page = vm_normal_page_pmd(vma, haddr, pmd); - if (!page) + folio = vm_normal_folio_pmd(vma, haddr, pmd); + if (!folio) goto out_map; /* See similar comment in do_numa_page for explanation */ if (!writable) flags |= TNF_NO_GROUP; - page_nid = page_to_nid(page); + nid = folio_nid(folio); /* * For memory tiering mode, cpupid of slow memory page is used * to record page access time. So use default value. */ - if (node_is_toptier(page_nid)) - last_cpupid = page_cpupid_last(page); - target_nid = numa_migrate_prep(page, vma, haddr, page_nid, - &flags); - + if (node_is_toptier(nid)) + last_cpupid = folio_last_cpupid(folio); + target_nid = numa_migrate_prep(folio, vma, haddr, nid, &flags); if (target_nid == NUMA_NO_NODE) { - put_page(page); + folio_put(folio); goto out_map; } spin_unlock(vmf->ptl); writable = false; - migrated = migrate_misplaced_page(page, vma, target_nid); + migrated = migrate_misplaced_folio(folio, vma, target_nid); if (migrated) { flags |= TNF_MIGRATED; - page_nid = target_nid; + nid = target_nid; } else { flags |= TNF_MIGRATE_FAIL; vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); @@ -1555,9 +1553,8 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) } out: - if (page_nid != NUMA_NO_NODE) - task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, - flags); + if (nid != NUMA_NO_NODE) + task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); return 0; @@ -1825,7 +1822,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION if (is_swap_pmd(*pmd)) { swp_entry_t entry = pmd_to_swp_entry(*pmd); - struct page *page = pfn_swap_entry_to_page(entry); + struct folio *folio = page_folio(pfn_swap_entry_to_page(entry)); pmd_t newpmd; VM_BUG_ON(!is_pmd_migration_entry(*pmd)); @@ -1834,7 +1831,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, * A protection check is difficult so * just be safe and disable write */ - if (PageAnon(page)) + if (folio_test_anon(folio)) entry = make_readable_exclusive_migration_entry(swp_offset(entry)); else entry = make_readable_migration_entry(swp_offset(entry)); @@ -1856,7 +1853,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, #endif if (prot_numa) { - struct page *page; + struct folio *folio; bool toptier; /* * Avoid trapping faults against the zero page. The read-only @@ -1869,8 +1866,8 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (pmd_protnone(*pmd)) goto unlock; - page = pmd_page(*pmd); - toptier = node_is_toptier(page_to_nid(page)); + folio = page_folio(pmd_page(*pmd)); + toptier = node_is_toptier(folio_nid(folio)); /* * Skip scanning top tier node if normal numa * balancing is disabled @@ -1881,7 +1878,8 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING && !toptier) - xchg_page_access_time(page, jiffies_to_msecs(jiffies)); + folio_xchg_access_time(folio, + jiffies_to_msecs(jiffies)); } /* * In case prot_numa, we are under mmap_read_lock(mm). It's critical @@ -2483,7 +2481,7 @@ static void __split_huge_page_tail(struct folio *folio, int tail, if (page_is_idle(head)) set_page_idle(page_tail); - page_cpupid_xchg_last(page_tail, page_cpupid_last(head)); + folio_xchg_last_cpupid(new_folio, folio_last_cpupid(folio)); /* * always add to the tail because some iterators expect new diff --git a/mm/internal.h b/mm/internal.h index 30cf724ddbce3399999d6a9e9816fa133e9c5c4b..e233b15c379727e5c101ca2667e553386a410b4d 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -930,7 +930,7 @@ void vunmap_range_noflush(unsigned long start, unsigned long end); void __vunmap_range_noflush(unsigned long start, unsigned long end); -int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, +int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma, unsigned long addr, int page_nid, int *flags); void free_zone_device_page(struct page *page); diff --git a/mm/memory.c b/mm/memory.c index 517221f013035345f611a2a595dd31f0fbbe53b1..f4667bfae83bdeb3765e6024bc5e20d782486ba9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -691,6 +691,16 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, out: return pfn_to_page(pfn); } + +struct folio *vm_normal_folio_pmd(struct vm_area_struct *vma, + unsigned long addr, pmd_t pmd) +{ + struct page *page = vm_normal_page_pmd(vma, addr, pmd); + + if (page) + return page_folio(page); + return NULL; +} #endif static void restore_exclusive_pte(struct vm_area_struct *vma, @@ -3006,23 +3016,24 @@ static vm_fault_t fault_dirty_shared_page(struct vm_fault *vmf) * case, all we need to do here is to mark the page as writable and update * any related book-keeping. */ -static inline void wp_page_reuse(struct vm_fault *vmf) +static inline void wp_page_reuse(struct vm_fault *vmf, struct folio *folio) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; - struct page *page = vmf->page; pte_t entry; VM_BUG_ON(!(vmf->flags & FAULT_FLAG_WRITE)); - VM_BUG_ON(page && PageAnon(page) && !PageAnonExclusive(page)); - /* - * Clear the pages cpupid information as the existing - * information potentially belongs to a now completely - * unrelated process. - */ - if (page) - page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1); + if (folio) { + VM_BUG_ON(folio_test_anon(folio) && + !PageAnonExclusive(vmf->page)); + /* + * Clear the folio's cpupid information as the existing + * information potentially belongs to a now completely + * unrelated process. + */ + folio_xchg_last_cpupid(folio, (1 << LAST_CPUPID_SHIFT) - 1); + } flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); entry = pte_mkyoung(vmf->orig_pte); @@ -3215,6 +3226,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) * writeable once the page is prepared * * @vmf: structure describing the fault + * @folio: the folio of vmf->page * * This function handles all that is needed to finish a write page fault in a * shared mapping due to PTE being read-only once the mapped page is prepared. @@ -3226,7 +3238,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) * Return: %0 on success, %VM_FAULT_NOPAGE when PTE got changed before * we acquired PTE lock. */ -vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf) +static vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf, struct folio *folio) { WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED)); vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, @@ -3242,7 +3254,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); return VM_FAULT_NOPAGE; } - wp_page_reuse(vmf); + wp_page_reuse(vmf, folio); return 0; } @@ -3267,9 +3279,9 @@ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf) ret = vma->vm_ops->pfn_mkwrite(vmf); if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)) return ret; - return finish_mkwrite_fault(vmf); + return finish_mkwrite_fault(vmf, NULL); } - wp_page_reuse(vmf); + wp_page_reuse(vmf, NULL); return 0; } @@ -3297,14 +3309,14 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf, struct folio *folio) folio_put(folio); return tmp; } - tmp = finish_mkwrite_fault(vmf); + tmp = finish_mkwrite_fault(vmf, folio); if (unlikely(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { folio_unlock(folio); folio_put(folio); return tmp; } } else { - wp_page_reuse(vmf); + wp_page_reuse(vmf, folio); folio_lock(folio); } ret |= fault_dirty_shared_page(vmf); @@ -3428,7 +3440,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); return 0; } - wp_page_reuse(vmf); + wp_page_reuse(vmf, folio); return 0; } copy: @@ -4719,10 +4731,10 @@ static vm_fault_t do_fault(struct vm_fault *vmf) return ret; } -int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, +int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma, unsigned long addr, int page_nid, int *flags) { - get_page(page); + folio_get(folio); /* Record the current PID acceesing VMA */ vma_set_access_pid_bit(vma); @@ -4733,14 +4745,14 @@ int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, *flags |= TNF_FAULT_LOCAL; } - return mpol_misplaced(page, vma, addr); + return mpol_misplaced(folio, vma, addr); } static vm_fault_t do_numa_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - struct page *page = NULL; - int page_nid = NUMA_NO_NODE; + struct folio *folio = NULL; + int nid = NUMA_NO_NODE; bool writable = false; int last_cpupid; int target_nid; @@ -4771,12 +4783,12 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) can_change_pte_writable(vma, vmf->address, pte)) writable = true; - page = vm_normal_page(vma, vmf->address, pte); - if (!page || is_zone_device_page(page)) + folio = vm_normal_folio(vma, vmf->address, pte); + if (!folio || folio_is_zone_device(folio)) goto out_map; /* TODO: handle PTE-mapped THP */ - if (PageCompound(page)) + if (folio_test_large(folio)) goto out_map; /* @@ -4791,34 +4803,33 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) flags |= TNF_NO_GROUP; /* - * Flag if the page is shared between multiple address spaces. This + * Flag if the folio is shared between multiple address spaces. This * is later used when determining whether to group tasks together */ - if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED)) + if (folio_estimated_sharers(folio) > 1 && (vma->vm_flags & VM_SHARED)) flags |= TNF_SHARED; - page_nid = page_to_nid(page); + nid = folio_nid(folio); /* * For memory tiering mode, cpupid of slow memory page is used * to record page access time. So use default value. */ if ((sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) && - !node_is_toptier(page_nid)) + !node_is_toptier(nid)) last_cpupid = (-1 & LAST_CPUPID_MASK); else - last_cpupid = page_cpupid_last(page); - target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid, - &flags); + last_cpupid = folio_last_cpupid(folio); + target_nid = numa_migrate_prep(folio, vma, vmf->address, nid, &flags); if (target_nid == NUMA_NO_NODE) { - put_page(page); + folio_put(folio); goto out_map; } pte_unmap_unlock(vmf->pte, vmf->ptl); writable = false; /* Migrate to the requested node */ - if (migrate_misplaced_page(page, vma, target_nid)) { - page_nid = target_nid; + if (migrate_misplaced_folio(folio, vma, target_nid)) { + nid = target_nid; flags |= TNF_MIGRATED; } else { flags |= TNF_MIGRATE_FAIL; @@ -4834,8 +4845,8 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) } out: - if (page_nid != NUMA_NO_NODE) - task_numa_fault(last_cpupid, page_nid, 1, flags); + if (nid != NUMA_NO_NODE) + task_numa_fault(last_cpupid, nid, 1, flags); return 0; out_map: /* diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e52e3a0b8f2e6c22b807851a6247cee4510ef559..4b8447f8175bf99e9d03bfe464ca2b7bb107ecd8 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2570,24 +2570,25 @@ static void sp_free(struct sp_node *n) } /** - * mpol_misplaced - check whether current page node is valid in policy + * mpol_misplaced - check whether current folio node is valid in policy * - * @page: page to be checked - * @vma: vm area where page mapped - * @addr: virtual address where page mapped + * @folio: folio to be checked + * @vma: vm area where folio mapped + * @addr: virtual address in @vma for shared policy lookup and interleave policy * - * Lookup current policy node id for vma,addr and "compare to" page's + * Lookup current policy node id for vma,addr and "compare to" folio's * node id. Policy determination "mimics" alloc_page_vma(). * Called from fault path where we know the vma and faulting address. * * Return: NUMA_NO_NODE if the page is in a node that is valid for this - * policy, or a suitable node ID to allocate a replacement page from. + * policy, or a suitable node ID to allocate a replacement folio from. */ -int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long addr) +int mpol_misplaced(struct folio *folio, struct vm_area_struct *vma, + unsigned long addr) { struct mempolicy *pol; struct zoneref *z; - int curnid = page_to_nid(page); + int curnid = folio_nid(folio); unsigned long pgoff; int thiscpu = raw_smp_processor_id(); int thisnid = cpu_to_node(thiscpu); @@ -2643,11 +2644,12 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long BUG(); } - /* Migrate the page towards the node whose CPU is referencing it */ + /* Migrate the folio towards the node whose CPU is referencing it */ if (pol->flags & MPOL_F_MORON) { polnid = thisnid; - if (!should_numa_migrate_memory(current, page, curnid, thiscpu)) + if (!should_numa_migrate_memory(current, folio, curnid, + thiscpu)) goto out; } diff --git a/mm/migrate.c b/mm/migrate.c index 06086dc9da288f96debedb4582d9467a02563322..5aab4994c4b592e2f40178684f1d3ee256647b55 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -157,8 +157,8 @@ void putback_movable_pages(struct list_head *l) list_del(&folio->lru); /* * We isolated non-lru movable folio so here we can use - * __PageMovable because LRU folio's mapping cannot have - * PAGE_MAPPING_MOVABLE. + * __folio_test_movable because LRU folio's mapping cannot + * have PAGE_MAPPING_MOVABLE. */ if (unlikely(__folio_test_movable(folio))) { VM_BUG_ON_FOLIO(!folio_test_isolated(folio), folio); @@ -588,20 +588,20 @@ void folio_migrate_flags(struct folio *newfolio, struct folio *folio) * Copy NUMA information to the new page, to prevent over-eager * future migrations of this same page. */ - cpupid = page_cpupid_xchg_last(&folio->page, -1); + cpupid = folio_xchg_last_cpupid(folio, -1); /* * For memory tiering mode, when migrate between slow and fast * memory node, reset cpupid, because that is used to record * page access time in slow memory node. */ if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) { - bool f_toptier = node_is_toptier(page_to_nid(&folio->page)); - bool t_toptier = node_is_toptier(page_to_nid(&newfolio->page)); + bool f_toptier = node_is_toptier(folio_nid(folio)); + bool t_toptier = node_is_toptier(folio_nid(newfolio)); if (f_toptier != t_toptier) cpupid = -1; } - page_cpupid_xchg_last(&newfolio->page, cpupid); + folio_xchg_last_cpupid(newfolio, cpupid); folio_migrate_ksm(newfolio, folio); /* @@ -946,7 +946,7 @@ static int move_to_new_folio(struct folio *dst, struct folio *src, enum migrate_mode mode) { int rc = -EAGAIN; - bool is_lru = !__PageMovable(&src->page); + bool is_lru = !__folio_test_movable(src); VM_BUG_ON_FOLIO(!folio_test_locked(src), src); VM_BUG_ON_FOLIO(!folio_test_locked(dst), dst); @@ -993,7 +993,7 @@ static int move_to_new_folio(struct folio *dst, struct folio *src, * src is freed; but stats require that PageAnon be left as PageAnon. */ if (rc == MIGRATEPAGE_SUCCESS) { - if (__PageMovable(&src->page)) { + if (__folio_test_movable(src)) { VM_BUG_ON_FOLIO(!folio_test_isolated(src), src); /* @@ -1085,7 +1085,7 @@ static void migrate_folio_done(struct folio *src, /* * Compaction can migrate also non-LRU pages which are * not accounted to NR_ISOLATED_*. They can be recognized - * as __PageMovable + * as __folio_test_movable */ if (likely(!__folio_test_movable(src))) mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON + @@ -1106,7 +1106,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio, int rc = -EAGAIN; int page_was_mapped = 0; struct anon_vma *anon_vma = NULL; - bool is_lru = !__PageMovable(&src->page); + bool is_lru = !__folio_test_movable(src); bool locked = false; bool dst_locked = false; @@ -1264,7 +1264,7 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private, int rc; int page_was_mapped = 0; struct anon_vma *anon_vma = NULL; - bool is_lru = !__PageMovable(&src->page); + bool is_lru = !__folio_test_movable(src); struct list_head *prev; __migrate_folio_extract(dst, &page_was_mapped, &anon_vma); @@ -2060,8 +2060,8 @@ static int add_page_for_migration(struct mm_struct *mm, const void __user *p, struct vm_area_struct *vma; unsigned long addr; struct page *page; + struct folio *folio; int err; - bool isolated; mmap_read_lock(mm); addr = (unsigned long)untagged_addr_remote(mm, p); @@ -2082,45 +2082,38 @@ static int add_page_for_migration(struct mm_struct *mm, const void __user *p, if (!page) goto out; - if (is_zone_device_page(page)) - goto out_putpage; + folio = page_folio(page); + if (folio_is_zone_device(folio)) + goto out_putfolio; err = 0; - if (page_to_nid(page) == node) - goto out_putpage; + if (folio_nid(folio) == node) + goto out_putfolio; err = -EACCES; if (page_mapcount(page) > 1 && !migrate_all) - goto out_putpage; + goto out_putfolio; - if (PageHuge(page)) { - if (PageHead(page)) { - isolated = isolate_hugetlb(page_folio(page), pagelist); - err = isolated ? 1 : -EBUSY; - } + err = -EBUSY; + if (folio_test_hugetlb(folio)) { + if (isolate_hugetlb(folio, pagelist)) + err = 1; } else { - struct page *head; - - head = compound_head(page); - isolated = isolate_lru_page(head); - if (!isolated) { - err = -EBUSY; - goto out_putpage; - } + if (!folio_isolate_lru(folio)) + goto out_putfolio; err = 1; - list_add_tail(&head->lru, pagelist); - mod_node_page_state(page_pgdat(head), - NR_ISOLATED_ANON + page_is_file_lru(head), - thp_nr_pages(head)); + list_add_tail(&folio->lru, pagelist); + node_stat_mod_folio(folio, + NR_ISOLATED_ANON + folio_is_file_lru(folio), + folio_nr_pages(folio)); } -out_putpage: +out_putfolio: /* - * Either remove the duplicate refcount from - * isolate_lru_page() or drop the page ref if it was - * not isolated. + * Either remove the duplicate refcount from folio_isolate_lru() + * or drop the folio ref if it was not isolated. */ - put_page(page); + folio_put(folio); out: mmap_read_unlock(mm); return err; @@ -2491,16 +2484,9 @@ static struct folio *alloc_misplaced_dst_folio(struct folio *src, return __folio_alloc_node(gfp, order, nid); } -static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) +static int numamigrate_isolate_folio(pg_data_t *pgdat, struct folio *folio) { - int nr_pages = thp_nr_pages(page); - int order = compound_order(page); - - VM_BUG_ON_PAGE(order && !PageTransHuge(page), page); - - /* Do not migrate THP mapped by multiple processes */ - if (PageTransHuge(page) && total_mapcount(page) > 1) - return 0; + int nr_pages = folio_nr_pages(folio); /* Avoid migrating to a node that is nearly full */ if (!migrate_balanced_pgdat(pgdat, nr_pages)) { @@ -2512,75 +2498,79 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) if (managed_zone(pgdat->node_zones + z)) break; } - wakeup_kswapd(pgdat->node_zones + z, 0, order, ZONE_MOVABLE); + wakeup_kswapd(pgdat->node_zones + z, 0, + folio_order(folio), ZONE_MOVABLE); return 0; } - if (!isolate_lru_page(page)) + if (!folio_isolate_lru(folio)) return 0; - mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_is_file_lru(page), + node_stat_mod_folio(folio, NR_ISOLATED_ANON + folio_is_file_lru(folio), nr_pages); /* - * Isolating the page has taken another reference, so the - * caller's reference can be safely dropped without the page + * Isolating the folio has taken another reference, so the + * caller's reference can be safely dropped without the folio * disappearing underneath us during migration. */ - put_page(page); + folio_put(folio); return 1; } /* - * Attempt to migrate a misplaced page to the specified destination + * Attempt to migrate a misplaced folio to the specified destination * node. Caller is expected to have an elevated reference count on - * the page that will be dropped by this function before returning. + * the folio that will be dropped by this function before returning. */ -int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, - int node) +int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, + int node) { pg_data_t *pgdat = NODE_DATA(node); int isolated; int nr_remaining; unsigned int nr_succeeded; LIST_HEAD(migratepages); - int nr_pages = thp_nr_pages(page); + int nr_pages = folio_nr_pages(folio); /* - * Don't migrate file pages that are mapped in multiple processes + * Don't migrate file folios that are mapped in multiple processes * with execute permissions as they are probably shared libraries. + * To check if the folio is shared, ideally we want to make sure + * every page is mapped to the same process. Doing that is very + * expensive, so check the estimated mapcount of the folio instead. */ - if (page_mapcount(page) != 1 && page_is_file_lru(page) && + if (folio_estimated_sharers(folio) != 1 && folio_is_file_lru(folio) && (vma->vm_flags & VM_EXEC)) goto out; /* - * Also do not migrate dirty pages as not all filesystems can move - * dirty pages in MIGRATE_ASYNC mode which is a waste of cycles. + * Also do not migrate dirty folios as not all filesystems can move + * dirty folios in MIGRATE_ASYNC mode which is a waste of cycles. */ - if (page_is_file_lru(page) && PageDirty(page)) + if (folio_is_file_lru(folio) && folio_test_dirty(folio)) goto out; - isolated = numamigrate_isolate_page(pgdat, page); + isolated = numamigrate_isolate_folio(pgdat, folio); if (!isolated) goto out; - list_add(&page->lru, &migratepages); + list_add(&folio->lru, &migratepages); nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_folio, NULL, node, MIGRATE_ASYNC, MR_NUMA_MISPLACED, &nr_succeeded); if (nr_remaining) { if (!list_empty(&migratepages)) { - list_del(&page->lru); - mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + - page_is_file_lru(page), -nr_pages); - putback_lru_page(page); + list_del(&folio->lru); + node_stat_mod_folio(folio, NR_ISOLATED_ANON + + folio_is_file_lru(folio), -nr_pages); + folio_putback_lru(folio); } isolated = 0; } if (nr_succeeded) { count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_succeeded); - if (!node_is_toptier(page_to_nid(page)) && node_is_toptier(node)) + if (!node_is_toptier(folio_nid(folio)) && node_is_toptier(node)) mod_node_page_state(pgdat, PGPROMOTE_SUCCESS, nr_succeeded); } @@ -2588,7 +2578,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, return isolated; out: - put_page(page); + folio_put(folio); return 0; } #endif /* CONFIG_NUMA_BALANCING */ diff --git a/mm/mmzone.c b/mm/mmzone.c index 68e1511be12de6052b91eed6c86287042dd69d73..b594d3f268fe6e1fe2e6b8b9144ec44194416fbd 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -93,19 +93,19 @@ void lruvec_init(struct lruvec *lruvec) } #if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) -int page_cpupid_xchg_last(struct page *page, int cpupid) +int folio_xchg_last_cpupid(struct folio *folio, int cpupid) { unsigned long old_flags, flags; int last_cpupid; - old_flags = READ_ONCE(page->flags); + old_flags = READ_ONCE(folio->flags); do { flags = old_flags; last_cpupid = (flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT); flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT; - } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags))); + } while (unlikely(!try_cmpxchg(&folio->flags, &old_flags, flags))); return last_cpupid; } diff --git a/mm/mprotect.c b/mm/mprotect.c index b94fbb45d5c71f1d7a75828e780afd97616be135..b51f90eae9fb0858952400b726c152a40a0f1081 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -114,7 +114,7 @@ static long change_pte_range(struct mmu_gather *tlb, * pages. See similar comment in change_huge_pmd. */ if (prot_numa) { - struct page *page; + struct folio *folio; int nid; bool toptier; @@ -122,13 +122,14 @@ static long change_pte_range(struct mmu_gather *tlb, if (pte_protnone(oldpte)) continue; - page = vm_normal_page(vma, addr, oldpte); - if (!page || is_zone_device_page(page) || PageKsm(page)) + folio = vm_normal_folio(vma, addr, oldpte); + if (!folio || folio_is_zone_device(folio) || + folio_test_ksm(folio)) continue; /* Also skip shared copy-on-write pages */ if (is_cow_mapping(vma->vm_flags) && - page_count(page) != 1) + folio_ref_count(folio) != 1) continue; /* @@ -136,14 +137,15 @@ static long change_pte_range(struct mmu_gather *tlb, * it cannot move them all from MIGRATE_ASYNC * context. */ - if (page_is_file_lru(page) && PageDirty(page)) + if (folio_is_file_lru(folio) && + folio_test_dirty(folio)) continue; /* * Don't mess with PTEs if page is already on the node * a single-threaded process is running on. */ - nid = page_to_nid(page); + nid = folio_nid(folio); if (target_node == nid) continue; toptier = node_is_toptier(nid); @@ -157,7 +159,7 @@ static long change_pte_range(struct mmu_gather *tlb, continue; if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING && !toptier) - xchg_page_access_time(page, + folio_xchg_access_time(folio, jiffies_to_msecs(jiffies)); }