diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index 6e2ef1d4b0028d36bb907739d76ee25230bfa913..9277524e84ebe73eb2c93db7bc18104aeb4022fb 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -174,10 +174,10 @@ TRACE_EVENT(mm_collapse_huge_page_swapin, TRACE_EVENT(mm_khugepaged_scan_file, - TP_PROTO(struct mm_struct *mm, struct page *page, struct file *file, + TP_PROTO(struct mm_struct *mm, struct folio *folio, struct file *file, int present, int swap, int result), - TP_ARGS(mm, page, file, present, swap, result), + TP_ARGS(mm, folio, file, present, swap, result), TP_STRUCT__entry( __field(struct mm_struct *, mm) @@ -190,7 +190,7 @@ TRACE_EVENT(mm_khugepaged_scan_file, TP_fast_assign( __entry->mm = mm; - __entry->pfn = page ? page_to_pfn(page) : -1; + __entry->pfn = folio ? folio_pfn(folio) : -1; __assign_str(filename, file->f_path.dentry->d_iname); __entry->present = present; __entry->swap = swap; @@ -207,10 +207,10 @@ TRACE_EVENT(mm_khugepaged_scan_file, ); TRACE_EVENT(mm_khugepaged_collapse_file, - TP_PROTO(struct mm_struct *mm, struct page *hpage, pgoff_t index, - bool is_shmem, unsigned long addr, struct file *file, + TP_PROTO(struct mm_struct *mm, struct folio *new_folio, pgoff_t index, + unsigned long addr, bool is_shmem, struct file *file, int nr, int result), - TP_ARGS(mm, hpage, index, addr, is_shmem, file, nr, result), + TP_ARGS(mm, new_folio, index, addr, is_shmem, file, nr, result), TP_STRUCT__entry( __field(struct mm_struct *, mm) __field(unsigned long, hpfn) @@ -224,7 +224,7 @@ TRACE_EVENT(mm_khugepaged_collapse_file, TP_fast_assign( __entry->mm = mm; - __entry->hpfn = hpage ? page_to_pfn(hpage) : -1; + __entry->hpfn = new_folio ? folio_pfn(new_folio) : -1; __entry->index = index; __entry->addr = addr; __entry->is_shmem = is_shmem; @@ -233,7 +233,7 @@ TRACE_EVENT(mm_khugepaged_collapse_file, __entry->result = result; ), - TP_printk("mm=%p, hpage_pfn=0x%lx, index=%ld, addr=%ld, is_shmem=%d, filename=%s, nr=%d, result=%s", + TP_printk("mm=%p, hpage_pfn=0x%lx, index=%ld, addr=%lx, is_shmem=%d, filename=%s, nr=%d, result=%s", __entry->mm, __entry->hpfn, __entry->index, diff --git a/mm/khugepaged.c b/mm/khugepaged.c index c6379a2d55bcea13f4892b2466ce15fa44872346..7f87636c530e747c72b8f29466fb281a0362ddf3 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -555,12 +555,14 @@ static void release_pte_pages(pte_t *pte, pte_t *_pte, static bool is_refcount_suitable(struct folio *folio) { - int expected_refcount; + int expected_refcount = folio_mapcount(folio); - expected_refcount = folio_mapcount(folio); - if (folio_test_swapcache(folio)) + if (!folio_test_anon(folio) || folio_test_swapcache(folio)) expected_refcount += folio_nr_pages(folio); + if (folio_test_private(folio)) + expected_refcount++; + return folio_ref_count(folio) == expected_refcount; } @@ -794,7 +796,7 @@ static void __collapse_huge_page_copy_failed(pte_t *pte, * Returns SCAN_SUCCEED if copying succeeds, otherwise returns SCAN_COPY_MC. * * @pte: starting of the PTEs to copy from - * @page: the new hugepage to copy contents to + * @folio: the new hugepage to copy contents to * @pmd: pointer to the new hugepage's PMD * @orig_pmd: the original raw pages' PMD * @vma: the original raw pages' virtual memory area @@ -802,33 +804,29 @@ static void __collapse_huge_page_copy_failed(pte_t *pte, * @ptl: lock on raw pages' PTEs * @compound_pagelist: list that stores compound pages */ -static int __collapse_huge_page_copy(pte_t *pte, - struct page *page, - pmd_t *pmd, - pmd_t orig_pmd, - struct vm_area_struct *vma, - unsigned long address, - spinlock_t *ptl, - struct list_head *compound_pagelist) -{ - struct page *src_page; - pte_t *_pte; - pte_t pteval; - unsigned long _address; +static int __collapse_huge_page_copy(pte_t *pte, struct folio *folio, + pmd_t *pmd, pmd_t orig_pmd, struct vm_area_struct *vma, + unsigned long address, spinlock_t *ptl, + struct list_head *compound_pagelist) +{ + unsigned int i; int result = SCAN_SUCCEED; /* * Copying pages' contents is subject to memory poison at any iteration. */ - for (_pte = pte, _address = address; _pte < pte + HPAGE_PMD_NR; - _pte++, page++, _address += PAGE_SIZE) { - pteval = ptep_get(_pte); + for (i = 0; i < HPAGE_PMD_NR; i++) { + pte_t pteval = ptep_get(pte + i); + struct page *page = folio_page(folio, i); + unsigned long src_addr = address + i * PAGE_SIZE; + struct page *src_page; + if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { - clear_user_highpage(page, _address); + clear_user_highpage(page, src_addr); continue; } src_page = pte_page(pteval); - if (copy_mc_user_highpage(page, src_page, _address, vma)) { + if (copy_mc_user_highpage(page, src_page, src_addr, vma)) { result = SCAN_COPY_MC; break; } @@ -919,20 +917,6 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc) } #endif -static bool hpage_collapse_alloc_folio(struct folio **folio, gfp_t gfp, int node, - nodemask_t *nmask) -{ - *folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, nmask); - - if (unlikely(!*folio)) { - count_vm_event(THP_COLLAPSE_ALLOC_FAILED); - return false; - } - - count_vm_event(THP_COLLAPSE_ALLOC); - return true; -} - /* * If mmap_lock temporarily dropped, revalidate vma * before taking mmap_lock. @@ -1087,7 +1071,7 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm, return result; } -static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm, +static int alloc_charge_folio(struct folio **foliop, struct mm_struct *mm, struct collapse_control *cc) { gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() : @@ -1098,20 +1082,23 @@ static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm, if (cc->reliable) gfp |= GFP_RELIABLE; - if (!hpage_collapse_alloc_folio(&folio, gfp, node, &cc->alloc_nmask)) { - *hpage = NULL; + folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, &cc->alloc_nmask); + if (!folio) { + *foliop = NULL; + count_vm_event(THP_COLLAPSE_ALLOC_FAILED); return SCAN_ALLOC_HUGE_PAGE_FAIL; } + count_vm_event(THP_COLLAPSE_ALLOC); if (unlikely(mem_cgroup_charge(folio, mm, gfp))) { folio_put(folio); - *hpage = NULL; + *foliop = NULL; return SCAN_CGROUP_CHARGE_FAIL; } count_memcg_folio_events(folio, THP_COLLAPSE_ALLOC, 1); - *hpage = folio_page(folio, 0); + *foliop = folio; return SCAN_SUCCEED; } @@ -1124,7 +1111,6 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, pte_t *pte; pgtable_t pgtable; struct folio *folio; - struct page *hpage; spinlock_t *pmd_ptl, *pte_ptl; int result = SCAN_FAIL; struct vm_area_struct *vma; @@ -1140,7 +1126,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, */ mmap_read_unlock(mm); - result = alloc_charge_hpage(&hpage, mm, cc); + result = alloc_charge_folio(&folio, mm, cc); if (result != SCAN_SUCCEED) goto out_nolock; @@ -1236,14 +1222,13 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, */ anon_vma_unlock_write(vma->anon_vma); - result = __collapse_huge_page_copy(pte, hpage, pmd, _pmd, + result = __collapse_huge_page_copy(pte, folio, pmd, _pmd, vma, address, pte_ptl, &compound_pagelist); pte_unmap(pte); if (unlikely(result != SCAN_SUCCEED)) goto out_up_write; - folio = page_folio(hpage); /* * The smp_wmb() inside __folio_mark_uptodate() ensures the * copy_huge_page writes become visible before the set_pmd_at() @@ -1252,12 +1237,12 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, __folio_mark_uptodate(folio); pgtable = pmd_pgtable(_pmd); - _pmd = mk_huge_pmd(hpage, vma->vm_page_prot); + _pmd = mk_huge_pmd(&folio->page, vma->vm_page_prot); _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma); spin_lock(pmd_ptl); BUG_ON(!pmd_none(*pmd)); - add_reliable_page_counter(hpage, vma->vm_mm, HPAGE_PMD_NR); + add_reliable_folio_counter(folio, vma->vm_mm, HPAGE_PMD_NR); folio_add_new_anon_rmap(folio, vma, address, RMAP_EXCLUSIVE); folio_add_lru_vma(folio, vma); pgtable_trans_huge_deposit(mm, pmd, pgtable); @@ -1265,14 +1250,14 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, update_mmu_cache_pmd(vma, address, pmd); spin_unlock(pmd_ptl); - hpage = NULL; + folio = NULL; result = SCAN_SUCCEED; out_up_write: mmap_write_unlock(mm); out_nolock: - if (hpage) - put_page(hpage); + if (folio) + folio_put(folio); trace_mm_collapse_huge_page(mm, result == SCAN_SUCCEED, result); return result; } @@ -1835,29 +1820,26 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, struct collapse_control *cc) { struct address_space *mapping = file->f_mapping; - struct page *hpage; - struct page *page; - struct page *tmp; - struct folio *folio; + struct page *dst; + struct folio *folio, *tmp, *new_folio; pgoff_t index = 0, end = start + HPAGE_PMD_NR; LIST_HEAD(pagelist); XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); int nr_none = 0, result = SCAN_SUCCEED; bool is_shmem = shmem_file(file); - int nr = 0; VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); VM_BUG_ON(start & (HPAGE_PMD_NR - 1)); - result = alloc_charge_hpage(&hpage, mm, cc); + result = alloc_charge_folio(&new_folio, mm, cc); if (result != SCAN_SUCCEED) goto out; - __SetPageLocked(hpage); + __folio_set_locked(new_folio); if (is_shmem) - __SetPageSwapBacked(hpage); - hpage->index = start; - hpage->mapping = mapping; + __folio_set_swapbacked(new_folio); + new_folio->index = start; + new_folio->mapping = mapping; /* * Ensure we have slots for all the pages in the range. This is @@ -1875,13 +1857,13 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, } } while (1); - for (index = start; index < end; index++) { + for (index = start; index < end;) { xas_set(&xas, index); - page = xas_load(&xas); + folio = xas_load(&xas); VM_BUG_ON(index != xas.xa_index); if (is_shmem) { - if (!page) { + if (!folio) { /* * Stop if extent has been truncated or * hole-punched, and is now completely @@ -1894,10 +1876,11 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, } } nr_none++; + index++; continue; } - if (xa_is_value(page) || !PageUptodate(page)) { + if (xa_is_value(folio) || !folio_test_uptodate(folio)) { xas_unlock_irq(&xas); /* swap in or instantiate fallocated page */ if (shmem_get_folio(mapping->host, index, 0, @@ -1907,28 +1890,27 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, } /* drain lru cache to help isolate_lru_page() */ lru_add_drain(); - page = folio_file_page(folio, index); - } else if (trylock_page(page)) { - get_page(page); + } else if (folio_trylock(folio)) { + folio_get(folio); xas_unlock_irq(&xas); } else { result = SCAN_PAGE_LOCK; goto xa_locked; } } else { /* !is_shmem */ - if (!page || xa_is_value(page)) { + if (!folio || xa_is_value(folio)) { xas_unlock_irq(&xas); page_cache_sync_readahead(mapping, &file->f_ra, file, index, end - index); /* drain lru cache to help isolate_lru_page() */ lru_add_drain(); - page = find_lock_page(mapping, index); - if (unlikely(page == NULL)) { + folio = filemap_lock_folio(mapping, index); + if (IS_ERR(folio)) { result = SCAN_FAIL; goto xa_unlocked; } - } else if (PageDirty(page)) { + } else if (folio_test_dirty(folio)) { /* * khugepaged only works on read-only fd, * so this page is dirty because it hasn't @@ -1946,12 +1928,12 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, filemap_flush(mapping); result = SCAN_FAIL; goto xa_unlocked; - } else if (PageWriteback(page)) { + } else if (folio_test_writeback(folio)) { xas_unlock_irq(&xas); result = SCAN_FAIL; goto xa_unlocked; - } else if (trylock_page(page)) { - get_page(page); + } else if (folio_trylock(folio)) { + folio_get(folio); xas_unlock_irq(&xas); } else { result = SCAN_PAGE_LOCK; @@ -1960,35 +1942,29 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, } /* - * The page must be locked, so we can drop the i_pages lock + * The folio must be locked, so we can drop the i_pages lock * without racing with truncate. */ - VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); - /* make sure the page is up to date */ - if (unlikely(!PageUptodate(page))) { + /* make sure the folio is up to date */ + if (unlikely(!folio_test_uptodate(folio))) { result = SCAN_FAIL; goto out_unlock; } /* * If file was truncated then extended, or hole-punched, before - * we locked the first page, then a THP might be there already. + * we locked the first folio, then a THP might be there already. * This will be discovered on the first iteration. */ - if (PageTransCompound(page)) { - struct page *head = compound_head(page); - - result = compound_order(head) == HPAGE_PMD_ORDER && - head->index == start - /* Maybe PMD-mapped */ - ? SCAN_PTE_MAPPED_HUGEPAGE - : SCAN_PAGE_COMPOUND; + if (folio_order(folio) == HPAGE_PMD_ORDER && + folio->index == start) { + /* Maybe PMD-mapped */ + result = SCAN_PTE_MAPPED_HUGEPAGE; goto out_unlock; } - folio = page_folio(page); - if (folio_mapping(folio) != mapping) { result = SCAN_TRUNCATED; goto out_unlock; @@ -1998,7 +1974,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, folio_test_writeback(folio))) { /* * khugepaged only works on read-only fd, so this - * page is dirty because it hasn't been flushed + * folio is dirty because it hasn't been flushed * since first write. */ result = SCAN_FAIL; @@ -2022,33 +1998,35 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, xas_lock_irq(&xas); - VM_BUG_ON_PAGE(page != xa_load(xas.xa, index), page); + VM_BUG_ON_FOLIO(folio != xa_load(xas.xa, index), folio); /* - * We control three references to the page: + * We control 2 + nr_pages references to the folio: * - we hold a pin on it; - * - one reference from page cache; - * - one from isolate_lru_page; - * If those are the only references, then any new usage of the - * page will have to fetch it from the page cache. That requires - * locking the page to handle truncate, so any new usage will be - * blocked until we unlock page after collapse/during rollback. + * - nr_pages reference from page cache; + * - one from lru_isolate_folio; + * If those are the only references, then any new usage + * of the folio will have to fetch it from the page + * cache. That requires locking the folio to handle + * truncate, so any new usage will be blocked until we + * unlock folio after collapse/during rollback. */ - if (page_count(page) != 3) { + if (folio_ref_count(folio) != 2 + folio_nr_pages(folio)) { result = SCAN_PAGE_COUNT; xas_unlock_irq(&xas); - putback_lru_page(page); + folio_putback_lru(folio); goto out_unlock; } /* - * Accumulate the pages that are being collapsed. + * Accumulate the folios that are being collapsed. */ - list_add_tail(&page->lru, &pagelist); + list_add_tail(&folio->lru, &pagelist); + index += folio_nr_pages(folio); continue; out_unlock: - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); goto xa_unlocked; } @@ -2087,23 +2065,32 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, } /* - * The old pages are locked, so they won't change anymore. + * The old folios are locked, so they won't change anymore. */ index = start; - list_for_each_entry(page, &pagelist, lru) { - while (index < page->index) { - clear_highpage(hpage + (index % HPAGE_PMD_NR)); + dst = folio_page(new_folio, 0); + list_for_each_entry(folio, &pagelist, lru) { + int i, nr_pages = folio_nr_pages(folio); + + while (index < folio->index) { + clear_highpage(dst); index++; + dst++; } - if (copy_mc_highpage(hpage + (page->index % HPAGE_PMD_NR), page)) { - result = SCAN_COPY_MC; - goto rollback; + + for (i = 0; i < nr_pages; i++) { + if (copy_mc_highpage(dst, folio_page(folio, i))) { + result = SCAN_COPY_MC; + goto rollback; + } + index++; + dst++; } - index++; } while (index < end) { - clear_highpage(hpage + (index % HPAGE_PMD_NR)); + clear_highpage(dst); index++; + dst++; } if (nr_none) { @@ -2131,16 +2118,17 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, } /* - * If userspace observed a missing page in a VMA with a MODE_MISSING - * userfaultfd, then it might expect a UFFD_EVENT_PAGEFAULT for that - * page. If so, we need to roll back to avoid suppressing such an - * event. Since wp/minor userfaultfds don't give userspace any - * guarantees that the kernel doesn't fill a missing page with a zero - * page, so they don't matter here. + * If userspace observed a missing page in a VMA with + * a MODE_MISSING userfaultfd, then it might expect a + * UFFD_EVENT_PAGEFAULT for that page. If so, we need to + * roll back to avoid suppressing such an event. Since + * wp/minor userfaultfds don't give userspace any + * guarantees that the kernel doesn't fill a missing + * page with a zero page, so they don't matter here. * - * Any userfaultfds registered after this point will not be able to - * observe any missing pages due to the previously inserted retry - * entries. + * Any userfaultfds registered after this point will + * not be able to observe any missing pages due to the + * previously inserted retry entries. */ vma_interval_tree_foreach(vma, &mapping->i_mmap, start, end) { if (userfaultfd_missing(vma)) { @@ -2165,33 +2153,32 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, xas_lock_irq(&xas); } - folio = page_folio(hpage); - nr = folio_nr_pages(folio); if (is_shmem) - __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr); + __lruvec_stat_mod_folio(new_folio, NR_SHMEM_THPS, HPAGE_PMD_NR); else - __lruvec_stat_mod_folio(folio, NR_FILE_THPS, nr); + __lruvec_stat_mod_folio(new_folio, NR_FILE_THPS, HPAGE_PMD_NR); if (nr_none) { - __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr_none); + __lruvec_stat_mod_folio(new_folio, NR_FILE_PAGES, nr_none); /* nr_none is always 0 for non-shmem. */ - __lruvec_stat_mod_folio(folio, NR_SHMEM, nr_none); + __lruvec_stat_mod_folio(new_folio, NR_SHMEM, nr_none); } /* - * Mark hpage as uptodate before inserting it into the page cache so - * that it isn't mistaken for an fallocated but unwritten page. + * Mark new_folio as uptodate before inserting it into the + * page cache so that it isn't mistaken for an fallocated but + * unwritten page. */ - folio_mark_uptodate(folio); - folio_ref_add(folio, HPAGE_PMD_NR - 1); + folio_mark_uptodate(new_folio); + folio_ref_add(new_folio, HPAGE_PMD_NR - 1); if (is_shmem) - folio_mark_dirty(folio); - folio_add_lru(folio); + folio_mark_dirty(new_folio); + folio_add_lru(new_folio); /* Join all the small entries into a single multi-index entry. */ xas_set_order(&xas, start, HPAGE_PMD_ORDER); - xas_store(&xas, folio); + xas_store(&xas, new_folio); WARN_ON_ONCE(xas_error(&xas)); xas_unlock_irq(&xas); @@ -2202,18 +2189,18 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, retract_page_tables(mapping, start); if (cc && !cc->is_khugepaged) result = SCAN_PTE_MAPPED_HUGEPAGE; - folio_unlock(folio); + folio_unlock(new_folio); /* - * The collapse has succeeded, so free the old pages. + * The collapse has succeeded, so free the old folios. */ - list_for_each_entry_safe(page, tmp, &pagelist, lru) { - list_del(&page->lru); - page->mapping = NULL; - ClearPageActive(page); - ClearPageUnevictable(page); - unlock_page(page); - folio_put_refs(page_folio(page), 3); + list_for_each_entry_safe(folio, tmp, &pagelist, lru) { + list_del(&folio->lru); + folio->mapping = NULL; + folio_clear_active(folio); + folio_clear_unevictable(folio); + folio_unlock(folio); + folio_put_refs(folio, 2 + folio_nr_pages(folio)); } goto out; @@ -2227,11 +2214,11 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, shmem_uncharge(mapping->host, nr_none); } - list_for_each_entry_safe(page, tmp, &pagelist, lru) { - list_del(&page->lru); - unlock_page(page); - putback_lru_page(page); - put_page(page); + list_for_each_entry_safe(folio, tmp, &pagelist, lru) { + list_del(&folio->lru); + folio_unlock(folio); + folio_putback_lru(folio); + folio_put(folio); } /* * Undo the updates of filemap_nr_thps_inc for non-SHMEM @@ -2247,13 +2234,13 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, smp_mb(); } - hpage->mapping = NULL; + new_folio->mapping = NULL; - unlock_page(hpage); - put_page(hpage); + folio_unlock(new_folio); + folio_put(new_folio); out: VM_BUG_ON(!list_empty(&pagelist)); - trace_mm_khugepaged_collapse_file(mm, hpage, index, is_shmem, addr, file, nr, result); + trace_mm_khugepaged_collapse_file(mm, new_folio, index, addr, is_shmem, file, HPAGE_PMD_NR, result); return result; } @@ -2261,7 +2248,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, struct file *file, pgoff_t start, struct collapse_control *cc) { - struct page *page = NULL; + struct folio *folio = NULL; struct address_space *mapping = file->f_mapping; XA_STATE(xas, &mapping->i_pages, start); int present, swap; @@ -2274,12 +2261,12 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, nodes_clear(cc->alloc_nmask); cc->reliable = false; rcu_read_lock(); - xas_for_each(&xas, page, start + HPAGE_PMD_NR - 1) { - if (xas_retry(&xas, page)) + xas_for_each(&xas, folio, start + HPAGE_PMD_NR - 1) { + if (xas_retry(&xas, folio)) continue; - if (xa_is_value(page)) { - ++swap; + if (xa_is_value(folio)) { + swap += 1 << xas_get_order(&xas); if (cc->is_khugepaged && swap > khugepaged_max_ptes_swap) { result = SCAN_EXCEED_SWAP_PTE; @@ -2289,18 +2276,10 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, continue; } - /* - * TODO: khugepaged should compact smaller compound pages - * into a PMD sized page - */ - if (PageTransCompound(page)) { - struct page *head = compound_head(page); - - result = compound_order(head) == HPAGE_PMD_ORDER && - head->index == start - /* Maybe PMD-mapped */ - ? SCAN_PTE_MAPPED_HUGEPAGE - : SCAN_PAGE_COMPOUND; + if (folio_order(folio) == HPAGE_PMD_ORDER && + folio->index == start) { + /* Maybe PMD-mapped */ + result = SCAN_PTE_MAPPED_HUGEPAGE; /* * For SCAN_PTE_MAPPED_HUGEPAGE, further processing * by the caller won't touch the page cache, and so @@ -2310,43 +2289,43 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, break; } - node = page_to_nid(page); + node = folio_nid(folio); if (hpage_collapse_scan_abort(node, cc)) { result = SCAN_SCAN_ABORT; break; } cc->node_load[node]++; - if (!PageLRU(page)) { + if (!folio_test_lru(folio)) { result = SCAN_PAGE_LRU; break; } - if (page_count(page) != - 1 + page_mapcount(page) + page_has_private(page)) { + if (!is_refcount_suitable(folio)) { result = SCAN_PAGE_COUNT; break; } - if (page_from_dynamic_pool(page)) { + if (page_from_dynamic_pool(&folio->page)) { result = SCAN_FAIL; break; } /* - * We probably should check if the page is referenced here, but - * nobody would transfer pte_young() to PageReferenced() for us. - * And rmap walk here is just too costly... + * We probably should check if the folio is referenced + * here, but nobody would transfer pte_young() to + * folio_test_referenced() for us. And rmap walk here + * is just too costly... */ - present++; + present += folio_nr_pages(folio); if (need_resched()) { xas_pause(&xas); cond_resched_rcu(); } - if (page_reliable(page)) + if (folio_reliable(folio)) cc->reliable = true; } rcu_read_unlock(); @@ -2361,7 +2340,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, } } - trace_mm_khugepaged_scan_file(mm, page, file, present, swap, result); + trace_mm_khugepaged_scan_file(mm, folio, file, present, swap, result); return result; } #else diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 829320a519e723127ad6c0f0928486d95a581024..56d4480e8d3cbfc4c20af9c48026e2c34f0fa39e 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -1095,7 +1095,7 @@ static void usage(void) fprintf(stderr, "\n\tSupported Options:\n"); fprintf(stderr, "\t\t-h: This help message.\n"); fprintf(stderr, "\t\t-s: mTHP size, expressed as page order.\n"); - fprintf(stderr, "\t\t Defaults to 0. Use this size for anon allocations.\n"); + fprintf(stderr, "\t\t Defaults to 0. Use this size for anon or shmem allocations.\n"); exit(1); } @@ -1209,6 +1209,8 @@ int main(int argc, char **argv) default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; default_settings.hugepages[hpage_pmd_order].enabled = THP_INHERIT; default_settings.hugepages[anon_order].enabled = THP_ALWAYS; + default_settings.shmem_hugepages[hpage_pmd_order].enabled = SHMEM_INHERIT; + default_settings.shmem_hugepages[anon_order].enabled = SHMEM_ALWAYS; save_settings(); thp_push_settings(&default_settings); diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c index a4163438108ec50fc849d314d651122d6662b2b4..577eaab6266fd94528b445959b0fa8d08cd840ac 100644 --- a/tools/testing/selftests/mm/thp_settings.c +++ b/tools/testing/selftests/mm/thp_settings.c @@ -33,10 +33,11 @@ static const char * const thp_defrag_strings[] = { }; static const char * const shmem_enabled_strings[] = { + "never", "always", "within_size", "advise", - "never", + "inherit", "deny", "force", NULL @@ -200,6 +201,7 @@ void thp_write_num(const char *name, unsigned long num) void thp_read_settings(struct thp_settings *settings) { unsigned long orders = thp_supported_orders(); + unsigned long shmem_orders = thp_shmem_supported_orders(); char path[PATH_MAX]; int i; @@ -234,12 +236,24 @@ void thp_read_settings(struct thp_settings *settings) settings->hugepages[i].enabled = thp_read_string(path, thp_enabled_strings); } + + for (i = 0; i < NR_ORDERS; i++) { + if (!((1 << i) & shmem_orders)) { + settings->shmem_hugepages[i].enabled = SHMEM_NEVER; + continue; + } + snprintf(path, PATH_MAX, "hugepages-%ukB/shmem_enabled", + (getpagesize() >> 10) << i); + settings->shmem_hugepages[i].enabled = + thp_read_string(path, shmem_enabled_strings); + } } void thp_write_settings(struct thp_settings *settings) { struct khugepaged_settings *khugepaged = &settings->khugepaged; unsigned long orders = thp_supported_orders(); + unsigned long shmem_orders = thp_shmem_supported_orders(); char path[PATH_MAX]; int enabled; int i; @@ -271,6 +285,15 @@ void thp_write_settings(struct thp_settings *settings) enabled = settings->hugepages[i].enabled; thp_write_string(path, thp_enabled_strings[enabled]); } + + for (i = 0; i < NR_ORDERS; i++) { + if (!((1 << i) & shmem_orders)) + continue; + snprintf(path, PATH_MAX, "hugepages-%ukB/shmem_enabled", + (getpagesize() >> 10) << i); + enabled = settings->shmem_hugepages[i].enabled; + thp_write_string(path, shmem_enabled_strings[enabled]); + } } struct thp_settings *thp_current_settings(void) @@ -324,17 +347,18 @@ void thp_set_read_ahead_path(char *path) dev_queue_read_ahead_path[sizeof(dev_queue_read_ahead_path) - 1] = '\0'; } -unsigned long thp_supported_orders(void) +static unsigned long __thp_supported_orders(bool is_shmem) { unsigned long orders = 0; char path[PATH_MAX]; char buf[256]; - int ret; - int i; + int ret, i; + char anon_dir[] = "enabled"; + char shmem_dir[] = "shmem_enabled"; for (i = 0; i < NR_ORDERS; i++) { - ret = snprintf(path, PATH_MAX, THP_SYSFS "hugepages-%ukB/enabled", - (getpagesize() >> 10) << i); + ret = snprintf(path, PATH_MAX, THP_SYSFS "hugepages-%ukB/%s", + (getpagesize() >> 10) << i, is_shmem ? shmem_dir : anon_dir); if (ret >= PATH_MAX) { printf("%s: Pathname is too long\n", __func__); exit(EXIT_FAILURE); @@ -347,3 +371,13 @@ unsigned long thp_supported_orders(void) return orders; } + +unsigned long thp_supported_orders(void) +{ + return __thp_supported_orders(false); +} + +unsigned long thp_shmem_supported_orders(void) +{ + return __thp_supported_orders(true); +} diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h index 71cbff05f4c7ff3b4ec67ee79cdf2129fcdf9f13..876235a23460ca2c050d99700e78d5b15abba2ed 100644 --- a/tools/testing/selftests/mm/thp_settings.h +++ b/tools/testing/selftests/mm/thp_settings.h @@ -22,10 +22,11 @@ enum thp_defrag { }; enum shmem_enabled { + SHMEM_NEVER, SHMEM_ALWAYS, SHMEM_WITHIN_SIZE, SHMEM_ADVISE, - SHMEM_NEVER, + SHMEM_INHERIT, SHMEM_DENY, SHMEM_FORCE, }; @@ -46,6 +47,10 @@ struct khugepaged_settings { unsigned long pages_to_scan; }; +struct shmem_hugepages_settings { + enum shmem_enabled enabled; +}; + struct thp_settings { enum thp_enabled thp_enabled; enum thp_defrag thp_defrag; @@ -54,6 +59,7 @@ struct thp_settings { struct khugepaged_settings khugepaged; unsigned long read_ahead_kb; struct hugepages_settings hugepages[NR_ORDERS]; + struct shmem_hugepages_settings shmem_hugepages[NR_ORDERS]; }; int read_file(const char *path, char *buf, size_t buflen); @@ -76,5 +82,6 @@ void thp_save_settings(void); void thp_set_read_ahead_path(char *path); unsigned long thp_supported_orders(void); +unsigned long thp_shmem_supported_orders(void); #endif /* __THP_SETTINGS_H__ */