From c01cfed1544b76d0c70f102a713a5145ee1cd861 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 9 Jan 2025 10:07:13 +0800 Subject: [PATCH 1/6] mm: shmem: fix incorrect index alignment for within_size policy mainline inclusion from mainline-v6.13-rc1 commit d0e6983a6d1719738cf8d13982a68094f0a1872a category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBGFBA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d0e6983a6d1719738cf8d13982a68094f0a1872a -------------------------------- With enabling the shmem per-size within_size policy, using an incorrect 'order' size to round_up() the index can lead to incorrect i_size checks, resulting in an inappropriate large orders being returned. Changing to use '1 << order' to round_up() the index to fix this issue. Additionally, adding an 'aligned_index' variable to avoid affecting the index checks. Link: https://lkml.kernel.org/r/77d8ef76a7d3d646e9225e9af88a76549a68aab1.1734593154.git.baolin.wang@linux.alibaba.com Fixes: e7a2ab7b3bb5 ("mm: shmem: add mTHP support for anonymous shmem") Signed-off-by: Baolin Wang Acked-by: David Hildenbrand Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Wang Lian --- --- mm/shmem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index f9e48c353a9d..3d0b81b8b885 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1682,6 +1682,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, unsigned long mask = READ_ONCE(huge_shmem_orders_always); unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size); unsigned long vm_flags = vma ? vma->vm_flags : 0; + pgoff_t aligned_index; bool global_huge; loff_t i_size; int order; @@ -1716,9 +1717,9 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, /* Allow mTHP that will be fully within i_size. */ order = highest_order(within_size_orders); while (within_size_orders) { - index = round_up(index + 1, order); + aligned_index = round_up(index + 1, 1 << order); i_size = round_up(i_size_read(inode), PAGE_SIZE); - if (i_size >> PAGE_SHIFT >= index) { + if (i_size >> PAGE_SHIFT >= aligned_index) { mask |= within_size_orders; break; } -- Gitee From 5c093a1c3d730668f27ca679009bb9fefdf7aca5 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 9 Jan 2025 10:10:50 +0800 Subject: [PATCH 2/6] mm: shmem: fix the update of 'shmem_falloc->nr_unswapped' mainline inclusion from mainline-v6.13-rc1 commit d77b90d2b2642655b5f60953c36ad887257e1802 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBGFBA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d77b90d2b2642655b5f60953c36ad887257e1802 -------------------------------- The 'shmem_falloc->nr_unswapped' is used to record how many writepage refused to swap out because fallocate() is allocating, but after shmem supports large folio swap out, the update of 'shmem_falloc->nr_unswapped' does not use the correct number of pages in the large folio, which may lead to fallocate() not exiting as soon as possible. Anyway, this is found through code inspection, and I am not sure whether it would actually cause serious issues. Link: https://lkml.kernel.org/r/f66a0119d0564c2c37c84f045835b870d1b2196f.1734593154.git.baolin.wang@linux.alibaba.com Fixes: 809bc86517cc ("mm: shmem: support large folio swap out") Signed-off-by: Baolin Wang Acked-by: David Hildenbrand Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Wang Lian --- --- mm/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index 3d0b81b8b885..856e63a08b40 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1524,7 +1524,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) !shmem_falloc->waitq && index >= shmem_falloc->start && index < shmem_falloc->next) - shmem_falloc->nr_unswapped++; + shmem_falloc->nr_unswapped += nr_pages; else shmem_falloc = NULL; spin_unlock(&inode->i_lock); -- Gitee From 13564f7bc07b82cf0ca287ae31ba0f1915fd77fe Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 9 Jan 2025 10:15:31 +0800 Subject: [PATCH 3/6] docs: mm: fix the incorrect 'FileHugeMapped' field mainline inclusion from mainline-v6.13-rc1 commit 472098f23323c39cc6269d7b7bf76cba62830a4c category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBGFBA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=472098f23323c39cc6269d7b7bf76cba62830a4c -------------------------------- The '/proc/PID/smaps' does not have the 'FileHugeMapped' field to count the file transparent huge pages, instead, the 'FilePmdMapped' field should be used. Fix it. Link: https://lkml.kernel.org/r/d520ce3aba2b03b088be30bece732426a939049a.1734425264.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Acked-by: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Wang Lian --- --- Documentation/admin-guide/mm/transhuge.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst index 5acca1f4c8d3..a3bb6fab003d 100644 --- a/Documentation/admin-guide/mm/transhuge.rst +++ b/Documentation/admin-guide/mm/transhuge.rst @@ -436,7 +436,7 @@ AnonHugePmdMapped). The number of file transparent huge pages mapped to userspace is available by reading ShmemPmdMapped and ShmemHugePages fields in ``/proc/meminfo``. To identify what applications are mapping file transparent huge pages, it -is necessary to read ``/proc/PID/smaps`` and count the FileHugeMapped fields +is necessary to read ``/proc/PID/smaps`` and count the FilePmdMapped fields for each mapping. Note that reading the smaps file is expensive and reading it -- Gitee From bf5cb2bf8e4158df17fef87c4f6215f237a0bb21 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 13 Jan 2025 15:20:56 +0800 Subject: [PATCH 4/6] mm: shmem: fix minor off-by-one in shrinkable calculation mainline inclusion from mainline-v6.12-rc1 commit de5b85262e2038a5ae5d281ddf43d35acb2bfa60 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBGFBA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=de5b85262e2038a5ae5d281ddf43d35acb2bfa60 -------------------------------- There has been a long-standing and very minor off-by-one, where shmem_get_folio_gfp() decides if a large folio extends beyond i_size far enough to leave a page or more for freeing later under pressure. This is not something needed for stable: but it will be proportionately more significant as support for smaller large folios is added, and is best fixed before duplicating the check in other places. Link: https://lkml.kernel.org/r/d8e75079-af2d-8519-56df-6be1dccc247a@google.com Fixes: 779750d20b93 ("shmem: split huge pages beyond i_size under memory pressure") Signed-off-by: Hugh Dickins Reviewed-by: David Hildenbrand Reviewed-by: Baolin Wang Signed-off-by: Andrew Morton Signed-off-by: Wang Lian --- --- mm/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index 856e63a08b40..266e13b98f7b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2370,7 +2370,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, alloced = true; if (folio_test_large(folio) && DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) < - folio_next_index(folio) - 1) { + folio_next_index(folio)) { struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); struct shmem_inode_info *info = SHMEM_I(inode); /* -- Gitee From 6ac04fc4522db987d7e5b128162f016a97bd7f55 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 13 Jan 2025 15:36:22 +0800 Subject: [PATCH 5/6] mm: shmem: extend shmem_unused_huge_shrink() to all sizes mainline inclusion from mainline-v6.12-rc1 commit 15444054a537aca115bb077a77e99a9cc5ae11e6 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBGFBA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=15444054a537aca115bb077a77e99a9cc5ae11e6 -------------------------------- Although shmem_get_folio_gfp() is correctly putting inodes on the shrinklist according to the folio size, shmem_unused_huge_shrink() was still dealing with that shrinklist in terms of HPAGE_PMD_SIZE. Generalize that; and to handle the mixture of sizes more sensibly, shmem_alloc_and_add_folio() give it a number of pages to be freed (approximate: no need to minimize that with an exact calculation) instead of a number of inodes to split. [akpm@linux-foundation.org: comment tweak, per David] Link: https://lkml.kernel.org/r/d8c40850-6774-7a93-1e2c-8d941683b260@google.com Signed-off-by: Hugh Dickins Reviewed-by: David Hildenbrand Cc: Baolin Wang Cc: Hugh Dickins Cc: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Wang Lian --- --- mm/shmem.c | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 266e13b98f7b..ef8724d4300f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -632,15 +632,14 @@ static const char *shmem_format_huge(int huge) #endif static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, - struct shrink_control *sc, unsigned long nr_to_split) + struct shrink_control *sc, unsigned long nr_to_free) { LIST_HEAD(list), *pos, *next; - LIST_HEAD(to_remove); struct inode *inode; struct shmem_inode_info *info; struct folio *folio; unsigned long batch = sc ? sc->nr_to_scan : 128; - int split = 0; + unsigned long split = 0, freed = 0; if (list_empty(&sbinfo->shrinklist)) return SHRINK_STOP; @@ -658,13 +657,6 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, goto next; } - /* Check if there's anything to gain */ - if (round_up(inode->i_size, PAGE_SIZE) == - round_up(inode->i_size, HPAGE_PMD_SIZE)) { - list_move(&info->shrinklist, &to_remove); - goto next; - } - list_move(&info->shrinklist, &list); next: sbinfo->shrinklist_len--; @@ -673,34 +665,36 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, } spin_unlock(&sbinfo->shrinklist_lock); - list_for_each_safe(pos, next, &to_remove) { - info = list_entry(pos, struct shmem_inode_info, shrinklist); - inode = &info->vfs_inode; - list_del_init(&info->shrinklist); - iput(inode); - } - list_for_each_safe(pos, next, &list) { + pgoff_t next, end; + loff_t i_size; int ret; - pgoff_t index; info = list_entry(pos, struct shmem_inode_info, shrinklist); inode = &info->vfs_inode; - if (nr_to_split && split >= nr_to_split) + if (nr_to_free && freed >= nr_to_free) goto move_back; - index = (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT; - folio = filemap_get_folio(inode->i_mapping, index); - if (IS_ERR(folio)) + i_size = i_size_read(inode); + folio = filemap_get_entry(inode->i_mapping, i_size / PAGE_SIZE); + if (!folio || xa_is_value(folio)) goto drop; - /* No huge page at the end of the file: nothing to split */ + /* No large folio at the end of the file: nothing to split */ if (!folio_test_large(folio)) { folio_put(folio); goto drop; } + /* Check if there is anything to gain from splitting */ + next = folio_next_index(folio); + end = shmem_fallocend(inode, DIV_ROUND_UP(i_size, PAGE_SIZE)); + if (end <= folio->index || end >= next) { + folio_put(folio); + goto drop; + } + /* * Move the inode on the list back to shrinklist if we failed * to lock the page at this time. @@ -721,6 +715,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, if (ret) goto move_back; + freed += next - end; split++; drop: list_del_init(&info->shrinklist); @@ -765,7 +760,7 @@ static long shmem_unused_huge_count(struct super_block *sb, #define shmem_huge SHMEM_HUGE_DENY static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, - struct shrink_control *sc, unsigned long nr_to_split) + struct shrink_control *sc, unsigned long nr_to_free) { return 0; } @@ -1870,7 +1865,7 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf, * Try to reclaim some space by splitting a few * large folios beyond i_size on the filesystem. */ - shmem_unused_huge_shrink(sbinfo, NULL, 2); + shmem_unused_huge_shrink(sbinfo, NULL, pages); /* * And do a shmem_recalc_inode() to account for freed pages: * except our folio is there in cache, so not quite balanced. -- Gitee From 89e8f6011260063ed616b17dce2f4317995aea15 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 21 Jan 2025 13:22:52 +0800 Subject: [PATCH 6/6] mm: shmem: fix ShmemHugePages at swapout mainline inclusion from mainline-v6.12-rc1 commit dad2dc9c92e0f93f33cebcb0595b8daa3d57473f category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBGFBA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=dad2dc9c92e0f93f33cebcb0595b8daa3d57473f -------------------------------- /proc/meminfo ShmemHugePages has been showing overlarge amounts (more than Shmem) after swapping out THPs: we forgot to update NR_SHMEM_THPS. Add shmem_update_stats(), to avoid repetition, and risk of making that mistake again: the call from shmem_delete_from_page_cache() is the bugfix; the call from shmem_replace_folio() is reassuring, but not really a bugfix (replace corrects misplaced swapin readahead, but huge swapin readahead would be a mistake). Link: https://lkml.kernel.org/r/5ba477c8-a569-70b5-923e-09ab221af45b@google.com Fixes: 809bc86517cc ("mm: shmem: support large folio swap out") Signed-off-by: Hugh Dickins Reviewed-by: Shakeel Butt Reviewed-by: Yosry Ahmed Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: Signed-off-by: Andrew Morton Conflicts: mm/shmem.c [Context conflicts in shmem.c due to miss commit 7aad25b4b47e] Signed-off-by: Wang Lian --- --- mm/shmem.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index ef8724d4300f..d6afa94b5bc2 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -773,6 +773,14 @@ static bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index, } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static void shmem_update_stats(struct folio *folio, int nr_pages) +{ + if (folio_test_pmd_mappable(folio)) + __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr_pages); + __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr_pages); + __lruvec_stat_mod_folio(folio, NR_SHMEM, nr_pages); +} + /* * Somewhat like filemap_add_folio, but error if expected item has gone. */ @@ -807,10 +815,7 @@ static int shmem_add_to_page_cache(struct folio *folio, xas_store(&xas, folio); if (xas_error(&xas)) goto unlock; - if (folio_test_pmd_mappable(folio)) - __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr); - __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr); - __lruvec_stat_mod_folio(folio, NR_SHMEM, nr); + shmem_update_stats(folio, nr); shmem_reliable_folio_add(folio, nr); mapping->nrpages += nr; unlock: @@ -839,8 +844,7 @@ static void shmem_delete_from_page_cache(struct folio *folio, void *radswap) error = shmem_replace_entry(mapping, folio->index, folio, radswap); folio->mapping = NULL; mapping->nrpages -= nr; - __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr); - __lruvec_stat_mod_folio(folio, NR_SHMEM, -nr); + shmem_update_stats(folio, -nr); shmem_reliable_folio_add(folio, -nr); xa_unlock_irq(&mapping->i_pages); folio_put_refs(folio, nr); @@ -1966,11 +1970,9 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp, } if (!error) { mem_cgroup_replace_folio(old, new); - __lruvec_stat_mod_folio(new, NR_FILE_PAGES, nr_pages); - __lruvec_stat_mod_folio(new, NR_SHMEM, nr_pages); + shmem_update_stats(new, nr_pages); shmem_reliable_folio_add(new, nr_pages); - __lruvec_stat_mod_folio(old, NR_FILE_PAGES, -nr_pages); - __lruvec_stat_mod_folio(old, NR_SHMEM, -nr_pages); + shmem_update_stats(old, -nr_pages); shmem_reliable_folio_add(old, -nr_pages); } xa_unlock_irq(&swap_mapping->i_pages); -- Gitee