diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst index f9d692f049f62be52ff1caeb7f7b50e02a71adf0..b6e5ba22176a01bdd407d6c8365cfbf08063b90e 100644 --- a/Documentation/admin-guide/mm/transhuge.rst +++ b/Documentation/admin-guide/mm/transhuge.rst @@ -213,8 +213,8 @@ possible to enable/disable it by configurate the corresponding bit:: The kernel could try to enable mappings for different sizes, eg, 64K on arm64, BIT0 for file mapping, BIT1 for anonymous mapping, and THP size -page, BIT3 for anonymous mapping, where 64K anonymous mapping for arm64 -is dependent on BIT3 being turned on, the above feature are disabled by +page, BIT2 for anonymous mapping, where 2M anonymous mapping for arm64 +is dependent on BIT2 being turned on, the above feature are disabled by default, and could enable the above feature by writing the corresponding bit to 1:: @@ -232,12 +232,11 @@ it back by writing 0:: echo 0 >/sys/kernel/mm/transparent_hugepage/pcp_allow_high_order echo 4 >/sys/kernel/mm/transparent_hugepage/pcp_allow_high_order -khugepaged will be automatically started when one or more hugepage -sizes are enabled (either by directly setting "always" or "madvise", -or by setting "inherit" while the top-level enabled is set to "always" -or "madvise"), and it'll be automatically shutdown when the last -hugepage size is disabled (either by directly setting "never", or by -setting "inherit" while the top-level enabled is set to "never"). +khugepaged will be automatically started when PMD-sized THP is enabled +(either of the per-size anon control or the top-level control are set +to "always" or "madvise"), and it'll be automatically shutdown when +PMD-sized THP is disabled (when both the per-size anon control and the +top-level control are "never") Khugepaged controls ------------------- diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9b877f4732f60601cab0746c9a6013d23c188de4..76ede16f79d9cb313e33b5d70a99e17deacf7925 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3976,18 +3976,19 @@ static int ext4_iomap_write_begin(struct file *file, *fsdata = delalloc ? (void *)0 : (void *)FALL_BACK_TO_NONDELALLOC; retry: - iter.pos = pos; - iter.len = len; - folio = iomap_get_folio(&iter, pos, len); if (IS_ERR(folio)) return PTR_ERR(folio); - WARN_ON_ONCE(pos + len > folio_pos(folio) + folio_size(folio)); + if (pos + len > folio_pos(folio) + folio_size(folio)) + len = folio_pos(folio) + folio_size(folio) - pos; if (iomap_is_fully_dirty(folio, offset_in_folio(folio, pos), len)) goto out; + iter.pos = pos; + iter.len = len; + do { int length; diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 0da01df3b2832b31c4ea2c38b504f7dd66dca7e8..c016cb753b5595d810463a271c61fe6db22cc306 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -80,14 +80,20 @@ extern struct kobj_attribute shmem_enabled_attr; #define THP_ORDERS_ALL_ANON ((BIT(PMD_ORDER + 1) - 1) & ~(BIT(0) | BIT(1))) /* - * Mask of all large folio orders supported for file THP. + * Mask of all large folio orders supported for file THP. Folios in a DAX + * file is never split and the MAX_PAGECACHE_ORDER limit does not apply to + * it. */ -#define THP_ORDERS_ALL_FILE (BIT(PMD_ORDER) | BIT(PUD_ORDER)) +#define THP_ORDERS_ALL_FILE_DAX \ + (BIT(PMD_ORDER) | BIT(PUD_ORDER)) +#define THP_ORDERS_ALL_FILE_DEFAULT \ + ((BIT(MAX_PAGECACHE_ORDER + 1) - 1) & ~BIT(0)) /* * Mask of all large folio orders supported for THP. */ -#define THP_ORDERS_ALL (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE) +#define THP_ORDERS_ALL \ + (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DAX | THP_ORDERS_ALL_FILE_DEFAULT) #define TVA_SMAPS (1 << 0) /* Will be used for procfs */ #define TVA_IN_PF (1 << 1) /* Page fault handler */ @@ -124,18 +130,6 @@ static inline bool hugepage_global_always(void) (1<ki_pos; struct address_space *mapping = file->f_mapping; const struct address_space_operations *a_ops = mapping->a_ops; + size_t chunk = mapping_max_folio_size(mapping); long status = 0; ssize_t written = 0; do { struct page *page; - unsigned long offset; /* Offset into pagecache page */ - unsigned long bytes; /* Bytes to write to page */ + struct folio *folio; + size_t offset; /* Offset into folio */ + size_t bytes; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ void *fsdata = NULL; - offset = (pos & (PAGE_SIZE - 1)); - bytes = min_t(unsigned long, PAGE_SIZE - offset, - iov_iter_count(i)); + bytes = iov_iter_count(i); +retry: + offset = pos & (chunk - 1); + bytes = min(chunk - offset, bytes); + balance_dirty_pages_ratelimited(mapping); -again: /* * Bring in the user page that we will copy from _first_. * Otherwise there's a nasty deadlock on copying from the @@ -4074,11 +4077,16 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i) if (unlikely(status < 0)) break; + folio = page_folio(page); + offset = offset_in_folio(folio, pos); + if (bytes > folio_size(folio) - offset) + bytes = folio_size(folio) - offset; + if (mapping_writably_mapped(mapping)) - flush_dcache_page(page); + flush_dcache_folio(folio); - copied = copy_page_from_iter_atomic(page, offset, bytes, i); - flush_dcache_page(page); + copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); + flush_dcache_folio(folio); status = a_ops->write_end(file, mapping, pos, bytes, copied, page, fsdata); @@ -4096,14 +4104,16 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i) * halfway through, might be a race with munmap, * might be severe memory pressure. */ - if (copied) + if (chunk > PAGE_SIZE) + chunk /= 2; + if (copied) { bytes = copied; - goto again; + goto retry; + } + } else { + pos += status; + written += status; } - pos += status; - written += status; - - balance_dirty_pages_ratelimited(mapping); } while (iov_iter_count(i)); if (!written) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d743502c70f09740d453071940b3e8bef9478ea9..16d8ed7f46bd8ddac2b2e31c52d6170e8f42e20b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -84,9 +84,17 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, bool smaps = tva_flags & TVA_SMAPS; bool in_pf = tva_flags & TVA_IN_PF; bool enforce_sysfs = tva_flags & TVA_ENFORCE_SYSFS; + unsigned long supported_orders; + /* Check the intersection of requested and supported orders. */ - orders &= vma_is_anonymous(vma) ? - THP_ORDERS_ALL_ANON : THP_ORDERS_ALL_FILE; + if (vma_is_anonymous(vma)) + supported_orders = THP_ORDERS_ALL_ANON; + else if (vma_is_dax(vma)) + supported_orders = THP_ORDERS_ALL_FILE_DAX; + else + supported_orders = THP_ORDERS_ALL_FILE_DEFAULT; + + orders &= supported_orders; if (!orders) return 0; @@ -652,6 +660,13 @@ static ssize_t thpsize_enabled_store(struct kobject *kobj, } else ret = -EINVAL; + if (ret > 0) { + int err; + + err = start_stop_khugepaged(); + if (err) + ret = err; + } return ret; } @@ -3348,22 +3363,11 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, return ret; } -void folio_undo_large_rmappable(struct folio *folio) +void __folio_undo_large_rmappable(struct folio *folio) { struct deferred_split *ds_queue; unsigned long flags; - if (folio_order(folio) <= 1) - return; - - /* - * At this point, there is no one trying to add the folio to - * deferred_list. If folio is not in deferred_list, it's safe - * to check without acquiring the split_queue_lock. - */ - if (data_race(list_empty(&folio->_deferred_list))) - return; - ds_queue = get_deferred_split_queue(folio); spin_lock_irqsave(&ds_queue->split_queue_lock, flags); if (!list_empty(&folio->_deferred_list)) { diff --git a/mm/internal.h b/mm/internal.h index 0ecbaa392054e0f57035445051bbb80e60edfbe5..7db2957ef3a0975bcc51e3a799518215f88076e1 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -593,7 +593,22 @@ static inline void folio_set_order(struct folio *folio, unsigned int order) #endif } -void folio_undo_large_rmappable(struct folio *folio); +void __folio_undo_large_rmappable(struct folio *folio); +static inline void folio_undo_large_rmappable(struct folio *folio) +{ + if (folio_order(folio) <= 1 || !folio_test_large_rmappable(folio)) + return; + + /* + * At this point, there is no one trying to add the folio to + * deferred_list. If folio is not in deferred_list, it's safe + * to check without acquiring the split_queue_lock. + */ + if (data_race(list_empty(&folio->_deferred_list))) + return; + + __folio_undo_large_rmappable(folio); +} static inline struct folio *page_rmappable_folio(struct page *page) { diff --git a/mm/khugepaged.c b/mm/khugepaged.c index bc1aaf5b99edacea147c380256b52702a45396be..d13033eb7eaabb0adcddab33557f3c79baf8861c 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -422,6 +422,26 @@ static inline int hpage_collapse_test_exit_or_disable(struct mm_struct *mm) test_bit(MMF_DISABLE_THP, &mm->flags); } +static bool hugepage_pmd_enabled(void) +{ + /* + * We cover both the anon and the file-backed case here; file-backed + * hugepages, when configured in, are determined by the global control. + * Anon pmd-sized hugepages are determined by the pmd-size control. + */ + if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && + hugepage_global_enabled()) + return true; + if (test_bit(PMD_ORDER, &huge_anon_orders_always)) + return true; + if (test_bit(PMD_ORDER, &huge_anon_orders_madvise)) + return true; + if (test_bit(PMD_ORDER, &huge_anon_orders_inherit) && + hugepage_global_enabled()) + return true; + return false; +} + void __khugepaged_enter(struct mm_struct *mm) { struct khugepaged_mm_slot *mm_slot; @@ -458,7 +478,7 @@ void khugepaged_enter_vma(struct vm_area_struct *vma, unsigned long vm_flags) { if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) && - hugepage_flags_enabled()) { + hugepage_pmd_enabled()) { if (thp_vma_allowable_order(vma, vm_flags, TVA_ENFORCE_SYSFS, PMD_ORDER)) __khugepaged_enter(vma->vm_mm); @@ -2505,8 +2525,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result, static int khugepaged_has_work(void) { - return !list_empty(&khugepaged_scan.mm_head) && - hugepage_flags_enabled(); + return !list_empty(&khugepaged_scan.mm_head) && hugepage_pmd_enabled(); } static int khugepaged_wait_event(void) @@ -2579,7 +2598,7 @@ static void khugepaged_wait_work(void) return; } - if (hugepage_flags_enabled()) + if (hugepage_pmd_enabled()) wait_event_freezable(khugepaged_wait, khugepaged_wait_event()); } @@ -2610,7 +2629,7 @@ static void set_recommended_min_free_kbytes(void) int nr_zones = 0; unsigned long recommended_min; - if (!hugepage_flags_enabled()) { + if (!hugepage_pmd_enabled()) { calculate_min_free_kbytes(); goto update_wmarks; } @@ -2660,7 +2679,7 @@ int start_stop_khugepaged(void) int err = 0; mutex_lock(&khugepaged_mutex); - if (hugepage_flags_enabled()) { + if (hugepage_pmd_enabled()) { if (!khugepaged_thread) khugepaged_thread = kthread_run(khugepaged, NULL, "khugepaged"); @@ -2686,7 +2705,7 @@ int start_stop_khugepaged(void) void khugepaged_min_free_kbytes_update(void) { mutex_lock(&khugepaged_mutex); - if (hugepage_flags_enabled() && khugepaged_thread) + if (hugepage_pmd_enabled() && khugepaged_thread) set_recommended_min_free_kbytes(); mutex_unlock(&khugepaged_mutex); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fcf08f3dc53fe1abcb86a94a4ce719159c27ed07..ff22aeac06a4c872f2bcbedb9b12ee3da4b6c99b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -8684,17 +8684,6 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new) /* Transfer the charge and the css ref */ commit_charge(new, memcg); - /* - * If the old folio is a large folio and is in the split queue, it needs - * to be removed from the split queue now, in case getting an incorrect - * split queue in destroy_large_folio() after the memcg of the old folio - * is cleared. - * - * In addition, the old folio is about to be freed after migration, so - * removing from the split queue a bit earlier seems reasonable. - */ - if (folio_test_large(old) && folio_test_large_rmappable(old)) - folio_undo_large_rmappable(old); old->memcg_data = 0; } diff --git a/mm/migrate.c b/mm/migrate.c index b5d9d8feacfad3b3d869413c5e62ee31b70002ec..f2f3f3cf3fe27264779cb7f23a229fd313217a9e 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -413,6 +413,15 @@ int folio_migrate_mapping(struct address_space *mapping, if (folio_ref_count(folio) != expected_count) return -EAGAIN; + /* Take off deferred split queue while frozen and memcg set */ + if (folio_test_large(folio) && + folio_test_large_rmappable(folio)) { + if (!folio_ref_freeze(folio, expected_count)) + return -EAGAIN; + folio_undo_large_rmappable(folio); + folio_ref_unfreeze(folio, expected_count); + } + /* No turning back from here */ newfolio->index = folio->index; newfolio->mapping = folio->mapping; @@ -431,6 +440,9 @@ int folio_migrate_mapping(struct address_space *mapping, return -EAGAIN; } + /* Take off deferred split queue while frozen and memcg set */ + folio_undo_large_rmappable(folio); + /* * Now we know that no one else is looking at the folio: * no turning back from here. @@ -1480,11 +1492,17 @@ static int unmap_and_move_huge_page(new_folio_t get_new_folio, return rc; } -static inline int try_split_folio(struct folio *folio, struct list_head *split_folios) +static inline int try_split_folio(struct folio *folio, struct list_head *split_folios, + enum migrate_mode mode) { int rc; - folio_lock(folio); + if (mode == MIGRATE_ASYNC) { + if (!folio_trylock(folio)) + return -EAGAIN; + } else { + folio_lock(folio); + } rc = split_folio_to_list(folio, split_folios); folio_unlock(folio); if (!rc) @@ -1660,6 +1678,10 @@ static int migrate_pages_batch(struct list_head *from, * migrate_pages() may report success with (split but * unmigrated) pages still on its fromlist; whereas it * always reports success when its fromlist is empty. + * stats->nr_thp_failed should be increased too, + * otherwise stats inconsistency will happen when + * migrate_pages_batch is called via migrate_pages() + * with MIGRATE_SYNC and MIGRATE_ASYNC. * * Only check it without removing it from the list. * Since the folio can be on deferred_split_scan() @@ -1674,8 +1696,9 @@ static int migrate_pages_batch(struct list_head *from, */ if (nr_pages > 2 && !list_empty(&folio->_deferred_list)) { - if (try_split_folio(folio, split_folios) == 0) { + if (!try_split_folio(folio, split_folios, mode)) { nr_failed++; + stats->nr_thp_failed += is_thp; stats->nr_thp_split += is_thp; stats->nr_split++; continue; @@ -1695,7 +1718,7 @@ static int migrate_pages_batch(struct list_head *from, if (!thp_migration_supported() && is_thp) { nr_failed++; stats->nr_thp_failed++; - if (!try_split_folio(folio, split_folios)) { + if (!try_split_folio(folio, split_folios, mode)) { stats->nr_thp_split++; stats->nr_split++; continue; @@ -1727,7 +1750,7 @@ static int migrate_pages_batch(struct list_head *from, stats->nr_thp_failed += is_thp; /* Large folio NUMA faulting doesn't split to retry. */ if (is_large && !nosplit) { - int ret = try_split_folio(folio, split_folios); + int ret = try_split_folio(folio, split_folios, mode); if (!ret) { stats->nr_thp_split += is_thp; @@ -2600,7 +2623,6 @@ int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, int nr_remaining; unsigned int nr_succeeded; LIST_HEAD(migratepages); - int nr_pages = folio_nr_pages(folio); /* * Don't migrate file folios that are mapped in multiple processes @@ -2629,12 +2651,8 @@ int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, NULL, node, MIGRATE_ASYNC, MR_NUMA_MISPLACED, &nr_succeeded); if (nr_remaining) { - if (!list_empty(&migratepages)) { - list_del(&folio->lru); - node_stat_mod_folio(folio, NR_ISOLATED_ANON + - folio_is_file_lru(folio), -nr_pages); - folio_putback_lru(folio); - } + if (!list_empty(&migratepages)) + putback_movable_pages(&migratepages); isolated = 0; } if (nr_succeeded) { diff --git a/mm/mmap.c b/mm/mmap.c index 1d8def3db125e4f50228827c177f54d00e8bf874..27ba0bb1acde01911457c71ffd2f99e4b676c519 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -106,7 +106,7 @@ void vma_set_page_prot(struct vm_area_struct *vma) * Requires inode->i_mapping->i_mmap_rwsem */ static void __remove_shared_vm_struct(struct vm_area_struct *vma, - struct file *file, struct address_space *mapping) + struct address_space *mapping) { if (vma->vm_flags & VM_SHARED) mapping_unmap_writable(mapping); @@ -127,7 +127,7 @@ void unlink_file_vma(struct vm_area_struct *vma) if (file) { struct address_space *mapping = file->f_mapping; i_mmap_lock_write(mapping); - __remove_shared_vm_struct(vma, file, mapping); + __remove_shared_vm_struct(vma, mapping); i_mmap_unlock_write(mapping); } } @@ -394,26 +394,30 @@ static void __vma_link_file(struct vm_area_struct *vma, flush_dcache_mmap_unlock(mapping); } +static void vma_link_file(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct address_space *mapping; + + if (file) { + mapping = file->f_mapping; + i_mmap_lock_write(mapping); + __vma_link_file(vma, mapping); + i_mmap_unlock_write(mapping); + } +} + static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma) { VMA_ITERATOR(vmi, mm, 0); - struct address_space *mapping = NULL; vma_iter_config(&vmi, vma->vm_start, vma->vm_end); if (vma_iter_prealloc(&vmi, vma)) return -ENOMEM; vma_start_write(vma); - vma_iter_store(&vmi, vma); - - if (vma->vm_file) { - mapping = vma->vm_file->f_mapping; - i_mmap_lock_write(mapping); - __vma_link_file(vma, mapping); - i_mmap_unlock_write(mapping); - } - + vma_link_file(vma); mm->map_count++; validate_mm(mm); return 0; @@ -521,10 +525,9 @@ static inline void vma_complete(struct vma_prepare *vp, } if (vp->remove && vp->file) { - __remove_shared_vm_struct(vp->remove, vp->file, vp->mapping); + __remove_shared_vm_struct(vp->remove, vp->mapping); if (vp->remove2) - __remove_shared_vm_struct(vp->remove2, vp->file, - vp->mapping); + __remove_shared_vm_struct(vp->remove2, vp->mapping); } else if (vp->insert) { /* * split_vma has split insert from vma, and needs @@ -2875,16 +2878,7 @@ static unsigned long __mmap_region(struct mm_struct *mm, vma_start_write(vma); vma_iter_store(&vmi, vma); mm->map_count++; - if (vma->vm_file) { - i_mmap_lock_write(vma->vm_file->f_mapping); - if (vma->vm_flags & VM_SHARED) - mapping_allow_writable(vma->vm_file->f_mapping); - - flush_dcache_mmap_lock(vma->vm_file->f_mapping); - vma_interval_tree_insert(vma, &vma->vm_file->f_mapping->i_mmap); - flush_dcache_mmap_unlock(vma->vm_file->f_mapping); - i_mmap_unlock_write(vma->vm_file->f_mapping); - } + vma_link_file(vma); /* * vma_merge() calls khugepaged_enter_vma() either, the below diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 22addac04e98e35570b391d51410a2b2fdecad92..52382ba2423229ba8cd05746346fe20aedc8b9c9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -621,12 +621,14 @@ compaction_capture(struct capture_control *capc, struct page *page, return false; /* - * Do not let lower order allocations pollute a movable pageblock. + * Do not let lower order allocations pollute a movable pageblock + * unless compaction is also requesting movable pages. * This might let an unmovable request use a reclaimable pageblock * and vice-versa but no more than normal fallback logic which can * have trouble finding a high-order free page. */ - if (order < pageblock_order && migratetype == MIGRATE_MOVABLE) + if (order < pageblock_order && migratetype == MIGRATE_MOVABLE && + capc->cc->migratetype != MIGRATE_MOVABLE) return false; capc->page = page; @@ -2566,8 +2568,7 @@ void free_unref_folios(struct folio_batch *folios) continue; } - if (order > 0 && folio_test_large_rmappable(folio)) - folio_undo_large_rmappable(folio); + folio_undo_large_rmappable(folio); if (!free_unref_page_prepare(&folio->page, pfn, order)) continue; diff --git a/mm/readahead.c b/mm/readahead.c index 689e003951fe72fff2c2230912a8814de78af10a..a8911f7c161a725700af79e9e17db060de6c347f 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -506,13 +506,11 @@ void page_cache_ra_order(struct readahead_control *ractl, limit = min(limit, index + ra->size - 1); - if (new_order < MAX_PAGECACHE_ORDER) { + if (new_order < MAX_PAGECACHE_ORDER) new_order += 2; - if (new_order > MAX_PAGECACHE_ORDER) - new_order = MAX_PAGECACHE_ORDER; - while ((1 << new_order) > ra->size) - new_order--; - } + + new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order); + new_order = min_t(unsigned int, new_order, ilog2(ra->size)); /* See comment in page_cache_ra_unbounded() */ nofs = memalloc_nofs_save(); diff --git a/mm/slub.c b/mm/slub.c index 7fcd18261c1e1e55ce33146ff4341f32e69274bd..bcbfd720b57471fafadbcd41aabe8f95f8187635 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2490,7 +2490,7 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab, struct slab new; struct slab old; - if (slab->freelist) { + if (READ_ONCE(slab->freelist)) { stat(s, DEACTIVATE_REMOTE_FREES); tail = DEACTIVATE_TO_TAIL; } @@ -5448,7 +5448,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s, else if (flags & SO_OBJECTS) WARN_ON_ONCE(1); else - x = slab->slabs; + x = data_race(slab->slabs); total += x; nodes[node] += x; } @@ -5653,7 +5653,7 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf) slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu)); if (slab) - slabs += slab->slabs; + slabs += data_race(slab->slabs); } #endif @@ -5667,7 +5667,7 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf) slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu)); if (slab) { - slabs = READ_ONCE(slab->slabs); + slabs = data_race(slab->slabs); objects = (slabs * oo_objects(s->oo)) / 2; len += sysfs_emit_at(buf, len, " C%d=%d(%d)", cpu, objects, slabs); diff --git a/mm/swap.c b/mm/swap.c index 1c9e8f70d6b5c823c95ad20a4345787dbb32449a..358bf849406223f6cb64f05f66dea7b2084e7133 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -123,8 +123,7 @@ void __folio_put(struct folio *folio) } page_cache_release(folio); - if (folio_test_large(folio) && folio_test_large_rmappable(folio)) - folio_undo_large_rmappable(folio); + folio_undo_large_rmappable(folio); mem_cgroup_uncharge(folio); free_unref_page(&folio->page, folio_order(folio)); } @@ -999,10 +998,7 @@ void folios_put_refs(struct folio_batch *folios, unsigned int *refs) free_huge_folio(folio); continue; } - if (folio_test_large(folio) && - folio_test_large_rmappable(folio)) - folio_undo_large_rmappable(folio); - + folio_undo_large_rmappable(folio); __page_cache_release(folio, &lruvec, &flags); if (j != i) diff --git a/mm/swapfile.c b/mm/swapfile.c index 744e5c8bd66b7d7fa6912bd2aa756c0023481011..941a98e7ed395a2c077ee94119af27dc152b8dbe 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -896,7 +896,7 @@ static int scan_swap_map_slots(struct swap_info_struct *si, last_in_cluster = offset + SWAPFILE_CLUSTER - 1; /* Locate the first empty (unaligned) cluster */ - for (; last_in_cluster <= si->highest_bit; offset++) { + for (; last_in_cluster <= READ_ONCE(si->highest_bit); offset++) { if (si->swap_map[offset]) last_in_cluster = offset + SWAPFILE_CLUSTER; else if (offset == last_in_cluster) { diff --git a/mm/vmscan.c b/mm/vmscan.c index 37019d51c31d1394f0f24f2ecde721acf7867bb4..3337907ae5e9dc6f993975744e4db157ad631086 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2135,9 +2135,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, */ nr_reclaimed += nr_pages; - if (folio_test_large(folio) && - folio_test_large_rmappable(folio)) - folio_undo_large_rmappable(folio); + folio_undo_large_rmappable(folio); if (folio_batch_add(&free_folios, folio) == 0) { mem_cgroup_uncharge_folios(&free_folios); try_to_unmap_flush(); @@ -2545,9 +2543,7 @@ static unsigned int move_folios_to_lru(struct lruvec *lruvec, if (unlikely(folio_put_testzero(folio))) { __folio_clear_lru_flags(folio); - if (folio_test_large(folio) && - folio_test_large_rmappable(folio)) - folio_undo_large_rmappable(folio); + folio_undo_large_rmappable(folio); if (folio_batch_add(&free_folios, folio) == 0) { spin_unlock_irq(&lruvec->lru_lock); mem_cgroup_uncharge_folios(&free_folios);