diff --git a/include/linux/dynamic_pool.h b/include/linux/dynamic_pool.h index d6ef9214c58608d03cfe25649b6fbb7bd6dad47b..1d41c6a853c3a8bb796b34c72d08ad318aa05a74 100644 --- a/include/linux/dynamic_pool.h +++ b/include/linux/dynamic_pool.h @@ -111,6 +111,7 @@ void dynamic_pool_bind_file(struct hugetlbfs_inode_info *p, struct hstate *h); void dynamic_pool_unbind_file(struct hugetlbfs_inode_info *p); int dynamic_pool_hugetlb_acct_memory(struct hstate *h, long delta, struct hugetlbfs_inode_info *p); +bool dynamic_pool_should_alloc(gfp_t gfp_mask, unsigned int order); struct folio *dynamic_pool_alloc_hugepage(struct hugetlbfs_inode_info *p, struct hstate *h, bool reserved); void dynamic_pool_free_hugepage(struct folio *folio, bool restore_reserve); @@ -186,6 +187,11 @@ static inline int dynamic_pool_hugetlb_acct_memory(struct hstate *h, long delta, return -ENOMEM; } +static inline bool dynamic_pool_should_alloc(gfp_t gfp_mask, unsigned int order) +{ + return false; +} + static inline struct folio *dynamic_pool_alloc_hugepage(struct hugetlbfs_inode_info *p, struct hstate *h, bool reserved) { diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h index 15f69349a2a863fa25b59406f2ddb935c31bd8b4..d8cabf94f4a32a8aaf53cc93fd23f94945003e85 100644 --- a/include/linux/mem_reliable.h +++ b/include/linux/mem_reliable.h @@ -51,6 +51,9 @@ static inline bool page_reliable(struct page *page) if (!page) return false; + if (PagePool(page)) + return false; + return page_zonenum(page) < ZONE_MOVABLE; } diff --git a/mm/compaction.c b/mm/compaction.c index 771e9629b95c77e1e9868b40af873356f418e234..e9fe6777c8a3c2e9dc38d63e4e27f98199abacb8 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "internal.h" #ifdef CONFIG_COMPACTION @@ -2024,6 +2025,9 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc) continue; } + if (page_in_dynamic_pool(page)) + continue; + /* * If isolation recently failed, do not retry. Only check the * pageblock once. COMPACT_CLUSTER_MAX causes a pageblock diff --git a/mm/dynamic_pool.c b/mm/dynamic_pool.c index 82cbaa27f3d23bc3e40c07185c00ef32fd39bc7f..15a491a76a582a885283f70f04c3085e05c0dcbd 100644 --- a/mm/dynamic_pool.c +++ b/mm/dynamic_pool.c @@ -420,7 +420,7 @@ static int dpool_promote_pool(struct dynamic_pool *dpool, int type) src_pool = &dpool->pool[type + 1]; dst_pool = &dpool->pool[type]; - spin_lock(&dpool->lock); + spin_lock_irq(&dpool->lock); if (!dst_pool->split_pages) goto unlock; @@ -438,13 +438,13 @@ static int dpool_promote_pool(struct dynamic_pool *dpool, int type) * there is no way to free spage_next, so * it is safe to unlock here. */ - spin_unlock(&dpool->lock); + spin_unlock_irq(&dpool->lock); cond_resched(); lru_add_drain_all(); dpool_disable_pcp_pool(dpool, true); do_migrate_range(spage->start_pfn, spage->start_pfn + nr_pages); - spin_lock(&dpool->lock); + spin_lock_irq(&dpool->lock); dpool_enable_pcp_pool(dpool); ret = dpool_promote_huge_page(src_pool, dst_pool, spage); break; @@ -463,7 +463,7 @@ static int dpool_promote_pool(struct dynamic_pool *dpool, int type) } unlock: - spin_unlock(&dpool->lock); + spin_unlock_irq(&dpool->lock); if (!ret) kfree(spage); trace_dpool_promote(dpool, type, page, ret); @@ -479,11 +479,12 @@ static void dpool_refill_pcp_pool(struct dynamic_pool *dpool, { struct pages_pool *pool = &dpool->pool[PAGES_POOL_4K]; struct page *page, *next; + unsigned long flags; int i = 0; lockdep_assert_held(&pcp_pool->lock); - spin_lock(&dpool->lock); + spin_lock_irqsave(&dpool->lock, flags); if (!pool->free_pages && dpool_demote_pool_locked(dpool, PAGES_POOL_2M)) goto unlock; @@ -498,7 +499,7 @@ static void dpool_refill_pcp_pool(struct dynamic_pool *dpool, } unlock: - spin_unlock(&dpool->lock); + spin_unlock_irqrestore(&dpool->lock, flags); } static void dpool_drain_pcp_pool(struct dynamic_pool *dpool, @@ -507,11 +508,12 @@ static void dpool_drain_pcp_pool(struct dynamic_pool *dpool, { struct pages_pool *pool = &dpool->pool[PAGES_POOL_4K]; struct page *page, *next; + unsigned long flags; int i = 0; lockdep_assert_held(&pcp_pool->lock); - spin_lock(&dpool->lock); + spin_lock_irqsave(&dpool->lock, flags); list_for_each_entry_safe(page, next, &pcp_pool->freelist, lru) { list_move_tail(&page->lru, &pool->freelist); __SetPageDpool(page); @@ -523,7 +525,7 @@ static void dpool_drain_pcp_pool(struct dynamic_pool *dpool, pool->used_pages += pcp_pool->used_pages; pcp_pool->used_pages = 0; - spin_unlock(&dpool->lock); + spin_unlock_irqrestore(&dpool->lock, flags); } static void dpool_drain_all_pcp_pool(struct dynamic_pool *dpool) @@ -682,7 +684,7 @@ int dynamic_pool_can_attach(struct task_struct *tsk, struct mem_cgroup *memcg) return ret; } -static bool dpool_should_alloc(gfp_t gfp_mask, unsigned int order) +bool dynamic_pool_should_alloc(gfp_t gfp_mask, unsigned int order) { gfp_t gfp = gfp_mask & GFP_HIGHUSER_MOVABLE; @@ -700,6 +702,11 @@ static bool dpool_should_alloc(gfp_t gfp_mask, unsigned int order) if ((gfp | __GFP_IO | __GFP_FS) != GFP_HIGHUSER_MOVABLE) return false; +#ifdef CONFIG_MEMORY_RELIABLE + if (mem_reliable_is_enabled() && (gfp_mask & GFP_RELIABLE)) + return false; +#endif + return true; } @@ -714,7 +721,7 @@ struct page *dynamic_pool_alloc_page(gfp_t gfp, unsigned int order, if (!dpool_enabled) return NULL; - if (!dpool_should_alloc(gfp, order)) + if (!dynamic_pool_should_alloc(gfp, order)) return NULL; dpool = dpool_get_from_task(current); @@ -885,16 +892,16 @@ struct folio *dynamic_pool_alloc_hugepage(struct hugetlbfs_inode_info *p, if (!dpool) return NULL; - spin_lock_irqsave(&dpool->lock, flags); - if (!dpool->online) - goto unlock; - if (hstate_is_gigantic(h)) type = PAGES_POOL_1G; else type = PAGES_POOL_2M; pool = &dpool->pool[type]; + spin_lock_irqsave(&dpool->lock, flags); + if (!dpool->online) + goto unlock; + list_for_each_entry(folio, &pool->freelist, lru) { if (folio_test_hwpoison(folio)) continue; @@ -938,13 +945,14 @@ void dynamic_pool_free_hugepage(struct folio *folio, bool restore_reserve) return; } - spin_lock_irqsave(&dpool->lock, flags); if (hstate_is_gigantic(h)) type = PAGES_POOL_1G; else type = PAGES_POOL_2M; pool = &dpool->pool[type]; + spin_lock_irqsave(&dpool->lock, flags); + if (folio_test_hwpoison(folio)) goto unlock; @@ -1204,10 +1212,10 @@ static int dpool_fill_from_hugetlb(struct dynamic_pool *dpool, void *arg) if (!h) return -EINVAL; - spin_lock(&hugetlb_lock); + spin_lock_irq(&hugetlb_lock); if ((h->free_huge_pages_node[nid] < nr_pages) || (h->free_huge_pages - h->resv_huge_pages < nr_pages)) { - spin_unlock(&hugetlb_lock); + spin_unlock_irq(&hugetlb_lock); return -ENOMEM; } @@ -1228,24 +1236,24 @@ static int dpool_fill_from_hugetlb(struct dynamic_pool *dpool, void *arg) list_move(&page->lru, &page_list); count++; } - spin_unlock(&hugetlb_lock); + spin_unlock_irq(&hugetlb_lock); list_for_each_entry_safe(page, next, &page_list, lru) { if (hugetlb_vmemmap_restore(h, page)) { - spin_lock(&hugetlb_lock); + spin_lock_irq(&hugetlb_lock); enqueue_hugetlb_folio(h, folio); - spin_unlock(&hugetlb_lock); + spin_unlock_irq(&hugetlb_lock); pr_err("restore hugetlb_vmemmap failed page 0x%px\n", page); continue; } __SetPageDpool(page); - spin_lock(&dpool->lock); + spin_lock_irq(&dpool->lock); list_move(&page->lru, &pool->freelist); pool->free_pages++; dpool->total_pages++; - spin_unlock(&dpool->lock); + spin_unlock_irq(&dpool->lock); } return 0; @@ -1262,7 +1270,7 @@ static int dpool_drain_to_hugetlb(struct dynamic_pool *dpool) if (!h) return -EINVAL; - spin_lock(&dpool->lock); + spin_lock_irq(&dpool->lock); list_for_each_entry_safe(page, next, &pool->freelist, lru) { WARN_ON(PageHWPoison(page)); idx = hugepage_index(page_to_pfn(page)); @@ -1273,13 +1281,13 @@ static int dpool_drain_to_hugetlb(struct dynamic_pool *dpool) pool->free_pages--; dpool->total_pages--; } - spin_unlock(&dpool->lock); + spin_unlock_irq(&dpool->lock); list_for_each_entry_safe(page, next, &page_list, lru) { hugetlb_vmemmap_optimize(h, page); - spin_lock(&hugetlb_lock); + spin_lock_irq(&hugetlb_lock); enqueue_hugetlb_folio(h, page_folio(page)); - spin_unlock(&hugetlb_lock); + spin_unlock_irq(&hugetlb_lock); } return dpool->total_pages ? -ENOMEM : 0; @@ -1303,20 +1311,20 @@ static int dpool_merge_all(struct dynamic_pool *dpool) } } - spin_lock(&dpool->lock); + spin_lock_irq(&dpool->lock); if (pool->split_pages || pool->used_huge_pages || pool->resv_huge_pages) { ret = -ENOMEM; pr_err("some 2M pages are still in use or mmap, delete failed: "); pr_cont_cgroup_name(dpool->memcg->css.cgroup); pr_cont("\n"); - spin_unlock(&dpool->lock); + spin_unlock_irq(&dpool->lock); goto out; } pool->free_pages += pool->nr_huge_pages; pool->nr_huge_pages = 0; pool->free_huge_pages = 0; - spin_unlock(&dpool->lock); + spin_unlock_irq(&dpool->lock); pool = &dpool->pool[PAGES_POOL_1G]; while (pool->split_pages) { @@ -1331,20 +1339,20 @@ static int dpool_merge_all(struct dynamic_pool *dpool) } } - spin_lock(&dpool->lock); + spin_lock_irq(&dpool->lock); if (pool->split_pages || pool->used_huge_pages || pool->resv_huge_pages) { ret = -ENOMEM; pr_err("some 1G pages are still in use or mmap, delete failed: "); pr_cont_cgroup_name(dpool->memcg->css.cgroup); pr_cont("\n"); - spin_unlock(&dpool->lock); + spin_unlock_irq(&dpool->lock); goto out; } pool->free_pages += pool->nr_huge_pages; pool->nr_huge_pages = 0; pool->free_huge_pages = 0; - spin_unlock(&dpool->lock); + spin_unlock_irq(&dpool->lock); ret = 0; out: @@ -1436,7 +1444,7 @@ void dynamic_pool_show(struct mem_cgroup *memcg, struct seq_file *m) } dpool_disable_pcp_pool(dpool, false); - spin_lock(&dpool->lock); + spin_lock_irq(&dpool->lock); /* * no others can modify the count because pcp pool is disabled and @@ -1471,7 +1479,7 @@ void dynamic_pool_show(struct mem_cgroup *memcg, struct seq_file *m) seq_printf(m, "4K_free_pages %lu\n", free_pages); seq_printf(m, "4K_used_pages %ld\n", used_pages); - spin_unlock(&dpool->lock); + spin_unlock_irq(&dpool->lock); dpool_enable_pcp_pool(dpool); dpool_put(dpool); } @@ -1494,22 +1502,25 @@ int dynamic_pool_reserve_hugepage(struct mem_cgroup *memcg, goto unlock; pool = &dpool->pool[type]; - spin_lock(&dpool->lock); + spin_lock_irq(&dpool->lock); if (nr_pages > pool->nr_huge_pages) { delta = nr_pages - pool->nr_huge_pages; while (delta > pool->free_pages && - !dpool_demote_pool_locked(dpool, type - 1)) - cond_resched_lock(&dpool->lock); + !dpool_demote_pool_locked(dpool, type - 1)) { + spin_unlock_irq(&dpool->lock); + cond_resched(); + spin_lock_irq(&dpool->lock); + } /* Only try merge pages for 2M pages */ if (type == PAGES_POOL_2M) { while (delta > pool->free_pages) { - spin_unlock(&dpool->lock); + spin_unlock_irq(&dpool->lock); cond_resched(); if (dpool_promote_pool(dpool, type)) { - spin_lock(&dpool->lock); + spin_lock_irq(&dpool->lock); break; } - spin_lock(&dpool->lock); + spin_lock_irq(&dpool->lock); } } delta = min(delta, pool->free_pages); @@ -1523,7 +1534,7 @@ int dynamic_pool_reserve_hugepage(struct mem_cgroup *memcg, pool->free_huge_pages -= delta; pool->free_pages += delta; } - spin_unlock(&dpool->lock); + spin_unlock_irq(&dpool->lock); dpool_put(dpool); ret = 0; @@ -1559,7 +1570,7 @@ static int dpool_fill_from_pagelist(struct dynamic_pool *dpool, void *arg) memcpy(dpool->pfn_ranges, info->pfn_ranges, sizeof(struct range) * dpool->range_cnt); - spin_lock(&dpool->lock); + spin_lock_irq(&dpool->lock); for (i = 0; i < dpool->range_cnt; i++) { struct range *range = &dpool->pfn_ranges[i]; @@ -1586,7 +1597,7 @@ static int dpool_fill_from_pagelist(struct dynamic_pool *dpool, void *arg) ret = 0; unlock: - spin_unlock(&dpool->lock); + spin_unlock_irq(&dpool->lock); return ret; } @@ -1604,7 +1615,7 @@ static int dpool_migrate_used_pages(struct dynamic_pool *dpool) int range_cnt = dpool->range_cnt; int i; - spin_lock(&dpool->lock); + spin_lock_irq(&dpool->lock); dpool->nr_poisoned_pages = 0; for (i = 0; i < range_cnt; i++) { @@ -1615,11 +1626,11 @@ static int dpool_migrate_used_pages(struct dynamic_pool *dpool) struct page *page = pfn_to_page(pfn); /* Unlock and try migration. */ - spin_unlock(&dpool->lock); + spin_unlock_irq(&dpool->lock); cond_resched(); if (PageDpool(page)) { - spin_lock(&dpool->lock); + spin_lock_irq(&dpool->lock); continue; } @@ -1628,11 +1639,11 @@ static int dpool_migrate_used_pages(struct dynamic_pool *dpool) lru_add_drain_all(); do_migrate_range(pfn, pfn + 1); - spin_lock(&dpool->lock); + spin_lock_irq(&dpool->lock); } } - spin_unlock(&dpool->lock); + spin_unlock_irq(&dpool->lock); return 0; } @@ -1696,6 +1707,7 @@ void dynamic_pool_show_meminfo(struct seq_file *m) struct pages_pool *pool; unsigned long free_pages = 0; long used_pages = 0; + unsigned long flags; if (!dpool_enabled || !enable_dpagelist) return; @@ -1706,11 +1718,11 @@ void dynamic_pool_show_meminfo(struct seq_file *m) pool = &dpool->pool[PAGES_POOL_4K]; dpool_disable_pcp_pool(dpool, false); - spin_lock(&dpool->lock); + spin_lock_irqsave(&dpool->lock, flags); dpool_sum_pcp_pool(dpool, &free_pages, &used_pages); free_pages += pool->free_pages; used_pages += pool->used_pages; - spin_unlock(&dpool->lock); + spin_unlock_irqrestore(&dpool->lock, flags); dpool_enable_pcp_pool(dpool); out: diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0d36485728989177c30aef98bb97ad1a66282822..b45e35e8af2c53f48924f4559d043ee834156c35 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3258,7 +3258,8 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, spin_unlock_irq(&hugetlb_lock); - hugetlb_set_folio_subpool(folio, spool); + if (!page_from_dynamic_pool(folio_page(folio, 0))) + hugetlb_set_folio_subpool(folio, spool); map_commit = vma_commit_reservation(h, vma, addr); if (unlikely(map_chg > map_commit)) { diff --git a/mm/migrate.c b/mm/migrate.c index 1e88f81d23694618899890ab68f911fad9616263..f13c0fcb989c117bd7cd42687fe9414a68d1c573 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -50,6 +50,7 @@ #include #include #include +#include #include @@ -2009,6 +2010,9 @@ struct folio *alloc_migration_target(struct folio *src, unsigned long private) if (folio_test_hugetlb(src)) { struct hstate *h = folio_hstate(src); + if (page_in_dynamic_pool(folio_page(src, 0))) + return NULL; + gfp_mask = htlb_modify_alloc_mask(h, gfp_mask); return alloc_hugetlb_folio_nodemask(h, nid, mtc->nmask, gfp_mask); @@ -2525,6 +2529,9 @@ static int numamigrate_isolate_folio(pg_data_t *pgdat, struct folio *folio) return 0; } + if (page_in_dynamic_pool(folio_page(folio, 0))) + return 0; + if (!folio_isolate_lru(folio)) return 0; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b063dd0d456dc826895c3bacb09aa9a697f4af9f..815b0c0212fd68443acc8724887065d195c959d7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4098,6 +4098,17 @@ static inline void mem_reliable_fallback_slowpath(gfp_t gfp_mask, return; } } + +static inline bool mem_reliable_fallback_dpool(gfp_t gfp_mask, unsigned int order) +{ + if (!reliable_allow_fb_enabled()) + return false; + + if (!(gfp_mask & GFP_RELIABLE)) + return false; + + return dynamic_pool_should_alloc(gfp_mask & ~GFP_RELIABLE, order); +} #else static inline struct zone *mem_reliable_fallback_zone(gfp_t gfp_mask, struct alloc_context *ac) @@ -4106,6 +4117,10 @@ static inline struct zone *mem_reliable_fallback_zone(gfp_t gfp_mask, } static inline void mem_reliable_fallback_slowpath(gfp_t gfp_mask, struct alloc_context *ac) {} +static inline bool mem_reliable_fallback_dpool(gfp_t gfp_mask, unsigned int order) +{ + return false; +} #endif static inline struct page * @@ -4765,6 +4780,18 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, if (likely(page)) goto out; + /* + * Fallback to dpool if mirrored momory is not enough. + * + * Kswapd and driect reclaim will not be trigger, since the later + * normal memory allocation can trigger this, there is no problem + * here. + */ + if (mem_reliable_fallback_dpool(gfp, order)) { + gfp &= ~GFP_RELIABLE; + goto retry; + } + alloc_gfp = gfp; ac.spread_dirty_pages = false; diff --git a/mm/page_isolation.c b/mm/page_isolation.c index bcf99ba747a05aa9e0015ab431fba20c768d7c29..fefc8a9269447a077c6cf2b2aab01a1fbb046503 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "internal.h" #define CREATE_TRACE_POINTS @@ -329,6 +330,9 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, start_pfn = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES), zone->zone_start_pfn); + if (page_in_dynamic_pool(pfn_to_page(isolate_pageblock))) + return -EBUSY; + if (skip_isolation) { int mt __maybe_unused = get_pageblock_migratetype(pfn_to_page(isolate_pageblock)); @@ -558,8 +562,9 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, pfn < isolate_end - pageblock_nr_pages; pfn += pageblock_nr_pages) { page = __first_valid_page(pfn, pageblock_nr_pages); - if (page && set_migratetype_isolate(page, migratetype, flags, - start_pfn, end_pfn)) { + if (page && (page_in_dynamic_pool(page) || + set_migratetype_isolate(page, migratetype, flags, + start_pfn, end_pfn))) { undo_isolate_page_range(isolate_start, pfn, migratetype); unset_migratetype_isolate( pfn_to_page(isolate_end - pageblock_nr_pages),