diff --git a/fs/Kconfig b/fs/Kconfig index a5ed26b093b72f8c5655dfd61d2fc3772640f697..385602ba0d9951f3bb90d403a95b024ebf017c13 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -262,7 +262,7 @@ config HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON config DYNAMIC_HUGETLB bool "Dynamic HugeTLB" - depends on X86_64 + depends on X86_64 || (ARM64 && ARM64_4K_PAGES) depends on HUGETLBFS depends on MEMCG && CGROUP_HUGETLB help diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index cfdd8cffe6d7ebafdd262f44fcd8e063cd7d84d6..2c101a812dee9cf0c425bec0a9bb542bdc7ae548 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1202,7 +1202,7 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) */ mpol_shared_policy_init(&p->policy, NULL); /* Initialize hpool here in case of a quick call to destroy */ - link_hpool(p); + link_hpool(p, sbinfo->hstate); return &p->vfs_inode; } diff --git a/include/linux/dynamic_hugetlb.h b/include/linux/dynamic_hugetlb.h index 5dcba8e8b9334832d1ffc958c5199f38f7fbf916..eff31669e2108976413bb1f59cb254f1360b23c2 100644 --- a/include/linux/dynamic_hugetlb.h +++ b/include/linux/dynamic_hugetlb.h @@ -66,7 +66,7 @@ enum huge_pages_pool_type { struct dhugetlb_pool { int nid; spinlock_t lock; - spinlock_t reserved_lock; + KABI_DEPRECATE(spinlock_t, reserved_lock) atomic_t refcnt; unsigned long normal_pages_disabled; @@ -74,6 +74,18 @@ struct dhugetlb_pool { unsigned long total_huge_pages; struct huge_pages_pool hpages_pool[HUGE_PAGES_POOL_MAX]; + + /* The dhugetlb_pool structures is only used by core kernel, it is + * also accessed only the memory cgroup and hugetlb core code and + * so changes made to dhugetlb_pool structure should not affect + * third-party kernel modules. + */ + KABI_EXTEND(struct mutex reserved_lock) + + /* + * The percpu_pool[] should only be used by dynamic hugetlb core. + * External kernel modules should not used it. + */ struct percpu_pages_pool percpu_pool[0]; }; @@ -97,13 +109,14 @@ bool free_page_to_dhugetlb_pool(struct page *page); void free_page_list_to_dhugetlb_pool(struct list_head *list); int task_has_mem_in_hpool(struct task_struct *tsk); -void link_hpool(struct hugetlbfs_inode_info *p); +void link_hpool(struct hugetlbfs_inode_info *p, struct hstate *h); void unlink_hpool(struct hugetlbfs_inode_info *p); bool file_has_mem_in_hpool(struct hugetlbfs_inode_info *p); int dhugetlb_acct_memory(struct hstate *h, long delta, struct hugetlbfs_inode_info *p); struct page *alloc_huge_page_from_dhugetlb_pool(struct hstate *h, struct dhugetlb_pool *hpool, bool need_unreserved); void free_huge_page_to_dhugetlb_pool(struct page *page, bool restore_reserve); +bool page_belong_to_dynamic_hugetlb(struct page *page); #else @@ -147,7 +160,7 @@ static inline int task_has_mem_in_hpool(struct task_struct *tsk) } #ifdef CONFIG_HUGETLBFS -static inline void link_hpool(struct hugetlbfs_inode_info *p) +static inline void link_hpool(struct hugetlbfs_inode_info *p, struct hstate *h) { } static inline void unlink_hpool(struct hugetlbfs_inode_info *p) @@ -171,6 +184,11 @@ static inline void free_huge_page_to_dhugetlb_pool(struct page *page, bool restore_reserve) { } +static inline +bool page_belong_to_dynamic_hugetlb(struct page *page) +{ + return false; +} #endif #endif /* CONFIG_DYNAMIC_HUGETLB */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 600cda4ea1be53ee22d9e6340ba33e325a4ac06c..3056b0985c1bd0e2520c717b74dbf9bf30441834 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -372,7 +372,7 @@ struct mem_cgroup { struct deferred_split deferred_split_queue; #endif -#ifdef CONFIG_DYNAMIC_HUGETLB +#if defined(CONFIG_DYNAMIC_HUGETLB) && defined(CONFIG_X86_64) struct dhugetlb_pool *hpool; #endif #ifndef __GENKSYMS__ @@ -392,7 +392,11 @@ struct mem_cgroup { KABI_RESERVE(3) KABI_RESERVE(4) #endif +#if defined(CONFIG_DYNAMIC_HUGETLB) && defined(CONFIG_ARM64) + KABI_USE(5, struct dhugetlb_pool *hpool) +#else KABI_RESERVE(5) +#endif KABI_RESERVE(6) KABI_RESERVE(7) KABI_RESERVE(8) diff --git a/mm/dynamic_hugetlb.c b/mm/dynamic_hugetlb.c index 6b615009c3a46554bc710906936690a30ebc8fbc..8199ef893f4a6ddc56ebddce7f8531722e742b9a 100644 --- a/mm/dynamic_hugetlb.c +++ b/mm/dynamic_hugetlb.c @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -54,7 +55,8 @@ static void __hpool_split_gigantic_page(struct dhugetlb_pool *hpool, struct page { int nr_pages = 1 << (PUD_SHIFT - PAGE_SHIFT); int nr_blocks = 1 << (PMD_SHIFT - PAGE_SHIFT); - int i, pfn = page_to_pfn(page); + unsigned long pfn = page_to_pfn(page); + int i; lockdep_assert_held(&hpool->lock); atomic_set(compound_mapcount_ptr(page), 0); @@ -447,6 +449,19 @@ static struct dhugetlb_pool *find_hpool_by_dhugetlb_pagelist(struct page *page) return hpool; } +bool page_belong_to_dynamic_hugetlb(struct page *page) +{ + struct dhugetlb_pool *hpool; + + if (!dhugetlb_enabled) + return false; + + hpool = find_hpool_by_dhugetlb_pagelist(page); + if (hpool) + return true; + return false; +} + static struct dhugetlb_pool *find_hpool_by_task(struct task_struct *tsk) { struct mem_cgroup *memcg; @@ -515,6 +530,13 @@ static struct page *__alloc_page_from_dhugetlb_pool(void) spin_lock_irqsave(&percpu_pool->lock, flags); do { + /* + * Before discard the bad page, set PagePool flag to + * distinguish from free page. And increase used_pages + * to guarantee used + freed = total. + */ + if (page) + SetPagePool(page); page = NULL; if (percpu_pool->free_pages == 0) { int ret; @@ -530,8 +552,8 @@ static struct page *__alloc_page_from_dhugetlb_pool(void) page = list_entry(percpu_pool->head_page.next, struct page, lru); list_del(&page->lru); percpu_pool->free_pages--; + percpu_pool->used_pages++; } while (page && check_new_page(page)); - percpu_pool->used_pages++; SetPagePool(page); unlock: @@ -618,13 +640,19 @@ void free_page_list_to_dhugetlb_pool(struct list_head *list) } } -void link_hpool(struct hugetlbfs_inode_info *p) +void link_hpool(struct hugetlbfs_inode_info *p, struct hstate *h) { + unsigned long size; + if (!dhugetlb_enabled || !p) return; - p->hpool = find_hpool_by_task(current); - if (!get_hpool_unless_zero(p->hpool)) + size = huge_page_size(h); + if (size == PMD_SIZE || size == PUD_SIZE) { + p->hpool = find_hpool_by_task(current); + if (!get_hpool_unless_zero(p->hpool)) + p->hpool = NULL; + } else p->hpool = NULL; } @@ -733,8 +761,15 @@ void free_huge_page_to_dhugetlb_pool(struct page *page, bool restore_reserve) } spin_lock(&hpool->lock); + /* + * memory_failure will free the hwpoison hugepage, and then try to + * dissolve it and free subpage to buddy system. Since the page in + * dhugetlb_pool should not free to buudy system, we isolate the + * hugepage here directly, and skip the latter dissolution. + */ + if (PageHWPoison(page)) + goto out; ClearPagePool(page); - set_compound_page_dtor(page, NULL_COMPOUND_DTOR); if (hstate_is_gigantic(h)) hpages_pool = &hpool->hpages_pool[HUGE_PAGES_POOL_1G]; else @@ -750,6 +785,7 @@ void free_huge_page_to_dhugetlb_pool(struct page *page, bool restore_reserve) } trace_dynamic_hugetlb_alloc_free(hpool, page, hpages_pool->free_huge_pages, DHUGETLB_FREE, huge_page_size(h)); +out: spin_unlock(&hpool->lock); put_hpool(hpool); } @@ -859,7 +895,7 @@ static int hugetlb_pool_create(struct mem_cgroup *memcg, unsigned long nid) return -ENOMEM; spin_lock_init(&hpool->lock); - spin_lock_init(&hpool->reserved_lock); + mutex_init(&hpool->reserved_lock); hpool->nid = nid; atomic_set(&hpool->refcnt, 1); @@ -972,7 +1008,7 @@ static ssize_t update_reserved_pages(struct mem_cgroup *memcg, char *buf, int hp if (!get_hpool_unless_zero(hpool)) return -EINVAL; - spin_lock(&hpool->reserved_lock); + mutex_lock(&hpool->reserved_lock); spin_lock(&hpool->lock); hpages_pool = &hpool->hpages_pool[hpages_pool_idx]; if (nr_pages > hpages_pool->nr_huge_pages) { @@ -1008,7 +1044,7 @@ static ssize_t update_reserved_pages(struct mem_cgroup *memcg, char *buf, int hp hpages_pool->free_normal_pages += delta; } spin_unlock(&hpool->lock); - spin_unlock(&hpool->reserved_lock); + mutex_unlock(&hpool->reserved_lock); put_hpool(hpool); return 0; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 03eca3aec0f6edf52b2b47c1d9538f32e7b2ed97..6c3ebbbccabd4afd5081310d79510a916c0c9209 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2023,6 +2023,13 @@ int dissolve_free_huge_page(struct page *page) if (!PageHuge(page)) return 0; + /* + * the page belong to dynamic hugetlb will be isolated as a whole + * when free. See free_huge_page_to_dhugetlb_pool() for detail. + */ + if (page_belong_to_dynamic_hugetlb(page)) + return -EBUSY; + spin_lock_irq(&hugetlb_lock); if (!PageHuge(page)) { rc = 0;