From ffbdb9dde7ab5a888c2a7ed91029eb79e2d2b453 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Wed, 13 Sep 2023 11:12:15 +0800 Subject: [PATCH 1/5] mm/mmap: Don't merge vma from sharepool ascend inclusion category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7YF5R --------------------------------------------- The vma mapped from sharepool cannot merge because the sp_area cannot merge. Check this in is_mergeable_vma() instead of vma_merge(). Signed-off-by: Wang Wensheng --- mm/mmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 7d04e54494b6..1e5a20d81aa9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -827,6 +827,10 @@ static inline bool is_mergeable_vma(struct vm_area_struct *vma, return false; if (!anon_vma_name_eq(anon_vma_name(vma), anon_name)) return false; + /* don't merge this kind of vma as sp_area couldn't be merged */ + if (sp_check_vm_share_pool(vm_flags)) + return false; + return true; } @@ -976,10 +980,6 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, if (vm_flags & VM_SPECIAL) return NULL; - /* don't merge this kind of vma as sp_area couldn't be merged */ - if (sp_check_vm_share_pool(vm_flags)) - return NULL; - /* Does the input range span an existing VMA? (cases 5 - 8) */ curr = find_vma_intersection(mm, prev ? prev->vm_end : 0, end); -- Gitee From 468db0c4541d5b4758f03fb3f36bc5d445ab4f9a Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Wed, 13 Sep 2023 11:12:16 +0800 Subject: [PATCH 2/5] mm/sharepool: Use mmap_write_[un]lock helper ascend inclusion category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7YF5R --------------------------------------------- Use the mmap_write_[un]lock helper. Since the PER_VMA_LOCK series been merged, the mmpa_write_unlock is not equal to down_write(&mm->mmap_lock). Signed-off-by: Wang Wensheng --- mm/share_pool.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 1ca5b867147d..66f14fd38d25 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1777,9 +1777,9 @@ static void sp_munmap(struct mm_struct *mm, unsigned long addr, { int err; - down_write(&mm->mmap_lock); + mmap_write_lock(mm); if (unlikely(!mmget_not_zero(mm))) { - up_write(&mm->mmap_lock); + mmap_write_unlock(mm); pr_warn("munmap: target mm is exiting\n"); return; } @@ -1789,7 +1789,7 @@ static void sp_munmap(struct mm_struct *mm, unsigned long addr, if (err) pr_err("failed to unmap VA %pK when sp munmap, %d\n", (void *)addr, err); - up_write(&mm->mmap_lock); + mmap_write_unlock(mm); mmput_async(mm); } @@ -2090,9 +2090,9 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, unsigned long mmap_addr; unsigned long populate = 0; - down_write(&mm->mmap_lock); + mmap_write_lock(mm); if (unlikely(!mmget_not_zero(mm))) { - up_write(&mm->mmap_lock); + mmap_write_unlock(mm); pr_warn("sp_map: target mm is exiting\n"); return SP_SKIP_ERR; } @@ -2100,19 +2100,19 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, /* when success, mmap_addr == spa->va_start */ mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); if (IS_ERR_VALUE(mmap_addr)) { - up_write(&mm->mmap_lock); + mmap_write_unlock(mm); mmput_async(mm); pr_err("%s, sp mmap failed %ld\n", str, mmap_addr); return (int)mmap_addr; } if (spa->type == SPA_TYPE_ALLOC) { - up_write(&mm->mmap_lock); + mmap_write_unlock(mm); ret = sp_alloc_populate(mm, spa, populate, ac); if (ret) { - down_write(&mm->mmap_lock); + mmap_write_lock(mm); do_munmap(mm, mmap_addr, spa_size(spa), NULL); - up_write(&mm->mmap_lock); + mmap_write_unlock(mm); } } else { ret = sp_k2u_populate(mm, spa); @@ -2120,7 +2120,7 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, do_munmap(mm, mmap_addr, spa_size(spa), NULL); pr_info("k2u populate failed, %d\n", ret); } - up_write(&mm->mmap_lock); + mmap_write_unlock(mm); } mmput_async(mm); @@ -2735,11 +2735,11 @@ void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int tgid) return ERR_PTR(-EPERM); } - down_write(&mm->mmap_lock); + mmap_write_lock(mm); ret = __sp_walk_page_range(uva, size, mm, &sp_walk_data); if (ret) { pr_err_ratelimited("walk page range failed %d\n", ret); - up_write(&mm->mmap_lock); + mmap_write_unlock(mm); return ERR_PTR(ret); } @@ -2749,7 +2749,7 @@ void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int tgid) else p = vmap(sp_walk_data.pages, sp_walk_data.page_count, VM_MAP, PAGE_KERNEL); - up_write(&mm->mmap_lock); + mmap_write_unlock(mm); if (!p) { pr_err("vmap(huge) in u2k failed\n"); @@ -2892,9 +2892,9 @@ int mg_sp_walk_page_range(unsigned long uva, unsigned long size, return -ESRCH; } - down_write(&mm->mmap_lock); + mmap_write_lock(mm); ret = __sp_walk_page_range(uva, size, mm, sp_walk_data); - up_write(&mm->mmap_lock); + mmap_write_unlock(mm); mmput(mm); put_task_struct(tsk); -- Gitee From 9bf7fb8809c9e6db701464d6818fe6c350aedd22 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Wed, 13 Sep 2023 11:12:17 +0800 Subject: [PATCH 3/5] mm/sharepool: Return -ENOMEM when allocate hugepage failed ascend inclusion category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7YF5R --------------------------------------------- Return -ENOMEM when allocate hugepages failed. Signed-off-by: Wang Wensheng --- mm/share_pool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/share_pool.c b/mm/share_pool.c index 66f14fd38d25..1b9498fc0187 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2064,6 +2064,8 @@ static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa, * depends on this feature (and MAP_LOCKED) to work correctly. */ ret = do_mm_populate(mm, spa->va_start, populate, 0); + if (ac && (ac->sp_flags & SP_HUGEPAGE) && unlikely(ret == -EFAULT)) + ret = -ENOMEM; if (ret) { if (unlikely(fatal_signal_pending(current))) pr_warn("allocation failed, current thread is killed\n"); -- Gitee From 3a0a8c0684adc64ab950eb1e844c827e00429fff Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Wed, 13 Sep 2023 11:12:18 +0800 Subject: [PATCH 4/5] mm/sharepool: Protect the va reserved for sharepool ascend inclusion category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7YF5R --------------------------------------------- Add protections for the va reserved for sharepool. Forbid mremap/munmap to access that range. Signed-off-by: Wang Wensheng --- include/linux/share_pool.h | 31 ++++++++++++++++++++----------- mm/mmap.c | 9 +++++++++ mm/mremap.c | 4 ++++ 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index b0711eea4b73..693ceea5999b 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -158,6 +158,23 @@ static inline bool sp_check_vm_share_pool(unsigned long vm_flags) return false; } +static inline bool sp_check_addr(unsigned long addr) +{ + if (sp_is_enabled() && mg_is_sharepool_addr(addr)) + return true; + else + return false; +} + +static inline bool sp_check_mmap_addr(unsigned long addr, unsigned long flags) +{ + if (sp_is_enabled() && mg_is_sharepool_addr(addr) && + !(flags & MAP_SHARE_POOL)) + return true; + else + return false; +} + #else /* CONFIG_SHARE_POOL */ static inline int mg_sp_group_add_task(int tgid, unsigned long prot, int spg_id) @@ -233,14 +250,6 @@ static inline bool mg_is_sharepool_addr(unsigned long addr) return false; } -static inline void spa_overview_show(struct seq_file *seq) -{ -} - -static inline void spg_overview_show(struct seq_file *seq) -{ -} - static inline bool sp_is_enabled(void) { return false; @@ -255,14 +264,14 @@ static inline bool sp_check_vm_share_pool(unsigned long vm_flags) return false; } -static inline bool is_vm_huge_special(struct vm_area_struct *vma) +static inline bool sp_check_addr(unsigned long addr) { return false; } -static inline int sp_node_id(struct vm_area_struct *vma) +static inline bool sp_check_mmap_addr(unsigned long addr, unsigned long flags) { - return numa_node_id(); + return false; } #endif /* !CONFIG_SHARE_POOL */ diff --git a/mm/mmap.c b/mm/mmap.c index 1e5a20d81aa9..eb24efdba25d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1765,6 +1765,9 @@ generic_get_unmapped_area(struct file *filp, unsigned long addr, if (len > mmap_end - mmap_min_addr) return -ENOMEM; + if (sp_check_mmap_addr(addr, flags)) + return -EINVAL; + if (flags & MAP_FIXED) return addr; @@ -1814,6 +1817,9 @@ generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr, if (len > mmap_end - mmap_min_addr) return -ENOMEM; + if (sp_check_mmap_addr(addr, flags)) + return -EINVAL; + if (flags & MAP_FIXED) return addr; @@ -3083,6 +3089,9 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade) LIST_HEAD(uf); VMA_ITERATOR(vmi, mm, start); + if (sp_check_addr(start)) + return -EINVAL; + if (mmap_write_lock_killable(mm)) return -EINTR; diff --git a/mm/mremap.c b/mm/mremap.c index b11ce6c92099..add907c0a9af 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -936,6 +937,9 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, if (offset_in_page(addr)) return ret; + if (sp_check_addr(addr) || sp_check_addr(new_addr)) + return ret; + old_len = PAGE_ALIGN(old_len); new_len = PAGE_ALIGN(new_len); -- Gitee From b7593c7ecc76a571c7947b2211547ffca2611e15 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Wed, 13 Sep 2023 11:12:19 +0800 Subject: [PATCH 5/5] mm/sharepool: Mmap for the current process at first ascend inclusion category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7YF5R --------------------------------------------- When the user calling mg_sp_alloc(), the physical memory is allocated in do_mm_populate() for the first process and the pages are charged into the memcg of that process. This is unreasonable and may cause memcg of first process oom. We should start at the current process so as to charge the pages to the memcg of current process. Signed-off-by: Wang Wensheng --- mm/share_pool.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 1b9498fc0187..99a46d9f05a1 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2129,16 +2129,28 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, return ret; } -static int sp_alloc_mmap_populate(struct sp_area *spa, struct sp_alloc_context *ac) +static int sp_alloc_mmap_populate(struct sp_area *spa, struct sp_alloc_context *ac, + struct sp_group_node *spg_node) { - int ret = -EINVAL; + int ret = 0; int mmap_ret = 0; struct mm_struct *mm; - struct sp_group_node *spg_node; + bool reach_current = false; + + mmap_ret = sp_map_spa_to_mm(current->mm, spa, spg_node->prot, ac, "sp_alloc"); + if (mmap_ret) { + /* Don't skip error for current process */ + mmap_ret = (mmap_ret == SP_SKIP_ERR) ? -EINVAL : mmap_ret; + goto fallocate; + } /* create mapping for each process in the group */ list_for_each_entry(spg_node, &spa->spg->proc_head, proc_node) { mm = spg_node->master->mm; + if (mm == current->mm) { + reach_current = true; + continue; + } mmap_ret = sp_map_spa_to_mm(mm, spa, spg_node->prot, ac, "sp_alloc"); if (mmap_ret) { /* @@ -2158,7 +2170,9 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, struct sp_alloc_context * unmap: __sp_free(spa, mm); - + if (!reach_current) + sp_munmap(current->mm, spa->va_start, spa_size(spa)); +fallocate: /* * Sometimes do_mm_populate() allocates some memory and then failed to * allocate more. (e.g. memory use reaches cgroup limit.) @@ -2212,7 +2226,7 @@ static void *__mg_sp_alloc_nodemask(unsigned long size, unsigned long sp_flags, goto out; } - ret = sp_alloc_mmap_populate(spa, &ac); + ret = sp_alloc_mmap_populate(spa, &ac, spg_node); if (ret == -ENOMEM && sp_alloc_fallback(spa, &ac)) goto try_again; -- Gitee