From f38529cdeb40ac23061db21674699c349327d2fa Mon Sep 17 00:00:00 2001 From: Xu Yu Date: Tue, 29 Aug 2023 11:11:22 +0800 Subject: [PATCH] anolis: mm: thp: introduce memcg interface to control THP behavior ANBZ: #6273 This introduces a memcg interface to allow administrator to disable anonymous, shmem, file THP, respectively, for tasks in this memcg. Although prohibiting tasks in specific memcg from using THP does not prevent fragmentation, but it can reduce THP contention and memory waste. This interface uses bits to operate, and the bit to disable anonymous, shmem, file THP is 0, 1, 2, respectively. For example, to disable anonymous THP: $ echo 0x1 > /path/to/memcg/memory.thp_control And to disable shmem and file THP: $ echo 0x6 > /path/to/memcg/memory.thp_control And to disable all THP: $ echo 0x7 > /path/to/memcg/memory.thp_control Note that this interface is not applicable to root memcg. Signed-off-by: Xu Yu Reviewed-by: Rongwei Wang --- .../admin-guide/cgroup-v1/memory.rst | 29 +++++++++++ include/linux/huge_mm.h | 42 +++++++++++++++- include/linux/khugepaged.h | 3 ++ include/linux/memcontrol.h | 1 + mm/memcontrol.c | 48 +++++++++++++++++++ mm/shmem.c | 3 +- 6 files changed, 123 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst index 289616e45a28..7a996f156124 100644 --- a/Documentation/admin-guide/cgroup-v1/memory.rst +++ b/Documentation/admin-guide/cgroup-v1/memory.rst @@ -107,6 +107,7 @@ Brief summary of control files. memory.wmark_scale_factor the gap between wmark_low and wmark_high, percentage of max limit, default is 50 or 0.5% of max limit. The max value is 1000 or 10% of max limit. + memory.thp_control set/show thp controls. ==================================== ========================================== 1. History @@ -1029,6 +1030,34 @@ enable/disable the feature in each memcg. Write "1" to enable the priority oom a 3. Start reclamation in the background when the limit is not yet hit but the usage is getting closer +15. THP Control +=============== + +memory.thp_control file is for THP behavior controls. At present, this +allows administrator to disable anonymous, shmem, file THP, respectively, +for tasks in this memcg. + +Although prohibiting tasks in specific memcg from using THP does not +prevent fragmentation, but it can reduce THP contention and memory +waste. + +This interface uses bits to operate, and the bit to disable anonymous, +shmem, file THP is 0, 1, 2, respectively. + +For example, to disable anonymous THP: + + # echo 0x1 > /path/to/memcg/memory.thp_control + +And to disable shmem and file THP: + + # echo 0x6 > /path/to/memcg/memory.thp_control + +And to disable all THP: + + # echo 0x7 > /path/to/memcg/memory.thp_control + +Note that this interface is not applicable to root memcg. + Summary ======= diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 61b1cca51c89..3cabea13d5ea 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -122,6 +122,40 @@ extern struct kobj_attribute shmem_enabled_attr; extern unsigned long transparent_hugepage_flags; +enum memcg_thp_flag { + MEMCG_DISABLE_ANON_THP, + MEMCG_DISABLE_SHMEM_THP, + MEMCG_DISABLE_FILE_THP, + NR_MEMCG_THP_FLAG, +}; + +#ifdef CONFIG_MEMCG +extern bool memcg_thp_control_test(struct mm_struct *mm, + enum memcg_thp_flag flag); +#else +static inline bool memcg_thp_control_test(struct mm_struct *mm, + enum memcg_thp_flag flag) +{ + return false; +} +#endif + +static inline bool memcg_transhuge_vma_enabled(struct vm_area_struct *vma) +{ + struct mm_struct *mm = vma->vm_mm; + + if (vma_is_anonymous(vma)) + return !memcg_thp_control_test(mm, MEMCG_DISABLE_ANON_THP); + + if (vma_is_shmem(vma)) + return !memcg_thp_control_test(mm, MEMCG_DISABLE_SHMEM_THP); + + if (vma->vm_file) + return !memcg_thp_control_test(mm, MEMCG_DISABLE_FILE_THP); + + return true; +} + static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, unsigned long haddr) { @@ -144,6 +178,8 @@ static inline bool transhuge_vma_enabled(struct vm_area_struct *vma, if ((vm_flags & VM_NOHUGEPAGE) || test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) return false; + if (!memcg_transhuge_vma_enabled(vma)) + return false; return true; } @@ -219,10 +255,12 @@ static inline bool hugetext_vma_enabled(struct vm_area_struct *vma, if (!(vm_flags & VM_EXEC)) return false; - if (hugetext_file_enabled() && vma_is_hugetext_file(vma, vm_flags)) + if (hugetext_file_enabled() && vma_is_hugetext_file(vma, vm_flags) + && !memcg_thp_control_test(vma->vm_mm, MEMCG_DISABLE_FILE_THP)) return true; - if (hugetext_anon_enabled() && vma_is_hugetext_anon(vma, vm_flags)) + if (hugetext_anon_enabled() && vma_is_hugetext_anon(vma, vm_flags) + && !memcg_thp_control_test(vma->vm_mm, MEMCG_DISABLE_ANON_THP)) return true; return false; diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 7b8ac40ef1a9..39a7458b4b47 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -100,6 +100,9 @@ static inline void khugepaged_exit(struct mm_struct *mm) static inline int khugepaged_enter(struct vm_area_struct *vma, unsigned long vm_flags) { + if (unlikely(!memcg_transhuge_vma_enabled(vma))) + return 0; + if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags)) if ((khugepaged_always() || (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) || diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c39dd5829756..6967258103aa 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -449,6 +449,7 @@ struct mem_cgroup { struct deferred_split deferred_split_queue; int thp_reclaim; struct thp_reclaim_ctrl tr_ctrl; + unsigned long thp_control; #endif #ifdef CONFIG_MEMSLI diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ea1068b4cd01..29cc8eb5c49d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6828,6 +6828,47 @@ static int __init setup_thp_reclaim_proactive_init(char *str) } __setup("tr.proactive=", setup_thp_reclaim_proactive_init); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static int memcg_thp_control_show(struct seq_file *m, void *v) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); + unsigned long thp_control = memcg->thp_control; + + seq_printf(m, "0x%lx\n", thp_control); + return 0; +} + +static ssize_t memcg_thp_control_write(struct kernfs_open_file *of, + char *buf, size_t count, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned long thp_control; + int ret; + + buf = strstrip(buf); + ret = kstrtoul(buf, 0, &thp_control); + if (ret || thp_control >= (1 << NR_MEMCG_THP_FLAG)) + return -EINVAL; + + memcg->thp_control = thp_control; + return count; +} + +bool memcg_thp_control_test(struct mm_struct *mm, enum memcg_thp_flag flag) +{ + struct mem_cgroup *memcg; + unsigned long thp_control = 0; + + memcg = get_mem_cgroup_from_mm(mm); + if (memcg) { + thp_control = memcg->thp_control; + css_put(&memcg->css); + } + + return test_bit(flag, &thp_control); +} +#endif + static struct cftype mem_cgroup_legacy_files[] = { { .name = "usage_in_bytes", @@ -7152,6 +7193,12 @@ static struct cftype mem_cgroup_legacy_files[] = { .seq_show = memcg_thp_reclaim_ctrl_show, .write = memcg_thp_reclaim_ctrl_write, }, + { + .name = "thp_control", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = memcg_thp_control_show, + .write = memcg_thp_control_write, + }, #endif { }, /* terminate */ @@ -7441,6 +7488,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) memcg->thp_reclaim = parent->thp_reclaim; memcg->tr_ctrl.threshold = parent->tr_ctrl.threshold; memcg->tr_ctrl.proactive = parent->tr_ctrl.proactive; + memcg->thp_control = parent->thp_control; #endif kidled_memcg_inherit_parent_buckets(parent, memcg); memcg->reap_background = parent->reap_background; diff --git a/mm/shmem.c b/mm/shmem.c index ffbcee8806e6..4622177acaeb 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2129,7 +2129,8 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf) sgp = SGP_CACHE; if ((vma->vm_flags & VM_NOHUGEPAGE) || - test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) + test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags) || + memcg_thp_control_test(vma->vm_mm, MEMCG_DISABLE_SHMEM_THP)) sgp = SGP_NOHUGE; else if (vma->vm_flags & VM_HUGEPAGE) sgp = SGP_HUGE; -- Gitee