diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst index 289616e45a28dd5efe9941f7fbab49ee6cd622b9..7a996f1561242f26f0e6069831984ad5e6e19c54 100644 --- a/Documentation/admin-guide/cgroup-v1/memory.rst +++ b/Documentation/admin-guide/cgroup-v1/memory.rst @@ -107,6 +107,7 @@ Brief summary of control files. memory.wmark_scale_factor the gap between wmark_low and wmark_high, percentage of max limit, default is 50 or 0.5% of max limit. The max value is 1000 or 10% of max limit. + memory.thp_control set/show thp controls. ==================================== ========================================== 1. History @@ -1029,6 +1030,34 @@ enable/disable the feature in each memcg. Write "1" to enable the priority oom a 3. Start reclamation in the background when the limit is not yet hit but the usage is getting closer +15. THP Control +=============== + +memory.thp_control file is for THP behavior controls. At present, this +allows administrator to disable anonymous, shmem, file THP, respectively, +for tasks in this memcg. + +Although prohibiting tasks in specific memcg from using THP does not +prevent fragmentation, but it can reduce THP contention and memory +waste. + +This interface uses bits to operate, and the bit to disable anonymous, +shmem, file THP is 0, 1, 2, respectively. + +For example, to disable anonymous THP: + + # echo 0x1 > /path/to/memcg/memory.thp_control + +And to disable shmem and file THP: + + # echo 0x6 > /path/to/memcg/memory.thp_control + +And to disable all THP: + + # echo 0x7 > /path/to/memcg/memory.thp_control + +Note that this interface is not applicable to root memcg. + Summary ======= diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 61b1cca51c892241edac7b2c1cdb5a42128a439a..3cabea13d5ea1220e2c45bc2ea88442aaad2d315 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -122,6 +122,40 @@ extern struct kobj_attribute shmem_enabled_attr; extern unsigned long transparent_hugepage_flags; +enum memcg_thp_flag { + MEMCG_DISABLE_ANON_THP, + MEMCG_DISABLE_SHMEM_THP, + MEMCG_DISABLE_FILE_THP, + NR_MEMCG_THP_FLAG, +}; + +#ifdef CONFIG_MEMCG +extern bool memcg_thp_control_test(struct mm_struct *mm, + enum memcg_thp_flag flag); +#else +static inline bool memcg_thp_control_test(struct mm_struct *mm, + enum memcg_thp_flag flag) +{ + return false; +} +#endif + +static inline bool memcg_transhuge_vma_enabled(struct vm_area_struct *vma) +{ + struct mm_struct *mm = vma->vm_mm; + + if (vma_is_anonymous(vma)) + return !memcg_thp_control_test(mm, MEMCG_DISABLE_ANON_THP); + + if (vma_is_shmem(vma)) + return !memcg_thp_control_test(mm, MEMCG_DISABLE_SHMEM_THP); + + if (vma->vm_file) + return !memcg_thp_control_test(mm, MEMCG_DISABLE_FILE_THP); + + return true; +} + static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, unsigned long haddr) { @@ -144,6 +178,8 @@ static inline bool transhuge_vma_enabled(struct vm_area_struct *vma, if ((vm_flags & VM_NOHUGEPAGE) || test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) return false; + if (!memcg_transhuge_vma_enabled(vma)) + return false; return true; } @@ -219,10 +255,12 @@ static inline bool hugetext_vma_enabled(struct vm_area_struct *vma, if (!(vm_flags & VM_EXEC)) return false; - if (hugetext_file_enabled() && vma_is_hugetext_file(vma, vm_flags)) + if (hugetext_file_enabled() && vma_is_hugetext_file(vma, vm_flags) + && !memcg_thp_control_test(vma->vm_mm, MEMCG_DISABLE_FILE_THP)) return true; - if (hugetext_anon_enabled() && vma_is_hugetext_anon(vma, vm_flags)) + if (hugetext_anon_enabled() && vma_is_hugetext_anon(vma, vm_flags) + && !memcg_thp_control_test(vma->vm_mm, MEMCG_DISABLE_ANON_THP)) return true; return false; diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 7b8ac40ef1a9c25f6ff7f75b18aa5914194325a6..39a7458b4b479593be4e006e28adecda263c9128 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -100,6 +100,9 @@ static inline void khugepaged_exit(struct mm_struct *mm) static inline int khugepaged_enter(struct vm_area_struct *vma, unsigned long vm_flags) { + if (unlikely(!memcg_transhuge_vma_enabled(vma))) + return 0; + if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags)) if ((khugepaged_always() || (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) || diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c39dd5829756f8ffa502208e0374d5ab7ae12aff..6967258103aa28accc0c3af75d3249118d19b306 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -449,6 +449,7 @@ struct mem_cgroup { struct deferred_split deferred_split_queue; int thp_reclaim; struct thp_reclaim_ctrl tr_ctrl; + unsigned long thp_control; #endif #ifdef CONFIG_MEMSLI diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ea1068b4cd01829ea817c9cfffe4f81c6af3e3b3..29cc8eb5c49df4d10aa955b0708a568f75e772ef 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6828,6 +6828,47 @@ static int __init setup_thp_reclaim_proactive_init(char *str) } __setup("tr.proactive=", setup_thp_reclaim_proactive_init); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static int memcg_thp_control_show(struct seq_file *m, void *v) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); + unsigned long thp_control = memcg->thp_control; + + seq_printf(m, "0x%lx\n", thp_control); + return 0; +} + +static ssize_t memcg_thp_control_write(struct kernfs_open_file *of, + char *buf, size_t count, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned long thp_control; + int ret; + + buf = strstrip(buf); + ret = kstrtoul(buf, 0, &thp_control); + if (ret || thp_control >= (1 << NR_MEMCG_THP_FLAG)) + return -EINVAL; + + memcg->thp_control = thp_control; + return count; +} + +bool memcg_thp_control_test(struct mm_struct *mm, enum memcg_thp_flag flag) +{ + struct mem_cgroup *memcg; + unsigned long thp_control = 0; + + memcg = get_mem_cgroup_from_mm(mm); + if (memcg) { + thp_control = memcg->thp_control; + css_put(&memcg->css); + } + + return test_bit(flag, &thp_control); +} +#endif + static struct cftype mem_cgroup_legacy_files[] = { { .name = "usage_in_bytes", @@ -7152,6 +7193,12 @@ static struct cftype mem_cgroup_legacy_files[] = { .seq_show = memcg_thp_reclaim_ctrl_show, .write = memcg_thp_reclaim_ctrl_write, }, + { + .name = "thp_control", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = memcg_thp_control_show, + .write = memcg_thp_control_write, + }, #endif { }, /* terminate */ @@ -7441,6 +7488,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) memcg->thp_reclaim = parent->thp_reclaim; memcg->tr_ctrl.threshold = parent->tr_ctrl.threshold; memcg->tr_ctrl.proactive = parent->tr_ctrl.proactive; + memcg->thp_control = parent->thp_control; #endif kidled_memcg_inherit_parent_buckets(parent, memcg); memcg->reap_background = parent->reap_background; diff --git a/mm/shmem.c b/mm/shmem.c index ffbcee8806e643427d5bbd8dc45ec9e0387b920e..4622177acaeb20281ecb77d05735aceb7f4f60e8 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2129,7 +2129,8 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf) sgp = SGP_CACHE; if ((vma->vm_flags & VM_NOHUGEPAGE) || - test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) + test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags) || + memcg_thp_control_test(vma->vm_mm, MEMCG_DISABLE_SHMEM_THP)) sgp = SGP_NOHUGE; else if (vma->vm_flags & VM_HUGEPAGE) sgp = SGP_HUGE;