diff --git a/fs/exec.c b/fs/exec.c index 792d62632e92aece555f19f5622a3493c712db03..43378e25abcb63ff9bd04e3ba10a9a1c04fc25cb 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -65,6 +65,9 @@ #include #include #include +#ifndef __GENKSYMS__ +#include +#endif #include #include @@ -252,6 +255,14 @@ static int __bprm_mm_init(struct linux_binprm *bprm) goto err_free; } + /* + * Need to be called with mmap write lock + * held, to avoid race with ksmd. + */ + err = ksm_execve(mm); + if (err) + goto err_ksm; + /* * Place the stack at the largest stack address the architecture * supports. Later, we'll move this to an appropriate place. We don't @@ -273,6 +284,8 @@ static int __bprm_mm_init(struct linux_binprm *bprm) bprm->p = vma->vm_end - sizeof(void *); return 0; err: + ksm_exit(mm); +err_ksm: mmap_write_unlock(mm); err_free: bprm->vma = NULL; diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 4e02e8a770a90f640e952ec46879fd84c92db78d..debef5446114fbb9a2dece3f40e2a608f351b218 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -45,6 +45,14 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) return 0; } +static inline int ksm_execve(struct mm_struct *mm) +{ + if (test_bit(MMF_VM_MERGE_ANY, &mm->flags)) + return __ksm_enter(mm); + + return 0; +} + static inline void ksm_exit(struct mm_struct *mm) { if (test_bit(MMF_VM_MERGEABLE, &mm->flags)) @@ -83,6 +91,11 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) return 0; } +static inline int ksm_execve(struct mm_struct *mm) +{ + return 0; +} + static inline void ksm_exit(struct mm_struct *mm) { } diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 287c54141a90935f833c0ea321d8eb8678e855e0..ef3a6a8e640f78ab7b961f1e1154aa36433b2001 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -414,7 +414,11 @@ struct mem_cgroup { #else KABI_RESERVE(7) #endif +#ifdef CONFIG_KSM + KABI_USE(8, bool auto_ksm_enabled) +#else KABI_RESERVE(8) +#endif struct mem_cgroup_per_node *nodeinfo[0]; /* WARNING: nodeinfo must be the last member here */ diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 6a4d85c7a5f34369a7083d4d66081a96fa2ad5da..103ca84e379ce30f92f3362ee019b3337cb6a57d 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -70,13 +70,15 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ +#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ #define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ #define MMF_MULTIPROCESS 27 /* mm is shared between processes */ -#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) +#define MMF_VM_MERGE_ANY 29 +#define MMF_VM_MERGE_ANY_MASK (1 << MMF_VM_MERGE_ANY) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ - MMF_DISABLE_THP_MASK) + MMF_DISABLE_THP_MASK | MMF_VM_MERGE_ANY_MASK) #define MMF_VM_MERGE_ANY 29 #endif /* _LINUX_SCHED_COREDUMP_H */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index db44ade93455fd4cae019e7ad3c7e373de86f1c7..9007c3554771afeab314d5610fe0813daecc274b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5772,7 +5772,7 @@ static ssize_t memcg_high_async_ratio_write(struct kernfs_open_file *of, } #ifdef CONFIG_KSM -static int memcg_set_ksm_for_tasks(struct mem_cgroup *memcg, bool enable) +static int __memcg_set_ksm_for_tasks(struct mem_cgroup *memcg, bool enable) { struct task_struct *task; struct mm_struct *mm; @@ -5806,6 +5806,27 @@ static int memcg_set_ksm_for_tasks(struct mem_cgroup *memcg, bool enable) return ret; } +static int memcg_set_ksm_for_tasks(struct mem_cgroup *memcg, bool enable) +{ + struct mem_cgroup *iter; + int ret = 0; + + for_each_mem_cgroup_tree(iter, memcg) { + if (READ_ONCE(iter->auto_ksm_enabled) == enable) + continue; + + ret = __memcg_set_ksm_for_tasks(iter, enable); + if (ret) { + mem_cgroup_iter_break(memcg, iter); + break; + } + + WRITE_ONCE(iter->auto_ksm_enabled, enable); + } + + return ret; +} + static int memory_ksm_show(struct seq_file *m, void *v) { unsigned long ksm_merging_pages = 0; @@ -5833,6 +5854,7 @@ static int memory_ksm_show(struct seq_file *m, void *v) } css_task_iter_end(&it); + seq_printf(m, "auto ksm enabled: %d\n", READ_ONCE(memcg->auto_ksm_enabled)); seq_printf(m, "merge any tasks: %u\n", tasks); seq_printf(m, "ksm_rmap_items %lu\n", ksm_rmap_items); seq_printf(m, "ksm_merging_pages %lu\n", ksm_merging_pages); @@ -5855,12 +5877,48 @@ static ssize_t memory_ksm_write(struct kernfs_open_file *of, char *buf, if (err) return err; + if (READ_ONCE(memcg->auto_ksm_enabled) == enable) + return nbytes; + err = memcg_set_ksm_for_tasks(memcg, enable); if (err) return err; return nbytes; } + +static void memcg_attach_ksm(struct cgroup_taskset *tset) +{ + struct cgroup_subsys_state *css; + struct mem_cgroup *memcg; + struct task_struct *task; + + cgroup_taskset_first(tset, &css); + memcg = mem_cgroup_from_css(css); + if (!READ_ONCE(memcg->auto_ksm_enabled)) + return; + + cgroup_taskset_for_each(task, css, tset) { + struct mm_struct *mm = get_task_mm(task); + + if (!mm) + continue; + + if (mmap_write_lock_killable(mm)) { + mmput(mm); + continue; + } + + ksm_enable_merge_any(mm); + + mmap_write_unlock(mm); + mmput(mm); + } +} +#else +static inline void memcg_attach_ksm(struct cgroup_taskset *tset) +{ +} #endif /* CONFIG_KSM */ #ifdef CONFIG_CGROUP_V1_WRITEBACK @@ -6430,6 +6488,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) } hugetlb_pool_inherit(memcg, parent); +#ifdef CONFIG_KSM + memcg->auto_ksm_enabled = READ_ONCE(parent->auto_ksm_enabled); +#endif error = memcg_online_kmem(memcg); if (error) @@ -7345,6 +7406,12 @@ static void mem_cgroup_move_charge(void) atomic_dec(&mc.from->moving_account); } +static void mem_cgroup_attach(struct cgroup_taskset *tset) +{ + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) + memcg_attach_ksm(tset); +} + static void mem_cgroup_move_task(void) { if (mc.to) { @@ -7360,6 +7427,9 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset) static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset) { } +static void mem_cgroup_attach(struct cgroup_taskset *tset) +{ +} static void mem_cgroup_move_task(void) { } @@ -7623,6 +7693,7 @@ struct cgroup_subsys memory_cgrp_subsys = { .css_rstat_flush = mem_cgroup_css_rstat_flush, .can_attach = mem_cgroup_can_attach, .cancel_attach = mem_cgroup_cancel_attach, + .attach = mem_cgroup_attach, .post_attach = mem_cgroup_move_task, .bind = mem_cgroup_bind, .dfl_cftypes = memory_files,