diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 9d2f717c1f7c1cdf66ec458eec264a6d7b10acec..faeabaab5a9c96783acafd763d58375e9225d69d 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -154,9 +154,11 @@ CONFIG_ARCH_SUPPORTS_INT128=y CONFIG_NUMA_BALANCING=y CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y CONFIG_CGROUPS=y +CONFIG_CGROUP_V1_KILL=y CONFIG_PAGE_COUNTER=y # CONFIG_CGROUP_FAVOR_DYNMODS is not set CONFIG_MEMCG=y +CONFIG_MEMCG_V1_RECLAIM=y CONFIG_MEMCG_KMEM=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_WRITEBACK=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index b40100e166838208b2a77a09d011f190c52ceb53..54932bcd661556c21f441b121b015857a8b48c79 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -176,9 +176,11 @@ CONFIG_ARCH_SUPPORTS_INT128=y CONFIG_NUMA_BALANCING=y CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y CONFIG_CGROUPS=y +CONFIG_CGROUP_V1_KILL=y CONFIG_PAGE_COUNTER=y # CONFIG_CGROUP_FAVOR_DYNMODS is not set CONFIG_MEMCG=y +CONFIG_MEMCG_V1_RECLAIM=y CONFIG_MEMCG_KMEM=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_WRITEBACK=y diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e4e24da16d2c398493e197cfb97f25be9940e3d0..133e2e5b0ae07314d7a337b932acba401ddb33f5 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -333,6 +333,11 @@ struct mem_cgroup { struct lru_gen_mm_list mm_list; #endif +#ifdef CONFIG_MEMCG_V1_RECLAIM + int high_async_ratio; + bool high_async_reclaim; +#endif + struct mem_cgroup_per_node *nodeinfo[]; }; @@ -1113,6 +1118,20 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, rcu_read_unlock(); } +static bool memcg_event_add(struct mem_cgroup *memcg, + enum memcg_memory_event event) +{ + if (!mem_cgroup_is_root(memcg)) + return true; + +#ifdef CONFIG_MEMCG_V1_RECLAIM + if (event == MEMCG_OOM_KILL && !cgroup_subsys_on_dfl(memory_cgrp_subsys)) + return true; +#endif + + return false; +} + static inline void memcg_memory_event(struct mem_cgroup *memcg, enum memcg_memory_event event) { @@ -1129,13 +1148,14 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg, cgroup_file_notify(&memcg->swap_events_file); else cgroup_file_notify(&memcg->events_file); - +#ifndef CONFIG_MEMCG_V1_RECLAIM if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) break; +#endif if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS) break; } while ((memcg = parent_mem_cgroup(memcg)) && - !mem_cgroup_is_root(memcg)); + memcg_event_add(memcg, event)); } static inline void memcg_memory_event_mm(struct mm_struct *mm, diff --git a/init/Kconfig b/init/Kconfig index 6d35728b94b2b3b12bfb47c6fcea047dbf145be6..d4d9ba7e1b480d752e31d4134c42e9ab5c94b403 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -944,6 +944,11 @@ config MEMCG help Provides control over the memory footprint of tasks in a cgroup. +config MEMCG_V1_RECLAIM + bool "Enable Per Memory Cgroup Qos Reclaim in Cgroup V1" + depends on MEMCG + default n + config MEMCG_KMEM bool depends on MEMCG @@ -1175,6 +1180,11 @@ config SOCK_CGROUP_DATA bool default n +config CGROUP_V1_KILL + bool "Kill All Tasks In Cgroup V1" + default n + depends on CGROUPS + endif # CGROUPS menuconfig NAMESPACES diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index c56071f150f2ae74362f6a397c22687409372736..d5a197d4b0ec0da7a23605b675d97295d1a2251a 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -264,6 +264,9 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, int __cgroup_task_count(const struct cgroup *cgrp); int cgroup_task_count(const struct cgroup *cgrp); +ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf, size_t nbytes, + loff_t off); + /* * rstat.c */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 76db6c67e39a9207dfecbc1a5fd375401e6a6142..134a15e1d83a9af2af2c8174e7ff3ac9dca6a751 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -660,6 +660,13 @@ struct cftype cgroup1_base_files[] = { .write = cgroup_release_agent_write, .max_write_len = PATH_MAX - 1, }, +#ifdef CONFIG_CGROUP_V1_KILL + { + .name = "cgroup.kill", + .flags = CFTYPE_NOT_ON_ROOT, + .write = cgroup_kill_write, + }, +#endif { } /* terminate */ }; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 518725b57200c2fe6d1d01a2280bd5156073ae0c..8db5e7eb242b3220bd5ea9c97fe1f9e053eb32d6 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3965,8 +3965,8 @@ static void cgroup_kill(struct cgroup *cgrp) __cgroup_kill(dsct); } -static ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf, - size_t nbytes, loff_t off) +ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf, size_t nbytes, + loff_t off) { ssize_t ret = 0; int kill; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8a881ab21f6cbd97c6464c30a3496936c7cd367a..ec4160b7fbbb0492d162189e8fe6054e56ea0510 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -105,6 +105,18 @@ static bool do_memsw_account(void) #define THRESHOLDS_EVENTS_TARGET 128 #define SOFTLIMIT_EVENTS_TARGET 1024 +/* + * memcg warning watermark = memory.high * memcg->high_async_ratio / + * HIGH_ASYNC_RATIO_BASE. + * when memcg usage is larger than warning watermark, but smaller than + * memory.high, start memcg async reclaim; + * when memcg->high_async_ratio is HIGH_ASYNC_RATIO_BASE, memcg async + * relcaim is disabled; + */ + +#define HIGH_ASYNC_RATIO_BASE 100 +#define HIGH_ASYNC_RATIO_GAP 10 + /* * Cgroups above their limits are maintained in a RB-Tree, independent of * their hierarchy representation @@ -2406,12 +2418,52 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg, return nr_reclaimed; } +#ifdef CONFIG_MEMCG_V1_RECLAIM +static bool is_high_async_reclaim(struct mem_cgroup *memcg) +{ + int ratio = READ_ONCE(memcg->high_async_ratio); + unsigned long memcg_high = READ_ONCE(memcg->memory.high); + + if (ratio == HIGH_ASYNC_RATIO_BASE || memcg_high == PAGE_COUNTER_MAX) + return false; + + return page_counter_read(&memcg->memory) > + memcg_high * ratio / HIGH_ASYNC_RATIO_BASE; +} + +static void async_reclaim_high(struct mem_cgroup *memcg) +{ + unsigned long nr_pages, pflags; + unsigned long memcg_high = READ_ONCE(memcg->memory.high); + unsigned long memcg_usage = page_counter_read(&memcg->memory); + int ratio = READ_ONCE(memcg->high_async_ratio) - HIGH_ASYNC_RATIO_GAP; + unsigned long safe_pages = memcg_high * ratio / HIGH_ASYNC_RATIO_BASE; + + if (!is_high_async_reclaim(memcg)) { + WRITE_ONCE(memcg->high_async_reclaim, false); + return; + } + + psi_memstall_enter(&pflags); + nr_pages = memcg_usage > safe_pages ? memcg_usage - safe_pages : + MEMCG_CHARGE_BATCH; + try_to_free_mem_cgroup_pages(memcg, nr_pages, GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP); + psi_memstall_leave(&pflags); + WRITE_ONCE(memcg->high_async_reclaim, false); +} +#endif + static void high_work_func(struct work_struct *work) { - struct mem_cgroup *memcg; + struct mem_cgroup *memcg = container_of(work, struct mem_cgroup, + high_work); - memcg = container_of(work, struct mem_cgroup, high_work); - reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL); +#ifdef CONFIG_MEMCG_V1_RECLAIM + if (READ_ONCE(memcg->high_async_reclaim)) + async_reclaim_high(memcg); + else +#endif + reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL); } /* @@ -2799,6 +2851,13 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, } continue; } +#ifdef CONFIG_MEMCG_V1_RECLAIM + if (is_high_async_reclaim(memcg) && !mem_high) { + WRITE_ONCE(memcg->high_async_reclaim, true); + schedule_work(&memcg->high_work); + break; + } +#endif if (mem_high || swap_high) { /* @@ -4530,6 +4589,11 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v) seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom); seq_printf(sf, "oom_kill %lu\n", atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL])); +#ifdef CONFIG_MEMCG_V1_RECLAIM + seq_printf(sf, "oom_kill_local %lu\n", + atomic_long_read(&memcg->memory_events_local[MEMCG_OOM_KILL])); +#endif + return 0; } @@ -5010,6 +5074,215 @@ static int mem_cgroup_slab_show(struct seq_file *m, void *p) } #endif +static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value) +{ + if (value == PAGE_COUNTER_MAX) + seq_puts(m, "max\n"); + else + seq_printf(m, "%llu\n", (u64)value * PAGE_SIZE); + + return 0; +} + +static int memory_min_show(struct seq_file *m, void *v) +{ + return seq_puts_memcg_tunable(m, + READ_ONCE(mem_cgroup_from_seq(m)->memory.min)); +} + +static ssize_t memory_min_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned long min; + int err; + + buf = strstrip(buf); + err = page_counter_memparse(buf, "max", &min); + if (err) + return err; + + page_counter_set_min(&memcg->memory, min); + + return nbytes; +} + +static int memory_low_show(struct seq_file *m, void *v) +{ + return seq_puts_memcg_tunable(m, + READ_ONCE(mem_cgroup_from_seq(m)->memory.low)); +} + +static ssize_t memory_low_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned long low; + int err; + + buf = strstrip(buf); + err = page_counter_memparse(buf, "max", &low); + if (err) + return err; + + page_counter_set_low(&memcg->memory, low); + + return nbytes; +} + +static int memory_high_show(struct seq_file *m, void *v) +{ + return seq_puts_memcg_tunable(m, + READ_ONCE(mem_cgroup_from_seq(m)->memory.high)); +} + +static ssize_t memory_high_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned int nr_retries = MAX_RECLAIM_RETRIES; + bool drained = false; + unsigned long high; + int err; + + buf = strstrip(buf); + err = page_counter_memparse(buf, "max", &high); + if (err) + return err; + + page_counter_set_high(&memcg->memory, high); + + for (;;) { + unsigned long nr_pages = page_counter_read(&memcg->memory); + unsigned long reclaimed; + + if (nr_pages <= high) + break; + + if (signal_pending(current)) + break; + + if (!drained) { + drain_all_stock(memcg); + drained = true; + continue; + } + + reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high, + GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP); + + if (!reclaimed && !nr_retries--) + break; + } + + memcg_wb_domain_size_changed(memcg); + return nbytes; +} + +#ifdef CONFIG_MEMCG_V1_RECLAIM +static void __memcg_events_show(struct seq_file *m, atomic_long_t *events) +{ + seq_printf(m, "low %lu\n", atomic_long_read(&events[MEMCG_LOW])); + seq_printf(m, "high %lu\n", atomic_long_read(&events[MEMCG_HIGH])); + seq_printf(m, "limit_in_bytes %lu\n", + atomic_long_read(&events[MEMCG_MAX])); + seq_printf(m, "oom %lu\n", atomic_long_read(&events[MEMCG_OOM])); +} + +static int memcg_events_show(struct seq_file *m, void *v) +{ + struct mem_cgroup *memcg = mem_cgroup_from_seq(m); + + __memcg_events_show(m, memcg->memory_events); + return 0; +} + +static int memcg_events_local_show(struct seq_file *m, void *v) +{ + struct mem_cgroup *memcg = mem_cgroup_from_seq(m); + + __memcg_events_show(m, memcg->memory_events_local); + return 0; +} + +static int memcg_high_async_ratio_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", + READ_ONCE(mem_cgroup_from_seq(m)->high_async_ratio)); + return 0; +} + +static ssize_t memcg_high_async_ratio_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, + loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + int ret, high_async_ratio; + + buf = strstrip(buf); + if (!buf) + return -EINVAL; + + ret = kstrtoint(buf, 0, &high_async_ratio); + if (ret) + return ret; + + if (high_async_ratio > HIGH_ASYNC_RATIO_BASE || + high_async_ratio <= HIGH_ASYNC_RATIO_GAP) + return -EINVAL; + + WRITE_ONCE(memcg->high_async_ratio, high_async_ratio); + + return nbytes; +} +#endif + +static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned int nr_retries = MAX_RECLAIM_RETRIES; + unsigned long nr_to_reclaim, nr_reclaimed = 0; + unsigned int reclaim_options; + int err; + + buf = strstrip(buf); + err = page_counter_memparse(buf, "", &nr_to_reclaim); + if (err) + return err; + + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && + mem_cgroup_is_root(memcg)) + return -EINVAL; + + reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE; + while (nr_reclaimed < nr_to_reclaim) { + unsigned long reclaimed; + + if (signal_pending(current)) + return -EINTR; + + /* + * This is the final attempt, drain percpu lru caches in the + * hope of introducing more evictable pages for + * try_to_free_mem_cgroup_pages(). + */ + if (!nr_retries) + lru_add_drain_all(); + + reclaimed = try_to_free_mem_cgroup_pages(memcg, + min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX), + GFP_KERNEL, reclaim_options); + + if (!reclaimed && !nr_retries--) + return -EAGAIN; + + nr_reclaimed += reclaimed; + } + + return nbytes; +} + static int memory_stat_show(struct seq_file *m, void *v); static struct cftype mem_cgroup_legacy_files[] = { @@ -5138,6 +5411,48 @@ static struct cftype mem_cgroup_legacy_files[] = { .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, +#ifdef CONFIG_MEMCG_V1_RECLAIM + { + .name = "min", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = memory_min_show, + .write = memory_min_write, + }, + { + .name = "low", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = memory_low_show, + .write = memory_low_write, + }, + { + .name = "high", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = memory_high_show, + .write = memory_high_write, + }, + { + .name = "events", + .flags = CFTYPE_NOT_ON_ROOT, + .file_offset = offsetof(struct mem_cgroup, events_file), + .seq_show = memcg_events_show, + }, + { + .name = "events.local", + .flags = CFTYPE_NOT_ON_ROOT, + .file_offset = offsetof(struct mem_cgroup, events_local_file), + .seq_show = memcg_events_local_show, + }, + { + .name = "high_async_ratio", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = memcg_high_async_ratio_show, + .write = memcg_high_async_ratio_write, + }, + { + .name = "reclaim", + .write = memory_reclaim, + }, +#endif { }, /* terminate */ }; @@ -5364,6 +5679,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) WRITE_ONCE(memcg->soft_limit, PAGE_COUNTER_MAX); #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP) memcg->zswap_max = PAGE_COUNTER_MAX; +#endif +#ifdef CONFIG_MEMCG_V1_RECLAIM + memcg->high_async_ratio = HIGH_ASYNC_RATIO_BASE; #endif page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX); if (parent) { @@ -6402,16 +6720,6 @@ static void mem_cgroup_attach(struct cgroup_taskset *tset) } #endif /* CONFIG_LRU_GEN */ -static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value) -{ - if (value == PAGE_COUNTER_MAX) - seq_puts(m, "max\n"); - else - seq_printf(m, "%llu\n", (u64)value * PAGE_SIZE); - - return 0; -} - static u64 memory_current_read(struct cgroup_subsys_state *css, struct cftype *cft) { @@ -6428,101 +6736,6 @@ static u64 memory_peak_read(struct cgroup_subsys_state *css, return (u64)memcg->memory.watermark * PAGE_SIZE; } -static int memory_min_show(struct seq_file *m, void *v) -{ - return seq_puts_memcg_tunable(m, - READ_ONCE(mem_cgroup_from_seq(m)->memory.min)); -} - -static ssize_t memory_min_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) -{ - struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); - unsigned long min; - int err; - - buf = strstrip(buf); - err = page_counter_memparse(buf, "max", &min); - if (err) - return err; - - page_counter_set_min(&memcg->memory, min); - - return nbytes; -} - -static int memory_low_show(struct seq_file *m, void *v) -{ - return seq_puts_memcg_tunable(m, - READ_ONCE(mem_cgroup_from_seq(m)->memory.low)); -} - -static ssize_t memory_low_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) -{ - struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); - unsigned long low; - int err; - - buf = strstrip(buf); - err = page_counter_memparse(buf, "max", &low); - if (err) - return err; - - page_counter_set_low(&memcg->memory, low); - - return nbytes; -} - -static int memory_high_show(struct seq_file *m, void *v) -{ - return seq_puts_memcg_tunable(m, - READ_ONCE(mem_cgroup_from_seq(m)->memory.high)); -} - -static ssize_t memory_high_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) -{ - struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); - unsigned int nr_retries = MAX_RECLAIM_RETRIES; - bool drained = false; - unsigned long high; - int err; - - buf = strstrip(buf); - err = page_counter_memparse(buf, "max", &high); - if (err) - return err; - - page_counter_set_high(&memcg->memory, high); - - for (;;) { - unsigned long nr_pages = page_counter_read(&memcg->memory); - unsigned long reclaimed; - - if (nr_pages <= high) - break; - - if (signal_pending(current)) - break; - - if (!drained) { - drain_all_stock(memcg); - drained = true; - continue; - } - - reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high, - GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP); - - if (!reclaimed && !nr_retries--) - break; - } - - memcg_wb_domain_size_changed(memcg); - return nbytes; -} - static int memory_max_show(struct seq_file *m, void *v) { return seq_puts_memcg_tunable(m, @@ -6687,48 +6900,6 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of, return nbytes; } -static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, - size_t nbytes, loff_t off) -{ - struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); - unsigned int nr_retries = MAX_RECLAIM_RETRIES; - unsigned long nr_to_reclaim, nr_reclaimed = 0; - unsigned int reclaim_options; - int err; - - buf = strstrip(buf); - err = page_counter_memparse(buf, "", &nr_to_reclaim); - if (err) - return err; - - reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE; - while (nr_reclaimed < nr_to_reclaim) { - unsigned long reclaimed; - - if (signal_pending(current)) - return -EINTR; - - /* - * This is the final attempt, drain percpu lru caches in the - * hope of introducing more evictable pages for - * try_to_free_mem_cgroup_pages(). - */ - if (!nr_retries) - lru_add_drain_all(); - - reclaimed = try_to_free_mem_cgroup_pages(memcg, - min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX), - GFP_KERNEL, reclaim_options); - - if (!reclaimed && !nr_retries--) - return -EAGAIN; - - nr_reclaimed += reclaimed; - } - - return nbytes; -} - static struct cftype memory_files[] = { { .name = "current",