From 901b37204cbebc72bedaec7639730b969d6b5745 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Tue, 28 May 2024 18:14:36 +0800 Subject: [PATCH 1/6] mm/dynamic_hugetlb: support to allocate page by specifying memcg hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9H66Y CVE: NA -------------------------------- Next patch will support to allocate page from a hpool for zram and zspool if they are bounded with a memcg, and the memcg is bounded with the hpool. As a preparation, support to allocate page by specifying memcg. Signed-off-by: Liu Shixin --- include/linux/dynamic_hugetlb.h | 7 +++++-- mm/dynamic_hugetlb.c | 25 ++++++++++++++++++++----- mm/page_alloc.c | 2 +- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/include/linux/dynamic_hugetlb.h b/include/linux/dynamic_hugetlb.h index 06b20a24dfe6..10bff6f093e0 100644 --- a/include/linux/dynamic_hugetlb.h +++ b/include/linux/dynamic_hugetlb.h @@ -103,7 +103,8 @@ void hugetlb_pool_inherit(struct mem_cgroup *memcg, struct mem_cgroup *parent); int hugetlb_pool_destroy(struct cgroup *cgrp); void __init dynamic_hugetlb_init(void); -struct page *alloc_page_from_dhugetlb_pool(gfp_t gfp, unsigned int order, +struct page *alloc_page_from_dhugetlb_pool(struct mem_cgroup *memcg, + gfp_t gfp, unsigned int order, unsigned int flags); bool free_page_to_dhugetlb_pool(struct page *page); void free_page_list_to_dhugetlb_pool(struct list_head *list); @@ -142,7 +143,9 @@ static inline void __init dynamic_hugetlb_init(void) { } -static inline struct page *alloc_page_from_dhugetlb_pool(gfp_t gfp, unsigned int order, +static inline struct page *alloc_page_from_dhugetlb_pool(struct mem_cgroup *memcg, + gfp_t gfp, + unsigned int order, unsigned int flags) { return NULL; diff --git a/mm/dynamic_hugetlb.c b/mm/dynamic_hugetlb.c index 8199ef893f4a..176b98584420 100644 --- a/mm/dynamic_hugetlb.c +++ b/mm/dynamic_hugetlb.c @@ -509,10 +509,9 @@ static bool should_allocate_from_dhugetlb_pool(gfp_t gfp_mask) return true; } -static struct page *__alloc_page_from_dhugetlb_pool(void) +static struct page *__alloc_page_from_dpool(struct dhugetlb_pool *hpool) { struct percpu_pages_pool *percpu_pool; - struct dhugetlb_pool *hpool; struct page *page = NULL; unsigned long flags; @@ -563,9 +562,11 @@ static struct page *__alloc_page_from_dhugetlb_pool(void) return page; } -struct page *alloc_page_from_dhugetlb_pool(gfp_t gfp, unsigned int order, +struct page *alloc_page_from_dhugetlb_pool(struct mem_cgroup *memcg, + gfp_t gfp, unsigned int order, unsigned int flags) { + struct dhugetlb_pool *hpool; struct page *page = NULL; if (!dhugetlb_enabled) @@ -574,11 +575,25 @@ struct page *alloc_page_from_dhugetlb_pool(gfp_t gfp, unsigned int order, if (order != 0) return NULL; - if (should_allocate_from_dhugetlb_pool(gfp)) - page = __alloc_page_from_dhugetlb_pool(); + if (memcg) { + hpool = memcg->hpool; + if (!hpool) + return NULL; + goto alloc_page; + } + + if (!should_allocate_from_dhugetlb_pool(gfp)) + return NULL; + + hpool = find_hpool_by_task(current); + if (!hpool) + return NULL; +alloc_page: + page = __alloc_page_from_dpool(hpool); if (page) prep_new_page(page, order, gfp, flags); + return page; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1ba392f11e6b..e5878707222f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5397,7 +5397,7 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone, gfp); /* Before alloc from buddy system, alloc from hpool firstly */ - page = alloc_page_from_dhugetlb_pool(alloc_gfp, order, alloc_flags); + page = alloc_page_from_dhugetlb_pool(NULL, alloc_gfp, order, alloc_flags); if (page) goto out; -- Gitee From b2610fef24c8e0d81303e3ab0d0f0be8965d85c5 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Tue, 28 May 2024 18:14:37 +0800 Subject: [PATCH 2/6] zram: set memcg for zram device hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9H66Y CVE: NA -------------------------------- Add a new zram file mem_cgroup to set memcg for zram device. The memcg will be record in zs_pool too when create zs_pool. Signed-off-by: Liu Shixin --- drivers/block/zram/zram_drv.c | 107 +++++++++++++++++++++++++++++++++- drivers/block/zram/zram_drv.h | 4 ++ include/linux/memcontrol.h | 3 + include/linux/zsmalloc.h | 3 + mm/Kconfig | 8 +++ mm/memcontrol.c | 27 +++++++++ mm/zsmalloc.c | 27 +++++++++ 7 files changed, 178 insertions(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 955f0c4d358f..0c2c068b2374 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1265,6 +1265,106 @@ static DEVICE_ATTR_RO(bd_stat); #endif static DEVICE_ATTR_RO(debug_stat); +#ifdef CONFIG_MEMCG_ZRAM +static inline int init_memcg(struct zram *zram, struct mem_cgroup *memcg) +{ + if (init_done(zram)) + return -EINVAL; + + if (zram->memcg) + css_put(&zram->memcg->css); + + zram->memcg = memcg; + + return 0; +} + +static inline void reset_memcg(struct zram *zram) +{ + struct mem_cgroup *memcg = zram->memcg; + + if (!memcg) + return; + + zram->memcg = NULL; + css_put(&memcg->css); +} + + +static ssize_t mem_cgroup_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + struct mem_cgroup *memcg = zram->memcg; + + if (mem_cgroup_disabled() || !memcg) + return scnprintf(buf, PAGE_SIZE, "none\n"); + + if (!cgroup_path(memcg->css.cgroup, buf, PATH_MAX)) + return scnprintf(buf, PAGE_SIZE, "none\n"); + + return scnprintf(buf, PAGE_SIZE, "%s\n", buf); +} + +static ssize_t mem_cgroup_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct zram *zram = dev_to_zram(dev); + struct mem_cgroup *memcg; + char *kbuf; + size_t sz; + int ret = 0; + + if (mem_cgroup_disabled()) + return -EINVAL; + + kbuf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + strlcpy(kbuf, buf, PATH_MAX); + sz = strlen(kbuf); + if (sz > 0 && kbuf[sz - 1] == '\n') + kbuf[sz - 1] = 0x00; + + if (!strcmp(kbuf, "none")) { + memcg = NULL; + } else { + memcg = memcg_get_from_path(kbuf, PATH_MAX); + if (!memcg) { + ret = -EINVAL; + goto out; + } + } + + down_write(&zram->init_lock); + ret = init_memcg(zram, memcg); + if (ret && memcg) + css_put(&memcg->css); + up_write(&zram->init_lock); + +out: + kfree(kbuf); + return ret ? ret : len; +} +static DEVICE_ATTR_RW(mem_cgroup); + +static inline struct zs_pool *zram_create_pool(struct zram *zram) +{ + return zs_create_pool_with_memcg(zram->disk->disk_name, + zram->memcg); +} +#else +static inline void reset_memcg(struct zram *zram) +{ +} + +static inline struct zs_pool *zram_create_pool(struct zram *zram) +{ + return zs_create_pool(zram->disk->disk_name); +} +#endif + static void zram_meta_free(struct zram *zram, u64 disksize) { size_t num_pages = disksize >> PAGE_SHIFT; @@ -1287,7 +1387,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) if (!zram->table) return false; - zram->mem_pool = zs_create_pool(zram->disk->disk_name); + zram->mem_pool = zram_create_pool(zram); if (!zram->mem_pool) { vfree(zram->table); return false; @@ -2141,6 +2241,7 @@ static void zram_reset_device(struct zram *zram) zram->limit_pages = 0; if (!init_done(zram)) { + reset_memcg(zram); up_write(&zram->init_lock); return; } @@ -2156,6 +2257,7 @@ static void zram_reset_device(struct zram *zram) zram_destroy_comps(zram); memset(&zram->stats, 0, sizeof(zram->stats)); reset_bdev(zram); + reset_memcg(zram); comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); up_write(&zram->init_lock); @@ -2338,6 +2440,9 @@ static struct attribute *zram_disk_attrs[] = { #ifdef CONFIG_ZRAM_MULTI_COMP &dev_attr_recomp_algorithm.attr, &dev_attr_recompress.attr, +#endif +#ifdef CONFIG_MEMCG_ZRAM + &dev_attr_mem_cgroup.attr, #endif NULL, }; diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index eb13d0299f89..8987e77ac7ee 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -18,6 +18,7 @@ #include #include #include +#include #include "zcomp.h" @@ -142,5 +143,8 @@ struct zram { #ifdef CONFIG_ZRAM_MEMORY_TRACKING struct dentry *debugfs_dir; #endif +#ifdef CONFIG_MEMCG_ZRAM + struct mem_cgroup *memcg; +#endif }; #endif diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ef3a6a8e640f..2804701f75dd 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1325,6 +1325,9 @@ int mem_cgroup_force_empty(struct mem_cgroup *memcg); int memcg_get_swap_type(struct page *page); void memcg_remove_swapfile(int type); +#ifdef CONFIG_MEMCG_ZRAM +struct mem_cgroup *memcg_get_from_path(char *path, size_t buflen); +#endif #else /* CONFIG_MEMCG */ #define MEM_CGROUP_ID_SHIFT 0 diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index a48cd0ffe57d..2c09676f9178 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -41,6 +41,9 @@ struct zs_pool_stats { struct zs_pool; struct zs_pool *zs_create_pool(const char *name); +#ifdef CONFIG_MEMCG_ZRAM +struct zs_pool *zs_create_pool_with_memcg(const char *name, void *memcg); +#endif void zs_destroy_pool(struct zs_pool *pool); unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags); diff --git a/mm/Kconfig b/mm/Kconfig index 70c85533aada..c54be242232d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -521,6 +521,14 @@ config MEMCG_SWAP_QOS memcg swap control include memory force swapin, swapfile control and swap limit. +config MEMCG_ZRAM + bool "Enable Memory Cgroup charge of zram usage" + depends on MEMCG_SWAP_QOS && ZRAM + depends on X86 || ARM64 + default n + help + Support to charge zram usage in memory cgroup. + config ETMEM_SCAN tristate "module: etmem page scan for etmem support" depends on ETMEM diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9007c3554771..575f382358e9 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3628,6 +3628,33 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, } } +#ifdef CONFIG_MEMCG_ZRAM +struct mem_cgroup *memcg_get_from_path(char *path, size_t buflen) +{ + struct mem_cgroup *memcg; + char *memcg_path; + + if (mem_cgroup_disabled()) + return NULL; + + memcg_path = kzalloc(buflen, GFP_KERNEL); + if (!memcg_path) + return NULL; + + for_each_mem_cgroup(memcg) { + cgroup_path(memcg->css.cgroup, memcg_path, buflen); + if (!strcmp(path, memcg_path) && css_tryget_online(&memcg->css)) { + mem_cgroup_iter_break(NULL, memcg); + break; + } + } + + kfree(memcg_path); + return memcg; +} +EXPORT_SYMBOL(memcg_get_from_path); +#endif + #ifdef CONFIG_MEMCG_KMEM static int memcg_online_kmem(struct mem_cgroup *memcg) { diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 6079f5625abb..85aba62d777d 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -57,6 +57,7 @@ #include #include #include +#include #define ZSPAGE_MAGIC 0x58 @@ -274,6 +275,9 @@ struct zs_pool { atomic_long_t isolated_pages; bool destroying; #endif +#ifdef CONFIG_MEMCG_ZRAM + struct mem_cgroup *memcg; +#endif }; struct zspage { @@ -2527,10 +2531,33 @@ struct zs_pool *zs_create_pool(const char *name) } EXPORT_SYMBOL_GPL(zs_create_pool); +#ifdef CONFIG_MEMCG_ZRAM +static inline void zs_set_memcg(struct zs_pool *pool, void *memcg) +{ + if (pool) + pool->memcg = memcg; +} + +struct zs_pool *zs_create_pool_with_memcg(const char *name, void *memcg) +{ + struct zs_pool *pool = zs_create_pool(name); + + zs_set_memcg(pool, memcg); + + return pool; +} +EXPORT_SYMBOL_GPL(zs_create_pool_with_memcg); +#else +static inline void zs_set_memcg(struct zs_pool *pool, void *memcg) +{ +} +#endif + void zs_destroy_pool(struct zs_pool *pool) { int i; + zs_set_memcg(pool, NULL); zs_unregister_shrinker(pool); zs_unregister_migration(pool); zs_pool_stat_destroy(pool); -- Gitee From 9a54ec6b1a0199d0c650b38c5df43bd24296e58b Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Tue, 28 May 2024 18:14:38 +0800 Subject: [PATCH 3/6] zram: charge zram usage in memory cgroup hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9H66Y CVE: NA -------------------------------- In the previous patch, the zram and zspool can be bounded with memcg. Now support to charge the usage of zspage and zram->table in memory cgroup. This can be help limiting memory usage in container environment and prevent zram resource contention between containers. Add a new file memory.zram_usage_in_bytes to show zram usage. At the same time, the zram usage will also be added to memory.usage_in_bytes. Now the stats are as follows: memory.zram_usage_in_bytes = zram_usage memory.usage_in_bytes = program_usage + zram_usage memory.memsw.usage_in_bytes = program_usage + swap_usage Signed-off-by: Liu Shixin --- drivers/block/zram/zram_drv.c | 32 +++++++++++++++++++-- include/linux/memcontrol.h | 5 ++++ mm/memcontrol.c | 52 +++++++++++++++++++++++++++++++++++ mm/zsmalloc.c | 31 +++++++++++++++++++++ 4 files changed, 117 insertions(+), 3 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 0c2c068b2374..a56a549ee92e 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1354,6 +1354,20 @@ static inline struct zs_pool *zram_create_pool(struct zram *zram) return zs_create_pool_with_memcg(zram->disk->disk_name, zram->memcg); } + +static inline void zram_charge_memory(struct zram *zram, unsigned long size) +{ + unsigned long nr_pages = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT; + + memcg_charge_zram(zram->memcg, nr_pages); +} + +static inline void zram_uncharge_memory(struct zram *zram, unsigned long size) +{ + unsigned long nr_pages = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT; + + memcg_uncharge_zram(zram->memcg, nr_pages); +} #else static inline void reset_memcg(struct zram *zram) { @@ -1363,11 +1377,20 @@ static inline struct zs_pool *zram_create_pool(struct zram *zram) { return zs_create_pool(zram->disk->disk_name); } + +static inline void zram_charge_memory(struct zram *zram, unsigned long size) +{ +} + +static inline void zram_uncharge_memory(struct zram *zram, unsigned long size) +{ +} #endif static void zram_meta_free(struct zram *zram, u64 disksize) { size_t num_pages = disksize >> PAGE_SHIFT; + unsigned long size = array_size(num_pages, sizeof(*zram->table)); size_t index; /* Free all pages that are still in this zram device */ @@ -1376,14 +1399,15 @@ static void zram_meta_free(struct zram *zram, u64 disksize) zs_destroy_pool(zram->mem_pool); vfree(zram->table); + zram_uncharge_memory(zram, size); } static bool zram_meta_alloc(struct zram *zram, u64 disksize) { - size_t num_pages; + size_t num_pages = disksize >> PAGE_SHIFT; + unsigned long size = array_size(num_pages, sizeof(*zram->table)); - num_pages = disksize >> PAGE_SHIFT; - zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); + zram->table = vzalloc(size); if (!zram->table) return false; @@ -1393,6 +1417,8 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) return false; } + zram_charge_memory(zram, size); + if (!huge_class_size) huge_class_size = zs_huge_class_size(zram->mem_pool); return true; diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2804701f75dd..450300805b21 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -248,6 +248,9 @@ struct obj_cgroup { struct swap_device { unsigned long max; int type; +#ifdef CONFIG_MEMCG_ZRAM + atomic64_t zram_usage; +#endif }; /* @@ -1327,6 +1330,8 @@ void memcg_remove_swapfile(int type); #ifdef CONFIG_MEMCG_ZRAM struct mem_cgroup *memcg_get_from_path(char *path, size_t buflen); +void memcg_charge_zram(struct mem_cgroup *memcg, unsigned int nr_pages); +void memcg_uncharge_zram(struct mem_cgroup *memcg, unsigned int nr_pages); #endif #else /* CONFIG_MEMCG */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 575f382358e9..f7cdcdfa81b1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3653,6 +3653,49 @@ struct mem_cgroup *memcg_get_from_path(char *path, size_t buflen) return memcg; } EXPORT_SYMBOL(memcg_get_from_path); + +static inline void memcg_zram_usage_init(struct mem_cgroup *memcg) +{ + atomic64_set(&memcg->swap_dev->zram_usage, 0); +} + +void memcg_charge_zram(struct mem_cgroup *memcg, unsigned int nr_pages) +{ + if (mem_cgroup_disabled() || !memcg) + return; + + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) + return; + + page_counter_charge(&memcg->memory, nr_pages); + atomic_long_add(nr_pages, &memcg->swap_dev->zram_usage); +} +EXPORT_SYMBOL_GPL(memcg_charge_zram); + +void memcg_uncharge_zram(struct mem_cgroup *memcg, unsigned int nr_pages) +{ + if (mem_cgroup_disabled() || !memcg) + return; + + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) + return; + + page_counter_uncharge(&memcg->memory, nr_pages); + atomic_long_sub(nr_pages, &memcg->swap_dev->zram_usage); +} +EXPORT_SYMBOL_GPL(memcg_uncharge_zram); + +static u64 mem_cgroup_zram_usage(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(css); + + return (u64)atomic64_read(&memcg->swap_dev->zram_usage) * PAGE_SIZE; +} +#else +static inline void memcg_zram_usage_init(struct mem_cgroup *memcg) +{ +} #endif #ifdef CONFIG_MEMCG_KMEM @@ -4251,6 +4294,8 @@ static void memcg_swap_device_init(struct mem_cgroup *memcg, WRITE_ONCE(memcg->swap_dev->type, READ_ONCE(parent->swap_dev->type)); } + + memcg_zram_usage_init(memcg); } u64 memcg_swapmax_read(struct cgroup_subsys_state *css, struct cftype *cft) @@ -6247,6 +6292,13 @@ static struct cftype mem_cgroup_legacy_files[] = { .write = memcg_swapfile_write, .seq_show = memcg_swapfile_read, }, +#ifdef CONFIG_MEMCG_ZRAM + { + .name = "zram_usage_in_bytes", + .flags = CFTYPE_NOT_ON_ROOT, + .read_u64 = mem_cgroup_zram_usage, + }, +#endif #endif { .name = "high_async_ratio", diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 85aba62d777d..934101f9f09e 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -935,6 +935,35 @@ static int trylock_zspage(struct zspage *zspage) return 0; } +#ifdef CONFIG_MEMCG_ZRAM +static inline void zs_charge_memory(struct zs_pool *pool, + unsigned long nr_pages) +{ + /* + * Since only zram configures memcg for zs_pool, + * charge the memory in zram usage. + */ + memcg_charge_zram(pool->memcg, nr_pages); +} + +static inline void zs_uncharge_memory(struct zs_pool *pool, + unsigned long nr_pages) +{ + /* See zs_charge_memory() for detail */ + memcg_uncharge_zram(pool->memcg, nr_pages); +} +#else +static inline void zs_charge_memory(struct zs_pool *pool, + unsigned long nr_pages) +{ +} + +static inline void zs_uncharge_memory(struct zs_pool *pool, + unsigned long nr_pages) +{ +} +#endif + static void __free_zspage(struct zs_pool *pool, struct size_class *class, struct zspage *zspage) { @@ -965,6 +994,7 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class, zs_stat_dec(class, OBJ_ALLOCATED, class->objs_per_zspage); atomic_long_sub(class->pages_per_zspage, &pool->pages_allocated); + zs_uncharge_memory(pool, class->pages_per_zspage); } static void free_zspage(struct zs_pool *pool, struct size_class *class, @@ -1484,6 +1514,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) record_obj(handle, obj); atomic_long_add(class->pages_per_zspage, &pool->pages_allocated); + zs_charge_memory(pool, class->pages_per_zspage); zs_stat_inc(class, OBJ_ALLOCATED, class->objs_per_zspage); /* We completely set up zspage so mark them as movable */ -- Gitee From 01a114252153947914853d8a97c52808ef9b9f81 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Tue, 28 May 2024 18:14:39 +0800 Subject: [PATCH 4/6] zram: support to allocate page from hpool for zram hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9H66Y CVE: NA -------------------------------- After bound the zram with a memcg which has a hpool as memory provider. The memory usage of zram should prefer to allocate from hpool. zspool is allocated by alloc_page() before, replace it with zs_alloc_page() to achieve the effect. zram->table is allocated by vzalloc(), and vzalloc() is calling alloc_pages_node() to allocate memory. Add vzalloc_with_memcg() and __vmalloc_alloc_pages() to replace vzalloc() and alloc_pages_node() respectively to achieve the effect. Signed-off-by: Liu Shixin --- drivers/block/zram/zram_drv.c | 14 +++++- include/linux/vmalloc.h | 3 ++ mm/dynamic_hugetlb.c | 3 +- mm/vmalloc.c | 90 +++++++++++++++++++++++++++++++++-- mm/zsmalloc.c | 22 ++++++++- 5 files changed, 125 insertions(+), 7 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a56a549ee92e..90e2c989a553 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1368,6 +1368,12 @@ static inline void zram_uncharge_memory(struct zram *zram, unsigned long size) memcg_uncharge_zram(zram->memcg, nr_pages); } + +static inline struct zram_table_entry *zram_table_alloc(struct zram *zram, + unsigned long size) +{ + return vzalloc_with_memcg(size, zram->memcg); +} #else static inline void reset_memcg(struct zram *zram) { @@ -1385,6 +1391,12 @@ static inline void zram_charge_memory(struct zram *zram, unsigned long size) static inline void zram_uncharge_memory(struct zram *zram, unsigned long size) { } + +static inline struct zram_table_entry *zram_table_alloc(struct zram *zram, + unsigned long size) +{ + return vzalloc(size); +} #endif static void zram_meta_free(struct zram *zram, u64 disksize) @@ -1407,7 +1419,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) size_t num_pages = disksize >> PAGE_SHIFT; unsigned long size = array_size(num_pages, sizeof(*zram->table)); - zram->table = vzalloc(size); + zram->table = zram_table_alloc(zram, size); if (!zram->table) return false; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 1ebe364ed29a..c0c9a3476700 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -131,6 +131,9 @@ static inline unsigned long vmalloc_nr_pages(void) { return 0; } extern void *vmalloc(unsigned long size); extern void *vzalloc(unsigned long size); +#ifdef CONFIG_MEMCG_ZRAM +extern void *vzalloc_with_memcg(unsigned long size, void *memcg); +#endif extern void *vmalloc_user(unsigned long size); extern void *vmalloc_node(unsigned long size, int node); extern void *vzalloc_node(unsigned long size, int node); diff --git a/mm/dynamic_hugetlb.c b/mm/dynamic_hugetlb.c index 176b98584420..10960a585d5b 100644 --- a/mm/dynamic_hugetlb.c +++ b/mm/dynamic_hugetlb.c @@ -515,8 +515,6 @@ static struct page *__alloc_page_from_dpool(struct dhugetlb_pool *hpool) struct page *page = NULL; unsigned long flags; - hpool = find_hpool_by_task(current); - if (!get_hpool_unless_zero(hpool)) return NULL; @@ -596,6 +594,7 @@ struct page *alloc_page_from_dhugetlb_pool(struct mem_cgroup *memcg, return page; } +EXPORT_SYMBOL_GPL(alloc_page_from_dhugetlb_pool); static void __free_page_to_dhugetlb_pool(struct page *page) { diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6d802924d9e8..f0aaae496ec4 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2889,9 +2889,36 @@ void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot) EXPORT_SYMBOL_GPL(vmap_pfn); #endif /* CONFIG_VMAP_PFN */ +#include + +#ifdef CONFIG_MEMCG_ZRAM +static inline struct page *__vmalloc_alloc_pages(int node, gfp_t gfp_mask, + unsigned int order, + struct mem_cgroup *memcg) +{ + struct page *page; + + if (!memcg) + return alloc_pages_node(node, gfp_mask, order); + + page = alloc_page_from_dhugetlb_pool(memcg, gfp_mask, order, 0); + if (!page) + page = alloc_pages_node(node, gfp_mask, order); + + return page; +} +#else +static inline struct page *__vmalloc_alloc_pages(int node, gfp_t gfp_mask, + unsigned int order, + struct mem_cgroup *memcg) +{ + return alloc_pages_node(node, gfp_mask, order); +} +#endif + static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot, unsigned int page_shift, - int node) + int node, struct mem_cgroup *memcg) { const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; unsigned long addr = (unsigned long)area->addr; @@ -2940,7 +2967,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, int p; /* Compound pages required for remap_vmalloc_page */ - page = alloc_pages_node(node, gfp_mask | __GFP_COMP, page_order); + page = __vmalloc_alloc_pages(node, gfp_mask | __GFP_COMP, + page_order, memcg); if (unlikely(!page)) { /* Successfully allocated i pages, free them in __vfree() */ area->nr_pages = i; @@ -3050,7 +3078,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, goto fail; } - addr = __vmalloc_area_node(area, gfp_mask, prot, shift, node); + addr = __vmalloc_area_node(area, gfp_mask, prot, shift, node, NULL); if (!addr) goto fail; @@ -3180,6 +3208,62 @@ void *vzalloc(unsigned long size) } EXPORT_SYMBOL(vzalloc); +#ifdef CONFIG_MEMCG_ZRAM +static void *__vmalloc_with_memcg(unsigned long size, gfp_t gfp_mask, + struct mem_cgroup *memcg, const void *caller) +{ + struct vm_struct *area; + void *addr; + + if (WARN_ON_ONCE(!size)) + return NULL; + + if ((size >> PAGE_SHIFT) > totalram_pages()) { + warn_alloc(gfp_mask, NULL, + "vmalloc size %lu: exceeds total pages", size); + return NULL; + } + + area = __get_vm_area_node(size, 1, PAGE_SHIFT, VM_ALLOC | + VM_UNINITIALIZED, VMALLOC_START, + VMALLOC_END, NUMA_NO_NODE, + gfp_mask, caller); + if (!area) { + warn_alloc(gfp_mask, NULL, + "vmalloc size %lu: vm_struct allocation failed", size); + return NULL; + } + + addr = __vmalloc_area_node(area, gfp_mask, PAGE_KERNEL, PAGE_SHIFT, + NUMA_NO_NODE, memcg); + if (!addr) + return NULL; + + /* + * In this function, newly allocated vm_struct has VM_UNINITIALIZED + * flag. It means that vm_struct is not fully initialized. + * Now, it is fully initialized, so remove this flag here. + */ + clear_vm_uninitialized_flag(area); + + size = PAGE_ALIGN(size); + kmemleak_vmalloc(area, size, gfp_mask); + + return addr; +} + +void *vzalloc_with_memcg(unsigned long size, void *memcg) +{ + if (!memcg) + return vzalloc(size); + + return __vmalloc_with_memcg(size, GFP_KERNEL | __GFP_ZERO, + (struct mem_cgroup *)memcg, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(vzalloc_with_memcg); +#endif + /** * vmalloc_user - allocate zeroed virtually contiguous memory for userspace * @size: allocation size diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 934101f9f09e..55a1d9ea16ce 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -952,6 +952,21 @@ static inline void zs_uncharge_memory(struct zs_pool *pool, /* See zs_charge_memory() for detail */ memcg_uncharge_zram(pool->memcg, nr_pages); } + +static inline struct page *zs_alloc_page(struct zs_pool *pool, gfp_t gfp) +{ + struct mem_cgroup *memcg = pool->memcg; + struct page *page; + + if (!memcg) + return alloc_page(gfp); + + page = alloc_page_from_dhugetlb_pool(memcg, gfp, 0, 0); + if (!page) + page = alloc_page(gfp); + + return page; +} #else static inline void zs_charge_memory(struct zs_pool *pool, unsigned long nr_pages) @@ -962,6 +977,11 @@ static inline void zs_uncharge_memory(struct zs_pool *pool, unsigned long nr_pages) { } + +static inline struct page *zs_alloc_page(struct zs_pool *pool, gfp_t gfp) +{ + return alloc_page(gfp); +} #endif static void __free_zspage(struct zs_pool *pool, struct size_class *class, @@ -1111,7 +1131,7 @@ static struct zspage *alloc_zspage(struct zs_pool *pool, for (i = 0; i < class->pages_per_zspage; i++) { struct page *page; - page = alloc_page(gfp); + page = zs_alloc_page(pool, gfp); if (!page) { while (--i >= 0) { dec_zone_page_state(pages[i], NR_ZSPAGES); -- Gitee From f684362ad9444ea75584f31e3a019142ff4ea152 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Tue, 28 May 2024 18:14:40 +0800 Subject: [PATCH 5/6] mm/swap: support to allocate page from hpool for swapcache hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9H66Y CVE: NA -------------------------------- When swapin in async mode, if the swap entry is belong to a memcg and the memcg is bound with a hpool, then we should try to allocate page from hpool firstly. If the swapin is triggered in page fault, then we don't need to allocate from hpool proactively, since the task is belong to the memcg, the page will allocate from hpool naturally. Signed-off-by: Liu Shixin --- include/linux/memcontrol.h | 10 +++++++++ mm/memcontrol.c | 44 ++++++++++++++++++++++++++++++++++++++ mm/swap_state.c | 2 +- 3 files changed, 55 insertions(+), 1 deletion(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 450300805b21..bac4aade25b4 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -808,6 +808,9 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target, page_counter_read(&memcg->memory); } +struct page *memcg_alloc_page_vma(swp_entry_t entry, gfp_t gfp_mask, + struct vm_area_struct *vma, unsigned long addr); + int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); void mem_cgroup_uncharge(struct page *page); @@ -1411,6 +1414,13 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target, return false; } +static inline struct page *memcg_alloc_page_vma(swp_entry_t entry, gfp_t gfp_mask, + struct vm_area_struct *vma, + unsigned long addr) +{ + return alloc_page_vma(gfp_mask, vma, addr); +} + static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f7cdcdfa81b1..3195991c98ae 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7954,6 +7954,50 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root, atomic_long_read(&parent->memory.children_low_usage))); } +#ifdef CONFIG_DYNAMIC_HUGETLB +static struct mem_cgroup *get_mem_cgroup_from_swap(swp_entry_t entry) +{ + struct mem_cgroup *memcg; + unsigned short id; + + if (mem_cgroup_disabled()) + return NULL; + + id = lookup_swap_cgroup_id(entry); + + rcu_read_lock(); + memcg = mem_cgroup_from_id(id); + if (memcg && !css_tryget_online(&memcg->css)) + memcg = NULL; + rcu_read_unlock(); + + return memcg; +} + +struct page *memcg_alloc_page_vma(swp_entry_t entry, gfp_t gfp_mask, + struct vm_area_struct *vma, unsigned long addr) +{ + struct mem_cgroup *memcg; + struct page *page = NULL; + + memcg = get_mem_cgroup_from_swap(entry); + if (memcg) { + page = alloc_page_from_dhugetlb_pool(memcg, gfp_mask, 0, 0); + css_put(&memcg->css); + } + if (!page) + page = alloc_page_vma(gfp_mask, vma, addr); + + return page; +} +#else +struct page *memcg_alloc_page_vma(swp_entry_t entry, gfp_t gfp_mask, + struct vm_area_struct *vma, unsigned long addr) +{ + return alloc_page_vma(gfp_mask, vma, addr); +} +#endif + /** * mem_cgroup_charge - charge a newly allocated page to a cgroup * @page: page to charge diff --git a/mm/swap_state.c b/mm/swap_state.c index 69d71c4be7b8..f0929da6225a 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -491,7 +491,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, * before marking swap_map SWAP_HAS_CACHE, when -EEXIST will * cause any racers to loop around until we add it to cache. */ - page = alloc_page_vma(gfp_mask, vma, addr); + page = memcg_alloc_page_vma(entry, gfp_mask, vma, addr); if (!page) return NULL; -- Gitee From e84d94e0ae099d868807c0ee234c24772a44c024 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Tue, 28 May 2024 18:14:41 +0800 Subject: [PATCH 6/6] openeuler_defconfig: enable memcg_zram for x86_64 and arm64 hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9H66Y CVE: NA -------------------------------- Enable CONFIG_MEMCG_ZRAM to support memcg_zram feature. Signed-off-by: Liu Shixin --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index a8f83a4e2a4f..a485c4f43f03 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -1121,6 +1121,7 @@ CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y CONFIG_MEMCG_QOS=y CONFIG_MEMCG_SWAP_QOS=y +CONFIG_MEMCG_ZRAM=y CONFIG_ETMEM_SCAN=m CONFIG_ETMEM_SWAP=m CONFIG_ETMEM=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 86a2e4ea72da..bdc4005cf7a4 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -1054,6 +1054,7 @@ CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y CONFIG_MEMCG_QOS=y CONFIG_MEMCG_SWAP_QOS=y +CONFIG_MEMCG_ZRAM=y CONFIG_ETMEM_SCAN=m CONFIG_ETMEM_SWAP=m CONFIG_ETMEM=y -- Gitee