From e077a1d88f292cd8c823be684ecd46b5092b3bab Mon Sep 17 00:00:00 2001 From: Lu Jialin Date: Tue, 5 Sep 2023 06:53:00 +0000 Subject: [PATCH 1/2] cgroup: Add cgroup1_get_from_id() hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7VR9L ------------------------------- Add cgroup1_get_from_id() function to help cgroupv1 get cgroup through cgroup inode; The patch also export cgroup_tryget_css(), which will be used later Signed-off-by: Lu Jialin --- include/linux/cgroup-defs.h | 2 ++ include/linux/cgroup.h | 1 + kernel/cgroup/cgroup.c | 41 +++++++++++++++++++++++++++++++++++-- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8a0d5466c7be..e9c5608fac27 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -762,6 +762,8 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) percpu_up_read(&cgroup_threadgroup_rwsem); } +struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp, + struct cgroup_subsys *ss); #else /* CONFIG_CGROUPS */ #define CGROUP_SUBSYS_COUNT 0 diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b400d8d278c0..a54af75aff14 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -634,6 +634,7 @@ static inline void cgroup_kthread_ready(void) } void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen); +struct cgroup *cgroup1_get_from_id(struct cgroup_root *root, u64 id); struct cgroup *cgroup_get_from_id(u64 id); #else /* !CONFIG_CGROUPS */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index d65411063781..6f49515a0c88 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -501,8 +501,8 @@ static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, * Find and get @cgrp's css associated with @ss. If the css doesn't exist * or is offline, %NULL is returned. */ -static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp, - struct cgroup_subsys *ss) +struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp, + struct cgroup_subsys *ss) { struct cgroup_subsys_state *css; @@ -6348,6 +6348,43 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, return retval; } +struct cgroup *cgroup1_get_from_id(struct cgroup_root *root, u64 id) +{ + struct kernfs_node *kn; + struct cgroup *cgrp, *root_cgrp; + + kn = kernfs_find_and_get_node_by_id(root->kf_root, id); + if (!kn) + return ERR_PTR(-ENOENT); + + if (kernfs_type(kn) != KERNFS_DIR) { + kernfs_put(kn); + return ERR_PTR(-ENOENT); + } + + rcu_read_lock(); + + cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv); + if (cgrp && !cgroup_tryget(cgrp)) + cgrp = NULL; + + rcu_read_unlock(); + kernfs_put(kn); + + if (!cgrp) + return ERR_PTR(-ENOENT); + + spin_lock_irq(&css_set_lock); + root_cgrp = current_cgns_cgroup_from_root(root); + spin_unlock_irq(&css_set_lock); + if (!cgroup_is_descendant(cgrp, root_cgrp)) { + cgroup_put(cgrp); + return ERR_PTR(-ENOENT); + } + + return cgrp; +} + /** * cgroup_fork - initialize cgroup related fields during copy_process() * @child: pointer to task_struct of forking parent process. -- Gitee From 17087410372a874e9bb891d94b7d366bbeed26d4 Mon Sep 17 00:00:00 2001 From: Lu Jialin Date: Tue, 5 Sep 2023 06:53:01 +0000 Subject: [PATCH 2/2] cgroup: support cgroup writeback on cgroupv1 hulkl inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7VR9L ------------------------------- In cgroupv1, cgroup writeback is not supproted for two problems: 1) Blkcg_css and memcg_css are mounted on different cgroup trees. Therefore, blkcg_css cannot be found according to a certain memcg_css. 2) Buffer I/O is worked by kthread, which is in the root_blkcg. Therefore, blkcg cannot limit wbps and wiops of buffer I/O. We solve the two problems to support cgroup writeback on cgroupv1. 1) A memcg is attached to the blkcg_root css when the memcg was created. 2) We add a member "wb_blkio_ino" in mem_cgroup_legacy_files. User can attach a memcg to a cerntain blkcg through echo the file inode of the blkcg into the wb_blkio of the memcg. 3) inode_cgwb_enabled() return true when memcg and io are both mounted on cgroupv2 or both on cgroupv1. 4) Buffer I/O can find a blkcg according to its memcg. Thus, a memcg can find a certain blkcg, and cgroup writeback can be supported on cgroupv1. Signed-off-by: Lu Jialin --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + block/blk-cgroup.c | 3 + block/blk-cgroup.h | 3 + include/linux/backing-dev.h | 29 +++++- include/linux/cgroup.h | 2 + include/linux/memcontrol.h | 5 ++ init/Kconfig | 5 ++ kernel/cgroup/cgroup.c | 5 ++ mm/backing-dev.c | 118 ++++++++++++++++++++++++- mm/memcontrol.c | 84 +++++++++++++++++- 11 files changed, 249 insertions(+), 7 deletions(-) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 698df5e71350..0a6f9efcd104 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -159,6 +159,7 @@ CONFIG_MEMCG=y CONFIG_MEMCG_KMEM=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_WRITEBACK=y +CONFIG_CGROUP_V1_WRITEBACK=y CONFIG_CGROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 295cd120ab64..f46de474f8fa 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -182,6 +182,7 @@ CONFIG_MEMCG=y CONFIG_MEMCG_KMEM=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_WRITEBACK=y +CONFIG_CGROUP_V1_WRITEBACK=y CONFIG_CGROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index dce1548a7a0c..1cf679da6aaa 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1347,6 +1347,9 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) INIT_HLIST_HEAD(&blkcg->blkg_list); #ifdef CONFIG_CGROUP_WRITEBACK INIT_LIST_HEAD(&blkcg->cgwb_list); +#endif +#ifdef CONFIG_CGROUP_V1_WRITEBACK + INIT_LIST_HEAD(&blkcg->memcg_list); #endif list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs); diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 624c03c8fe64..ff2544e574c7 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -114,6 +114,9 @@ struct blkcg { #ifdef CONFIG_CGROUP_WRITEBACK struct list_head cgwb_list; #endif +#ifdef CONFIG_CGROUP_V1_WRITEBACK + struct list_head memcg_list; +#endif }; static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index fbad4fcd408e..8c73163ea0f2 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -150,6 +150,26 @@ static inline bool mapping_can_writeback(struct address_space *mapping) return inode_to_bdi(mapping->host)->capabilities & BDI_CAP_WRITEBACK; } +#ifdef CONFIG_CGROUP_V1_WRITEBACK +void wb_kill_memcg_blkcg(struct cgroup_subsys_state *css); +void wb_attach_memcg_to_blkcg(struct cgroup_subsys_state *memcg_css, + struct cgroup_subsys_state *blkcg_css); +bool cgroup1_writeback_enabled(void); +#else +static inline void wb_kill_memcg_blkcg(struct cgroup_subsys_state *css) +{ +} +static inline void +wb_attach_memcg_to_blkcg(struct cgroup_subsys_state *memcg_css, + struct cgroup_subsys_state *blkcg_css) +{ +} +static inline bool cgroup1_writeback_enabled(void) +{ + return false; +} +#endif /* CONFIG_CGROUP_V1_WRITEBACK */ + #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi, @@ -175,10 +195,11 @@ static inline bool inode_cgwb_enabled(struct inode *inode) { struct backing_dev_info *bdi = inode_to_bdi(inode); - return cgroup_subsys_on_dfl(memory_cgrp_subsys) && - cgroup_subsys_on_dfl(io_cgrp_subsys) && - (bdi->capabilities & BDI_CAP_WRITEBACK) && - (inode->i_sb->s_iflags & SB_I_CGROUPWB); + return ((cgroup_subsys_on_dfl(memory_cgrp_subsys) && + cgroup_subsys_on_dfl(io_cgrp_subsys)) || + cgroup1_writeback_enabled()) && + (bdi->capabilities & BDI_CAP_WRITEBACK) && + (inode->i_sb->s_iflags & SB_I_CGROUPWB); } /** diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a54af75aff14..75e0e8ecc958 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -634,7 +634,9 @@ static inline void cgroup_kthread_ready(void) } void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen); +#ifdef CONFIG_CGROUP_V1_WRITEBACK struct cgroup *cgroup1_get_from_id(struct cgroup_root *root, u64 id); +#endif struct cgroup *cgroup_get_from_id(u64 id); #else /* !CONFIG_CGROUPS */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 222d7370134c..02e8999915b3 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -313,6 +313,11 @@ struct mem_cgroup { struct memcg_cgwb_frn cgwb_frn[MEMCG_CGWB_FRN_CNT]; #endif +#ifdef CONFIG_CGROUP_V1_WRITEBACK + struct cgroup_subsys_state *wb_blk_css; + struct list_head memcg_node; +#endif + /* List of events which userspace want to receive */ struct list_head event_list; spinlock_t event_list_lock; diff --git a/init/Kconfig b/init/Kconfig index d96c76143610..fb04d39eb00f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -975,6 +975,11 @@ config CGROUP_WRITEBACK depends on MEMCG && BLK_CGROUP default y +config CGROUP_V1_WRITEBACK + bool "Support Cgroup Writeback On Cgroupv1" + depends on CGROUP_WRITEBACK + default n + menuconfig CGROUP_SCHED bool "CPU controller" default n diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 6f49515a0c88..ce82e3fcc767 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -59,6 +59,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -5605,6 +5606,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, list_del_rcu(&css->sibling); err_free_css: list_del_rcu(&css->rstat_css_node); + wb_kill_memcg_blkcg(css); INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); return ERR_PTR(err); @@ -5874,6 +5876,7 @@ static void kill_css(struct cgroup_subsys_state *css) */ css_get(css); + wb_kill_memcg_blkcg(css); /* * cgroup core guarantees that, by the time ->css_offline() is * invoked, no new css reference will be given out via @@ -6348,6 +6351,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, return retval; } +#ifdef CONFIG_CGROUP_V1_WRITEBACK struct cgroup *cgroup1_get_from_id(struct cgroup_root *root, u64 id) { struct kernfs_node *kn; @@ -6384,6 +6388,7 @@ struct cgroup *cgroup1_get_from_id(struct cgroup_root *root, u64 id) return cgrp; } +#endif /** * cgroup_fork - initialize cgroup related fields during copy_process() diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 7da9727fcdf3..214672155cab 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -569,6 +569,30 @@ static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb) spin_unlock_irq(&cgwb_lock); } +#ifdef CONFIG_CGROUP_V1_WRITEBACK +#include "../block/blk-cgroup.h" +static struct cgroup_subsys_state *cgwbv1_get_blkcss(struct mem_cgroup *memcg) +{ + struct cgroup_subsys_state *blkcg_css; + + rcu_read_lock(); + blkcg_css = memcg->wb_blk_css; + if (!css_tryget_online(blkcg_css)) { + blkcg_css = blkcg_root_css; + css_get(blkcg_css); + } + rcu_read_unlock(); + + return blkcg_css; +} +#else +static inline struct cgroup_subsys_state * +cgwbv1_get_blkcss(struct mem_cgroup *memcg) +{ + return NULL; +} +#endif + static int cgwb_create(struct backing_dev_info *bdi, struct cgroup_subsys_state *memcg_css, gfp_t gfp) { @@ -580,7 +604,11 @@ static int cgwb_create(struct backing_dev_info *bdi, int ret = 0; memcg = mem_cgroup_from_css(memcg_css); - blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys); + if (cgroup1_writeback_enabled()) + blkcg_css = cgwbv1_get_blkcss(memcg); + else + blkcg_css = cgroup_get_e_css(memcg_css->cgroup, + &io_cgrp_subsys); memcg_cgwb_list = &memcg->cgwb_list; blkcg_cgwb_list = blkcg_get_cgwb_list(blkcg_css); @@ -699,9 +727,14 @@ struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi, wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id); if (wb) { struct cgroup_subsys_state *blkcg_css; + struct mem_cgroup *memcg = mem_cgroup_from_css(memcg_css); /* see whether the blkcg association has changed */ - blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys); + if (cgroup1_writeback_enabled()) + blkcg_css = cgwbv1_get_blkcss(memcg); + else + blkcg_css = cgroup_get_e_css(memcg_css->cgroup, + &io_cgrp_subsys); if (unlikely(wb->blkcg_css != blkcg_css || !wb_tryget(wb))) wb = NULL; css_put(blkcg_css); @@ -1128,3 +1161,84 @@ const char *bdi_dev_name(struct backing_dev_info *bdi) return bdi->dev_name; } EXPORT_SYMBOL_GPL(bdi_dev_name); + +#ifdef CONFIG_CGROUP_V1_WRITEBACK + +#include "../kernel/cgroup/cgroup-internal.h" + +static bool cgroup1_writeback __read_mostly; + +bool cgroup1_writeback_enabled(void) +{ + return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && + !cgroup_subsys_on_dfl(io_cgrp_subsys) && cgroup1_writeback; +} + +static void wb_kill_memcg(struct cgroup_subsys_state *memcg_css) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(memcg_css); + + list_del_init(&memcg->memcg_node); + css_put(memcg->wb_blk_css); +} + +static void wb_kill_blkcg(struct cgroup_subsys_state *blkcg_css) +{ + struct mem_cgroup *memcg, *tmp; + struct blkcg *blkcg = css_to_blkcg(blkcg_css); + struct blkcg *root_blkcg = css_to_blkcg(blkcg_root_css); + + list_for_each_entry_safe(memcg, tmp, &blkcg->memcg_list, memcg_node) { + css_get(blkcg_root_css); + memcg->wb_blk_css = blkcg_root_css; + list_move(&memcg->memcg_node, &root_blkcg->memcg_list); + css_put(blkcg_css); + } +} + +void wb_kill_memcg_blkcg(struct cgroup_subsys_state *css) +{ + struct cgroup_subsys *ss = css->ss; + + if (!cgroup1_writeback) + return; + + lockdep_assert_held(&cgroup_mutex); + + if (ss->id == io_cgrp_id) + wb_kill_blkcg(css); + else if (ss->id == memory_cgrp_id) + wb_kill_memcg(css); +} + +void wb_attach_memcg_to_blkcg(struct cgroup_subsys_state *memcg_css, + struct cgroup_subsys_state *blkcg_css) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(memcg_css); + struct cgroup_subsys_state *pre_blkcss = memcg->wb_blk_css; + struct blkcg *blkcg = css_to_blkcg(blkcg_css); + + if (!cgroup1_writeback) + return; + + lockdep_assert_held(&cgroup_mutex); + + css_get(blkcg_css); + memcg->wb_blk_css = blkcg_css; + if (pre_blkcss == NULL) + list_add(&memcg->memcg_node, &blkcg->memcg_list); + else { + list_move(&memcg->memcg_node, &blkcg->memcg_list); + css_put(pre_blkcss); + } +} + +static int __init enable_cgroup1_writeback(char *s) +{ + cgroup1_writeback = true; + + return 1; +} +__setup("cgroup1_writeback", enable_cgroup1_writeback); +#endif + diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4b27e245a055..fec60307a561 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -371,7 +371,8 @@ struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio) { struct mem_cgroup *memcg = folio_memcg(folio); - if (!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) + if (!memcg || !(cgroup_subsys_on_dfl(memory_cgrp_subsys) || + cgroup1_writeback_enabled())) memcg = root_mem_cgroup; return &memcg->css; @@ -5059,6 +5060,77 @@ static int mem_cgroup_slab_show(struct seq_file *m, void *p) } #endif +#ifdef CONFIG_CGROUP_V1_WRITEBACK +#include "../kernel/cgroup/cgroup-internal.h" + +static int wb_blkio_show(struct seq_file *m, void *v) +{ + char *path; + ino_t blkcg_id; + struct cgroup *blkcg_cgroup; + struct cgroup_subsys_state *blkcg_css; + struct mem_cgroup *memcg = mem_cgroup_from_seq(m); + + if (!cgroup1_writeback_enabled()) + return -EOPNOTSUPP; + + path = kzalloc(PATH_MAX, GFP_KERNEL); + if (!path) + return -ENOMEM; + + mutex_lock(&cgroup_mutex); + blkcg_css = memcg->wb_blk_css; + blkcg_cgroup = blkcg_css->cgroup; + blkcg_id = cgroup_ino(blkcg_cgroup); + cgroup_path(blkcg_cgroup, path, PATH_MAX); + mutex_unlock(&cgroup_mutex); + seq_printf(m, "wb_blkio_path:%s\n", path); + seq_printf(m, "wb_blkio_ino:%lu\n", blkcg_id); + kfree(path); + + return 0; +} + +static ssize_t wb_blkio_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + int ret = 0; + u64 cgrp_id; + struct cgroup_root *root; + struct cgroup *blk_cgroup; + struct cgroup_subsys_state *blkcg_css; + struct cgroup_subsys_state *memcg_css = of_css(of); + + if (!cgroup1_writeback_enabled()) + return -EOPNOTSUPP; + + buf = strstrip(buf); + ret = kstrtou64(buf, 0, &cgrp_id); + if (ret) + return ret; + + mutex_lock(&cgroup_mutex); + root = blkcg_root_css->cgroup->root; + blk_cgroup = cgroup1_get_from_id(root, cgrp_id); + if (IS_ERR(blk_cgroup)) { + mutex_unlock(&cgroup_mutex); + return -EINVAL; + } + blkcg_css = cgroup_tryget_css(blk_cgroup, &io_cgrp_subsys); + if (!blkcg_css) + goto out_unlock; + wb_attach_memcg_to_blkcg(memcg_css, blkcg_css); + css_put(blkcg_css); + +out_unlock: + cgroup_put(blk_cgroup); + mutex_unlock(&cgroup_mutex); + + return ret < 0 ? ret : nbytes; +} +#endif + + static struct cftype mem_cgroup_legacy_files[] = { { .name = "usage_in_bytes", @@ -5185,6 +5257,15 @@ static struct cftype mem_cgroup_legacy_files[] = { .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, +#ifdef CONFIG_CGROUP_V1_WRITEBACK + { + .name = "wb_blkio_ino", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = wb_blkio_show, + .write = wb_blkio_write, + }, +#endif + { }, /* terminate */ }; @@ -5440,6 +5521,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) static_branch_inc(&memcg_bpf_enabled_key); #endif + wb_attach_memcg_to_blkcg(&memcg->css, blkcg_root_css); return &memcg->css; } -- Gitee