From 2dbb5275f291a3512a16679af08a59cc6562d571 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 17 Jun 2024 11:45:32 +0800 Subject: [PATCH 1/9] block: add a switch to enable hungtask check for io hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA -------------------------------- Because slow io or io timeout handler can take a long time, hungtask check is forbidden in order to prevent false positive warnings. However, this also cause kenel to be silence if io really hang. It's quite complicated to distinguish if io is slow or hanged, this patch add a switch to enable hungtask check, the switch is enabled by default, and can be turn off by: 1) disable config BLK_IO_HUNG_TASK_CHECK 2) add blk_core.io_hung_task_check=0 to boot cmd 3) echo 0 > /sys/module/blk_core/parameters/io_hung_task_check Noted that user has to be careful to use this with hungtask panic enabeld, since there could be false positive hungtask warnings. Signed-off-by: Yu Kuai --- arch/arm64/configs/openeuler_defconfig | 1 + arch/powerpc/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + block/Kconfig | 11 +++++++++++ block/bio.c | 2 +- block/blk-core.c | 13 ++++++++++++- block/blk-exec.c | 2 +- block/blk.h | 1 + 8 files changed, 29 insertions(+), 3 deletions(-) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 05b50ca381b1..6c809c2369dc 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -964,6 +964,7 @@ CONFIG_BLK_DEBUG_FS=y CONFIG_BLK_DEBUG_FS_ZONED=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set +# CONFIG_BLK_IO_HUNG_TASK_CHEC is not set CONFIG_BLK_DEV_DUMPINFO=y CONFIG_BLK_BIO_DISPATCH_ASYNC=y diff --git a/arch/powerpc/configs/openeuler_defconfig b/arch/powerpc/configs/openeuler_defconfig index cbc315cc4dc1..fc0ca355dcaa 100644 --- a/arch/powerpc/configs/openeuler_defconfig +++ b/arch/powerpc/configs/openeuler_defconfig @@ -687,6 +687,7 @@ CONFIG_BLK_DEBUG_FS_ZONED=y CONFIG_BLK_SED_OPAL=y CONFIG_BLK_INLINE_ENCRYPTION=y # CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK is not set +# CONFIG_BLK_IO_HUNG_TASK_CHECK is not set # # Partition Types diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index f3b810d0cf47..7903ccb27e66 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -942,6 +942,7 @@ CONFIG_BLK_DEBUG_FS=y CONFIG_BLK_DEBUG_FS_ZONED=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set +# CONFIG_BLK_IO_HUNG_TASK_CHECK is not set CONFIG_BLK_DEV_DUMPINFO=y CONFIG_BLK_BIO_DISPATCH_ASYNC=y diff --git a/block/Kconfig b/block/Kconfig index 24c6bb87727d..1b8220766e3a 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -220,6 +220,17 @@ config BLK_BIO_DISPATCH_ASYNC the pressure on the busy CPUs. If unsure, say N. +config BLK_IO_HUNG_TASK_CHECK + bool "Enable io hung task check" + default n + depends on DETECT_HUNG_TASK + help + Enabling this lets the block layer detect hungtask for io, noted + if this is set, hungtask will complain about slow io even if such + io is not hanged. Be careful to enable hungtask panic in this case. + + If unsure, say N. + menu "Partition Types" source "block/partitions/Kconfig" diff --git a/block/bio.c b/block/bio.c index 8c64c93e96c8..123b44ba17cb 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1169,7 +1169,7 @@ int submit_bio_wait(struct bio *bio) submit_bio(bio); /* Prevent hang_check timer from firing at us during very long I/O */ - hang_check = sysctl_hung_task_timeout_secs; + hang_check = sysctl_hung_task_timeout_secs && !io_hung_task_check; if (hang_check) while (!wait_for_completion_io_timeout(&done, hang_check * (HZ/2))) diff --git a/block/blk-core.c b/block/blk-core.c index e3e2659d0673..48e3ee71af02 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -77,6 +77,17 @@ static int __init precise_iostat_setup(char *str) } __setup("precise_iostat=", precise_iostat_setup); +/* + * Noted if this is set, hungtask will complain about slow io even if such io is + * not hanged. Be careful to enable hungtask panic in this case. + */ +#ifdef CONFIG_BLK_IO_HUNG_TASK_CHECK +bool io_hung_task_check = true; +#else +bool io_hung_task_check; +#endif +module_param_named(io_hung_task_check, io_hung_task_check, bool, 0644); + /* * For queue allocation */ @@ -2148,7 +2159,7 @@ void blk_io_schedule(void) /* Prevent hang_check timer from firing at us during very long I/O */ unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2; - if (timeout) + if (timeout && !io_hung_task_check) io_schedule_timeout(timeout); else io_schedule(); diff --git a/block/blk-exec.c b/block/blk-exec.c index b2676de4c6a5..497aa52cd51e 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -87,7 +87,7 @@ blk_status_t blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); /* Prevent hang_check timer from firing at us during very long I/O */ - hang_check = sysctl_hung_task_timeout_secs; + hang_check = sysctl_hung_task_timeout_secs && !io_hung_task_check; if (hang_check) while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2))); else diff --git a/block/blk.h b/block/blk.h index c86d27d80ba0..720e82c60f9d 100644 --- a/block/blk.h +++ b/block/blk.h @@ -15,6 +15,7 @@ #define BLK_MAX_TIMEOUT (5 * HZ) extern struct dentry *blk_debugfs_root; +extern bool io_hung_task_check; struct blk_flush_queue { unsigned int flush_pending_idx:1; -- Gitee From 4b7dd16a2dda8f8b67c95dbf9d9f998d901250ad Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 17 Jun 2024 11:45:33 +0800 Subject: [PATCH 2/9] blk-throttle: add a config to control hierarchical throttle in cgroup v1 hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA ------------------------------- The feature that enable default hierarchy for io throttle in cgroup v1 can only be enabled with CONFIG_BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT. Signed-off-by: Yu Kuai --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + block/Kconfig | 11 +++++++++++ block/blk-throttle.c | 17 ++++++++++++++--- 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 6c809c2369dc..e9d7d9e85a1c 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -955,6 +955,7 @@ CONFIG_BLK_DEV_INTEGRITY_T10=m CONFIG_BLK_DEV_ZONED=y CONFIG_BLK_DEV_THROTTLING=y # CONFIG_BLK_DEV_THROTTLING_LOW is not set +CONFIG_BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT=y # CONFIG_BLK_CMDLINE_PARSER is not set CONFIG_BLK_WBT=y # CONFIG_BLK_CGROUP_IOLATENCY is not set diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 7903ccb27e66..39b9252f156a 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -933,6 +933,7 @@ CONFIG_BLK_DEV_INTEGRITY_T10=m CONFIG_BLK_DEV_ZONED=y CONFIG_BLK_DEV_THROTTLING=y # CONFIG_BLK_DEV_THROTTLING_LOW is not set +CONFIG_BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT=y # CONFIG_BLK_CMDLINE_PARSER is not set CONFIG_BLK_WBT=y # CONFIG_BLK_CGROUP_IOLATENCY is not set diff --git a/block/Kconfig b/block/Kconfig index 1b8220766e3a..a1026e1b8f4d 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -114,6 +114,17 @@ config BLK_DEV_THROTTLING_LOW Note, this is an experimental interface and could be changed someday. +config BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT + bool "Block layer global limit in cgroup v1" + depends on BLK_DEV_THROTTLING=y + default n + help + blkio subsytem is not under default hierarchy in cgroup v1 by default, + Enabling this will support globlal limit in cgroup v1. + + Note, a cmdline "blkcg_global_limit=1" is still required to enabled this + feature. + config BLK_CMDLINE_PARSER bool "Block device command line partition parser" help diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 050ddf0ad002..e5da0664b16c 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -43,9 +43,15 @@ static struct blkcg_policy blkcg_policy_throtl; /* A workqueue to queue throttle related work */ static struct workqueue_struct *kthrotld_workqueue; +#ifdef BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT /* True if global limit is enabled in cgroup v1 */ static bool global_limit; +static inline bool blkcg_global_limit_enabled(void) +{ + return global_limit; +} + static int __init setup_global_limit(char *str) { if (!strcmp(str, "1") || !strcmp(str, "Y") || !strcmp(str, "y")) @@ -55,7 +61,12 @@ static int __init setup_global_limit(char *str) } __setup("blkcg_global_limit=", setup_global_limit); - +#else +static inline bool blkcg_global_limit_enabled(void) +{ + return false; +} +#endif /* * To implement hierarchical throttling, throtl_grps form a tree and bios * are dispatched upwards level by level until they reach the top and get @@ -571,8 +582,8 @@ static void throtl_pd_init(struct blkg_policy_data *pd) * regardless of the position of the group in the hierarchy. */ sq->parent_sq = &td->service_queue; - if ((cgroup_subsys_on_dfl(io_cgrp_subsys) || global_limit) && - blkg->parent) + if ((cgroup_subsys_on_dfl(io_cgrp_subsys) || + blkcg_global_limit_enabled()) && blkg->parent) sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue; tg->td = td; } -- Gitee From f156a50bf683f37fb3152e64e2e4618aa9f88caf Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 17 Jun 2024 11:45:34 +0800 Subject: [PATCH 3/9] blk-throttle: fix missing prefix "CONFIG_" hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA ------------------------------- Global limit of blk throttle will be invalid because commit a633be8a278f ("[Huawei] blk-throttle: add a config to control hierarchical throttle in cgroup v1") missed prefix "CONFIG_" in ifdef marco. Fixes: a633be8a278f ("[Huawei] blk-throttle: add a config to control hierarchical throttle in cgroup v1") Signed-off-by: Yu Kuai --- block/blk-throttle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e5da0664b16c..9fa00b8b1ac2 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -43,7 +43,7 @@ static struct blkcg_policy blkcg_policy_throtl; /* A workqueue to queue throttle related work */ static struct workqueue_struct *kthrotld_workqueue; -#ifdef BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT +#ifdef CONFIG_BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT /* True if global limit is enabled in cgroup v1 */ static bool global_limit; -- Gitee From 62e9bb3fa2f3d945d4ead74eb52d5decced58ea1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 11:45:35 +0800 Subject: [PATCH 4/9] block: serialize all debugfs operations using q->debugfs_mutex mainline inclusion from mainline-v5.19-rc4 commit 5cf9c91ba927119fc6606b938b1895bb2459d3bc category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5cf9c91ba927119fc6606b938b1895bb2459d3bc ---------------------------------------- Various places like I/O schedulers or the QOS infrastructure try to register debugfs files on demans, which can race with creating and removing the main queue debugfs directory. Use the existing debugfs_mutex to serialize all debugfs operations that rely on q->debugfs_dir or the directories hanging off it. To make the teardown code a little simpler declare all debugfs dentry pointers and not just the main one uncoditionally in blkdev.h. Move debugfs_mutex next to the dentries that it protects and document what it is used for. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220614074827.458955-3-hch@lst.de Signed-off-by: Jens Axboe Conflicts: block/blk-sysfs.c include/linux/blkdev.h kernel/trace/blktrace.c block/blk-mq-debugfs.c block/blk-mq-sched.c Signed-off-by: Yu Kuai --- block/blk-mq-debugfs.c | 25 ++++++++++++++++++++----- block/blk-mq-debugfs.h | 5 ----- block/blk-mq-sched.c | 11 +++++++++++ block/blk-rq-qos.c | 2 ++ block/blk-rq-qos.h | 7 ++++++- block/blk-sysfs.c | 20 ++++++++++---------- include/linux/blkdev.h | 8 ++++---- kernel/trace/blktrace.c | 3 --- 8 files changed, 53 insertions(+), 28 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index de587a442a90..c8563eff3300 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -894,11 +894,6 @@ void blk_mq_debugfs_register(struct request_queue *q) } } -void blk_mq_debugfs_unregister(struct request_queue *q) -{ - q->sched_debugfs_dir = NULL; -} - static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx) { @@ -932,6 +927,8 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q, void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { + if (!hctx->queue->debugfs_dir) + return; debugfs_remove_recursive(hctx->debugfs_dir); hctx->sched_debugfs_dir = NULL; hctx->debugfs_dir = NULL; @@ -959,6 +956,8 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) { struct elevator_type *e = q->elevator->type; + lockdep_assert_held(&q->debugfs_mutex); + /* * If the parent directory has not been created yet, return, we will be * called again later on and the directory/files will be created then. @@ -976,12 +975,18 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) void blk_mq_debugfs_unregister_sched(struct request_queue *q) { + lockdep_assert_held(&q->debugfs_mutex); + debugfs_remove_recursive(q->sched_debugfs_dir); q->sched_debugfs_dir = NULL; } void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) { + lockdep_assert_held(&rqos->q->debugfs_mutex); + + if (!rqos->q->debugfs_dir) + return; debugfs_remove_recursive(rqos->debugfs_dir); rqos->debugfs_dir = NULL; } @@ -991,6 +996,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) struct request_queue *q = rqos->q; const char *dir_name = rq_qos_id_to_name(rqos->id); + lockdep_assert_held(&q->debugfs_mutex); + if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs) return; @@ -1006,6 +1013,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) { + lockdep_assert_held(&q->debugfs_mutex); + debugfs_remove_recursive(q->rqos_debugfs_dir); q->rqos_debugfs_dir = NULL; } @@ -1015,6 +1024,8 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, { struct elevator_type *e = q->elevator->type; + lockdep_assert_held(&q->debugfs_mutex); + if (!e->hctx_debugfs_attrs) return; @@ -1026,6 +1037,10 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) { + lockdep_assert_held(&hctx->queue->debugfs_mutex); + + if (!hctx->queue->debugfs_dir) + return; debugfs_remove_recursive(hctx->sched_debugfs_dir); hctx->sched_debugfs_dir = NULL; } diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index a68aa6041a10..891c3af6f611 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -19,7 +19,6 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq); int blk_mq_debugfs_rq_show(struct seq_file *m, void *v); void blk_mq_debugfs_register(struct request_queue *q); -void blk_mq_debugfs_unregister(struct request_queue *q); void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx); @@ -40,10 +39,6 @@ static inline void blk_mq_debugfs_register(struct request_queue *q) { } -static inline void blk_mq_debugfs_unregister(struct request_queue *q) -{ -} - static inline void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index c92d25b71a72..8620a5d75c62 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -610,7 +610,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) if (ret) goto err_free_map_and_rqs; + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_sched(q); + mutex_unlock(&q->debugfs_mutex); queue_for_each_hw_ctx(q, hctx, i) { if (e->ops.init_hctx) { @@ -623,7 +625,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) return ret; } } + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_sched_hctx(q, hctx); + mutex_unlock(&q->debugfs_mutex); } return 0; @@ -664,14 +668,21 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) unsigned int flags = 0; queue_for_each_hw_ctx(q, hctx, i) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_sched_hctx(hctx); + mutex_unlock(&q->debugfs_mutex); + if (e->type->ops.exit_hctx && hctx->sched_data) { e->type->ops.exit_hctx(hctx, i); hctx->sched_data = NULL; } flags = hctx->flags; } + + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_sched(q); + mutex_unlock(&q->debugfs_mutex); + if (e->type->ops.exit_sched) e->type->ops.exit_sched(e); blk_mq_sched_tags_teardown(q, flags); diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index e83af7bc7591..249a6f05dd3b 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -294,7 +294,9 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, void rq_qos_exit(struct request_queue *q) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_queue_rqos(q); + mutex_unlock(&q->debugfs_mutex); while (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 37c59d7d6ba7..af1c2ca157d7 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -118,8 +118,11 @@ static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos) blk_mq_unfreeze_queue(q); - if (rqos->ops->debugfs_attrs) + if (rqos->ops->debugfs_attrs) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); + } return 0; ebusy: @@ -150,7 +153,9 @@ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) blk_mq_unfreeze_queue(q); + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); } typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 078aace75204..2d5396483c86 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -897,14 +897,13 @@ static void blk_release_queue(struct kobject *kobj) if (queue_is_mq(q)) blk_mq_release(q); - blk_trace_shutdown(q); mutex_lock(&q->debugfs_mutex); + blk_trace_shutdown(q); debugfs_remove_recursive(q->debugfs_dir); + q->debugfs_dir = NULL; + q->sched_debugfs_dir = NULL; mutex_unlock(&q->debugfs_mutex); - if (queue_is_mq(q)) - blk_mq_debugfs_unregister(q); - bioset_exit(&q->bio_split); ida_simple_remove(&blk_queue_ida, q->id); @@ -974,17 +973,18 @@ int blk_register_queue(struct gendisk *disk) goto unlock; } + if (queue_is_mq(q)) + __blk_mq_register_dev(dev, q); + mutex_lock(&q->sysfs_lock); + mutex_lock(&q->debugfs_mutex); q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent), blk_debugfs_root); - mutex_unlock(&q->debugfs_mutex); - - if (queue_is_mq(q)) { - __blk_mq_register_dev(dev, q); + if (queue_is_mq(q)) blk_mq_debugfs_register(q); - } - mutex_lock(&q->sysfs_lock); + mutex_unlock(&q->debugfs_mutex); + if (q->elevator) { ret = elv_register_queue(q, false); if (ret) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f27a0916a75e..06578d0a562f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -551,7 +551,6 @@ struct request_queue { unsigned int sg_timeout; unsigned int sg_reserved_size; int node; - struct mutex debugfs_mutex; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; #endif @@ -599,11 +598,12 @@ struct request_queue { struct bio_set bio_split; struct dentry *debugfs_dir; - -#ifdef CONFIG_BLK_DEBUG_FS struct dentry *sched_debugfs_dir; struct dentry *rqos_debugfs_dir; -#endif + /* + * Serializes all debugfs metadata operations using the above dentries. + */ + struct mutex debugfs_mutex; bool mq_sysfs_init_done; diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index bc98d5e4e033..246ed13b49ca 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -773,12 +773,9 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) **/ void blk_trace_shutdown(struct request_queue *q) { - mutex_lock(&q->debugfs_mutex); if (rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->debugfs_mutex))) __blk_trace_remove(q); - - mutex_unlock(&q->debugfs_mutex); } #ifdef CONFIG_BLK_CGROUP -- Gitee From 47a21ce48460335d68827ec7538ba960dcb7a54a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 11:45:36 +0800 Subject: [PATCH 5/9] block: remove per-disk debugfs files in blk_unregister_queue mainline inclusion from mainline-v5.19-rc4 commit 99d055b4fd4bbb309c6cdb51a0d420669f777944 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=99d055b4fd4bbb309c6cdb51a0d420669f777944 ---------------------------------------- The block debugfs files are created in blk_register_queue, which is called by add_disk and use a naming scheme based on the disk_name. After del_gendisk returns that name can be reused and thus we must not leave these debugfs files around, otherwise the kernel is unhappy and spews messages like: Directory XXXXX with parent 'block' already present! and the newly created devices will not have working debugfs files. Move the unregistration to blk_unregister_queue instead (which matches the sysfs unregistration) to make sure the debugfs life time rules match those of the disk name. As part of the move also make sure the whole debugfs unregistration is inside a single debugfs_mutex critical section. Note that this breaks blktests block/002, which checks that the debugfs directory has not been removed while blktests is running, but that particular check should simply be removed from the test case. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220614074827.458955-4-hch@lst.de Signed-off-by: Jens Axboe Signed-off-by: Yu Kuai --- block/blk-mq-debugfs.c | 8 -------- block/blk-mq-debugfs.h | 5 ----- block/blk-rq-qos.c | 4 ---- block/blk-sysfs.c | 16 ++++++++-------- 4 files changed, 8 insertions(+), 25 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index c8563eff3300..9d15022dea10 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -1011,14 +1011,6 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs); } -void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) -{ - lockdep_assert_held(&q->debugfs_mutex); - - debugfs_remove_recursive(q->rqos_debugfs_dir); - q->rqos_debugfs_dir = NULL; -} - void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index 891c3af6f611..f6898560b1f3 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -33,7 +33,6 @@ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_register_rqos(struct rq_qos *rqos); void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos); -void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q); #else static inline void blk_mq_debugfs_register(struct request_queue *q) { @@ -80,10 +79,6 @@ static inline void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) static inline void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) { } - -static inline void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) -{ -} #endif #ifdef CONFIG_BLK_DEBUG_FS_ZONED diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 249a6f05dd3b..d3a75693adbf 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -294,10 +294,6 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, void rq_qos_exit(struct request_queue *q) { - mutex_lock(&q->debugfs_mutex); - blk_mq_debugfs_unregister_queue_rqos(q); - mutex_unlock(&q->debugfs_mutex); - while (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; q->rq_qos = rqos->next; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 2d5396483c86..01b3d82b62ae 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -897,13 +897,6 @@ static void blk_release_queue(struct kobject *kobj) if (queue_is_mq(q)) blk_mq_release(q); - mutex_lock(&q->debugfs_mutex); - blk_trace_shutdown(q); - debugfs_remove_recursive(q->debugfs_dir); - q->debugfs_dir = NULL; - q->sched_debugfs_dir = NULL; - mutex_unlock(&q->debugfs_mutex); - bioset_exit(&q->bio_split); ida_simple_remove(&blk_queue_ida, q->id); @@ -1066,8 +1059,15 @@ void blk_unregister_queue(struct gendisk *disk) /* Now that we've deleted all child objects, we can delete the queue. */ kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_del(&q->kobj); - mutex_unlock(&q->sysfs_dir_lock); + mutex_lock(&q->debugfs_mutex); + blk_trace_shutdown(q); + debugfs_remove_recursive(q->debugfs_dir); + q->debugfs_dir = NULL; + q->sched_debugfs_dir = NULL; + q->rqos_debugfs_dir = NULL; + mutex_unlock(&q->debugfs_mutex); + kobject_put(&disk_to_dev(disk)->kobj); } -- Gitee From fbb132a7ae7879d62d75bf9f0861a375b07bd365 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 17 Jun 2024 11:45:37 +0800 Subject: [PATCH 6/9] block: protect blk_mq_debugfs_register/unregister_hctx() with 'debugfs_mutex' hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA ---------------------------------------- All operations to create and remove files under 'q->debugfs_dir' should be protected by 'q->debugfs_mutex'. Signed-off-by: Yu Kuai --- block/blk-mq-debugfs.c | 8 ++++++++ block/blk-mq.c | 2 ++ 2 files changed, 10 insertions(+) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 9d15022dea10..ab4c066d43c2 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -913,6 +913,8 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q, char name[20]; int i; + lockdep_assert_held(&q->debugfs_mutex); + if (!q->debugfs_dir) return; @@ -927,6 +929,8 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q, void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { + lockdep_assert_held(&hctx->queue->debugfs_mutex); + if (!hctx->queue->debugfs_dir) return; debugfs_remove_recursive(hctx->debugfs_dir); @@ -939,8 +943,10 @@ void blk_mq_debugfs_register_hctxs(struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i; + mutex_lock(&q->debugfs_mutex); queue_for_each_hw_ctx(q, hctx, i) blk_mq_debugfs_register_hctx(q, hctx); + mutex_unlock(&q->debugfs_mutex); } void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) @@ -948,8 +954,10 @@ void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i; + mutex_lock(&q->debugfs_mutex); queue_for_each_hw_ctx(q, hctx, i) blk_mq_debugfs_unregister_hctx(hctx); + mutex_unlock(&q->debugfs_mutex); } void blk_mq_debugfs_register_sched(struct request_queue *q) diff --git a/block/blk-mq.c b/block/blk-mq.c index a28957dfb757..5670dfeac85a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2873,7 +2873,9 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, queue_for_each_hw_ctx(q, hctx, i) { if (i == nr_queue) break; + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_hctx(hctx); + mutex_unlock(&q->debugfs_mutex); blk_mq_exit_hctx(q, set, hctx, i); } } -- Gitee From e5ea13c9c99bc17ce804c2207bb2129ee3dcde09 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 17 Jun 2024 11:45:38 +0800 Subject: [PATCH 7/9] block: shutdown blktrace in blk_release_queue() hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA ---------------------------------------- Commit 99d055b4fd4b ("block: remove per-disk debugfs files in blk_unregister_queue") move blk_trace_shutdown() from blk_release_queue() to blk_unregister_queue(). However, blktrace can still be enabled through ioctl after blk_unregister_queue(), and blktrace will be leaked in this case. Fix the problem by calling blk_trace_shutdown() in blk_release_queue(). Signed-off-by: Yu Kuai --- block/blk-sysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 01b3d82b62ae..e908363740b6 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -897,6 +897,10 @@ static void blk_release_queue(struct kobject *kobj) if (queue_is_mq(q)) blk_mq_release(q); + mutex_lock(&q->debugfs_mutex); + blk_trace_shutdown(q); + mutex_unlock(&q->debugfs_mutex); + bioset_exit(&q->bio_split); ida_simple_remove(&blk_queue_ida, q->id); -- Gitee From fefc7d41919b4707cd0d3580f44ca0439b2d71fd Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 17 Jun 2024 11:45:39 +0800 Subject: [PATCH 8/9] block: support enable/disable blk-mq debugfs dynamically hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA ---------------------------------------- After a disk is created, debugfs inode and dentry will be created together, and the memory used for debugfs can't be freed until disk removal. The number of debugfs inode and dentry is based on how many cpus and hctxs. For example, testing on a 128-core environemt, with default module parameters, each loop device will cost 1679KB memory, and debugfs will cost 336KB(20%). The memory cost for debugfs for a disk seems little, but if a big machine contains thousands of disks, the cost will be xxGB. This memory overhead can be avoided by disabling CONFIG_BLK_DEBUG_FS. This patch add a disk level switch that can enable/disable debugfs dynamically, so that user can disable debugfs if they care about the memory overhead, in the meantime, debugfs can be enabled again in demand. Signed-off-by: Yu Kuai --- arch/arm64/configs/openeuler_defconfig | 1 + arch/powerpc/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + block/Kconfig | 15 +++++ block/blk-mq-debugfs.c | 74 +++++++++++++++++++++--- block/blk-mq-debugfs.h | 5 ++ block/blk-mq.c | 9 +++ block/blk-sysfs.c | 54 +++++++++++++++++ include/linux/blkdev.h | 3 + 9 files changed, 156 insertions(+), 7 deletions(-) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index e9d7d9e85a1c..c956adb2fb57 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -963,6 +963,7 @@ CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_WBT_MQ=y CONFIG_BLK_DEBUG_FS=y CONFIG_BLK_DEBUG_FS_ZONED=y +CONFIG_BLK_DEBUG_FS_SWITCH=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set # CONFIG_BLK_IO_HUNG_TASK_CHEC is not set diff --git a/arch/powerpc/configs/openeuler_defconfig b/arch/powerpc/configs/openeuler_defconfig index fc0ca355dcaa..c285107c6d97 100644 --- a/arch/powerpc/configs/openeuler_defconfig +++ b/arch/powerpc/configs/openeuler_defconfig @@ -684,6 +684,7 @@ CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_WBT_MQ=y CONFIG_BLK_DEBUG_FS=y CONFIG_BLK_DEBUG_FS_ZONED=y +CONFIG_BLK_DEBUG_FS_SWITCH=y CONFIG_BLK_SED_OPAL=y CONFIG_BLK_INLINE_ENCRYPTION=y # CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK is not set diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 39b9252f156a..e657e4cfdbf9 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -941,6 +941,7 @@ CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_WBT_MQ=y CONFIG_BLK_DEBUG_FS=y CONFIG_BLK_DEBUG_FS_ZONED=y +CONFIG_BLK_DEBUG_FS_SWITCH=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set # CONFIG_BLK_IO_HUNG_TASK_CHECK is not set diff --git a/block/Kconfig b/block/Kconfig index a1026e1b8f4d..d01e418a55d9 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -189,6 +189,21 @@ config BLK_DEBUG_FS_ZONED bool default BLK_DEBUG_FS && BLK_DEV_ZONED +config BLK_DEBUG_FS_SWITCH + bool "Disk level switch to enable/disable debugfs dynamically" + depends on BLK_DEBUG_FS + depends on 64BIT + default y + help + After a disk is created, debugfs inode and dentry will be created + together, and the memory used for debugfs can't be freed until disk + removal. + + Enabling this will add a disk level switch that can enable/disable + debugfs dynamically, so that user can disable debugfs if they care + about the memory overhead, in the meantime, debugfs can be enabled + again in demand. + config BLK_SED_OPAL bool "Logic for interfacing with Opal enabled SEDs" help diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index ab4c066d43c2..39f5604d8359 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -132,6 +132,9 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(HCTX_ACTIVE), QUEUE_FLAG_NAME(NOWAIT), QUEUE_FLAG_NAME(DISPATCH_ASYNC), +#ifdef CONFIG_BLK_DEBUG_FS_SWITCH + QUEUE_FLAG_NAME(DEBUGFS), +#endif }; #undef QUEUE_FLAG_NAME @@ -861,11 +864,27 @@ static void debugfs_create_files(struct dentry *parent, void *data, (void *)attr, &blk_mq_debugfs_fops); } +static bool blk_mq_debugfs_enabled(struct request_queue *q) +{ + if (IS_ERR_OR_NULL(q->debugfs_dir)) + return false; + +#ifdef CONFIG_BLK_DEBUG_FS_SWITCH + if (!test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags)) + return false; +#endif + + return true; +} + void blk_mq_debugfs_register(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; int i; + if (!blk_mq_debugfs_enabled(q)) + return; + debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs); /* @@ -894,6 +913,46 @@ void blk_mq_debugfs_register(struct request_queue *q) } } +static void debugfs_remove_files(struct dentry *parent, + const struct blk_mq_debugfs_attr *attr) +{ + if (IS_ERR_OR_NULL(parent)) + return; + + for (; attr->name; attr++) + debugfs_lookup_and_remove(attr->name, parent); +} + +void blk_mq_debugfs_unregister(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + unsigned long i; + + spin_lock(&q->queue_lock); + if (q->rq_qos) { + struct rq_qos *rqos = q->rq_qos; + + while (rqos) { + rqos->debugfs_dir = NULL; + rqos = rqos->next; + } + } + spin_unlock(&q->queue_lock); + + debugfs_remove_recursive(q->rqos_debugfs_dir); + q->rqos_debugfs_dir = NULL; + + queue_for_each_hw_ctx(q, hctx, i) { + if (hctx->debugfs_dir) + blk_mq_debugfs_unregister_hctx(hctx); + } + + if (q->sched_debugfs_dir) + blk_mq_debugfs_unregister_sched(q); + + debugfs_remove_files(q->debugfs_dir, blk_mq_debugfs_queue_attrs); +} + static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx) { @@ -915,7 +974,7 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q, lockdep_assert_held(&q->debugfs_mutex); - if (!q->debugfs_dir) + if (!blk_mq_debugfs_enabled(q)) return; snprintf(name, sizeof(name), "hctx%u", hctx->queue_num); @@ -931,7 +990,7 @@ void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { lockdep_assert_held(&hctx->queue->debugfs_mutex); - if (!hctx->queue->debugfs_dir) + if (!blk_mq_debugfs_enabled(hctx->queue)) return; debugfs_remove_recursive(hctx->debugfs_dir); hctx->sched_debugfs_dir = NULL; @@ -970,7 +1029,7 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) * If the parent directory has not been created yet, return, we will be * called again later on and the directory/files will be created then. */ - if (!q->debugfs_dir) + if (!blk_mq_debugfs_enabled(q)) return; if (!e->queue_debugfs_attrs) @@ -993,7 +1052,7 @@ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) { lockdep_assert_held(&rqos->q->debugfs_mutex); - if (!rqos->q->debugfs_dir) + if (!blk_mq_debugfs_enabled(rqos->q)) return; debugfs_remove_recursive(rqos->debugfs_dir); rqos->debugfs_dir = NULL; @@ -1006,7 +1065,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) lockdep_assert_held(&q->debugfs_mutex); - if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs) + if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs || + !blk_mq_debugfs_enabled(q)) return; if (!q->rqos_debugfs_dir) @@ -1026,7 +1086,7 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, lockdep_assert_held(&q->debugfs_mutex); - if (!e->hctx_debugfs_attrs) + if (!e->hctx_debugfs_attrs || !blk_mq_debugfs_enabled(q)) return; hctx->sched_debugfs_dir = debugfs_create_dir("sched", @@ -1039,7 +1099,7 @@ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) { lockdep_assert_held(&hctx->queue->debugfs_mutex); - if (!hctx->queue->debugfs_dir) + if (!blk_mq_debugfs_enabled(hctx->queue)) return; debugfs_remove_recursive(hctx->sched_debugfs_dir); hctx->sched_debugfs_dir = NULL; diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index f6898560b1f3..3a2c43a9a0ae 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -19,6 +19,7 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq); int blk_mq_debugfs_rq_show(struct seq_file *m, void *v); void blk_mq_debugfs_register(struct request_queue *q); +void blk_mq_debugfs_unregister(struct request_queue *q); void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx); @@ -38,6 +39,10 @@ static inline void blk_mq_debugfs_register(struct request_queue *q) { } +static inline void blk_mq_debugfs_unregister(struct request_queue *q) +{ +} + static inline void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { diff --git a/block/blk-mq.c b/block/blk-mq.c index 5670dfeac85a..407098e8f210 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -47,6 +47,11 @@ bool mq_unfair_dtag = true; module_param_named(unfair_dtag, mq_unfair_dtag, bool, 0444); +#ifdef CONFIG_BLK_DEBUG_FS_SWITCH +bool enable_debugfs = true; +module_param_named(enable_debugfs, enable_debugfs, bool, 0444); +#endif + static DEFINE_PER_CPU(struct list_head, blk_cpu_done); static void blk_mq_poll_stats_start(struct request_queue *q); @@ -3540,6 +3545,10 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, q->tag_set = set; q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; +#ifdef CONFIG_BLK_DEBUG_FS_SWITCH + if (enable_debugfs) + blk_queue_flag_set(QUEUE_FLAG_DEBUGFS, q); +#endif if (set->nr_maps > HCTX_TYPE_POLL && set->map[HCTX_TYPE_POLL].nr_queues) blk_queue_flag_set(QUEUE_FLAG_POLL, q); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e908363740b6..6c72dec47e82 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -702,6 +702,57 @@ static struct queue_sysfs_entry queue_dispatch_async_cpus_entry = { QUEUE_RW_ENTRY(queue_dispatch_async, "dispatch_async"); #endif +#ifdef CONFIG_BLK_DEBUG_FS_SWITCH +static ssize_t queue_debugfs_show(struct request_queue *q, char *page) +{ + return queue_var_show(test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags), + page); +} + +static ssize_t queue_debugfs_store(struct request_queue *q, const char *page, + size_t count) +{ + unsigned long val; + ssize_t ret; + bool enabled; + int err; + + if (!queue_is_mq(q)) + return count; + + if (!blk_queue_registered(q)) + return -ENODEV; + + ret = queue_var_store(&val, page, count); + if (ret < 0) + return ret; + + err = blk_queue_enter(q, 0); + if (err) + return err; + + mutex_lock(&q->debugfs_mutex); + enabled = test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags); + if (!!val == enabled) + goto unlock; + + if (val) { + blk_queue_flag_set(QUEUE_FLAG_DEBUGFS, q); + blk_mq_debugfs_register(q); + } else { + blk_mq_debugfs_unregister(q); + blk_queue_flag_clear(QUEUE_FLAG_DEBUGFS, q); + } + +unlock: + mutex_unlock(&q->debugfs_mutex); + blk_queue_exit(q); + return ret; +} + +QUEUE_RW_ENTRY(queue_debugfs, "debugfs"); +#endif + static struct attribute *queue_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -752,6 +803,9 @@ static struct attribute *queue_attrs[] = { #endif #ifdef CONFIG_BLK_DEV_THROTTLING_LOW &blk_throtl_sample_time_entry.attr, +#endif +#ifdef CONFIG_BLK_DEBUG_FS_SWITCH + &queue_debugfs_entry.attr, #endif NULL, }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 06578d0a562f..4b3f35eafc60 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -663,6 +663,9 @@ struct request_queue { #define QUEUE_FLAG_HCTX_WAIT 30 /* support to dispatch bio asynchronously */ #define QUEUE_FLAG_DISPATCH_ASYNC 31 +#ifdef CONFIG_BLK_DEBUG_FS_SWITCH +#define QUEUE_FLAG_DEBUGFS 32 /* supports debugfs */ +#endif #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ -- Gitee From d760c7506c1a0179b9bcdb6fb16669b289e6cde3 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 17 Jun 2024 11:45:40 +0800 Subject: [PATCH 9/9] block: fix kabi broken in struct request_queue hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA ---------------------------------------- Move debugfs_mutex back to it's old position to prevent kabi broken. Signed-off-by: Yu Kuai --- block/blk-sysfs.c | 4 ++-- include/linux/blkdev.h | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 6c72dec47e82..4714e5363202 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -1121,10 +1121,10 @@ void blk_unregister_queue(struct gendisk *disk) mutex_lock(&q->debugfs_mutex); blk_trace_shutdown(q); + if (queue_is_mq(q)) + blk_mq_debugfs_unregister(q); debugfs_remove_recursive(q->debugfs_dir); q->debugfs_dir = NULL; - q->sched_debugfs_dir = NULL; - q->rqos_debugfs_dir = NULL; mutex_unlock(&q->debugfs_mutex); kobject_put(&disk_to_dev(disk)->kobj); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4b3f35eafc60..8b32899dbf0d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -551,6 +551,10 @@ struct request_queue { unsigned int sg_timeout; unsigned int sg_reserved_size; int node; + /* + * Serializes all debugfs metadata operations using the above dentries. + */ + struct mutex debugfs_mutex; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; #endif @@ -598,12 +602,10 @@ struct request_queue { struct bio_set bio_split; struct dentry *debugfs_dir; +#ifdef CONFIG_BLK_DEBUG_FS struct dentry *sched_debugfs_dir; struct dentry *rqos_debugfs_dir; - /* - * Serializes all debugfs metadata operations using the above dentries. - */ - struct mutex debugfs_mutex; +#endif bool mq_sysfs_init_done; -- Gitee