From 5b624e35bc4d5657520cf07f73cb808710dda6fe Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 22 May 2024 03:00:18 +0800 Subject: [PATCH 01/11] block: add a switch to enable hungtask check for io hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA -------------------------------- Because slow io or io timeout handler can take a long time, hungtask check is forbidden in order to prevent false positive warnings. However, this also cause kenel to be silence if io really hang. It's quite complicated to distinguish if io is slow or hanged, this patch add a switch to enable hungtask check, the switch is enabled by default, and can be turn off by: 1) disable config BLK_IO_HUNG_TASK_CHECK 2) add blk_core.io_hung_task_check=0 to boot cmd 3) echo 0 > /sys/module/blk_core/parameters/io_hung_task_check Noted that user has to be careful to use this with hungtask panic enabeld, since there could be false positive hungtask warnings. Signed-off-by: Yu Kuai --- arch/arm64/configs/openeuler_defconfig | 1 + arch/powerpc/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + block/Kconfig | 9 +++++++++ block/bio.c | 2 +- block/blk-core.c | 13 ++++++++++++- block/blk-exec.c | 2 +- block/blk.h | 1 + 8 files changed, 27 insertions(+), 3 deletions(-) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 34061d75a0d21..576b61de82a10 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -951,6 +951,7 @@ CONFIG_BLK_DEBUG_FS=y CONFIG_BLK_DEBUG_FS_ZONED=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set +CONFIG_BLK_IO_HUNG_TASK_CHECK=y CONFIG_BLK_DEV_DUMPINFO=y # diff --git a/arch/powerpc/configs/openeuler_defconfig b/arch/powerpc/configs/openeuler_defconfig index 74118e43ff05f..a144065cb5a5e 100644 --- a/arch/powerpc/configs/openeuler_defconfig +++ b/arch/powerpc/configs/openeuler_defconfig @@ -687,6 +687,7 @@ CONFIG_BLK_DEBUG_FS_ZONED=y CONFIG_BLK_SED_OPAL=y CONFIG_BLK_INLINE_ENCRYPTION=y # CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK is not set +CONFIG_BLK_IO_HUNG_TASK_CHECK=y # # Partition Types diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 1835f38f2947c..1a5e6e3a4e78f 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -932,6 +932,7 @@ CONFIG_BLK_DEBUG_FS=y CONFIG_BLK_DEBUG_FS_ZONED=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set +CONFIG_BLK_IO_HUNG_TASK_CHECK=y CONFIG_BLK_DEV_DUMPINFO=y # diff --git a/block/Kconfig b/block/Kconfig index e5c965f1ea258..e213002d5e73e 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -209,6 +209,15 @@ config BLK_DEV_DUMPINFO Dump info when open an write opened block device exclusively or open an exclusive opened device for write +config BLK_IO_HUNG_TASK_CHECK + bool "Enable io hung task check" + depends on DETECT_HUNG_TASK + default y + help + Enabling this lets the block layer detect hungtask for io, noted + if this is set, hungtask will complain about slow io even if such + io is not hanged. Be careful to enable hungtask panic in this case. + menu "Partition Types" source "block/partitions/Kconfig" diff --git a/block/bio.c b/block/bio.c index 8c64c93e96c8a..123b44ba17cbf 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1169,7 +1169,7 @@ int submit_bio_wait(struct bio *bio) submit_bio(bio); /* Prevent hang_check timer from firing at us during very long I/O */ - hang_check = sysctl_hung_task_timeout_secs; + hang_check = sysctl_hung_task_timeout_secs && !io_hung_task_check; if (hang_check) while (!wait_for_completion_io_timeout(&done, hang_check * (HZ/2))) diff --git a/block/blk-core.c b/block/blk-core.c index f91f8e8be482d..e291ed8ee01e2 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -77,6 +77,17 @@ static int __init precise_iostat_setup(char *str) } __setup("precise_iostat=", precise_iostat_setup); +/* + * Noted if this is set, hungtask will complain about slow io even if such io is + * not hanged. Be careful to enable hungtask panic in this case. + */ +#ifdef CONFIG_BLK_IO_HUNG_TASK_CHECK +bool io_hung_task_check = true; +#else +bool io_hung_task_check; +#endif +module_param_named(io_hung_task_check, io_hung_task_check, bool, 0644); + /* * For queue allocation */ @@ -1879,7 +1890,7 @@ void blk_io_schedule(void) /* Prevent hang_check timer from firing at us during very long I/O */ unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2; - if (timeout) + if (timeout && !io_hung_task_check) io_schedule_timeout(timeout); else io_schedule(); diff --git a/block/blk-exec.c b/block/blk-exec.c index b2676de4c6a57..497aa52cd51ec 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -87,7 +87,7 @@ blk_status_t blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); /* Prevent hang_check timer from firing at us during very long I/O */ - hang_check = sysctl_hung_task_timeout_secs; + hang_check = sysctl_hung_task_timeout_secs && !io_hung_task_check; if (hang_check) while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2))); else diff --git a/block/blk.h b/block/blk.h index 4bbcc971d4f73..2e487673b3a75 100644 --- a/block/blk.h +++ b/block/blk.h @@ -15,6 +15,7 @@ #define BLK_MAX_TIMEOUT (5 * HZ) extern struct dentry *blk_debugfs_root; +extern bool io_hung_task_check; struct blk_flush_queue { unsigned int flush_pending_idx:1; -- Gitee From 96297cb3a2834802e7ccf24fb3d1bc8ab10acf8a Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 22 May 2024 03:00:19 +0800 Subject: [PATCH 02/11] block: disable BLK_IO_HUNG_TASK_CHECK by default hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA -------------------------------- Enable this will cause new hungtask warnings, hence disable it by default. Signed-off-by: Yu Kuai --- arch/arm64/configs/openeuler_defconfig | 2 +- arch/powerpc/configs/openeuler_defconfig | 2 +- arch/x86/configs/openeuler_defconfig | 2 +- block/Kconfig | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 576b61de82a10..e360b836280e9 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -951,7 +951,7 @@ CONFIG_BLK_DEBUG_FS=y CONFIG_BLK_DEBUG_FS_ZONED=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set -CONFIG_BLK_IO_HUNG_TASK_CHECK=y +# CONFIG_BLK_IO_HUNG_TASK_CHECK is not set CONFIG_BLK_DEV_DUMPINFO=y # diff --git a/arch/powerpc/configs/openeuler_defconfig b/arch/powerpc/configs/openeuler_defconfig index a144065cb5a5e..68f642dd09f2d 100644 --- a/arch/powerpc/configs/openeuler_defconfig +++ b/arch/powerpc/configs/openeuler_defconfig @@ -687,7 +687,7 @@ CONFIG_BLK_DEBUG_FS_ZONED=y CONFIG_BLK_SED_OPAL=y CONFIG_BLK_INLINE_ENCRYPTION=y # CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK is not set -CONFIG_BLK_IO_HUNG_TASK_CHECK=y +# CONFIG_BLK_IO_HUNG_TASK_CHECK is not set # # Partition Types diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 1a5e6e3a4e78f..da47a010813d6 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -932,7 +932,7 @@ CONFIG_BLK_DEBUG_FS=y CONFIG_BLK_DEBUG_FS_ZONED=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set -CONFIG_BLK_IO_HUNG_TASK_CHECK=y +# CONFIG_BLK_IO_HUNG_TASK_CHECK is not set CONFIG_BLK_DEV_DUMPINFO=y # diff --git a/block/Kconfig b/block/Kconfig index e213002d5e73e..1e733b2b14d81 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -212,7 +212,7 @@ config BLK_DEV_DUMPINFO config BLK_IO_HUNG_TASK_CHECK bool "Enable io hung task check" depends on DETECT_HUNG_TASK - default y + default n help Enabling this lets the block layer detect hungtask for io, noted if this is set, hungtask will complain about slow io even if such -- Gitee From ced270bb7b0eeefeeb053d1dfdeb966b5fbbfaa1 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 22 May 2024 03:00:20 +0800 Subject: [PATCH 03/11] blk-throttle: add a config to control hierarchical throttle in cgroup v1 hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE:NA ------------------------------- The feature that enable default hierarchy for io throttle in cgroup v1 can only be enabled with CONFIG_BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT. Signed-off-by: Yu Kuai --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + block/Kconfig | 11 +++++++++++ block/blk-throttle.c | 17 ++++++++++++++--- 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index e360b836280e9..10c941186dbe5 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -942,6 +942,7 @@ CONFIG_BLK_DEV_INTEGRITY_T10=m CONFIG_BLK_DEV_ZONED=y CONFIG_BLK_DEV_THROTTLING=y # CONFIG_BLK_DEV_THROTTLING_LOW is not set +CONFIG_BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT=y # CONFIG_BLK_CMDLINE_PARSER is not set CONFIG_BLK_WBT=y # CONFIG_BLK_CGROUP_IOLATENCY is not set diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index da47a010813d6..0b09969e84689 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -923,6 +923,7 @@ CONFIG_BLK_DEV_INTEGRITY_T10=m CONFIG_BLK_DEV_ZONED=y CONFIG_BLK_DEV_THROTTLING=y # CONFIG_BLK_DEV_THROTTLING_LOW is not set +CONFIG_BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT=y # CONFIG_BLK_CMDLINE_PARSER is not set CONFIG_BLK_WBT=y # CONFIG_BLK_CGROUP_IOLATENCY is not set diff --git a/block/Kconfig b/block/Kconfig index 1e733b2b14d81..498d18f1d10ed 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -114,6 +114,17 @@ config BLK_DEV_THROTTLING_LOW Note, this is an experimental interface and could be changed someday. +config BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT + bool "Block layer global limit in cgroup v1" + depends on BLK_DEV_THROTTLING=y + default n + help + blkio subsytem is not under default hierarchy in cgroup v1 by default, + Enabling this will support globlal limit in cgroup v1. + + Note, a cmdline "blkcg_global_limit=1" is still required to enabled this + feature. + config BLK_CMDLINE_PARSER bool "Block device command line partition parser" help diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 050ddf0ad0027..e5da0664b16c7 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -43,9 +43,15 @@ static struct blkcg_policy blkcg_policy_throtl; /* A workqueue to queue throttle related work */ static struct workqueue_struct *kthrotld_workqueue; +#ifdef BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT /* True if global limit is enabled in cgroup v1 */ static bool global_limit; +static inline bool blkcg_global_limit_enabled(void) +{ + return global_limit; +} + static int __init setup_global_limit(char *str) { if (!strcmp(str, "1") || !strcmp(str, "Y") || !strcmp(str, "y")) @@ -55,7 +61,12 @@ static int __init setup_global_limit(char *str) } __setup("blkcg_global_limit=", setup_global_limit); - +#else +static inline bool blkcg_global_limit_enabled(void) +{ + return false; +} +#endif /* * To implement hierarchical throttling, throtl_grps form a tree and bios * are dispatched upwards level by level until they reach the top and get @@ -571,8 +582,8 @@ static void throtl_pd_init(struct blkg_policy_data *pd) * regardless of the position of the group in the hierarchy. */ sq->parent_sq = &td->service_queue; - if ((cgroup_subsys_on_dfl(io_cgrp_subsys) || global_limit) && - blkg->parent) + if ((cgroup_subsys_on_dfl(io_cgrp_subsys) || + blkcg_global_limit_enabled()) && blkg->parent) sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue; tg->td = td; } -- Gitee From 9e89811376f238344a0c8d58e1d8a5b688ace9a3 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 22 May 2024 03:00:21 +0800 Subject: [PATCH 04/11] blk-throttle: fix missing prefix "CONFIG_" hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE:NA ------------------------------- Global limit of blk throttle will be invalid because commit a633be8a278f ("[Huawei] blk-throttle: add a config to control hierarchical throttle in cgroup v1") missed prefix "CONFIG_" in ifdef marco. Fixes: a633be8a278f ("[Huawei] blk-throttle: add a config to control hierarchical throttle in cgroup v1") Signed-off-by: Yu Kuai --- block/blk-throttle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e5da0664b16c7..9fa00b8b1ac26 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -43,7 +43,7 @@ static struct blkcg_policy blkcg_policy_throtl; /* A workqueue to queue throttle related work */ static struct workqueue_struct *kthrotld_workqueue; -#ifdef BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT +#ifdef CONFIG_BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT /* True if global limit is enabled in cgroup v1 */ static bool global_limit; -- Gitee From c82781d8426de8ea8618e14e29cc70a093cf3a82 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 May 2024 03:00:22 +0800 Subject: [PATCH 05/11] block: serialize all debugfs operations using q->debugfs_mutex mainline inclusion from mainline-v5.19-rc4 commit 5cf9c91ba927119fc6606b938b1895bb2459d3bc category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5cf9c91ba927119fc6606b938b1895bb2459d3bc ---------------------------------------- Various places like I/O schedulers or the QOS infrastructure try to register debugfs files on demans, which can race with creating and removing the main queue debugfs directory. Use the existing debugfs_mutex to serialize all debugfs operations that rely on q->debugfs_dir or the directories hanging off it. To make the teardown code a little simpler declare all debugfs dentry pointers and not just the main one uncoditionally in blkdev.h. Move debugfs_mutex next to the dentries that it protects and document what it is used for. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220614074827.458955-3-hch@lst.de Signed-off-by: Jens Axboe Conflicts: block/blk-sysfs.c include/linux/blkdev.h kernel/trace/blktrace.c block/blk-mq-debugfs.c block/blk-mq-sched.c Signed-off-by: Yu Kuai --- block/blk-mq-debugfs.c | 25 ++++++++++++++++++++----- block/blk-mq-debugfs.h | 5 ----- block/blk-mq-sched.c | 11 +++++++++++ block/blk-rq-qos.c | 2 ++ block/blk-rq-qos.h | 7 ++++++- block/blk-sysfs.c | 20 ++++++++++---------- include/linux/blkdev.h | 8 ++++---- kernel/trace/blktrace.c | 3 --- 8 files changed, 53 insertions(+), 28 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index a879f94782e4c..d8623d9a7b18e 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -892,11 +892,6 @@ void blk_mq_debugfs_register(struct request_queue *q) } } -void blk_mq_debugfs_unregister(struct request_queue *q) -{ - q->sched_debugfs_dir = NULL; -} - static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx) { @@ -930,6 +925,8 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q, void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { + if (!hctx->queue->debugfs_dir) + return; debugfs_remove_recursive(hctx->debugfs_dir); hctx->sched_debugfs_dir = NULL; hctx->debugfs_dir = NULL; @@ -957,6 +954,8 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) { struct elevator_type *e = q->elevator->type; + lockdep_assert_held(&q->debugfs_mutex); + /* * If the parent directory has not been created yet, return, we will be * called again later on and the directory/files will be created then. @@ -974,12 +973,18 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) void blk_mq_debugfs_unregister_sched(struct request_queue *q) { + lockdep_assert_held(&q->debugfs_mutex); + debugfs_remove_recursive(q->sched_debugfs_dir); q->sched_debugfs_dir = NULL; } void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) { + lockdep_assert_held(&rqos->q->debugfs_mutex); + + if (!rqos->q->debugfs_dir) + return; debugfs_remove_recursive(rqos->debugfs_dir); rqos->debugfs_dir = NULL; } @@ -989,6 +994,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) struct request_queue *q = rqos->q; const char *dir_name = rq_qos_id_to_name(rqos->id); + lockdep_assert_held(&q->debugfs_mutex); + if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs) return; @@ -1004,6 +1011,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) { + lockdep_assert_held(&q->debugfs_mutex); + debugfs_remove_recursive(q->rqos_debugfs_dir); q->rqos_debugfs_dir = NULL; } @@ -1013,6 +1022,8 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, { struct elevator_type *e = q->elevator->type; + lockdep_assert_held(&q->debugfs_mutex); + if (!e->hctx_debugfs_attrs) return; @@ -1024,6 +1035,10 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) { + lockdep_assert_held(&hctx->queue->debugfs_mutex); + + if (!hctx->queue->debugfs_dir) + return; debugfs_remove_recursive(hctx->sched_debugfs_dir); hctx->sched_debugfs_dir = NULL; } diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index a68aa6041a10d..891c3af6f611e 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -19,7 +19,6 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq); int blk_mq_debugfs_rq_show(struct seq_file *m, void *v); void blk_mq_debugfs_register(struct request_queue *q); -void blk_mq_debugfs_unregister(struct request_queue *q); void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx); @@ -40,10 +39,6 @@ static inline void blk_mq_debugfs_register(struct request_queue *q) { } -static inline void blk_mq_debugfs_unregister(struct request_queue *q) -{ -} - static inline void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index c92d25b71a728..8620a5d75c62a 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -610,7 +610,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) if (ret) goto err_free_map_and_rqs; + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_sched(q); + mutex_unlock(&q->debugfs_mutex); queue_for_each_hw_ctx(q, hctx, i) { if (e->ops.init_hctx) { @@ -623,7 +625,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) return ret; } } + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_sched_hctx(q, hctx); + mutex_unlock(&q->debugfs_mutex); } return 0; @@ -664,14 +668,21 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) unsigned int flags = 0; queue_for_each_hw_ctx(q, hctx, i) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_sched_hctx(hctx); + mutex_unlock(&q->debugfs_mutex); + if (e->type->ops.exit_hctx && hctx->sched_data) { e->type->ops.exit_hctx(hctx, i); hctx->sched_data = NULL; } flags = hctx->flags; } + + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_sched(q); + mutex_unlock(&q->debugfs_mutex); + if (e->type->ops.exit_sched) e->type->ops.exit_sched(e); blk_mq_sched_tags_teardown(q, flags); diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index e83af7bc75919..249a6f05dd3bd 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -294,7 +294,9 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, void rq_qos_exit(struct request_queue *q) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_queue_rqos(q); + mutex_unlock(&q->debugfs_mutex); while (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 37c59d7d6ba7f..af1c2ca157d71 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -118,8 +118,11 @@ static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos) blk_mq_unfreeze_queue(q); - if (rqos->ops->debugfs_attrs) + if (rqos->ops->debugfs_attrs) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); + } return 0; ebusy: @@ -150,7 +153,9 @@ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) blk_mq_unfreeze_queue(q); + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); } typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index c95be9626a098..9e74668f98cc6 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -804,14 +804,13 @@ static void blk_release_queue(struct kobject *kobj) if (queue_is_mq(q)) blk_mq_release(q); - blk_trace_shutdown(q); mutex_lock(&q->debugfs_mutex); + blk_trace_shutdown(q); debugfs_remove_recursive(q->debugfs_dir); + q->debugfs_dir = NULL; + q->sched_debugfs_dir = NULL; mutex_unlock(&q->debugfs_mutex); - if (queue_is_mq(q)) - blk_mq_debugfs_unregister(q); - bioset_exit(&q->bio_split); ida_simple_remove(&blk_queue_ida, q->id); @@ -881,17 +880,18 @@ int blk_register_queue(struct gendisk *disk) goto unlock; } + if (queue_is_mq(q)) + __blk_mq_register_dev(dev, q); + mutex_lock(&q->sysfs_lock); + mutex_lock(&q->debugfs_mutex); q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent), blk_debugfs_root); - mutex_unlock(&q->debugfs_mutex); - - if (queue_is_mq(q)) { - __blk_mq_register_dev(dev, q); + if (queue_is_mq(q)) blk_mq_debugfs_register(q); - } - mutex_lock(&q->sysfs_lock); + mutex_unlock(&q->debugfs_mutex); + if (q->elevator) { ret = elv_register_queue(q, false); if (ret) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 50b4fd0a06873..8208922552b53 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -540,7 +540,6 @@ struct request_queue { unsigned int sg_timeout; unsigned int sg_reserved_size; int node; - struct mutex debugfs_mutex; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; #endif @@ -588,11 +587,12 @@ struct request_queue { struct bio_set bio_split; struct dentry *debugfs_dir; - -#ifdef CONFIG_BLK_DEBUG_FS struct dentry *sched_debugfs_dir; struct dentry *rqos_debugfs_dir; -#endif + /* + * Serializes all debugfs metadata operations using the above dentries. + */ + struct mutex debugfs_mutex; bool mq_sysfs_init_done; diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index bc98d5e4e033f..246ed13b49ca9 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -773,12 +773,9 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) **/ void blk_trace_shutdown(struct request_queue *q) { - mutex_lock(&q->debugfs_mutex); if (rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->debugfs_mutex))) __blk_trace_remove(q); - - mutex_unlock(&q->debugfs_mutex); } #ifdef CONFIG_BLK_CGROUP -- Gitee From 2991c61674ad9ff0c7579066b7b8081c4db00415 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 May 2024 03:00:23 +0800 Subject: [PATCH 06/11] block: remove per-disk debugfs files in blk_unregister_queue mainline inclusion from mainline-v5.19-rc4 commit 99d055b4fd4bbb309c6cdb51a0d420669f777944 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=99d055b4fd4bbb309c6cdb51a0d420669f777944 ---------------------------------------- The block debugfs files are created in blk_register_queue, which is called by add_disk and use a naming scheme based on the disk_name. After del_gendisk returns that name can be reused and thus we must not leave these debugfs files around, otherwise the kernel is unhappy and spews messages like: Directory XXXXX with parent 'block' already present! and the newly created devices will not have working debugfs files. Move the unregistration to blk_unregister_queue instead (which matches the sysfs unregistration) to make sure the debugfs life time rules match those of the disk name. As part of the move also make sure the whole debugfs unregistration is inside a single debugfs_mutex critical section. Note that this breaks blktests block/002, which checks that the debugfs directory has not been removed while blktests is running, but that particular check should simply be removed from the test case. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220614074827.458955-4-hch@lst.de Signed-off-by: Jens Axboe Signed-off-by: Yu Kuai --- block/blk-mq-debugfs.c | 8 -------- block/blk-mq-debugfs.h | 5 ----- block/blk-rq-qos.c | 4 ---- block/blk-sysfs.c | 16 ++++++++-------- 4 files changed, 8 insertions(+), 25 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index d8623d9a7b18e..1e4a5ff73bd8a 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -1009,14 +1009,6 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs); } -void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) -{ - lockdep_assert_held(&q->debugfs_mutex); - - debugfs_remove_recursive(q->rqos_debugfs_dir); - q->rqos_debugfs_dir = NULL; -} - void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index 891c3af6f611e..f6898560b1f39 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -33,7 +33,6 @@ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_register_rqos(struct rq_qos *rqos); void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos); -void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q); #else static inline void blk_mq_debugfs_register(struct request_queue *q) { @@ -80,10 +79,6 @@ static inline void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) static inline void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) { } - -static inline void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) -{ -} #endif #ifdef CONFIG_BLK_DEBUG_FS_ZONED diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 249a6f05dd3bd..d3a75693adbf4 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -294,10 +294,6 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, void rq_qos_exit(struct request_queue *q) { - mutex_lock(&q->debugfs_mutex); - blk_mq_debugfs_unregister_queue_rqos(q); - mutex_unlock(&q->debugfs_mutex); - while (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; q->rq_qos = rqos->next; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 9e74668f98cc6..ef38e165bb302 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -804,13 +804,6 @@ static void blk_release_queue(struct kobject *kobj) if (queue_is_mq(q)) blk_mq_release(q); - mutex_lock(&q->debugfs_mutex); - blk_trace_shutdown(q); - debugfs_remove_recursive(q->debugfs_dir); - q->debugfs_dir = NULL; - q->sched_debugfs_dir = NULL; - mutex_unlock(&q->debugfs_mutex); - bioset_exit(&q->bio_split); ida_simple_remove(&blk_queue_ida, q->id); @@ -973,8 +966,15 @@ void blk_unregister_queue(struct gendisk *disk) /* Now that we've deleted all child objects, we can delete the queue. */ kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_del(&q->kobj); - mutex_unlock(&q->sysfs_dir_lock); + mutex_lock(&q->debugfs_mutex); + blk_trace_shutdown(q); + debugfs_remove_recursive(q->debugfs_dir); + q->debugfs_dir = NULL; + q->sched_debugfs_dir = NULL; + q->rqos_debugfs_dir = NULL; + mutex_unlock(&q->debugfs_mutex); + kobject_put(&disk_to_dev(disk)->kobj); } -- Gitee From 1a862ebb77df4c480eb30955bd3e0c29b8bde821 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 22 May 2024 03:00:24 +0800 Subject: [PATCH 07/11] block: protect blk_mq_debugfs_register/unregister_hctx() with 'debugfs_mutex' hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA ---------------------------------------- All operations to create and remove files under 'q->debugfs_dir' should be protected by 'q->debugfs_mutex'. Signed-off-by: Yu Kuai --- block/blk-mq-debugfs.c | 8 ++++++++ block/blk-mq.c | 2 ++ 2 files changed, 10 insertions(+) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 1e4a5ff73bd8a..e2b8942311c9a 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -911,6 +911,8 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q, char name[20]; int i; + lockdep_assert_held(&q->debugfs_mutex); + if (!q->debugfs_dir) return; @@ -925,6 +927,8 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q, void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { + lockdep_assert_held(&hctx->queue->debugfs_mutex); + if (!hctx->queue->debugfs_dir) return; debugfs_remove_recursive(hctx->debugfs_dir); @@ -937,8 +941,10 @@ void blk_mq_debugfs_register_hctxs(struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i; + mutex_lock(&q->debugfs_mutex); queue_for_each_hw_ctx(q, hctx, i) blk_mq_debugfs_register_hctx(q, hctx); + mutex_unlock(&q->debugfs_mutex); } void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) @@ -946,8 +952,10 @@ void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i; + mutex_lock(&q->debugfs_mutex); queue_for_each_hw_ctx(q, hctx, i) blk_mq_debugfs_unregister_hctx(hctx); + mutex_unlock(&q->debugfs_mutex); } void blk_mq_debugfs_register_sched(struct request_queue *q) diff --git a/block/blk-mq.c b/block/blk-mq.c index a28957dfb7574..5670dfeac85a4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2873,7 +2873,9 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, queue_for_each_hw_ctx(q, hctx, i) { if (i == nr_queue) break; + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_hctx(hctx); + mutex_unlock(&q->debugfs_mutex); blk_mq_exit_hctx(q, set, hctx, i); } } -- Gitee From 51e11e45a97208ab5920b32848db6709e159bf95 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 22 May 2024 03:00:25 +0800 Subject: [PATCH 08/11] block: shutdown blktrace in blk_release_queue() hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA ---------------------------------------- Commit 99d055b4fd4b ("block: remove per-disk debugfs files in blk_unregister_queue") move blk_trace_shutdown() from blk_release_queue() to blk_unregister_queue(). However, blktrace can still be enabled through ioctl after blk_unregister_queue(), and blktrace will be leaked in this case. Fix the problem by calling blk_trace_shutdown() in blk_release_queue(). Signed-off-by: Yu Kuai --- block/blk-sysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index ef38e165bb302..dbe5d15c511ff 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -804,6 +804,10 @@ static void blk_release_queue(struct kobject *kobj) if (queue_is_mq(q)) blk_mq_release(q); + mutex_lock(&q->debugfs_mutex); + blk_trace_shutdown(q); + mutex_unlock(&q->debugfs_mutex); + bioset_exit(&q->bio_split); ida_simple_remove(&blk_queue_ida, q->id); -- Gitee From 7db89c3d8d0e090e073cf678fb2e6a37bc6cb4be Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 22 May 2024 03:00:26 +0800 Subject: [PATCH 09/11] block: support enable/disable blk-mq debugfs dynamically hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA ---------------------------------------- After a disk is created, debugfs inode and dentry will be created together, and the memory used for debugfs can't be freed until disk removal. The number of debugfs inode and dentry is based on how many cpus and hctxs. For example, testing on a 128-core environemt, with default module parameters, each loop device will cost 1679KB memory, and debugfs will cost 336KB(20%). The memory cost for debugfs for a disk seems little, but if a big machine contains thousands of disks, the cost will be xxGB. This memory overhead can be avoided by disabling CONFIG_BLK_DEBUG_FS. This patch add a disk level switch that can enable/disable debugfs dynamically, so that user can disable debugfs if they care about the memory overhead, in the meantime, debugfs can be enabled again in demand. Signed-off-by: Yu Kuai --- block/blk-mq-debugfs.c | 64 +++++++++++++++++++++++++++++++++++++----- block/blk-mq-debugfs.h | 5 ++++ block/blk-sysfs.c | 54 +++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 4 ++- 4 files changed, 119 insertions(+), 8 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index e2b8942311c9a..f600ac9677a5d 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -859,11 +859,20 @@ static void debugfs_create_files(struct dentry *parent, void *data, (void *)attr, &blk_mq_debugfs_fops); } +static bool blk_mq_debugfs_enabled(struct request_queue *q) +{ + return !IS_ERR_OR_NULL(q->debugfs_dir) && + test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags); +} + void blk_mq_debugfs_register(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; int i; + if (!blk_mq_debugfs_enabled(q)) + return; + debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs); /* @@ -892,6 +901,46 @@ void blk_mq_debugfs_register(struct request_queue *q) } } +static void debugfs_remove_files(struct dentry *parent, + const struct blk_mq_debugfs_attr *attr) +{ + if (IS_ERR_OR_NULL(parent)) + return; + + for (; attr->name; attr++) + debugfs_lookup_and_remove(attr->name, parent); +} + +void blk_mq_debugfs_unregister(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + unsigned long i; + + spin_lock(&q->queue_lock); + if (q->rq_qos) { + struct rq_qos *rqos = q->rq_qos; + + while (rqos) { + rqos->debugfs_dir = NULL; + rqos = rqos->next; + } + } + spin_unlock(&q->queue_lock); + + debugfs_remove_recursive(q->rqos_debugfs_dir); + q->rqos_debugfs_dir = NULL; + + queue_for_each_hw_ctx(q, hctx, i) { + if (hctx->debugfs_dir) + blk_mq_debugfs_unregister_hctx(hctx); + } + + if (q->sched_debugfs_dir) + blk_mq_debugfs_unregister_sched(q); + + debugfs_remove_files(q->debugfs_dir, blk_mq_debugfs_queue_attrs); +} + static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx) { @@ -913,7 +962,7 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q, lockdep_assert_held(&q->debugfs_mutex); - if (!q->debugfs_dir) + if (!blk_mq_debugfs_enabled(q)) return; snprintf(name, sizeof(name), "hctx%u", hctx->queue_num); @@ -929,7 +978,7 @@ void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { lockdep_assert_held(&hctx->queue->debugfs_mutex); - if (!hctx->queue->debugfs_dir) + if (!blk_mq_debugfs_enabled(hctx->queue)) return; debugfs_remove_recursive(hctx->debugfs_dir); hctx->sched_debugfs_dir = NULL; @@ -968,7 +1017,7 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) * If the parent directory has not been created yet, return, we will be * called again later on and the directory/files will be created then. */ - if (!q->debugfs_dir) + if (!blk_mq_debugfs_enabled(q)) return; if (!e->queue_debugfs_attrs) @@ -991,7 +1040,7 @@ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) { lockdep_assert_held(&rqos->q->debugfs_mutex); - if (!rqos->q->debugfs_dir) + if (!blk_mq_debugfs_enabled(rqos->q)) return; debugfs_remove_recursive(rqos->debugfs_dir); rqos->debugfs_dir = NULL; @@ -1004,7 +1053,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) lockdep_assert_held(&q->debugfs_mutex); - if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs) + if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs || + !blk_mq_debugfs_enabled(q)) return; if (!q->rqos_debugfs_dir) @@ -1024,7 +1074,7 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, lockdep_assert_held(&q->debugfs_mutex); - if (!e->hctx_debugfs_attrs) + if (!e->hctx_debugfs_attrs || !blk_mq_debugfs_enabled(q)) return; hctx->sched_debugfs_dir = debugfs_create_dir("sched", @@ -1037,7 +1087,7 @@ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) { lockdep_assert_held(&hctx->queue->debugfs_mutex); - if (!hctx->queue->debugfs_dir) + if (!blk_mq_debugfs_enabled(hctx->queue)) return; debugfs_remove_recursive(hctx->sched_debugfs_dir); hctx->sched_debugfs_dir = NULL; diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index f6898560b1f39..3a2c43a9a0ae8 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -19,6 +19,7 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq); int blk_mq_debugfs_rq_show(struct seq_file *m, void *v); void blk_mq_debugfs_register(struct request_queue *q); +void blk_mq_debugfs_unregister(struct request_queue *q); void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx); @@ -38,6 +39,10 @@ static inline void blk_mq_debugfs_register(struct request_queue *q) { } +static inline void blk_mq_debugfs_unregister(struct request_queue *q) +{ +} + static inline void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index dbe5d15c511ff..ea2d62ace573f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -619,6 +619,57 @@ QUEUE_RW_ENTRY(queue_iostats, "iostats"); QUEUE_RW_ENTRY(queue_random, "add_random"); QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes"); +#ifdef CONFIG_BLK_DEBUG_FS +static ssize_t queue_debugfs_show(struct request_queue *q, char *page) +{ + return queue_var_show(test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags), + page); +} + +static ssize_t queue_debugfs_store(struct request_queue *q, const char *page, + size_t count) +{ + unsigned long val; + ssize_t ret; + bool enabled; + int err; + + if (!queue_is_mq(q)) + return count; + + if (!blk_queue_registered(q)) + return -ENODEV; + + ret = queue_var_store(&val, page, count); + if (ret < 0) + return ret; + + err = blk_queue_enter(q, 0); + if (err) + return err; + + mutex_lock(&q->debugfs_mutex); + enabled = test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags); + if (!!val == enabled) + goto unlock; + + if (val) { + blk_queue_flag_set(QUEUE_FLAG_DEBUGFS, q); + blk_mq_debugfs_register(q); + } else { + blk_mq_debugfs_unregister(q); + blk_queue_flag_clear(QUEUE_FLAG_DEBUGFS, q); + } + +unlock: + mutex_unlock(&q->debugfs_mutex); + blk_queue_exit(q); + return ret; +} + +QUEUE_RW_ENTRY(queue_debugfs, "debugfs"); +#endif + static struct attribute *queue_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -661,6 +712,9 @@ static struct attribute *queue_attrs[] = { &queue_io_timeout_entry.attr, #ifdef CONFIG_BLK_DEV_THROTTLING_LOW &blk_throtl_sample_time_entry.attr, +#endif +#ifdef CONFIG_BLK_DEBUG_FS + &queue_debugfs_entry.attr, #endif NULL, }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8208922552b53..5ccb9d5f1c29b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -643,10 +643,12 @@ struct request_queue { #define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */ /*at least one blk-mq hctx can't get driver tag */ #define QUEUE_FLAG_HCTX_WAIT 30 +#define QUEUE_FLAG_DEBUGFS 31 /* supports debugfs */ #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ - (1 << QUEUE_FLAG_NOWAIT)) + (1 << QUEUE_FLAG_NOWAIT) | \ + (1 << QUEUE_FLAG_DEBUGFS)) void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); -- Gitee From b0beb99955d8a05ba7a091a5018bfbce932afcc0 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 22 May 2024 03:00:27 +0800 Subject: [PATCH 10/11] block: fix kabi broken in struct request_queue hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA ---------------------------------------- Signed-off-by: Yu Kuai --- block/blk-sysfs.c | 4 ++-- include/linux/blkdev.h | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index ea2d62ace573f..805073eb1d7d5 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -1028,10 +1028,10 @@ void blk_unregister_queue(struct gendisk *disk) mutex_lock(&q->debugfs_mutex); blk_trace_shutdown(q); + if (queue_is_mq(q)) + blk_mq_debugfs_unregister(q); debugfs_remove_recursive(q->debugfs_dir); q->debugfs_dir = NULL; - q->sched_debugfs_dir = NULL; - q->rqos_debugfs_dir = NULL; mutex_unlock(&q->debugfs_mutex); kobject_put(&disk_to_dev(disk)->kobj); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5ccb9d5f1c29b..19a4bb6c99dd3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -540,6 +540,10 @@ struct request_queue { unsigned int sg_timeout; unsigned int sg_reserved_size; int node; + /* + * Serializes all debugfs metadata operations using the above dentries. + */ + struct mutex debugfs_mutex; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; #endif @@ -587,12 +591,10 @@ struct request_queue { struct bio_set bio_split; struct dentry *debugfs_dir; +#ifdef CONFIG_BLK_DEBUG_FS struct dentry *sched_debugfs_dir; struct dentry *rqos_debugfs_dir; - /* - * Serializes all debugfs metadata operations using the above dentries. - */ - struct mutex debugfs_mutex; +#endif bool mq_sysfs_init_done; -- Gitee From 08d5886c2ca560d07d0b25e29caaf635049241df Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 22 May 2024 03:00:28 +0800 Subject: [PATCH 11/11] block: support to disable debugfs by default hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA ---------------------------------------- All block layer debugfs is enabled by default, and it can be disabled by adding "blk_mq.enable_debugfs=0" to boot cmd. Noted that user can still enable or disable debugfs by sysfs for each disk. Signed-off-by: Yu Kuai --- arch/arm64/configs/openeuler_defconfig | 1 + arch/powerpc/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + block/Kconfig | 11 +++++++++++ block/blk-mq.c | 10 ++++++++++ 5 files changed, 24 insertions(+) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 10c941186dbe5..50e67e81d7d85 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -950,6 +950,7 @@ CONFIG_BLK_WBT=y CONFIG_BLK_WBT_MQ=y CONFIG_BLK_DEBUG_FS=y CONFIG_BLK_DEBUG_FS_ZONED=y +CONFIG_BLK_DEBUG_FS_SWITCH=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set # CONFIG_BLK_IO_HUNG_TASK_CHECK is not set diff --git a/arch/powerpc/configs/openeuler_defconfig b/arch/powerpc/configs/openeuler_defconfig index 68f642dd09f2d..61cd711798b0d 100644 --- a/arch/powerpc/configs/openeuler_defconfig +++ b/arch/powerpc/configs/openeuler_defconfig @@ -683,6 +683,7 @@ CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_WBT_MQ=y CONFIG_BLK_DEBUG_FS=y +CONFIG_BLK_DEBUG_FS_SWITCH=y CONFIG_BLK_DEBUG_FS_ZONED=y CONFIG_BLK_SED_OPAL=y CONFIG_BLK_INLINE_ENCRYPTION=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 0b09969e84689..6529aa3f0e387 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -930,6 +930,7 @@ CONFIG_BLK_WBT=y # CONFIG_BLK_CGROUP_IOCOST is not set CONFIG_BLK_WBT_MQ=y CONFIG_BLK_DEBUG_FS=y +CONFIG_BLK_DEBUG_FS_SWITCH=y CONFIG_BLK_DEBUG_FS_ZONED=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set diff --git a/block/Kconfig b/block/Kconfig index 498d18f1d10ed..6818b1738dd77 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -185,6 +185,17 @@ config BLK_DEBUG_FS Unless you are building a kernel for a tiny system, you should say Y here. +config BLK_DEBUG_FS_SWITCH + bool "Disable/Enable block layer debugfs by demands" + default y + depends on BLK_DEBUG_FS + help + All block layer debugfs is enabled by default, enabling this option + allow user to disable all debugfs by module parameters. Noted that + user can still enable or disable debugfs by sysfs for each disk. + + If memory resources is limited, you should say Y here. + config BLK_DEBUG_FS_ZONED bool default BLK_DEBUG_FS && BLK_DEV_ZONED diff --git a/block/blk-mq.c b/block/blk-mq.c index 5670dfeac85a4..ea97231a25eef 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -47,6 +47,11 @@ bool mq_unfair_dtag = true; module_param_named(unfair_dtag, mq_unfair_dtag, bool, 0444); +#ifdef CONFIG_BLK_DEBUG_FS_SWITCH +bool enable_debugfs = true; +module_param_named(enable_debugfs, enable_debugfs, bool, 0444); +#endif + static DEFINE_PER_CPU(struct list_head, blk_cpu_done); static void blk_mq_poll_stats_start(struct request_queue *q); @@ -3540,6 +3545,11 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, q->tag_set = set; q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; + +#ifdef CONFIG_BLK_DEBUG_FS_SWITCH + if (!enable_debugfs) + blk_queue_flag_clear(QUEUE_FLAG_DEBUGFS, q); +#endif if (set->nr_maps > HCTX_TYPE_POLL && set->map[HCTX_TYPE_POLL].nr_queues) blk_queue_flag_set(QUEUE_FLAG_POLL, q); -- Gitee