diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 34061d75a0d21be9c981d724c4e05c90b4e1000d..50e67e81d7d85b6bb8b4fa2fd8d6eff4b26b70a3 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -942,6 +942,7 @@ CONFIG_BLK_DEV_INTEGRITY_T10=m CONFIG_BLK_DEV_ZONED=y CONFIG_BLK_DEV_THROTTLING=y # CONFIG_BLK_DEV_THROTTLING_LOW is not set +CONFIG_BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT=y # CONFIG_BLK_CMDLINE_PARSER is not set CONFIG_BLK_WBT=y # CONFIG_BLK_CGROUP_IOLATENCY is not set @@ -949,8 +950,10 @@ CONFIG_BLK_WBT=y CONFIG_BLK_WBT_MQ=y CONFIG_BLK_DEBUG_FS=y CONFIG_BLK_DEBUG_FS_ZONED=y +CONFIG_BLK_DEBUG_FS_SWITCH=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set +# CONFIG_BLK_IO_HUNG_TASK_CHECK is not set CONFIG_BLK_DEV_DUMPINFO=y # diff --git a/arch/powerpc/configs/openeuler_defconfig b/arch/powerpc/configs/openeuler_defconfig index 74118e43ff05f1bc29e8687d4c7cc182341bf66a..61cd711798b0da8f674e1e1e1f396fdbb0c3e308 100644 --- a/arch/powerpc/configs/openeuler_defconfig +++ b/arch/powerpc/configs/openeuler_defconfig @@ -683,10 +683,12 @@ CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_WBT_MQ=y CONFIG_BLK_DEBUG_FS=y +CONFIG_BLK_DEBUG_FS_SWITCH=y CONFIG_BLK_DEBUG_FS_ZONED=y CONFIG_BLK_SED_OPAL=y CONFIG_BLK_INLINE_ENCRYPTION=y # CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK is not set +# CONFIG_BLK_IO_HUNG_TASK_CHECK is not set # # Partition Types diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 1835f38f2947ccbf8c017d1d72656c378fa04c54..6529aa3f0e387a98ca4923314522004fdf09cce5 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -923,15 +923,18 @@ CONFIG_BLK_DEV_INTEGRITY_T10=m CONFIG_BLK_DEV_ZONED=y CONFIG_BLK_DEV_THROTTLING=y # CONFIG_BLK_DEV_THROTTLING_LOW is not set +CONFIG_BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT=y # CONFIG_BLK_CMDLINE_PARSER is not set CONFIG_BLK_WBT=y # CONFIG_BLK_CGROUP_IOLATENCY is not set # CONFIG_BLK_CGROUP_IOCOST is not set CONFIG_BLK_WBT_MQ=y CONFIG_BLK_DEBUG_FS=y +CONFIG_BLK_DEBUG_FS_SWITCH=y CONFIG_BLK_DEBUG_FS_ZONED=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set +# CONFIG_BLK_IO_HUNG_TASK_CHECK is not set CONFIG_BLK_DEV_DUMPINFO=y # diff --git a/block/Kconfig b/block/Kconfig index e5c965f1ea258cfe7f3eab4b5b7cbcda3af1b669..6818b1738dd77dbba521197f4fd3e9fbc831cd3d 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -114,6 +114,17 @@ config BLK_DEV_THROTTLING_LOW Note, this is an experimental interface and could be changed someday. +config BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT + bool "Block layer global limit in cgroup v1" + depends on BLK_DEV_THROTTLING=y + default n + help + blkio subsytem is not under default hierarchy in cgroup v1 by default, + Enabling this will support globlal limit in cgroup v1. + + Note, a cmdline "blkcg_global_limit=1" is still required to enabled this + feature. + config BLK_CMDLINE_PARSER bool "Block device command line partition parser" help @@ -174,6 +185,17 @@ config BLK_DEBUG_FS Unless you are building a kernel for a tiny system, you should say Y here. +config BLK_DEBUG_FS_SWITCH + bool "Disable/Enable block layer debugfs by demands" + default y + depends on BLK_DEBUG_FS + help + All block layer debugfs is enabled by default, enabling this option + allow user to disable all debugfs by module parameters. Noted that + user can still enable or disable debugfs by sysfs for each disk. + + If memory resources is limited, you should say Y here. + config BLK_DEBUG_FS_ZONED bool default BLK_DEBUG_FS && BLK_DEV_ZONED @@ -209,6 +231,15 @@ config BLK_DEV_DUMPINFO Dump info when open an write opened block device exclusively or open an exclusive opened device for write +config BLK_IO_HUNG_TASK_CHECK + bool "Enable io hung task check" + depends on DETECT_HUNG_TASK + default n + help + Enabling this lets the block layer detect hungtask for io, noted + if this is set, hungtask will complain about slow io even if such + io is not hanged. Be careful to enable hungtask panic in this case. + menu "Partition Types" source "block/partitions/Kconfig" diff --git a/block/bio.c b/block/bio.c index 8c64c93e96c8afd00ff938ab35a86ce479291a60..123b44ba17cbf03a565780642056157f7ebe0992 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1169,7 +1169,7 @@ int submit_bio_wait(struct bio *bio) submit_bio(bio); /* Prevent hang_check timer from firing at us during very long I/O */ - hang_check = sysctl_hung_task_timeout_secs; + hang_check = sysctl_hung_task_timeout_secs && !io_hung_task_check; if (hang_check) while (!wait_for_completion_io_timeout(&done, hang_check * (HZ/2))) diff --git a/block/blk-core.c b/block/blk-core.c index f91f8e8be482d1c93809ddb936437db45c0d8c91..e291ed8ee01e2a85b1be902dcfbc167b1e048a60 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -77,6 +77,17 @@ static int __init precise_iostat_setup(char *str) } __setup("precise_iostat=", precise_iostat_setup); +/* + * Noted if this is set, hungtask will complain about slow io even if such io is + * not hanged. Be careful to enable hungtask panic in this case. + */ +#ifdef CONFIG_BLK_IO_HUNG_TASK_CHECK +bool io_hung_task_check = true; +#else +bool io_hung_task_check; +#endif +module_param_named(io_hung_task_check, io_hung_task_check, bool, 0644); + /* * For queue allocation */ @@ -1879,7 +1890,7 @@ void blk_io_schedule(void) /* Prevent hang_check timer from firing at us during very long I/O */ unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2; - if (timeout) + if (timeout && !io_hung_task_check) io_schedule_timeout(timeout); else io_schedule(); diff --git a/block/blk-exec.c b/block/blk-exec.c index b2676de4c6a57dd66618273be5a0b2b62a034af6..497aa52cd51ec59cf6f2b7d6f2fae310085ce71e 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -87,7 +87,7 @@ blk_status_t blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); /* Prevent hang_check timer from firing at us during very long I/O */ - hang_check = sysctl_hung_task_timeout_secs; + hang_check = sysctl_hung_task_timeout_secs && !io_hung_task_check; if (hang_check) while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2))); else diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index a879f94782e4c82b6a4a45af753fd044ff1d8f76..f600ac9677a5d2d5034f9a701249cd211632bc77 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -859,11 +859,20 @@ static void debugfs_create_files(struct dentry *parent, void *data, (void *)attr, &blk_mq_debugfs_fops); } +static bool blk_mq_debugfs_enabled(struct request_queue *q) +{ + return !IS_ERR_OR_NULL(q->debugfs_dir) && + test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags); +} + void blk_mq_debugfs_register(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; int i; + if (!blk_mq_debugfs_enabled(q)) + return; + debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs); /* @@ -892,9 +901,44 @@ void blk_mq_debugfs_register(struct request_queue *q) } } +static void debugfs_remove_files(struct dentry *parent, + const struct blk_mq_debugfs_attr *attr) +{ + if (IS_ERR_OR_NULL(parent)) + return; + + for (; attr->name; attr++) + debugfs_lookup_and_remove(attr->name, parent); +} + void blk_mq_debugfs_unregister(struct request_queue *q) { - q->sched_debugfs_dir = NULL; + struct blk_mq_hw_ctx *hctx; + unsigned long i; + + spin_lock(&q->queue_lock); + if (q->rq_qos) { + struct rq_qos *rqos = q->rq_qos; + + while (rqos) { + rqos->debugfs_dir = NULL; + rqos = rqos->next; + } + } + spin_unlock(&q->queue_lock); + + debugfs_remove_recursive(q->rqos_debugfs_dir); + q->rqos_debugfs_dir = NULL; + + queue_for_each_hw_ctx(q, hctx, i) { + if (hctx->debugfs_dir) + blk_mq_debugfs_unregister_hctx(hctx); + } + + if (q->sched_debugfs_dir) + blk_mq_debugfs_unregister_sched(q); + + debugfs_remove_files(q->debugfs_dir, blk_mq_debugfs_queue_attrs); } static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, @@ -916,7 +960,9 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q, char name[20]; int i; - if (!q->debugfs_dir) + lockdep_assert_held(&q->debugfs_mutex); + + if (!blk_mq_debugfs_enabled(q)) return; snprintf(name, sizeof(name), "hctx%u", hctx->queue_num); @@ -930,6 +976,10 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q, void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { + lockdep_assert_held(&hctx->queue->debugfs_mutex); + + if (!blk_mq_debugfs_enabled(hctx->queue)) + return; debugfs_remove_recursive(hctx->debugfs_dir); hctx->sched_debugfs_dir = NULL; hctx->debugfs_dir = NULL; @@ -940,8 +990,10 @@ void blk_mq_debugfs_register_hctxs(struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i; + mutex_lock(&q->debugfs_mutex); queue_for_each_hw_ctx(q, hctx, i) blk_mq_debugfs_register_hctx(q, hctx); + mutex_unlock(&q->debugfs_mutex); } void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) @@ -949,19 +1001,23 @@ void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i; + mutex_lock(&q->debugfs_mutex); queue_for_each_hw_ctx(q, hctx, i) blk_mq_debugfs_unregister_hctx(hctx); + mutex_unlock(&q->debugfs_mutex); } void blk_mq_debugfs_register_sched(struct request_queue *q) { struct elevator_type *e = q->elevator->type; + lockdep_assert_held(&q->debugfs_mutex); + /* * If the parent directory has not been created yet, return, we will be * called again later on and the directory/files will be created then. */ - if (!q->debugfs_dir) + if (!blk_mq_debugfs_enabled(q)) return; if (!e->queue_debugfs_attrs) @@ -974,12 +1030,18 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) void blk_mq_debugfs_unregister_sched(struct request_queue *q) { + lockdep_assert_held(&q->debugfs_mutex); + debugfs_remove_recursive(q->sched_debugfs_dir); q->sched_debugfs_dir = NULL; } void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) { + lockdep_assert_held(&rqos->q->debugfs_mutex); + + if (!blk_mq_debugfs_enabled(rqos->q)) + return; debugfs_remove_recursive(rqos->debugfs_dir); rqos->debugfs_dir = NULL; } @@ -989,7 +1051,10 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) struct request_queue *q = rqos->q; const char *dir_name = rq_qos_id_to_name(rqos->id); - if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs) + lockdep_assert_held(&q->debugfs_mutex); + + if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs || + !blk_mq_debugfs_enabled(q)) return; if (!q->rqos_debugfs_dir) @@ -1002,18 +1067,14 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs); } -void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) -{ - debugfs_remove_recursive(q->rqos_debugfs_dir); - q->rqos_debugfs_dir = NULL; -} - void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { struct elevator_type *e = q->elevator->type; - if (!e->hctx_debugfs_attrs) + lockdep_assert_held(&q->debugfs_mutex); + + if (!e->hctx_debugfs_attrs || !blk_mq_debugfs_enabled(q)) return; hctx->sched_debugfs_dir = debugfs_create_dir("sched", @@ -1024,6 +1085,10 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) { + lockdep_assert_held(&hctx->queue->debugfs_mutex); + + if (!blk_mq_debugfs_enabled(hctx->queue)) + return; debugfs_remove_recursive(hctx->sched_debugfs_dir); hctx->sched_debugfs_dir = NULL; } diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index a68aa6041a10dc616ca2bca43a3bb4588f4baf8c..3a2c43a9a0ae881331cdf3c248bd3bd3f9fbe69b 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -34,7 +34,6 @@ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_register_rqos(struct rq_qos *rqos); void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos); -void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q); #else static inline void blk_mq_debugfs_register(struct request_queue *q) { @@ -85,10 +84,6 @@ static inline void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) static inline void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) { } - -static inline void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) -{ -} #endif #ifdef CONFIG_BLK_DEBUG_FS_ZONED diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index c92d25b71a72844978661adc0a3ccf452b4a3dd1..8620a5d75c62ad5dd577ba0c4cfaeb9c7d5c47d2 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -610,7 +610,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) if (ret) goto err_free_map_and_rqs; + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_sched(q); + mutex_unlock(&q->debugfs_mutex); queue_for_each_hw_ctx(q, hctx, i) { if (e->ops.init_hctx) { @@ -623,7 +625,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) return ret; } } + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_sched_hctx(q, hctx); + mutex_unlock(&q->debugfs_mutex); } return 0; @@ -664,14 +668,21 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) unsigned int flags = 0; queue_for_each_hw_ctx(q, hctx, i) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_sched_hctx(hctx); + mutex_unlock(&q->debugfs_mutex); + if (e->type->ops.exit_hctx && hctx->sched_data) { e->type->ops.exit_hctx(hctx, i); hctx->sched_data = NULL; } flags = hctx->flags; } + + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_sched(q); + mutex_unlock(&q->debugfs_mutex); + if (e->type->ops.exit_sched) e->type->ops.exit_sched(e); blk_mq_sched_tags_teardown(q, flags); diff --git a/block/blk-mq.c b/block/blk-mq.c index a28957dfb7574e37450d4494af4314f4bc2999f6..ea97231a25eefefcf0572a36f01b43909f442c11 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -47,6 +47,11 @@ bool mq_unfair_dtag = true; module_param_named(unfair_dtag, mq_unfair_dtag, bool, 0444); +#ifdef CONFIG_BLK_DEBUG_FS_SWITCH +bool enable_debugfs = true; +module_param_named(enable_debugfs, enable_debugfs, bool, 0444); +#endif + static DEFINE_PER_CPU(struct list_head, blk_cpu_done); static void blk_mq_poll_stats_start(struct request_queue *q); @@ -2873,7 +2878,9 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, queue_for_each_hw_ctx(q, hctx, i) { if (i == nr_queue) break; + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_hctx(hctx); + mutex_unlock(&q->debugfs_mutex); blk_mq_exit_hctx(q, set, hctx, i); } } @@ -3538,6 +3545,11 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, q->tag_set = set; q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; + +#ifdef CONFIG_BLK_DEBUG_FS_SWITCH + if (!enable_debugfs) + blk_queue_flag_clear(QUEUE_FLAG_DEBUGFS, q); +#endif if (set->nr_maps > HCTX_TYPE_POLL && set->map[HCTX_TYPE_POLL].nr_queues) blk_queue_flag_set(QUEUE_FLAG_POLL, q); diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index e83af7bc759194126a645bea59eef2b841603961..d3a75693adbf4ddf9ffb94b07d4302ffcd2ea0cb 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -294,8 +294,6 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, void rq_qos_exit(struct request_queue *q) { - blk_mq_debugfs_unregister_queue_rqos(q); - while (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; q->rq_qos = rqos->next; diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 37c59d7d6ba7f2333ed37ca7d2653173c79d7945..af1c2ca157d71438625f7820784d7aeba0ec7036 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -118,8 +118,11 @@ static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos) blk_mq_unfreeze_queue(q); - if (rqos->ops->debugfs_attrs) + if (rqos->ops->debugfs_attrs) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); + } return 0; ebusy: @@ -150,7 +153,9 @@ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) blk_mq_unfreeze_queue(q); + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); } typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index c95be9626a09807a07d8b5563540edd3e7a80de7..805073eb1d7d5df1c4e8c03d567c6e88af3859f2 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -619,6 +619,57 @@ QUEUE_RW_ENTRY(queue_iostats, "iostats"); QUEUE_RW_ENTRY(queue_random, "add_random"); QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes"); +#ifdef CONFIG_BLK_DEBUG_FS +static ssize_t queue_debugfs_show(struct request_queue *q, char *page) +{ + return queue_var_show(test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags), + page); +} + +static ssize_t queue_debugfs_store(struct request_queue *q, const char *page, + size_t count) +{ + unsigned long val; + ssize_t ret; + bool enabled; + int err; + + if (!queue_is_mq(q)) + return count; + + if (!blk_queue_registered(q)) + return -ENODEV; + + ret = queue_var_store(&val, page, count); + if (ret < 0) + return ret; + + err = blk_queue_enter(q, 0); + if (err) + return err; + + mutex_lock(&q->debugfs_mutex); + enabled = test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags); + if (!!val == enabled) + goto unlock; + + if (val) { + blk_queue_flag_set(QUEUE_FLAG_DEBUGFS, q); + blk_mq_debugfs_register(q); + } else { + blk_mq_debugfs_unregister(q); + blk_queue_flag_clear(QUEUE_FLAG_DEBUGFS, q); + } + +unlock: + mutex_unlock(&q->debugfs_mutex); + blk_queue_exit(q); + return ret; +} + +QUEUE_RW_ENTRY(queue_debugfs, "debugfs"); +#endif + static struct attribute *queue_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -661,6 +712,9 @@ static struct attribute *queue_attrs[] = { &queue_io_timeout_entry.attr, #ifdef CONFIG_BLK_DEV_THROTTLING_LOW &blk_throtl_sample_time_entry.attr, +#endif +#ifdef CONFIG_BLK_DEBUG_FS + &queue_debugfs_entry.attr, #endif NULL, }; @@ -804,14 +858,10 @@ static void blk_release_queue(struct kobject *kobj) if (queue_is_mq(q)) blk_mq_release(q); - blk_trace_shutdown(q); mutex_lock(&q->debugfs_mutex); - debugfs_remove_recursive(q->debugfs_dir); + blk_trace_shutdown(q); mutex_unlock(&q->debugfs_mutex); - if (queue_is_mq(q)) - blk_mq_debugfs_unregister(q); - bioset_exit(&q->bio_split); ida_simple_remove(&blk_queue_ida, q->id); @@ -881,17 +931,18 @@ int blk_register_queue(struct gendisk *disk) goto unlock; } + if (queue_is_mq(q)) + __blk_mq_register_dev(dev, q); + mutex_lock(&q->sysfs_lock); + mutex_lock(&q->debugfs_mutex); q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent), blk_debugfs_root); - mutex_unlock(&q->debugfs_mutex); - - if (queue_is_mq(q)) { - __blk_mq_register_dev(dev, q); + if (queue_is_mq(q)) blk_mq_debugfs_register(q); - } - mutex_lock(&q->sysfs_lock); + mutex_unlock(&q->debugfs_mutex); + if (q->elevator) { ret = elv_register_queue(q, false); if (ret) { @@ -973,8 +1024,15 @@ void blk_unregister_queue(struct gendisk *disk) /* Now that we've deleted all child objects, we can delete the queue. */ kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_del(&q->kobj); - mutex_unlock(&q->sysfs_dir_lock); + mutex_lock(&q->debugfs_mutex); + blk_trace_shutdown(q); + if (queue_is_mq(q)) + blk_mq_debugfs_unregister(q); + debugfs_remove_recursive(q->debugfs_dir); + q->debugfs_dir = NULL; + mutex_unlock(&q->debugfs_mutex); + kobject_put(&disk_to_dev(disk)->kobj); } diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 050ddf0ad0027c611310493a0860f9cc41aa231b..9fa00b8b1ac2666b38bd7a47a9d222dc1b0b4bed 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -43,9 +43,15 @@ static struct blkcg_policy blkcg_policy_throtl; /* A workqueue to queue throttle related work */ static struct workqueue_struct *kthrotld_workqueue; +#ifdef CONFIG_BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT /* True if global limit is enabled in cgroup v1 */ static bool global_limit; +static inline bool blkcg_global_limit_enabled(void) +{ + return global_limit; +} + static int __init setup_global_limit(char *str) { if (!strcmp(str, "1") || !strcmp(str, "Y") || !strcmp(str, "y")) @@ -55,7 +61,12 @@ static int __init setup_global_limit(char *str) } __setup("blkcg_global_limit=", setup_global_limit); - +#else +static inline bool blkcg_global_limit_enabled(void) +{ + return false; +} +#endif /* * To implement hierarchical throttling, throtl_grps form a tree and bios * are dispatched upwards level by level until they reach the top and get @@ -571,8 +582,8 @@ static void throtl_pd_init(struct blkg_policy_data *pd) * regardless of the position of the group in the hierarchy. */ sq->parent_sq = &td->service_queue; - if ((cgroup_subsys_on_dfl(io_cgrp_subsys) || global_limit) && - blkg->parent) + if ((cgroup_subsys_on_dfl(io_cgrp_subsys) || + blkcg_global_limit_enabled()) && blkg->parent) sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue; tg->td = td; } diff --git a/block/blk.h b/block/blk.h index 4bbcc971d4f734d5a03163748ed5924dbb4c5389..2e487673b3a759300c62460e500d6d33118cec6a 100644 --- a/block/blk.h +++ b/block/blk.h @@ -15,6 +15,7 @@ #define BLK_MAX_TIMEOUT (5 * HZ) extern struct dentry *blk_debugfs_root; +extern bool io_hung_task_check; struct blk_flush_queue { unsigned int flush_pending_idx:1; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 50b4fd0a06873b1cb196ddf37be26e343c5c7ea4..19a4bb6c99dd31a1e2033bc21d56c51785b70197 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -540,6 +540,9 @@ struct request_queue { unsigned int sg_timeout; unsigned int sg_reserved_size; int node; + /* + * Serializes all debugfs metadata operations using the above dentries. + */ struct mutex debugfs_mutex; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; @@ -588,7 +591,6 @@ struct request_queue { struct bio_set bio_split; struct dentry *debugfs_dir; - #ifdef CONFIG_BLK_DEBUG_FS struct dentry *sched_debugfs_dir; struct dentry *rqos_debugfs_dir; @@ -643,10 +645,12 @@ struct request_queue { #define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */ /*at least one blk-mq hctx can't get driver tag */ #define QUEUE_FLAG_HCTX_WAIT 30 +#define QUEUE_FLAG_DEBUGFS 31 /* supports debugfs */ #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ - (1 << QUEUE_FLAG_NOWAIT)) + (1 << QUEUE_FLAG_NOWAIT) | \ + (1 << QUEUE_FLAG_DEBUGFS)) void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index bc98d5e4e033f66fef0dc44f91828d47981b4036..246ed13b49ca915b8b2cf8ac5a25dcc96825fc92 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -773,12 +773,9 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) **/ void blk_trace_shutdown(struct request_queue *q) { - mutex_lock(&q->debugfs_mutex); if (rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->debugfs_mutex))) __blk_trace_remove(q); - - mutex_unlock(&q->debugfs_mutex); } #ifdef CONFIG_BLK_CGROUP