67 2 5 ffff888105f5a000 UN 0.0 0 0 [khugepaged]
48318 2 1 ffff88836a3e2000 UN 0.0 0 0 [kworker/1:0]
75414 1 1 ffff8883749e6000 UN 0.0 2260 1272 fsstress
75416 1 2 ffff88836c098000 UN 0.0 2260 1272 fsstress
75417 1 6 ffff888364d2a000 UN 0.0 2364 1368 fsstress
75418 1 6 ffff88837464a000 UN 0.0 2260 1268 fsstress
75420 1 7 ffff888366270000 UN 0.0 2596 1476 fsstress
75421 1 6 ffff888366272000 UN 0.0 2260 1256 fsstress
75422 1 2 ffff888366274000 UN 0.0 2336 1252 fsstress
75424 1 1 ffff88837aa2e000 UN 0.0 2260 1208 fsstress
75425 1 1 ffff88836500c000 UN 0.0 2260 1248 fsstress
75427 1 6 ffff888366316000 UN 0.0 2372 1312 fsstress
75428 1 6 ffff888368728000 UN 0.0 2260 1124 fsstress
75429 1 6 ffff88837fc7e000 UN 0.0 2388 1392 fsstress
75430 1 5 ffff8883c229a000 UN 0.0 2260 1272 fsstress
75433 1 2 ffff8883c2298000 UN 0.0 2260 1272 fsstress
75464 2780 2 ffff88837800c000 UN 0.0 212668 2348 tree
1889387 1 1 ffff88836495a000 UN 0.1 243684 16040 (ostnamed)
2415832 1 0 ffff888377d40000 UN 0.1 243684 16040 (ostnamed)
Hi qiuuuuu, welcome to the openEuler Community.
I'm the Bot here serving you. You can find the instructions on how to interact with me at
https://gitee.com/openeuler/community/blob/master/en/sig-infrastructure/command.md.
If you have any questions, please contact the SIG: Kernel, and any of the maintainers: @Xie XiuQi, @YangYingliang, @成坚 (CHENG Jian).
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。
1、首先查看了各个D状态进程的栈信息
发现48318卡在blk_mq_freeze_queue_wait,其他的分别都卡在IO流程中
crash> bt 48318
PID: 48318 TASK: ffff88836a3e2000 CPU: 1 COMMAND: "kworker/1:0"
#0 [ffff888375447ba8] __schedule at ffffffffb2d22a25
#1 [ffff888375447cc0] schedule at ffffffffb2d2358f
#2 [ffff888375447cd8] blk_mq_freeze_queue_wait at ffffffffb23d92b7
#3 [ffff888375447d90] blk_cleanup_queue at ffffffffb23bf305
#4 [ffff888375447da8] __scsi_remove_device at ffffffffb278ae40
#5 [ffff888375447dd0] scsi_remove_device at ffffffffb278afff
#6 [ffff888375447de8] virtscsi_handle_event at ffffffffc04d1983 [virtio_scsi]
#7 [ffff888375447e28] process_one_work at ffffffffb1d80265
#8 [ffff888375447ea8] worker_thread at ffffffffb1d806a1
#9 [ffff888375447f10] kthread at ffffffffb1d8bfc3
#10 [ffff888375447f50] ret_from_fork at ffffffffb2e0023f
crash> bt 75414 75420 75421 75422 75424 75425 75430
PID: 75414 TASK: ffff8883749e6000 CPU: 1 COMMAND: "fsstress"
#0 [ffff8882663cf678] __schedule at ffffffffb2d22a25
#1 [ffff8882663cf790] schedule at ffffffffb2d2358f
#2 [ffff8882663cf7a8] io_schedule at ffffffffb2d23bdc
#3 [ffff8882663cf7c0] __lock_page at ffffffffb2013c98
#4 [ffff8882663cf920] write_cache_pages at ffffffffb203b165
#5 [ffff8882663cfaf8] generic_writepages at ffffffffb203b57b
#6 [ffff8882663cfbc8] do_writepages at ffffffffb203c8c7
#7 [ffff8882663cfca8] __filemap_fdatawrite_range at ffffffffb2019f40
#8 [ffff8882663cfd98] filemap_write_and_wait at ffffffffb2019ff9
#9 [ffff8882663cfdb0] __sync_blockdev at ffffffffb21fd97c
#10 [ffff8882663cfdd0] sync_filesystem at ffffffffb21e8353
#11 [ffff8882663cfdf8] ovl_sync_fs at ffffffffc0fd853a [overlay]
#12 [ffff8882663cfe10] sync_fs_one_sb at ffffffffb21e8221
#13 [ffff8882663cfe30] iterate_supers at ffffffffb218401e
#14 [ffff8882663cfe70] ksys_sync at ffffffffb21e8588
#15 [ffff8882663cff20] __x64_sys_sync at ffffffffb21e861f
#16 [ffff8882663cff28] do_syscall_64 at ffffffffb1c06bc8
#17 [ffff8882663cff50] entry_SYSCALL_64_after_hwframe at ffffffffb2e000ad
RIP: 00007f479ab13347 RSP: 00007ffd4dda9fe8 RFLAGS: 00000202
RAX: ffffffffffffffda RBX: 000000000000005c RCX: 00007f479ab13347
RDX: 0000000000000000 RSI: 00000000473be788 RDI: 000000000000005c
RBP: 0000000051eb851f R8: 00007f479abd4030 R9: 00007f479abd40a0
R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000402c20
R13: 0000000000000001 R14: 0000000000000000 R15: 7fffffffffffffff
ORIG_RAX: 00000000000000a2 CS: 0033 SS: 002b
crash> bt 75416 75417 75418 75427 75428 75429 75433 75464
PID: 75416 TASK: ffff88836c098000 CPU: 2 COMMAND: "fsstress"
#0 [ffff8882e59a7608] __schedule at ffffffffb2d22a25
#1 [ffff8882e59a7720] schedule at ffffffffb2d2358f
#2 [ffff8882e59a7738] io_schedule at ffffffffb2d23bdc
#3 [ffff8882e59a7750] rq_qos_wait at ffffffffb2400fde
#4 [ffff8882e59a7878] wbt_wait at ffffffffb243a051
#5 [ffff8882e59a7910] __rq_qos_throttle at ffffffffb2400a20
#6 [ffff8882e59a7930] blk_mq_make_request at ffffffffb23de038
#7 [ffff8882e59a7a98] generic_make_request at ffffffffb23c393d
#8 [ffff8882e59a7b80] submit_bio at ffffffffb23c3db8
#9 [ffff8882e59a7c48] submit_bio_wait at ffffffffb23b3a5d
#10 [ffff8882e59a7cf0] blkdev_issue_flush at ffffffffb23c8f4c
#11 [ffff8882e59a7d20] ext4_sync_fs at ffffffffc06dd708 [ext4]
#12 [ffff8882e59a7dd0] sync_filesystem at ffffffffb21e8335
#13 [ffff8882e59a7df8] ovl_sync_fs at ffffffffc0fd853a [overlay]
#14 [ffff8882e59a7e10] sync_fs_one_sb at ffffffffb21e8221
#15 [ffff8882e59a7e30] iterate_supers at ffffffffb218401e
#16 [ffff8882e59a7e70] ksys_sync at ffffffffb21e8588
#17 [ffff8882e59a7f20] __x64_sys_sync at ffffffffb21e861f
#18 [ffff8882e59a7f28] do_syscall_64 at ffffffffb1c06bc8
#19 [ffff8882e59a7f50] entry_SYSCALL_64_after_hwframe at ffffffffb2e000ad
RIP: 00007f479ab13347 RSP: 00007ffd4dda9fe8 RFLAGS: 00000202
RAX: ffffffffffffffda RBX: 0000000000000068 RCX: 00007f479ab13347
RDX: 0000000000000000 RSI: 000000003e1b142d RDI: 0000000000000068
RBP: 0000000051eb851f R8: 00007f479abd4034 R9: 00007f479abd40a0
R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000402c20
R13: 0000000000000001 R14: 0000000000000000 R15: 7fffffffffffffff
ORIG_RAX: 00000000000000a2 CS: 0033 SS: 002b
crash> bt 75464
PID: 75464 TASK: ffff88837800c000 CPU: 2 COMMAND: "tree"
#0 [ffff8883604e6f08] __schedule at ffffffffb2d22a25
#1 [ffff8883604e7020] schedule at ffffffffb2d2358f
#2 [ffff8883604e7038] io_schedule at ffffffffb2d23bdc
#3 [ffff8883604e7050] rq_qos_wait at ffffffffb2400fde
#4 [ffff8883604e7178] wbt_wait at ffffffffb243a051
#5 [ffff8883604e7210] __rq_qos_throttle at ffffffffb2400a20
#6 [ffff8883604e7230] blk_mq_make_request at ffffffffb23de038
#7 [ffff8883604e7398] generic_make_request at ffffffffb23c393d
#8 [ffff8883604e7480] submit_bio at ffffffffb23c3db8
#9 [ffff8883604e7548] submit_bh_wbc at ffffffffb21f41e4
#10 [ffff8883604e7598] __sync_dirty_buffer at ffffffffb21f9103
#11 [ffff8883604e75b8] ext4_commit_super at ffffffffc06ec372 [ext4]
#12 [ffff8883604e75e8] ext4_handle_error at ffffffffc06ec905 [ext4]
#13 [ffff8883604e7640] __ext4_error at ffffffffc06ecb14 [ext4]
#14 [ffff8883604e7788] ext4_journal_check_start at ffffffffc06469c5 [ext4]
#15 [ffff8883604e77b8] __ext4_journal_start_sb at ffffffffc0646bfc [ext4]
#16 [ffff8883604e7800] ext4_dirty_inode at ffffffffc068d491 [ext4]
#17 [ffff8883604e7820] __mark_inode_dirty at ffffffffb21dd67a
#18 [ffff8883604e7868] generic_update_time at ffffffffb21b54fe
#19 [ffff8883604e78b0] touch_atime at ffffffffb21b8da5
#20 [ffff8883604e7950] ovl_update_time at ffffffffc0fe3eb5 [overlay]
#21 [ffff8883604e79e0] touch_atime at ffffffffb21b8da5
#22 [ffff8883604e7a80] trailing_symlink at ffffffffb2199923
#23 [ffff8883604e7ac0] path_lookupat at ffffffffb219a8eb
#24 [ffff8883604e7bf8] filename_lookup at ffffffffb219e4cc
#25 [ffff8883604e7d78] vfs_statx at ffffffffb2186916
#26 [ffff8883604e7e28] __do_sys_newstat at ffffffffb2187541
#27 [ffff8883604e7f28] do_syscall_64 at ffffffffb1c06bc8
#28 [ffff8883604e7f50] entry_SYSCALL_64_after_hwframe at ffffffffb2e000ad
RIP: 00007fe100b60dc5 RSP: 00007ffd98125508 RFLAGS: 00000246
RAX: ffffffffffffffda RBX: 00005641dc1c676b RCX: 00007fe100b60dc5
RDX: 00007ffd981255c0 RSI: 00007ffd981255c0 RDI: 00005641dc196750
RBP: 00005641dc1c6410 R8: 0000000000000002 R9: 00000000ffffffff
R10: 00005641dc1c676b R11: 0000000000000246 R12: 0000000000000017
R13: 00005641dc198770 R14: 00007ffd981255c0 R15: 00005641dc1b6eb0
ORIG_RAX: 0000000000000004 CS: 0033 SS: 002b
crash> bt 1889387
PID: 1889387 TASK: ffff88836495a000 CPU: 1 COMMAND: "(ostnamed)"
#0 [ffff8882205f7a78] __schedule at ffffffffb2d22a25
#1 [ffff8882205f7b90] schedule at ffffffffb2d2358f
#2 [ffff8882205f7ba8] rwsem_down_write_failed at ffffffffb2d2954b
#3 [ffff8882205f7d50] down_write at ffffffffb2d282a6
#4 [ffff8882205f7d68] do_mount at ffffffffb21c49fa
#5 [ffff8882205f7ec0] ksys_mount at ffffffffb21c60ed
#6 [ffff8882205f7ef8] __x64_sys_mount at ffffffffb21c61c7
#7 [ffff8882205f7f28] do_syscall_64 at ffffffffb1c06bc8
#8 [ffff8882205f7f50] entry_SYSCALL_64_after_hwframe at ffffffffb2e000ad
RIP: 00007f7f1766205a RSP: 00007fffda6eb4c8 RFLAGS: 00000206
RAX: ffffffffffffffda RBX: 000055ebff27d290 RCX: 00007f7f1766205a
RDX: 0000000000000000 RSI: 000055ebff27d290 RDI: 0000000000000000
RBP: 0000000000000001 R8: 0000000000000000 R9: 000055ebff28e5db
R10: 0000000000001021 R11: 0000000000000206 R12: 000055ebff1f59c0
R13: 000055ebff2a6d40 R14: 000055ebff1fdd00 R15: 000055ebff24d678
ORIG_RAX: 00000000000000a5 CS: 0033 SS: 002b
crash> bt 2415832
PID: 2415832 TASK: ffff888377d40000 CPU: 0 COMMAND: "(ostnamed)"
#0 [ffff8882207cfa78] __schedule at ffffffffb2d22a25
#1 [ffff8882207cfb90] schedule at ffffffffb2d2358f
#2 [ffff8882207cfba8] rwsem_down_write_failed at ffffffffb2d2954b
#3 [ffff8882207cfd50] down_write at ffffffffb2d282a6
#4 [ffff8882207cfd68] do_mount at ffffffffb21c49fa
#5 [ffff8882207cfec0] ksys_mount at ffffffffb21c60ed
#6 [ffff8882207cfef8] __x64_sys_mount at ffffffffb21c61c7
#7 [ffff8882207cff28] do_syscall_64 at ffffffffb1c06bc8
#8 [ffff8882207cff50] entry_SYSCALL_64_after_hwframe at ffffffffb2e000ad
RIP: 00007f7f1766205a RSP: 00007fffda6eb4c8 RFLAGS: 00000206
RAX: ffffffffffffffda RBX: 000055ebff26af70 RCX: 00007f7f1766205a
RDX: 0000000000000000 RSI: 000055ebff26af70 RDI: 0000000000000000
RBP: 0000000000000001 R8: 0000000000000000 R9: 000055ebff24139b
R10: 0000000000001021 R11: 0000000000000206 R12: 000055ebff15a520
R13: 000055ebff261270 R14: 000055ebfcbff510 R15: 000055ebff24d678
ORIG_RAX: 00000000000000a5 CS: 0033 SS: 002b
排查enable设置应该没什么问题,最终怀疑到
void __rq_qos_throttle(struct rq_qos *rqos, struct bio *bio)
{
do {
if (rqos->ops->throttle)
rqos->ops->throttle(rqos, bio);
rqos = rqos->next;
} while (rqos); 《= 加的时候rqos可能被删除,然后减的时候rqos已经add回来
}
最终看下来确实存在该问题,通过在wbt_init中增加delay可以将该问题必现
#include "blk-rq-qos.h"
@@ -761,6 +762,11 @@ int wbt_init(struct request_queue q)
/
* Assign rwb and add the stats callback.
*/
printk("%s:%d %s queue %px start delay\n", current->comm, current->pid, __func__, q);
msleep(10000);
printk("%s:%d %s queue %px end delay\n", current->comm, current->pid, __func__, q);
若写请求在rq_qos_add前下发,且已经执行rq_qos_throttle(inflight不会加),随后wbt对应的rqos初始化完成,便会在rq_qos_track将对应的req设上tracked标记,后续在wbt_done中会减其inflight值,这样便会导致inflight减负。
并发场景
T1 T2 T3
scsi_remove_device
scsi_remove_device
blk_cleanup_queue
Freeze_queue
rq_qos_exit
<= free(rwb) rqos=NULL
Unfreeze_queue
scsi_add_device
device_add
driver_probe_device
sd_probe
sd_probe_async
__device_add_disk
blk_mq_make_request
rq_qos_throttle <= wbt not init, rqos=null<=can't add inflight or wbt->wb_normal = 0
wbt_enable_default
Wbt_init <= rqos=NULL
rq_qos_track <= wbt_init done, add track mask for req or wbt->wb_normal != 0
wbt_done <= &wb->rq_wait[i].inflight = -1
登录 后才可以发表评论