Hi zhangjialin11, welcome to the openEuler Community.
I'm the Bot here serving you. You can find the instructions on how to interact with me at Here.
If you have any questions, please contact the SIG: Kernel, and any of the maintainers: @成坚 (CHENG Jian) , @Qiuuuuu , @zhengzengkai , @gogooo , @pi3orama , @Xie XiuQi , @YangYingliang
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。
pciehp: fix a race between pciehp and removing operations by sysfs
测试补丁:
diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
index d17f3bf36f709..8fa086410989b 100644
--- a/drivers/pci/hotplug/pciehp_pci.c
+++ b/drivers/pci/hotplug/pciehp_pci.c
@@ -87,7 +87,7 @@ void pciehp_unconfigure_device(struct controller *ctrl, bool presence)
struct pci_bus *parent = ctrl->pcie->port->subordinate;
u16 command;
- ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:00\n",
+ ctrl_info(ctrl, "%s: domain:bus:dev = %04x:%02x:00\n",
__func__, pci_domain_nr(parent), parent->number);
if (!presence)
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 95dec03d9f2a9..d511b2d321547 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -2,6 +2,7 @@
#include <linux/pci.h>
#include <linux/module.h>
#include "pci.h"
+#include <linux/delay.h>
static void pci_free_resources(struct pci_dev *dev)
{
@@ -121,6 +122,9 @@ EXPORT_SYMBOL(pci_stop_and_remove_bus_device);
void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev)
{
pci_lock_rescan_remove();
+ pr_info("before mdelay\n");
+ mdelay(9000);
+ pr_info("after mdelay\n");
pci_stop_and_remove_bus_device(dev);
pci_unlock_rescan_remove();
}
测试方法:先删除这个设备,再起一个窗口通过hotplug移除这个设备
config:
EXT4 = y
CONFIG_VIRTIO=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_BLK=y
CONFIG_VIRTIO_NET=y
测试结果:
修复前:
root@syzkaller:~# echo 1 > /sys/bus/pci/devices/0000\:00\:02.0/remove
[ 154.822003][ T342] before mdelay
[ 157.681556][ T160] pcieport 0000:02:00.0: pciehp: Slot(0-1): Attention button pressed
[ 157.682301][ T160] pcieport 0000:02:00.0: pciehp: Slot(0-1): Powering off due to button press
[ 163.822881][ T342] after mdelay
[ 163.823292][ T160] pcieport 0000:02:00.0: pciehp: pciehp_unconfigure_device: domain:bus:dev = 0000:03:00
[ 363.527352][ T84] INFO: task irq/54-pciehp:160 blocked for more than 120 seconds.
[ 363.531457][ T84] Not tainted 5.10.0+ #13
[ 363.533969][ T84] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 363.538592][ T84] task:irq/54-pciehp state:D stack: 0 pid: 160 ppid: 2 flags:0x00000028
[ 363.543856][ T84] Call trace:
[ 363.545532][ T84] __switch_to+0xf4/0x160
[ 363.547794][ T84] __schedule+0x2f8/0x858
[ 363.550018][ T84] schedule+0x50/0xe0
[ 363.552103][ T84] schedule_preempt_disabled+0x18/0x28
[ 363.554947][ T84] __mutex_lock.isra.1+0x210/0x5a0
[ 363.557687][ T84] __mutex_lock_slowpath+0x1c/0x28
[ 363.560341][ T84] mutex_lock+0x4c/0x68
[ 363.562576][ T84] pci_lock_rescan_remove+0x24/0x30
[ 363.565344][ T84] pciehp_unconfigure_device+0x80/0x120
[ 363.568322][ T84] pciehp_disable_slot+0x78/0x150
[ 363.570970][ T84] pciehp_handle_disable_request+0x54/0x78
[ 363.574136][ T84] pciehp_ist+0x224/0x270
[ 363.576411][ T84] irq_thread_fn+0x34/0xa8
[ 363.578731][ T84] irq_thread+0x140/0x258
[ 363.581086][ T84] kthread+0x130/0x138
[ 363.583154][ T84] ret_from_fork+0x10/0x18
[ 363.585502][ T84] INFO: task bash:342 blocked for more than 120 seconds.
[ 363.589290][ T84] Not tainted 5.10.0+ #13
[ 363.591900][ T84] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 363.596486][ T84] task:bash state:D stack: 0 pid: 342 ppid: 287 flags:0x00000008
[ 363.601357][ T84] Call trace:
[ 363.603085][ T84] __switch_to+0xf4/0x160
[ 363.605355][ T84] __schedule+0x2f8/0x858
[ 363.607648][ T84] schedule+0x50/0xe0
[ 363.609787][ T84] schedule_timeout+0x27c/0x308
[ 363.612338][ T84] wait_for_common+0x190/0x230
[ 363.614844][ T84] wait_for_completion+0x20/0x30
[ 363.617451][ T84] kthread_stop+0x80/0x208
[ 363.619808][ T84] __free_irq+0x1cc/0x430
[ 363.622077][ T84] free_irq+0x3c/0x88
[ 363.624172][ T84] pcie_shutdown_notification+0x48/0x88
[ 363.627109][ T84] pciehp_remove+0x30/0x58
[ 363.629495][ T84] pcie_port_remove_service+0x40/0x68
[ 363.632369][ T84] device_release_driver_internal+0x118/0x1f0
[ 363.635586][ T84] device_release_driver+0x20/0x30
[ 363.638272][ T84] bus_remove_device+0xdc/0x168
[ 363.640838][ T84] device_del+0x174/0x3e8
[ 363.643097][ T84] device_unregister+0x28/0x80
[ 363.645727][ T84] remove_iter+0x34/0x48
[ 363.647968][ T84] device_for_each_child+0x68/0xb0
[ 363.650657][ T84] pcie_port_device_remove+0x30/0x50
[ 363.653535][ T84] pcie_portdrv_remove+0x30/0x90
[ 363.656189][ T84] pci_device_remove+0x48/0xf0
[ 363.658683][ T84] device_release_driver_internal+0x118/0x1f0
[ 363.661971][ T84] device_release_driver+0x20/0x30
[ 363.664674][ T84] pci_stop_bus_device+0x8c/0xe8
[ 363.667236][ T84] pci_stop_bus_device+0x54/0xe8
[ 363.669890][ T84] pci_stop_bus_device+0x40/0xe8
[ 363.672457][ T84] pci_stop_and_remove_bus_device_locked+0x64/0xa0
[ 363.675918][ T84] remove_store+0x98/0xa8
[ 363.678214][ T84] dev_attr_store+0x20/0x38
[ 363.680641][ T84] sysfs_kf_write+0x4c/0x60
[ 363.682986][ T84] kernfs_fop_write_iter+0x140/0x1d0
[ 363.685877][ T84] new_sync_write+0x108/0x190
[ 363.688438][ T84] vfs_write+0x224/0x2b0
[ 363.690699][ T84] ksys_write+0x70/0xf8
[ 363.692924][ T84] __arm64_sys_write+0x24/0x30
[ 363.695437][ T84] el0_svc_common.constprop.0+0xb8/0x208
[ 363.698439][ T84] do_el0_svc+0x2c/0x98
[ 363.700627][ T84] el0_svc+0x20/0x30
[ 363.702637][ T84] el0_sync_handler+0x90/0xb8
[ 363.705109][ T84] el0_sync+0x160/0x180
[root@10 qemu2]# telnet localhost 7011
Trying ::1...
Connected to localhost.
Escape character is '^]'.
QEMU 4.1.0 monitor - type 'help' for more information
(qemu) device_del upstream2
修复后:
root@syzkaller:~# echo 1 > /sys/bus/pci/devices/0000\:00\:02.0/remove
[ 123.446512][ T341] before mdelay
[ 125.875444][ T160] pcieport 0000:02:00.0: pciehp: Slot(0-1): Attention button pressed
[ 125.876160][ T160] pcieport 0000:02:00.0: pciehp: Slot(0-1): Slot operation failed because a remove or rescan operation is under processing, please try later!
[ 132.447467][ T341] after mdelay
[ 132.459178][ T341] pci_bus 0000:05: busn_res: [bus 05] is released
[ 132.460145][ T341] pci_bus 0000:06: busn_res: [bus 06] is released
[ 132.461014][ T341] pci_bus 0000:04: busn_res: [bus 04-06] is released
[ 132.461909][ T341] pci_bus 0000:03: busn_res: [bus 03-06] is released
[ 132.464064][ T341] pci_bus 0000:07: busn_res: [bus 07] is released
[ 132.465044][ T341] pci_bus 0000:02: busn_res: [bus 02-07] is released
[ 132.465936][ T341] pci_bus 0000:01: busn_res: [bus 01-07] is released
[root@10 qemu2]# telnet localhost 7011
Trying ::1...
Connected to localhost.
Escape character is '^]'.
QEMU 4.1.0 monitor - type 'help' for more information
(qemu) device_del upstream2
pciehp: do not wake up irq_thread for sysfs operation
OLK-5.10已经有如下修复补丁:
157c1062fcd8 PCI: pciehp: Avoid returning prematurely from sysfs requests
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 654c972b8ea0c..882ce82c46990 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -72,6 +72,7 @@ extern int pciehp_poll_time;
* @reset_lock: prevents access to the Data Link Layer Link Active bit in the
* Link Status register and to the Presence Detect State bit in the Slot
* Status register during a slot reset which may cause them to flap
+ * @ist_running: flag to keep user request waiting while IRQ thread is running
* @request_result: result of last user request submitted to the IRQ thread
* @requester: wait queue to wake up on completion of user request,
* used for synchronous slot enable/disable request via sysfs
@@ -101,6 +102,7 @@ struct controller {
struct hotplug_slot hotplug_slot; /* hotplug core interface */
struct rw_semaphore reset_lock;
+ unsigned int ist_running;
int request_result;
wait_queue_head_t requester;
};
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index 21af7b16d7a4f..dd8e4a5fb2826 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -375,7 +375,8 @@ int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot)
ctrl->request_result = -ENODEV;
pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
wait_event(ctrl->requester,
- !atomic_read(&ctrl->pending_events));
+ !atomic_read(&ctrl->pending_events) &&
+ !ctrl->ist_running);
return ctrl->request_result;
case POWERON_STATE:
ctrl_info(ctrl, "Slot(%s): Already in powering on state\n",
@@ -408,7 +409,8 @@ int pciehp_sysfs_disable_slot(struct hotplug_slot *hotplug_slot)
mutex_unlock(&ctrl->state_lock);
pciehp_request(ctrl, DISABLE_SLOT);
wait_event(ctrl->requester,
- !atomic_read(&ctrl->pending_events));
+ !atomic_read(&ctrl->pending_events) &&
+ !ctrl->ist_running);
return ctrl->request_result;
case POWEROFF_STATE:
ctrl_info(ctrl, "Slot(%s): Already in powering off state\n",
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 1a522c1c41772..86d97f3112f02 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -583,6 +583,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
irqreturn_t ret;
u32 events;
+ ctrl->ist_running = true;
pci_config_pm_runtime_get(pdev);
/* rerun pciehp_isr() if the port was inaccessible on interrupt */
@@ -629,6 +630,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
up_read(&ctrl->reset_lock);
pci_config_pm_runtime_put(pdev);
+ ctrl->ist_running = false;
wake_up(&ctrl->requester);
return IRQ_HANDLED;
}
因此,只合入pciehp: do not wake up irq_thread for sysfs operation与Fixes: 764cafd9875e ("pciehp: fix a race between pciehp and removing operations by sysfs")有关的部分。
登录 后才可以发表评论