402 Star 1.4K Fork 1.4K

GVPopenEuler / kernel

 / 详情

[OLK-5.10] PCIE领域补丁回合

已完成
任务 成员
创建于  
2021-12-28 16:58

输入图片说明

评论 (4)

zhangjialin 创建了任务

Hi zhangjialin11, welcome to the openEuler Community.
I'm the Bot here serving you. You can find the instructions on how to interact with me at Here.
If you have any questions, please contact the SIG: Kernel, and any of the maintainers: @成坚 (CHENG Jian) , @Qiuuuuu , @zhengzengkai , @gogooo , @pi3orama , @Xie XiuQi , @YangYingliang

openeuler-ci-bot 添加了
 
sig/Kernel
标签
zhangjialin 修改了描述

pciehp: fix a race between pciehp and removing operations by sysfs
测试补丁:

diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
index d17f3bf36f709..8fa086410989b 100644
--- a/drivers/pci/hotplug/pciehp_pci.c
+++ b/drivers/pci/hotplug/pciehp_pci.c
@@ -87,7 +87,7 @@ void pciehp_unconfigure_device(struct controller *ctrl, bool presence)
 	struct pci_bus *parent = ctrl->pcie->port->subordinate;
 	u16 command;
 
-	ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:00\n",
+	ctrl_info(ctrl, "%s: domain:bus:dev = %04x:%02x:00\n",
 		 __func__, pci_domain_nr(parent), parent->number);
 
 	if (!presence)
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 95dec03d9f2a9..d511b2d321547 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -2,6 +2,7 @@
 #include <linux/pci.h>
 #include <linux/module.h>
 #include "pci.h"
+#include <linux/delay.h>
 
 static void pci_free_resources(struct pci_dev *dev)
 {
@@ -121,6 +122,9 @@ EXPORT_SYMBOL(pci_stop_and_remove_bus_device);
 void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev)
 {
 	pci_lock_rescan_remove();
+	pr_info("before mdelay\n");
+	mdelay(9000);
+	pr_info("after mdelay\n");
 	pci_stop_and_remove_bus_device(dev);
 	pci_unlock_rescan_remove();
 }

测试方法:先删除这个设备,再起一个窗口通过hotplug移除这个设备
config:

EXT4 = y
CONFIG_VIRTIO=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_BLK=y
CONFIG_VIRTIO_NET=y

测试结果:
修复前:

root@syzkaller:~# echo 1 > /sys/bus/pci/devices/0000\:00\:02.0/remove 
[  154.822003][  T342] before mdelay
[  157.681556][  T160] pcieport 0000:02:00.0: pciehp: Slot(0-1): Attention button pressed
[  157.682301][  T160] pcieport 0000:02:00.0: pciehp: Slot(0-1): Powering off due to button press
[  163.822881][  T342] after mdelay
[  163.823292][  T160] pcieport 0000:02:00.0: pciehp: pciehp_unconfigure_device: domain:bus:dev = 0000:03:00

[  363.527352][   T84] INFO: task irq/54-pciehp:160 blocked for more than 120 seconds.
[  363.531457][   T84]       Not tainted 5.10.0+ #13
[  363.533969][   T84] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  363.538592][   T84] task:irq/54-pciehp   state:D stack:    0 pid:  160 ppid:     2 flags:0x00000028
[  363.543856][   T84] Call trace:
[  363.545532][   T84]  __switch_to+0xf4/0x160
[  363.547794][   T84]  __schedule+0x2f8/0x858
[  363.550018][   T84]  schedule+0x50/0xe0
[  363.552103][   T84]  schedule_preempt_disabled+0x18/0x28
[  363.554947][   T84]  __mutex_lock.isra.1+0x210/0x5a0
[  363.557687][   T84]  __mutex_lock_slowpath+0x1c/0x28
[  363.560341][   T84]  mutex_lock+0x4c/0x68
[  363.562576][   T84]  pci_lock_rescan_remove+0x24/0x30
[  363.565344][   T84]  pciehp_unconfigure_device+0x80/0x120
[  363.568322][   T84]  pciehp_disable_slot+0x78/0x150
[  363.570970][   T84]  pciehp_handle_disable_request+0x54/0x78
[  363.574136][   T84]  pciehp_ist+0x224/0x270
[  363.576411][   T84]  irq_thread_fn+0x34/0xa8
[  363.578731][   T84]  irq_thread+0x140/0x258
[  363.581086][   T84]  kthread+0x130/0x138
[  363.583154][   T84]  ret_from_fork+0x10/0x18
[  363.585502][   T84] INFO: task bash:342 blocked for more than 120 seconds.
[  363.589290][   T84]       Not tainted 5.10.0+ #13
[  363.591900][   T84] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  363.596486][   T84] task:bash            state:D stack:    0 pid:  342 ppid:   287 flags:0x00000008
[  363.601357][   T84] Call trace:
[  363.603085][   T84]  __switch_to+0xf4/0x160
[  363.605355][   T84]  __schedule+0x2f8/0x858
[  363.607648][   T84]  schedule+0x50/0xe0
[  363.609787][   T84]  schedule_timeout+0x27c/0x308
[  363.612338][   T84]  wait_for_common+0x190/0x230
[  363.614844][   T84]  wait_for_completion+0x20/0x30
[  363.617451][   T84]  kthread_stop+0x80/0x208
[  363.619808][   T84]  __free_irq+0x1cc/0x430
[  363.622077][   T84]  free_irq+0x3c/0x88
[  363.624172][   T84]  pcie_shutdown_notification+0x48/0x88
[  363.627109][   T84]  pciehp_remove+0x30/0x58
[  363.629495][   T84]  pcie_port_remove_service+0x40/0x68
[  363.632369][   T84]  device_release_driver_internal+0x118/0x1f0
[  363.635586][   T84]  device_release_driver+0x20/0x30
[  363.638272][   T84]  bus_remove_device+0xdc/0x168
[  363.640838][   T84]  device_del+0x174/0x3e8
[  363.643097][   T84]  device_unregister+0x28/0x80
[  363.645727][   T84]  remove_iter+0x34/0x48
[  363.647968][   T84]  device_for_each_child+0x68/0xb0
[  363.650657][   T84]  pcie_port_device_remove+0x30/0x50
[  363.653535][   T84]  pcie_portdrv_remove+0x30/0x90
[  363.656189][   T84]  pci_device_remove+0x48/0xf0
[  363.658683][   T84]  device_release_driver_internal+0x118/0x1f0
[  363.661971][   T84]  device_release_driver+0x20/0x30
[  363.664674][   T84]  pci_stop_bus_device+0x8c/0xe8
[  363.667236][   T84]  pci_stop_bus_device+0x54/0xe8
[  363.669890][   T84]  pci_stop_bus_device+0x40/0xe8
[  363.672457][   T84]  pci_stop_and_remove_bus_device_locked+0x64/0xa0
[  363.675918][   T84]  remove_store+0x98/0xa8
[  363.678214][   T84]  dev_attr_store+0x20/0x38
[  363.680641][   T84]  sysfs_kf_write+0x4c/0x60
[  363.682986][   T84]  kernfs_fop_write_iter+0x140/0x1d0
[  363.685877][   T84]  new_sync_write+0x108/0x190
[  363.688438][   T84]  vfs_write+0x224/0x2b0
[  363.690699][   T84]  ksys_write+0x70/0xf8
[  363.692924][   T84]  __arm64_sys_write+0x24/0x30
[  363.695437][   T84]  el0_svc_common.constprop.0+0xb8/0x208
[  363.698439][   T84]  do_el0_svc+0x2c/0x98
[  363.700627][   T84]  el0_svc+0x20/0x30
[  363.702637][   T84]  el0_sync_handler+0x90/0xb8
[  363.705109][   T84]  el0_sync+0x160/0x180
[root@10 qemu2]# telnet localhost 7011
Trying ::1...
Connected to localhost.
Escape character is '^]'.
QEMU 4.1.0 monitor - type 'help' for more information
(qemu) device_del upstream2

修复后:

root@syzkaller:~# echo 1 > /sys/bus/pci/devices/0000\:00\:02.0/remove 
[  123.446512][  T341] before mdelay
[  125.875444][  T160] pcieport 0000:02:00.0: pciehp: Slot(0-1): Attention button pressed
[  125.876160][  T160] pcieport 0000:02:00.0: pciehp: Slot(0-1): Slot operation failed because a remove or rescan operation is under processing, please try later!
[  132.447467][  T341] after mdelay
[  132.459178][  T341] pci_bus 0000:05: busn_res: [bus 05] is released
[  132.460145][  T341] pci_bus 0000:06: busn_res: [bus 06] is released
[  132.461014][  T341] pci_bus 0000:04: busn_res: [bus 04-06] is released
[  132.461909][  T341] pci_bus 0000:03: busn_res: [bus 03-06] is released
[  132.464064][  T341] pci_bus 0000:07: busn_res: [bus 07] is released
[  132.465044][  T341] pci_bus 0000:02: busn_res: [bus 02-07] is released
[  132.465936][  T341] pci_bus 0000:01: busn_res: [bus 01-07] is released
[root@10 qemu2]# telnet localhost 7011
Trying ::1...
Connected to localhost.
Escape character is '^]'.
QEMU 4.1.0 monitor - type 'help' for more information
(qemu) device_del upstream2

pciehp: do not wake up irq_thread for sysfs operation
OLK-5.10已经有如下修复补丁:
157c1062fcd8 PCI: pciehp: Avoid returning prematurely from sysfs requests

diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 654c972b8ea0c..882ce82c46990 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -72,6 +72,7 @@ extern int pciehp_poll_time;
  * @reset_lock: prevents access to the Data Link Layer Link Active bit in the
  *     Link Status register and to the Presence Detect State bit in the Slot
  *     Status register during a slot reset which may cause them to flap
+ * @ist_running: flag to keep user request waiting while IRQ thread is running
  * @request_result: result of last user request submitted to the IRQ thread
  * @requester: wait queue to wake up on completion of user request,
  *     used for synchronous slot enable/disable request via sysfs
@@ -101,6 +102,7 @@ struct controller {
 
        struct hotplug_slot hotplug_slot;       /* hotplug core interface */
        struct rw_semaphore reset_lock;
+       unsigned int ist_running;
        int request_result;
        wait_queue_head_t requester;
 };
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index 21af7b16d7a4f..dd8e4a5fb2826 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -375,7 +375,8 @@ int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot)
                ctrl->request_result = -ENODEV;
                pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
                wait_event(ctrl->requester,
-                          !atomic_read(&ctrl->pending_events));
+                          !atomic_read(&ctrl->pending_events) &&
+                          !ctrl->ist_running);
                return ctrl->request_result;
        case POWERON_STATE:
                ctrl_info(ctrl, "Slot(%s): Already in powering on state\n",
@@ -408,7 +409,8 @@ int pciehp_sysfs_disable_slot(struct hotplug_slot *hotplug_slot)
                mutex_unlock(&ctrl->state_lock);
                pciehp_request(ctrl, DISABLE_SLOT);
                wait_event(ctrl->requester,
-                          !atomic_read(&ctrl->pending_events));
+                          !atomic_read(&ctrl->pending_events) &&
+                          !ctrl->ist_running);
                return ctrl->request_result;
        case POWEROFF_STATE:
                ctrl_info(ctrl, "Slot(%s): Already in powering off state\n",
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 1a522c1c41772..86d97f3112f02 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -583,6 +583,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
        irqreturn_t ret;
        u32 events;
 
+       ctrl->ist_running = true;
        pci_config_pm_runtime_get(pdev);
 
        /* rerun pciehp_isr() if the port was inaccessible on interrupt */
@@ -629,6 +630,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
        up_read(&ctrl->reset_lock);
 
        pci_config_pm_runtime_put(pdev);
+       ctrl->ist_running = false;
        wake_up(&ctrl->requester);
        return IRQ_HANDLED;
 }

因此,只合入pciehp: do not wake up irq_thread for sysfs operation与Fixes: 764cafd9875e ("pciehp: fix a race between pciehp and removing operations by sysfs")有关的部分。

诚邀Issue的创建人,负责人,协作人以及评论人对此次Issue解决过程给予评价:

   0   1   2   3   4   5   6   7   8   9   10  

 不满意                        非常满意

zhangjialin 修改了描述
zhangjialin 修改了描述

登录 后才可以发表评论

状态
负责人
项目
里程碑
Pull Requests
关联的 Pull Requests 被合并后可能会关闭此 issue
分支
开始日期   -   截止日期
-
置顶选项
优先级
预计工期 (小时)
参与者(3)
5329419 openeuler ci bot 1632792936 9968373 openeuler survey bot 1637036855
C
1
https://gitee.com/openeuler/kernel.git
git@gitee.com:openeuler/kernel.git
openeuler
kernel
kernel

搜索帮助

344bd9b3 5694891 D2dac590 5694891