From b34c04fd852ecfb094ab4d13021423c92765b471 Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Tue, 12 Mar 2024 14:11:00 +0000 Subject: [PATCH 01/17] scsi: libsas: Add helper for port add ex_phy mainline inclusion from mainline-v6.10-rc1 commit 888ea1b12b06906da717b5aceabfaf0a84d1766b category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5FF2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?id=888ea1b12b06906da717b5aceabfaf0a84d1766b --------------------------------------------------------------------- This moves the process of adding ex_phy to a port into a new helper. Signed-off-by: Xingui Yang Link: https://lore.kernel.org/r/20240312141103.31358-2-yangxingui@huawei.com Reviewed-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_expander.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index e97f4e01a865..d767cf6148e7 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -26,6 +26,13 @@ static int sas_configure_phy(struct domain_device *dev, int phy_id, u8 *sas_addr, int include); static int sas_disable_routing(struct domain_device *dev, u8 *sas_addr); +static void sas_port_add_ex_phy(struct sas_port *port, struct ex_phy *ex_phy) +{ + sas_port_add_phy(port, ex_phy->phy); + ex_phy->port = port; + ex_phy->phy_state = PHY_DEVICE_DISCOVERED; +} + /* ---------- SMP task management ---------- */ /* Give it some long enough timeout. In seconds. */ @@ -856,9 +863,7 @@ static bool sas_ex_join_wide_port(struct domain_device *parent, int phy_id) if (!memcmp(phy->attached_sas_addr, ephy->attached_sas_addr, SAS_ADDR_SIZE) && ephy->port) { - sas_port_add_phy(ephy->port, phy->phy); - phy->port = ephy->port; - phy->phy_state = PHY_DEVICE_DISCOVERED; + sas_port_add_ex_phy(ephy->port, phy); return true; } } -- Gitee From 40777995a0b0a640d264d83fdd9d880105938288 Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Tue, 12 Mar 2024 14:11:01 +0000 Subject: [PATCH 02/17] scsi: libsas: Move sas_add_parent_port() to sas_expander.c mainline inclusion from mainline-v6.10-rc1 commit 48032c0be6c7659f5019acd1403e17928dc27f52 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5FF2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?id=48032c0be6c7659f5019acd1403e17928dc27f52 --------------------------------------------------------------------- Move sas_add_parent_port() to sas_expander.c and rename it to sas_ex_add_parent_port() as it is only used in this file. Signed-off-by: Xingui Yang Link: https://lore.kernel.org/r/20240312141103.31358-3-yangxingui@huawei.com Reviewed-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_expander.c | 19 +++++++++++++++++-- drivers/scsi/libsas/sas_internal.h | 15 --------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index d767cf6148e7..71d1a84ef2a1 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -33,6 +33,21 @@ static void sas_port_add_ex_phy(struct sas_port *port, struct ex_phy *ex_phy) ex_phy->phy_state = PHY_DEVICE_DISCOVERED; } +static void sas_ex_add_parent_port(struct domain_device *dev, int phy_id) +{ + struct expander_device *ex = &dev->ex_dev; + struct ex_phy *ex_phy = &ex->ex_phy[phy_id]; + + if (!ex->parent_port) { + ex->parent_port = sas_port_alloc(&dev->rphy->dev, phy_id); + /* FIXME: error handling */ + BUG_ON(!ex->parent_port); + BUG_ON(sas_port_add(ex->parent_port)); + sas_port_mark_backlink(ex->parent_port); + } + sas_port_add_phy(ex->parent_port, ex_phy->phy); +} + /* ---------- SMP task management ---------- */ /* Give it some long enough timeout. In seconds. */ @@ -967,11 +982,11 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id) /* Parent and domain coherency */ if (!dev->parent && sas_phy_match_port_addr(dev->port, ex_phy)) { - sas_add_parent_port(dev, phy_id); + sas_ex_add_parent_port(dev, phy_id); return 0; } if (dev->parent && sas_phy_match_dev_addr(dev->parent, ex_phy)) { - sas_add_parent_port(dev, phy_id); + sas_ex_add_parent_port(dev, phy_id); if (ex_phy->routing_attr == TABLE_ROUTING) sas_configure_phy(dev, phy_id, dev->port->sas_addr, 1); return 0; diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index 277e45fed85d..8a7abe1cf2c2 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -191,21 +191,6 @@ static inline void sas_phy_set_target(struct asd_sas_phy *p, struct domain_devic } } -static inline void sas_add_parent_port(struct domain_device *dev, int phy_id) -{ - struct expander_device *ex = &dev->ex_dev; - struct ex_phy *ex_phy = &ex->ex_phy[phy_id]; - - if (!ex->parent_port) { - ex->parent_port = sas_port_alloc(&dev->rphy->dev, phy_id); - /* FIXME: error handling */ - BUG_ON(!ex->parent_port); - BUG_ON(sas_port_add(ex->parent_port)); - sas_port_mark_backlink(ex->parent_port); - } - sas_port_add_phy(ex->parent_port, ex_phy->phy); -} - static inline struct domain_device *sas_alloc_device(void) { struct domain_device *dev = kzalloc(sizeof(*dev), GFP_KERNEL); -- Gitee From 6df3ef8c32224ddc34b702fd711bdcbc0f1d13c9 Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Tue, 12 Mar 2024 14:11:02 +0000 Subject: [PATCH 03/17] scsi: libsas: Set port when ex_phy is added or deleted mainline inclusion from mainline-v6.10-rc1 commit 7a165a81d55faad2944c781049ef8d454a25dc03 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5FF2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?id=7a165a81d55faad2944c781049ef8d454a25dc03 --------------------------------------------------------------------- We found that when ex_phy was attached and added to the parent wide port, ex_phy->port was not set, resulting in sas_unregister_devs_sas_addr() not calling sas_port_delete_phy() when deleting the phy, and the deleted phy was still on the parent wide port's phy_list. When we use sas_port_add_ex_phy() to set ex_phy->port to solve the above problem, we find that after all the phys of the parent_port are removed and the number of phy becomes 0, the parent_port will not be set to NULL. This causes the freed parent port to be used when attaching a new ex_phy in sas_ex_add_parent_port(). Use sas_port_add_ex_phy() instead of sas_port_add_phy() to set ex_phy->port when ex_phy is added to the parent port, and set ex_dev->parent_port to NULL when the number of phy on the port becomes 0. Signed-off-by: Xingui Yang Link: https://lore.kernel.org/r/20240312141103.31358-4-yangxingui@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_expander.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 71d1a84ef2a1..4e6bb3d0f163 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -45,7 +45,7 @@ static void sas_ex_add_parent_port(struct domain_device *dev, int phy_id) BUG_ON(sas_port_add(ex->parent_port)); sas_port_mark_backlink(ex->parent_port); } - sas_port_add_phy(ex->parent_port, ex_phy->phy); + sas_port_add_ex_phy(ex->parent_port, ex_phy); } /* ---------- SMP task management ---------- */ @@ -1868,9 +1868,12 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent, if (phy->port) { sas_port_delete_phy(phy->port, phy->phy); sas_device_set_phy(found, phy->port); - if (phy->port->num_phys == 0) + if (phy->port->num_phys == 0) { list_add_tail(&phy->port->del_list, &parent->port->sas_port_del_list); + if (ex_dev->parent_port == phy->port) + ex_dev->parent_port = NULL; + } phy->port = NULL; } } -- Gitee From 55c81df5ea58f44e48b1053e3737baf9f1ea05af Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Tue, 8 Oct 2024 10:18:10 +0800 Subject: [PATCH 04/17] scsi: hisi_sas: Adjust priority of registering and exiting debugfs for security driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5FF2 CVE: NA --------------------------------------------------------------------- Spec says at least 5us between two H2D FIS when do soft reset, but be To be safe, we should register debugfs at the last stage of driver initialization and then unregister debugfs at the first stage of driver uninstallation. Fixes: 623a4b6d5c2a ("scsi: hisi_sas: Move debugfs code to v3 hw driver") Signed-off-by: Yihang Li Link: https://lore.kernel.org/r/20241008021822.2617339-2-liyihang9@huawei.com Reviewed-by: Xiang Chen Signed-off-by: Martin K. Petersen Signed-off-by: Bing Xia --- drivers/scsi/hisi_sas/hisi_sas_main.c | 4 ++-- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 17 ++++++----------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index b0b76c93d732..d7da18911ef1 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -2611,10 +2611,10 @@ static __init int hisi_sas_init(void) static __exit void hisi_sas_exit(void) { - sas_release_transport(hisi_sas_stt); - if (hisi_sas_debugfs_enable) debugfs_remove(hisi_sas_debugfs_dir); + + sas_release_transport(hisi_sas_stt); } module_init(hisi_sas_init); diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 1173c767084f..72f26f8c6b63 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -4998,16 +4998,13 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) SHOST_DIX_GUARD_CRC); } - if (hisi_sas_debugfs_enable) - debugfs_init_v3_hw(hisi_hba); - rc = interrupt_preinit_v3_hw(hisi_hba); if (rc) - goto err_out_undo_debugfs; + goto err_out_free_host; rc = scsi_add_host(shost, dev); if (rc) - goto err_out_undo_debugfs; + goto err_out_free_host; rc = sas_register_ha(sha); if (rc) @@ -5018,6 +5015,8 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_out_unregister_ha; scsi_scan_host(shost); + if (hisi_sas_debugfs_enable) + debugfs_init_v3_hw(hisi_hba); pm_runtime_set_autosuspend_delay(dev, 5000); pm_runtime_use_autosuspend(dev); @@ -5038,9 +5037,6 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) sas_unregister_ha(sha); err_out_remove_host: scsi_remove_host(shost); -err_out_undo_debugfs: - if (hisi_sas_debugfs_enable) - debugfs_exit_v3_hw(hisi_hba); err_out_free_host: hisi_sas_free(hisi_hba); scsi_host_put(shost); @@ -5072,6 +5068,8 @@ static void hisi_sas_v3_remove(struct pci_dev *pdev) struct Scsi_Host *shost = sha->shost; pm_runtime_get_noresume(dev); + if (hisi_sas_debugfs_enable) + debugfs_exit_v3_hw(hisi_hba); sas_unregister_ha(sha); flush_workqueue(hisi_hba->wq); @@ -5079,9 +5077,6 @@ static void hisi_sas_v3_remove(struct pci_dev *pdev) hisi_sas_v3_destroy_irqs(pdev, hisi_hba); hisi_sas_free(hisi_hba); - if (hisi_sas_debugfs_enable) - debugfs_exit_v3_hw(hisi_hba); - scsi_host_put(shost); } -- Gitee From 8208d1a01ad0efe0b62d500c5f1971d3fe2e7082 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Tue, 8 Oct 2024 10:18:11 +0800 Subject: [PATCH 05/17] scsi: hisi_sas: Create trigger_dump at the end of the debugfs initialization driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5FF2 CVE: NA --------------------------------------------------------------------- In the current debugfs initialization process, the interface trigger_dump is created first, and then the dump directory is created to store the register dump information. The issue is that after the trigger_dump interface is created, users can access the interface to trigger dump and call debugfs_create_files_v3_hw(). In debugfs_create_files_v3_hw(), if .debugfs_dump_dentry is NULL, the file for storing dump information is created under /sys/kernel/debug, and the memory and information cannot be released after the driver is uninstalled. Therefore, the creation of the trigger_dump interface is placed at the end of debugfs initialization. Fixes: 623a4b6d5c2a ("scsi: hisi_sas: Move debugfs code to v3 hw driver") Signed-off-by: Yihang Li Link: https://lore.kernel.org/r/20241008021822.2617339-3-liyihang9@huawei.com Reviewed-by: Xiang Chen Signed-off-by: Martin K. Petersen Signed-off-by: Bing Xia --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 72f26f8c6b63..18043cb15a3e 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -4893,11 +4893,6 @@ static void debugfs_init_v3_hw(struct hisi_hba *hisi_hba) hisi_hba->debugfs_dir = debugfs_create_dir(dev_name(dev), hisi_sas_debugfs_dir); - debugfs_create_file("trigger_dump", 0200, - hisi_hba->debugfs_dir, - hisi_hba, - &debugfs_trigger_dump_v3_hw_fops); - /* create bist structures */ debugfs_bist_init_v3_hw(hisi_hba); @@ -4906,6 +4901,10 @@ static void debugfs_init_v3_hw(struct hisi_hba *hisi_hba) debugfs_phy_down_cnt_init_v3_hw(hisi_hba); debugfs_fifo_init_v3_hw(hisi_hba); + debugfs_create_file("trigger_dump", 0200, + hisi_hba->debugfs_dir, + hisi_hba, + &debugfs_trigger_dump_v3_hw_fops); } static int -- Gitee From d48cefbbde47da545e38ce91cee49e6016c8e067 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Tue, 8 Oct 2024 10:18:12 +0800 Subject: [PATCH 06/17] scsi: hisi_sas: Add firmware information check driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5FF2 CVE: NA --------------------------------------------------------------------- For security purposes, after information is obtained through the FW, check information to ensure data correctness. - In v1 and v2 hw, the maximum number of PHYs is 9, while in v3 it is 8. - In v2 and v3 hw, the maximum number of hardware queues is 16, while in v1 it is 32. Also add some debug logs for failure. Fixes: e21fe3a52692 ("scsi: hisi_sas: add initialisation for v3 pci-based controller") Signed-off-by: Yihang Li Link: https://lore.kernel.org/r/20241008021822.2617339-4-liyihang9@huawei.com Reviewed-by: Xiang Chen Signed-off-by: Martin K. Petersen Signed-off-by: Bing Xia --- drivers/scsi/hisi_sas/hisi_sas.h | 1 + drivers/scsi/hisi_sas/hisi_sas_main.c | 5 +++++ drivers/scsi/hisi_sas/hisi_sas_v1_hw.c | 18 ++++++++++++++++++ drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 18 ++++++++++++++++++ drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 20 ++++++++++++++++++++ 5 files changed, 62 insertions(+) diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index 1e4550156b73..155cb289192c 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -307,6 +307,7 @@ enum { struct hisi_sas_hw { int (*hw_init)(struct hisi_hba *hisi_hba); + int (*fw_info_check)(struct hisi_hba *hisi_hba); int (*interrupt_preinit)(struct hisi_hba *hisi_hba); void (*setup_itct)(struct hisi_hba *hisi_hba, struct hisi_sas_device *device); diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index d7da18911ef1..7f6e1ac12c92 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -2431,6 +2431,11 @@ static struct Scsi_Host *hisi_sas_shost_alloc(struct platform_device *pdev, if (hisi_sas_get_fw_info(hisi_hba) < 0) goto err_out; + if (hisi_hba->hw->fw_info_check) { + if (hisi_hba->hw->fw_info_check(hisi_hba)) + goto err_out; + } + error = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); if (error) { dev_err(dev, "No usable DMA addressing method\n"); diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c index 3c555579f9a1..ee3d95b4bebf 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c @@ -1734,6 +1734,23 @@ static struct attribute *host_v1_hw_attrs[] = { ATTRIBUTE_GROUPS(host_v1_hw); +static int check_fw_info_v1_hw(struct hisi_hba *hisi_hba) +{ + struct device *dev = hisi_hba->dev; + + if (hisi_hba->n_phy < 0 || hisi_hba->n_phy > 9) { + dev_err(dev, "invalid phy number from FW\n"); + return -EINVAL; + } + + if (hisi_hba->queue_count < 0 || hisi_hba->queue_count > 32) { + dev_err(dev, "invalid queue count from FW\n"); + return -EINVAL; + } + + return 0; +} + static const struct scsi_host_template sht_v1_hw = { .name = DRV_NAME, .proc_name = DRV_NAME, @@ -1763,6 +1780,7 @@ static const struct scsi_host_template sht_v1_hw = { static const struct hisi_sas_hw hisi_sas_v1_hw = { .hw_init = hisi_sas_v1_init, + .fw_info_check = check_fw_info_v1_hw, .setup_itct = setup_itct_v1_hw, .sl_notify_ssp = sl_notify_ssp_v1_hw, .clear_itct = clear_itct_v1_hw, diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index 73b378837da7..59e56df322db 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -3561,6 +3561,23 @@ static void map_queues_v2_hw(struct Scsi_Host *shost) } } +static int check_fw_info_v2_hw(struct hisi_hba *hisi_hba) +{ + struct device *dev = hisi_hba->dev; + + if (hisi_hba->n_phy < 0 || hisi_hba->n_phy > 9) { + dev_err(dev, "invalid phy number from FW\n"); + return -EINVAL; + } + + if (hisi_hba->queue_count < 0 || hisi_hba->queue_count > 16) { + dev_err(dev, "invalid queue count from FW\n"); + return -EINVAL; + } + + return 0; +} + static const struct scsi_host_template sht_v2_hw = { .name = DRV_NAME, .proc_name = DRV_NAME, @@ -3592,6 +3609,7 @@ static const struct scsi_host_template sht_v2_hw = { static const struct hisi_sas_hw hisi_sas_v2_hw = { .hw_init = hisi_sas_v2_init, + .fw_info_check = check_fw_info_v2_hw, .interrupt_preinit = hisi_sas_v2_interrupt_preinit, .setup_itct = setup_itct_v2_hw, .slot_index_alloc = slot_index_alloc_quirk_v2_hw, diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 18043cb15a3e..edb0c0f40a44 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -3390,6 +3390,23 @@ static const struct hisi_sas_hw hisi_sas_v3_hw = { .debugfs_snapshot_regs = debugfs_snapshot_regs_v3_hw, }; +static int check_fw_info_v3_hw(struct hisi_hba *hisi_hba) +{ + struct device *dev = hisi_hba->dev; + + if (hisi_hba->n_phy < 0 || hisi_hba->n_phy > 8) { + dev_err(dev, "invalid phy number from FW\n"); + return -EINVAL; + } + + if (hisi_hba->queue_count < 0 || hisi_hba->queue_count > 16) { + dev_err(dev, "invalid queue count from FW\n"); + return -EINVAL; + } + + return 0; +} + static struct Scsi_Host * hisi_sas_shost_alloc_pci(struct pci_dev *pdev) { @@ -3420,6 +3437,9 @@ hisi_sas_shost_alloc_pci(struct pci_dev *pdev) if (hisi_sas_get_fw_info(hisi_hba) < 0) goto err_out; + if (check_fw_info_v3_hw(hisi_hba) < 0) + goto err_out; + if (experimental_iopoll_q_cnt < 0 || experimental_iopoll_q_cnt >= hisi_hba->queue_count) dev_err(dev, "iopoll queue count %d cannot exceed or equal 16, using default 0\n", -- Gitee From 2feb38eaf700a7a30a6fa45c06046aeaadb0755e Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Tue, 8 Oct 2024 10:18:13 +0800 Subject: [PATCH 07/17] scsi: hisi_sas: Enable all PHYs that are not disabled by user during controller reset driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5FF2 CVE: NA --------------------------------------------------------------------- For the controller reset operation(such as FLR or clear nexus ha in SCSI EH), we will disable all PHYs and then enable PHY based on the hisi_hba->phy_state obtained in hisi_sas_controller_reset_prepare(). If the device is removed before controller reset or the PHY is not attached to any device in directly attached scenario, the corresponding bit of phy_state is not set. After controller reset done, the PHY is disabled. The device cannot be identified even if user reconnect the disk. Therefore, for PHYs that are not disabled by user, hisi_sas_phy_enable() needs to be executed even if the corresponding bit of phy_state is not set. Fixes: 89954f024c3a ("scsi: hisi_sas: Ensure all enabled PHYs up during controller reset") Signed-off-by: Yihang Li Link: https://lore.kernel.org/r/20241008021822.2617339-5-liyihang9@huawei.com Reviewed-by: Xiang Chen Signed-off-by: Martin K. Petersen Signed-off-by: Bing Xia --- drivers/scsi/hisi_sas/hisi_sas_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 7f6e1ac12c92..cfd490bc5786 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1544,10 +1544,16 @@ void hisi_sas_controller_reset_done(struct hisi_hba *hisi_hba) /* Init and wait for PHYs to come up and all libsas event finished. */ for (phy_no = 0; phy_no < hisi_hba->n_phy; phy_no++) { struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no]; + struct asd_sas_phy *sas_phy = &phy->sas_phy; - if (!(hisi_hba->phy_state & BIT(phy_no))) + if (!sas_phy->phy->enabled) continue; + if (!(hisi_hba->phy_state & BIT(phy_no))) { + hisi_sas_phy_enable(hisi_hba, phy_no, 1); + continue; + } + async_schedule_domain(hisi_sas_async_init_wait_phyup, phy, &async); } -- Gitee From 7398a36c7e9d8b5b0db4c28399fed0202b9a1336 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Tue, 8 Oct 2024 10:18:14 +0800 Subject: [PATCH 08/17] scsi: hisi_sas: Reset PHY again if phyup timeout driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5FF2 CVE: NA --------------------------------------------------------------------- In commit 89954f024c3a ("scsi: hisi_sas: Ensure all enabled PHYs up during controller reset"), we enable PHYs in parallel through async operations and wait for PHYs come up. However, for some directly attached SATA disks, the PHY not come up after a timeout period and the hardware is not ready. At this time, we should get the latest PHY hardware state, if the new PHY state is not ready but the old PHY state is ready, call work HISI_PHYE_LINK_RESET to give it another chance to phyup. Fixes: 89954f024c3a ("scsi: hisi_sas: Ensure all enabled PHYs up during controller reset") Signed-off-by: Yihang Li Link: https://lore.kernel.org/r/20241008021822.2617339-6-liyihang9@huawei.com Reviewed-by: Xiang Chen Signed-off-by: Martin K. Petersen Signed-off-by: Bing Xia --- drivers/scsi/hisi_sas/hisi_sas_main.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index cfd490bc5786..e071747c3c02 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1383,6 +1383,7 @@ static void hisi_sas_refresh_port_id(struct hisi_hba *hisi_hba) static void hisi_sas_rescan_topology(struct hisi_hba *hisi_hba, u32 state) { + u32 new_state = hisi_hba->hw->get_phys_state(hisi_hba); struct asd_sas_port *_sas_port = NULL; int phy_no; @@ -1396,7 +1397,7 @@ static void hisi_sas_rescan_topology(struct hisi_hba *hisi_hba, u32 state) continue; /* Report PHY state change to libsas */ - if (state & BIT(phy_no)) { + if (new_state & BIT(phy_no)) { if (do_port_check && sas_port && sas_port->port_dev) { struct domain_device *dev = sas_port->port_dev; @@ -1409,6 +1410,16 @@ static void hisi_sas_rescan_topology(struct hisi_hba *hisi_hba, u32 state) } } else { hisi_sas_phy_down(hisi_hba, phy_no, 0, GFP_KERNEL); + + /* + * The new_state is not ready but old_state is ready, + * the two possible causes: + * 1. The connected device is removed + * 2. Device exists but phyup timed out + */ + if (state & BIT(phy_no)) + hisi_sas_notify_phy_event(phy, + HISI_PHYE_LINK_RESET); } } } -- Gitee From 4b586b3ee12d1a71ff7453900fb13173ac208dbd Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Tue, 8 Oct 2024 10:18:16 +0800 Subject: [PATCH 09/17] scsi: hisi_sas: Add cond_resched() for no forced preemption model driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5FF2 CVE: NA --------------------------------------------------------------------- For no forced preemption model kernel, in the scenario where the expander is connected to 12 high performance SAS SSDs, the following call trace may occur: [ 214.409199][ C240] watchdog: BUG: soft lockup - CPU#240 stuck for 22s! [irq/149-hisi_sa:3211] [ 214.568533][ C240] pstate: 60400009 (nZCv daif +PAN -UAO -TCO BTYPE=--) [ 214.575224][ C240] pc : fput_many+0x8c/0xdc [ 214.579480][ C240] lr : fput+0x1c/0xf0 [ 214.583302][ C240] sp : ffff80002de2b900 [ 214.587298][ C240] x29: ffff80002de2b900 x28: ffff1082aa412000 [ 214.593291][ C240] x27: ffff3062a0348c08 x26: ffff80003a9f6000 [ 214.599284][ C240] x25: ffff1062bbac5c40 x24: 0000000000001000 [ 214.605277][ C240] x23: 000000000000000a x22: 0000000000000001 [ 214.611270][ C240] x21: 0000000000001000 x20: 0000000000000000 [ 214.617262][ C240] x19: ffff3062a41ae580 x18: 0000000000010000 [ 214.623255][ C240] x17: 0000000000000001 x16: ffffdb3a6efe5fc0 [ 214.629248][ C240] x15: ffffffffffffffff x14: 0000000003ffffff [ 214.635241][ C240] x13: 000000000000ffff x12: 000000000000029c [ 214.641234][ C240] x11: 0000000000000006 x10: ffff80003a9f7fd0 [ 214.647226][ C240] x9 : ffffdb3a6f0482fc x8 : 0000000000000001 [ 214.653219][ C240] x7 : 0000000000000002 x6 : 0000000000000080 [ 214.659212][ C240] x5 : ffff55480ee9b000 x4 : fffffde7f94c6554 [ 214.665205][ C240] x3 : 0000000000000002 x2 : 0000000000000020 [ 214.671198][ C240] x1 : 0000000000000021 x0 : ffff3062a41ae5b8 [ 214.677191][ C240] Call trace: [ 214.680320][ C240] fput_many+0x8c/0xdc [ 214.684230][ C240] fput+0x1c/0xf0 [ 214.687707][ C240] aio_complete_rw+0xd8/0x1fc [ 214.692225][ C240] blkdev_bio_end_io+0x98/0x140 [ 214.696917][ C240] bio_endio+0x160/0x1bc [ 214.701001][ C240] blk_update_request+0x1c8/0x3bc [ 214.705867][ C240] scsi_end_request+0x3c/0x1f0 [ 214.710471][ C240] scsi_io_completion+0x7c/0x1a0 [ 214.715249][ C240] scsi_finish_command+0x104/0x140 [ 214.720200][ C240] scsi_softirq_done+0x90/0x180 [ 214.724892][ C240] blk_mq_complete_request+0x5c/0x70 [ 214.730016][ C240] scsi_mq_done+0x48/0xac [ 214.734194][ C240] sas_scsi_task_done+0xbc/0x16c [libsas] [ 214.739758][ C240] slot_complete_v3_hw+0x260/0x760 [hisi_sas_v3_hw] [ 214.746185][ C240] cq_thread_v3_hw+0xbc/0x190 [hisi_sas_v3_hw] [ 214.752179][ C240] irq_thread_fn+0x34/0xa4 [ 214.756435][ C240] irq_thread+0xc4/0x130 [ 214.760520][ C240] kthread+0x108/0x13c [ 214.764430][ C240] ret_from_fork+0x10/0x18 This is because in the hisi_sas driver, both the hardware interrupt handler and the interrupt thread are executed on the same CPU. In the performance test scenario, function irq_wait_for_interrupt() will always return 0 if lots of interrupts occurs and the CPU will be continuously consumed. As a result, the CPU cannot run the watchdog thread. When the watchdog time exceeds the specified time, call trace occurs. To fix it, add cond_resched() to execute the watchdog thread. Fixes: 60b4a5ee9034 ("scsi: hisi_sas: add v3 cq interrupt handler") Signed-off-by: Yihang Li Link: https://lore.kernel.org/r/20241008021822.2617339-8-liyihang9@huawei.com Reviewed-by: Xiang Chen Signed-off-by: Martin K. Petersen Signed-off-by: Bing Xia --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index edb0c0f40a44..86933aaffe7b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2493,6 +2493,7 @@ static int complete_v3_hw(struct hisi_sas_cq *cq) /* update rd_point */ cq->rd_point = rd_point; hisi_sas_write32(hisi_hba, COMPL_Q_0_RD_PTR + (0x14 * queue), rd_point); + cond_resched(); return completed; } -- Gitee From e92d22ee9c6aa1f2e106d62df273b69702c19509 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Tue, 8 Oct 2024 10:18:17 +0800 Subject: [PATCH 10/17] scsi: hisi_sas: Default enable interrupt coalescing driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5FF2 CVE: NA --------------------------------------------------------------------- In the current interrupt reporting mode, each CQ entry reports an interrupt. However, when there are a large number of I/O hardware completion interrupts, the following issue may occur: [ 4682.678657][ C129] irq 134: nobody cared (try booting with the "irqpoll" option) [ 4682.708455][ C129] Call trace: [ 4682.711589][ C129] dump_backtrace+0x0/0x1e4 [ 4682.715934][ C129] show_stack+0x20/0x2c [ 4682.719933][ C129] dump_stack+0xd8/0x140 [ 4682.724017][ C129] __report_bad_irq+0x54/0x180 [ 4682.728625][ C129] note_interrupt+0x1ec/0x2f0 [ 4682.733143][ C129] handle_irq_event+0x118/0x1ac [ 4682.737834][ C129] handle_fasteoi_irq+0xc8/0x200 [ 4682.742613][ C129] __handle_domain_irq+0x84/0xf0 [ 4682.747391][ C129] gic_handle_irq+0x88/0x2c0 [ 4682.751822][ C129] el1_irq+0xbc/0x140 [ 4682.755648][ C129] _find_next_bit.constprop.0+0x20/0x94 [ 4682.761036][ C129] cpumask_next+0x24/0x30 [ 4682.765208][ C129] gic_ipi_send_mask+0x48/0x170 [ 4682.769900][ C129] __ipi_send_mask+0x34/0x110 [ 4682.775720][ C129] smp_cross_call+0x3c/0xcc [ 4682.780064][ C129] arch_send_call_function_single_ipi+0x38/0x44 [ 4682.786146][ C129] send_call_function_single_ipi+0xd0/0xe0 [ 4682.791794][ C129] generic_exec_single+0xb4/0x170 [ 4682.796659][ C129] smp_call_function_single_async+0x2c/0x40 [ 4682.802395][ C129] blk_mq_complete_request_remote.part.0+0xec/0x100 [ 4682.808822][ C129] blk_mq_complete_request+0x30/0x70 [ 4682.813950][ C129] scsi_mq_done+0x48/0xac [ 4682.818128][ C129] sas_scsi_task_done+0xb0/0x150 [libsas] [ 4682.823692][ C129] slot_complete_v3_hw+0x230/0x710 [hisi_sas_v3_hw] [ 4682.830120][ C129] cq_thread_v3_hw+0xbc/0x190 [hisi_sas_v3_hw] [ 4682.836114][ C129] irq_thread_fn+0x34/0xa4 [ 4682.840371][ C129] irq_thread+0xc4/0x130 [ 4682.844455][ C129] kthread+0x108/0x13c [ 4682.848365][ C129] ret_from_fork+0x10/0x18 [ 4682.852621][ C129] handlers: [ 4682.855577][ C129] [<00000000949e52bf>] cq_interrupt_v3_hw [hisi_sas_v3_hw] threaded [<000000005d8e3b68>] cq_thread_v3_hw [hisi_sas_v3_hw] [ 4682.868084][ C129] Disabling IRQ #134 When the IRQ management layer processes each hardware interrupt, if the return value of the interrupt handler is IRQ_WAKE_THREAD, it will wake up the handler thread for this interrupt action and set IRQTF_RUNTHREAD flag, wait for the interrupt handling thread to clear the IRQTF_RUNTHREAD flag after execution. Later in note_interrupt(), use irq_count to count hardware interrupts and irqs_unhandled to count interrupts for which no thread handler is responsible. When irq_count reaches 100000 and irqs_unhandled reaches 99000, irq will be disabled. In the performance test scenario, I/O completion hardware interrupts are continuously and quickly generated. As a result, the interrupt processing thread is cyclically called in irq_thread() and does not exit, this affects the response of the interrupt thread to the hardware interrupt and causes irqs_unhandled to grow to 99000. Finally, the irq is disabled. Therefore, default enable interrupt coalescing to reduce the generation of hardware interrupts, this helps interrupt processing threads to stop calling in irq_thread(). For interrupt coalescing, according to the actual performance test, set the count of CQ entries to 10 and the interrupt coalescing timeout period to 10us based on the actual performance test. Before and after interrupt coalescing is enabled, the 4K read/write performance is improved by about 3%, and the 256K read/write performance is basically the same. Fixes: 60b4a5ee9034 ("scsi: hisi_sas: add v3 cq interrupt handler") Signed-off-by: Yihang Li Link: https://lore.kernel.org/r/20241008021822.2617339-9-liyihang9@huawei.com Reviewed-by: Xiang Chen Signed-off-by: Martin K. Petersen Signed-off-by: Bing Xia --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 86933aaffe7b..5b45b108e36d 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -638,9 +638,11 @@ static void init_reg_v3_hw(struct hisi_hba *hisi_hba) hisi_sas_write32(hisi_hba, TRANS_LOCK_ICT_TIME, 0x4A817C80); hisi_sas_write32(hisi_hba, HGC_SAS_TXFAIL_RETRY_CTRL, 0x108); hisi_sas_write32(hisi_hba, CFG_AGING_TIME, 0x1); - hisi_sas_write32(hisi_hba, INT_COAL_EN, 0x1); - hisi_sas_write32(hisi_hba, OQ_INT_COAL_TIME, 0x1); - hisi_sas_write32(hisi_hba, OQ_INT_COAL_CNT, 0x1); + hisi_sas_write32(hisi_hba, INT_COAL_EN, 0x3); + /* configure the interrupt coalescing timeout period 10us */ + hisi_sas_write32(hisi_hba, OQ_INT_COAL_TIME, 0xa); + /* configure the count of CQ entries 10 */ + hisi_sas_write32(hisi_hba, OQ_INT_COAL_CNT, 0xa); hisi_sas_write32(hisi_hba, CQ_INT_CONVERGE_EN, hisi_sas_intr_conv); hisi_sas_write32(hisi_hba, OQ_INT_SRC, 0xffff); @@ -2797,14 +2799,15 @@ static void config_intr_coal_v3_hw(struct hisi_hba *hisi_hba) { /* config those registers between enable and disable PHYs */ hisi_sas_stop_phys(hisi_hba); + hisi_sas_write32(hisi_hba, INT_COAL_EN, 0x3); if (hisi_hba->intr_coal_ticks == 0 || hisi_hba->intr_coal_count == 0) { - hisi_sas_write32(hisi_hba, INT_COAL_EN, 0x1); - hisi_sas_write32(hisi_hba, OQ_INT_COAL_TIME, 0x1); - hisi_sas_write32(hisi_hba, OQ_INT_COAL_CNT, 0x1); + /* configure the interrupt coalescing timeout period 10us */ + hisi_sas_write32(hisi_hba, OQ_INT_COAL_TIME, 0xa); + /* configure the count of CQ entries 10 */ + hisi_sas_write32(hisi_hba, OQ_INT_COAL_CNT, 0xa); } else { - hisi_sas_write32(hisi_hba, INT_COAL_EN, 0x3); hisi_sas_write32(hisi_hba, OQ_INT_COAL_TIME, hisi_hba->intr_coal_ticks); hisi_sas_write32(hisi_hba, OQ_INT_COAL_CNT, -- Gitee From d67181e3aed3c26d0aaf807a763bbbadc605f2c3 Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Tue, 8 Oct 2024 10:18:18 +0800 Subject: [PATCH 11/17] scsi: hisi_sas: Update disk locked timeout to 7 seconds driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5FF2 CVE: NA --------------------------------------------------------------------- The SATA disk will be locked after the disk sends the DMA Setup frame until all data frame transmission is completed. The CFG_ICT_TIMER_STEP_TRSH register is used for sata disk to configure the step size of the timer which records the time when the disk is locked. The unit is 1us and the default step size is 150ms. If the disk is locked for more than 7 timer steps, the io to be sent to the disk will end abnormally. The current timeout is only about 1 second, it is easy to trigger IO abnormal end when the SATA hard disk returns data slowly. Adjust the timeout to 7 seconds based on ERC time of most disks. Fixes: c94d8ca2b1a8 ("scsi: hisi_sas: add v3 hw init") Signed-off-by: Xingui Yang Link: https://lore.kernel.org/r/20241008021822.2617339-10-liyihang9@huawei.com Reviewed-by: Xiang Chen Reviewed-by: Yihang Li Signed-off-by: Martin K. Petersen Signed-off-by: Bing Xia --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 5b45b108e36d..e3052e346170 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -43,6 +43,7 @@ #define CQ_INT_CONVERGE_EN 0xb0 #define CFG_AGING_TIME 0xbc #define HGC_DFX_CFG2 0xc0 +#define CFG_ICT_TIMER_STEP_TRSH 0xc8 #define CFG_ABT_SET_QUERY_IPTT 0xd4 #define CFG_SET_ABORTED_IPTT_OFF 0 #define CFG_SET_ABORTED_IPTT_MSK (0xfff << CFG_SET_ABORTED_IPTT_OFF) @@ -638,6 +639,7 @@ static void init_reg_v3_hw(struct hisi_hba *hisi_hba) hisi_sas_write32(hisi_hba, TRANS_LOCK_ICT_TIME, 0x4A817C80); hisi_sas_write32(hisi_hba, HGC_SAS_TXFAIL_RETRY_CTRL, 0x108); hisi_sas_write32(hisi_hba, CFG_AGING_TIME, 0x1); + hisi_sas_write32(hisi_hba, CFG_ICT_TIMER_STEP_TRSH, 0xf4240); hisi_sas_write32(hisi_hba, INT_COAL_EN, 0x3); /* configure the interrupt coalescing timeout period 10us */ hisi_sas_write32(hisi_hba, OQ_INT_COAL_TIME, 0xa); -- Gitee From 197d8469b845040af53bf5e8b76c997052f40d54 Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Tue, 8 Oct 2024 10:18:19 +0800 Subject: [PATCH 12/17] scsi: hisi_sas: Add time interval between two H2D FIS following soft reset spec driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5FF2 CVE: NA --------------------------------------------------------------------- Spec says at least 5us between two H2D FIS when do soft reset, but be generous and sleep for about 1ms. Fixes: 7c594f0407de ("scsi: hisi_sas: add softreset function for SATA disk") Signed-off-by: Xingui Yang Link: https://lore.kernel.org/r/20241008021822.2617339-11-liyihang9@huawei.com Reviewed-by: Yihang Li Signed-off-by: Martin K. Petersen Signed-off-by: Bing Xia --- drivers/scsi/hisi_sas/hisi_sas_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index e071747c3c02..28d2d22c1516 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1320,6 +1320,7 @@ static int hisi_sas_softreset_ata_disk(struct domain_device *device) } if (rc == TMF_RESP_FUNC_COMPLETE) { + usleep_range(900, 1000); ata_for_each_link(link, ap, EDGE) { int pmp = sata_srst_pmp(link); -- Gitee From b18797bd01390942f9388b84cb88b2b4ec91ee91 Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Tue, 8 Oct 2024 10:18:20 +0800 Subject: [PATCH 13/17] scsi: hisi_sas: Update v3 hw STP_LINK_TIMER setting driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5FF2 CVE: NA --------------------------------------------------------------------- At present, it is found that some SATA HDD disks may continue to return the HOLD primitive for more than 500ms when they are busy writing data, which is more likely to trigger an STP link timeout exception. Now Modify STP link timer from 500ms to the maximum value of 1.048575s. Fixes: 810367310819 ("scsi: hisi_sas: Modify v3 hw STP_LINK_TIMER setting") Signed-off-by: Xingui Yang Link: https://lore.kernel.org/r/20241008021822.2617339-12-liyihang9@huawei.com Reviewed-by: Yihang Li Signed-off-by: Martin K. Petersen Signed-off-by: Bing Xia --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index e3052e346170..81165e561d34 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -686,7 +686,7 @@ static void init_reg_v3_hw(struct hisi_hba *hisi_hba) hisi_sas_phy_write32(hisi_hba, i, PHY_CTRL_RDY_MSK, 0x0); hisi_sas_phy_write32(hisi_hba, i, PHYCTRL_DWS_RESET_MSK, 0x0); hisi_sas_phy_write32(hisi_hba, i, PHYCTRL_OOB_RESTART_MSK, 0x1); - hisi_sas_phy_write32(hisi_hba, i, STP_LINK_TIMER, 0x7f7a120); + hisi_sas_phy_write32(hisi_hba, i, STP_LINK_TIMER, 0x7ffffff); hisi_sas_phy_write32(hisi_hba, i, CON_CFG_DRIVER, 0x2a0a01); hisi_sas_phy_write32(hisi_hba, i, SAS_EC_INT_COAL_TIME, 0x30f4240); -- Gitee From cb0cc10bf4e8e46944dc7d0fbab80dfdd4c0f084 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Tue, 8 Oct 2024 10:18:21 +0800 Subject: [PATCH 14/17] scsi: hisi_sas: Create all dump files during debugfs initialization driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5FF2 CVE: NA --------------------------------------------------------------------- For the current debugfs of hisi_sas, after user triggers dump, the driver allocate memory space to save the register information and create debugfs files to display the saved information. In this process, the debugfs files created after each dump. Therefore, when the dump is triggered while the driver is unbind, the following hang occurs: [67840.853907] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000a0 [67840.862947] Mem abort info: [67840.865855] ESR = 0x0000000096000004 [67840.869713] EC = 0x25: DABT (current EL), IL = 32 bits [67840.875125] SET = 0, FnV = 0 [67840.878291] EA = 0, S1PTW = 0 [67840.881545] FSC = 0x04: level 0 translation fault [67840.886528] Data abort info: [67840.889524] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 [67840.895117] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [67840.900284] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [67840.905709] user pgtable: 4k pages, 48-bit VAs, pgdp=0000002803a1f000 [67840.912263] [00000000000000a0] pgd=0000000000000000, p4d=0000000000000000 [67840.919177] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP [67840.996435] pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [67841.003628] pc : down_write+0x30/0x98 [67841.007546] lr : start_creating.part.0+0x60/0x198 [67841.012495] sp : ffff8000b979ba20 [67841.016046] x29: ffff8000b979ba20 x28: 0000000000000010 x27: 0000000000024b40 [67841.023412] x26: 0000000000000012 x25: ffff20202b355ae8 x24: ffff20202b35a8c8 [67841.030779] x23: ffffa36877928208 x22: ffffa368b4972240 x21: ffff8000b979bb18 [67841.038147] x20: ffff00281dc1e3c0 x19: fffffffffffffffe x18: 0000000000000020 [67841.045515] x17: 0000000000000000 x16: ffffa368b128a530 x15: ffffffffffffffff [67841.052888] x14: ffff8000b979bc18 x13: ffffffffffffffff x12: ffff8000b979bb18 [67841.060263] x11: 0000000000000000 x10: 0000000000000000 x9 : ffffa368b1289b18 [67841.067640] x8 : 0000000000000012 x7 : 0000000000000000 x6 : 00000000000003a9 [67841.075014] x5 : 0000000000000000 x4 : ffff002818c5cb00 x3 : 0000000000000001 [67841.082388] x2 : 0000000000000000 x1 : ffff002818c5cb00 x0 : 00000000000000a0 [67841.089759] Call trace: [67841.092456] down_write+0x30/0x98 [67841.096017] start_creating.part.0+0x60/0x198 [67841.100613] debugfs_create_dir+0x48/0x1f8 [67841.104950] debugfs_create_files_v3_hw+0x88/0x348 [hisi_sas_v3_hw] [67841.111447] debugfs_snapshot_regs_v3_hw+0x708/0x798 [hisi_sas_v3_hw] [67841.118111] debugfs_trigger_dump_v3_hw_write+0x9c/0x120 [hisi_sas_v3_hw] [67841.125115] full_proxy_write+0x68/0xc8 [67841.129175] vfs_write+0xd8/0x3f0 [67841.132708] ksys_write+0x70/0x108 [67841.136317] __arm64_sys_write+0x24/0x38 [67841.140440] invoke_syscall+0x50/0x128 [67841.144385] el0_svc_common.constprop.0+0xc8/0xf0 [67841.149273] do_el0_svc+0x24/0x38 [67841.152773] el0_svc+0x38/0xd8 [67841.156009] el0t_64_sync_handler+0xc0/0xc8 [67841.160361] el0t_64_sync+0x1a4/0x1a8 [67841.164189] Code: b9000882 d2800002 d2800023 f9800011 (c85ffc05) [67841.170443] ---[ end trace 0000000000000000 ]--- To fix this issue, create all directories and files during debugfs initialization. In this way, the driver only needs to allocate memory space to save information each time the user triggers dumping. Fixes: 623a4b6d5c2a ("scsi: hisi_sas: Move debugfs code to v3 hw driver") Signed-off-by: Yihang Li Link: https://lore.kernel.org/r/20241008021822.2617339-13-liyihang9@huawei.com Reviewed-by: Xingui Yang Signed-off-by: Martin K. Petersen Signed-off-by: Bing Xia --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 99 ++++++++++++++++++++------ 1 file changed, 77 insertions(+), 22 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 81165e561d34..12204a24722a 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -3595,6 +3595,11 @@ debugfs_to_reg_name_v3_hw(int off, int base_off, return NULL; } +static bool debugfs_dump_is_generated_v3_hw(void *p) +{ + return p ? true : false; +} + static void debugfs_print_reg_v3_hw(u32 *regs_val, struct seq_file *s, const struct hisi_sas_debugfs_reg *reg) { @@ -3620,6 +3625,9 @@ static int debugfs_global_v3_hw_show(struct seq_file *s, void *p) { struct hisi_sas_debugfs_regs *global = s->private; + if (!debugfs_dump_is_generated_v3_hw(global->data)) + return -EPERM; + debugfs_print_reg_v3_hw(global->data, s, &debugfs_global_reg); @@ -3631,6 +3639,9 @@ static int debugfs_axi_v3_hw_show(struct seq_file *s, void *p) { struct hisi_sas_debugfs_regs *axi = s->private; + if (!debugfs_dump_is_generated_v3_hw(axi->data)) + return -EPERM; + debugfs_print_reg_v3_hw(axi->data, s, &debugfs_axi_reg); @@ -3642,6 +3653,9 @@ static int debugfs_ras_v3_hw_show(struct seq_file *s, void *p) { struct hisi_sas_debugfs_regs *ras = s->private; + if (!debugfs_dump_is_generated_v3_hw(ras->data)) + return -EPERM; + debugfs_print_reg_v3_hw(ras->data, s, &debugfs_ras_reg); @@ -3654,6 +3668,9 @@ static int debugfs_port_v3_hw_show(struct seq_file *s, void *p) struct hisi_sas_debugfs_port *port = s->private; const struct hisi_sas_debugfs_reg *reg_port = &debugfs_port_reg; + if (!debugfs_dump_is_generated_v3_hw(port->data)) + return -EPERM; + debugfs_print_reg_v3_hw(port->data, s, reg_port); return 0; @@ -3709,6 +3726,9 @@ static int debugfs_cq_v3_hw_show(struct seq_file *s, void *p) struct hisi_sas_debugfs_cq *debugfs_cq = s->private; int slot; + if (!debugfs_dump_is_generated_v3_hw(debugfs_cq->complete_hdr)) + return -EPERM; + for (slot = 0; slot < HISI_SAS_QUEUE_SLOTS; slot++) debugfs_cq_show_slot_v3_hw(s, slot, debugfs_cq); @@ -3730,8 +3750,12 @@ static void debugfs_dq_show_slot_v3_hw(struct seq_file *s, int slot, static int debugfs_dq_v3_hw_show(struct seq_file *s, void *p) { + struct hisi_sas_debugfs_dq *debugfs_dq = s->private; int slot; + if (!debugfs_dump_is_generated_v3_hw(debugfs_dq->hdr)) + return -EPERM; + for (slot = 0; slot < HISI_SAS_QUEUE_SLOTS; slot++) debugfs_dq_show_slot_v3_hw(s, slot, s->private); @@ -3745,6 +3769,9 @@ static int debugfs_iost_v3_hw_show(struct seq_file *s, void *p) struct hisi_sas_iost *iost = debugfs_iost->iost; int i, max_command_entries = HISI_SAS_MAX_COMMANDS; + if (!debugfs_dump_is_generated_v3_hw(iost)) + return -EPERM; + for (i = 0; i < max_command_entries; i++, iost++) { __le64 *data = &iost->qw0; @@ -3764,6 +3791,9 @@ static int debugfs_iost_cache_v3_hw_show(struct seq_file *s, void *p) int i, tab_idx; __le64 *iost; + if (!debugfs_dump_is_generated_v3_hw(iost_cache)) + return -EPERM; + for (i = 0; i < HISI_SAS_IOST_ITCT_CACHE_NUM; i++, iost_cache++) { /* * Data struct of IOST cache: @@ -3787,6 +3817,9 @@ static int debugfs_itct_v3_hw_show(struct seq_file *s, void *p) struct hisi_sas_debugfs_itct *debugfs_itct = s->private; struct hisi_sas_itct *itct = debugfs_itct->itct; + if (!debugfs_dump_is_generated_v3_hw(itct)) + return -EPERM; + for (i = 0; i < HISI_SAS_MAX_ITCT_ENTRIES; i++, itct++) { __le64 *data = &itct->qw0; @@ -3806,6 +3839,9 @@ static int debugfs_itct_cache_v3_hw_show(struct seq_file *s, void *p) int i, tab_idx; __le64 *itct; + if (!debugfs_dump_is_generated_v3_hw(itct_cache)) + return -EPERM; + for (i = 0; i < HISI_SAS_IOST_ITCT_CACHE_NUM; i++, itct_cache++) { /* * Data struct of ITCT cache: @@ -3823,10 +3859,9 @@ static int debugfs_itct_cache_v3_hw_show(struct seq_file *s, void *p) } DEFINE_SHOW_ATTRIBUTE(debugfs_itct_cache_v3_hw); -static void debugfs_create_files_v3_hw(struct hisi_hba *hisi_hba) +static void debugfs_create_files_v3_hw(struct hisi_hba *hisi_hba, int index) { u64 *debugfs_timestamp; - int dump_index = hisi_hba->debugfs_dump_index; struct dentry *dump_dentry; struct dentry *dentry; char name[256]; @@ -3834,17 +3869,17 @@ static void debugfs_create_files_v3_hw(struct hisi_hba *hisi_hba) int c; int d; - snprintf(name, 256, "%d", dump_index); + snprintf(name, 256, "%d", index); dump_dentry = debugfs_create_dir(name, hisi_hba->debugfs_dump_dentry); - debugfs_timestamp = &hisi_hba->debugfs_timestamp[dump_index]; + debugfs_timestamp = &hisi_hba->debugfs_timestamp[index]; debugfs_create_u64("timestamp", 0400, dump_dentry, debugfs_timestamp); debugfs_create_file("global", 0400, dump_dentry, - &hisi_hba->debugfs_regs[dump_index][DEBUGFS_GLOBAL], + &hisi_hba->debugfs_regs[index][DEBUGFS_GLOBAL], &debugfs_global_v3_hw_fops); /* Create port dir and files */ @@ -3853,7 +3888,7 @@ static void debugfs_create_files_v3_hw(struct hisi_hba *hisi_hba) snprintf(name, 256, "%d", p); debugfs_create_file(name, 0400, dentry, - &hisi_hba->debugfs_port_reg[dump_index][p], + &hisi_hba->debugfs_port_reg[index][p], &debugfs_port_v3_hw_fops); } @@ -3863,7 +3898,7 @@ static void debugfs_create_files_v3_hw(struct hisi_hba *hisi_hba) snprintf(name, 256, "%d", c); debugfs_create_file(name, 0400, dentry, - &hisi_hba->debugfs_cq[dump_index][c], + &hisi_hba->debugfs_cq[index][c], &debugfs_cq_v3_hw_fops); } @@ -3873,32 +3908,32 @@ static void debugfs_create_files_v3_hw(struct hisi_hba *hisi_hba) snprintf(name, 256, "%d", d); debugfs_create_file(name, 0400, dentry, - &hisi_hba->debugfs_dq[dump_index][d], + &hisi_hba->debugfs_dq[index][d], &debugfs_dq_v3_hw_fops); } debugfs_create_file("iost", 0400, dump_dentry, - &hisi_hba->debugfs_iost[dump_index], + &hisi_hba->debugfs_iost[index], &debugfs_iost_v3_hw_fops); debugfs_create_file("iost_cache", 0400, dump_dentry, - &hisi_hba->debugfs_iost_cache[dump_index], + &hisi_hba->debugfs_iost_cache[index], &debugfs_iost_cache_v3_hw_fops); debugfs_create_file("itct", 0400, dump_dentry, - &hisi_hba->debugfs_itct[dump_index], + &hisi_hba->debugfs_itct[index], &debugfs_itct_v3_hw_fops); debugfs_create_file("itct_cache", 0400, dump_dentry, - &hisi_hba->debugfs_itct_cache[dump_index], + &hisi_hba->debugfs_itct_cache[index], &debugfs_itct_cache_v3_hw_fops); debugfs_create_file("axi", 0400, dump_dentry, - &hisi_hba->debugfs_regs[dump_index][DEBUGFS_AXI], + &hisi_hba->debugfs_regs[index][DEBUGFS_AXI], &debugfs_axi_v3_hw_fops); debugfs_create_file("ras", 0400, dump_dentry, - &hisi_hba->debugfs_regs[dump_index][DEBUGFS_RAS], + &hisi_hba->debugfs_regs[index][DEBUGFS_RAS], &debugfs_ras_v3_hw_fops); } @@ -4680,22 +4715,34 @@ static void debugfs_release_v3_hw(struct hisi_hba *hisi_hba, int dump_index) int i; devm_kfree(dev, hisi_hba->debugfs_iost_cache[dump_index].cache); + hisi_hba->debugfs_iost_cache[dump_index].cache = NULL; devm_kfree(dev, hisi_hba->debugfs_itct_cache[dump_index].cache); + hisi_hba->debugfs_itct_cache[dump_index].cache = NULL; devm_kfree(dev, hisi_hba->debugfs_iost[dump_index].iost); + hisi_hba->debugfs_iost[dump_index].iost = NULL; devm_kfree(dev, hisi_hba->debugfs_itct[dump_index].itct); + hisi_hba->debugfs_itct[dump_index].itct = NULL; - for (i = 0; i < hisi_hba->queue_count; i++) + for (i = 0; i < hisi_hba->queue_count; i++) { devm_kfree(dev, hisi_hba->debugfs_dq[dump_index][i].hdr); + hisi_hba->debugfs_dq[dump_index][i].hdr = NULL; + } - for (i = 0; i < hisi_hba->queue_count; i++) + for (i = 0; i < hisi_hba->queue_count; i++) { devm_kfree(dev, hisi_hba->debugfs_cq[dump_index][i].complete_hdr); + hisi_hba->debugfs_cq[dump_index][i].complete_hdr = NULL; + } - for (i = 0; i < DEBUGFS_REGS_NUM; i++) + for (i = 0; i < DEBUGFS_REGS_NUM; i++) { devm_kfree(dev, hisi_hba->debugfs_regs[dump_index][i].data); + hisi_hba->debugfs_regs[dump_index][i].data = NULL; + } - for (i = 0; i < hisi_hba->n_phy; i++) + for (i = 0; i < hisi_hba->n_phy; i++) { devm_kfree(dev, hisi_hba->debugfs_port_reg[dump_index][i].data); + hisi_hba->debugfs_port_reg[dump_index][i].data = NULL; + } } static const struct hisi_sas_debugfs_reg *debugfs_reg_array_v3_hw[DEBUGFS_REGS_NUM] = { @@ -4822,8 +4869,6 @@ static int debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba) debugfs_snapshot_itct_reg_v3_hw(hisi_hba); debugfs_snapshot_iost_reg_v3_hw(hisi_hba); - debugfs_create_files_v3_hw(hisi_hba); - debugfs_snapshot_restore_v3_hw(hisi_hba); hisi_hba->debugfs_dump_index++; @@ -4907,6 +4952,17 @@ static void debugfs_bist_init_v3_hw(struct hisi_hba *hisi_hba) hisi_hba->debugfs_bist_linkrate = SAS_LINK_RATE_1_5_GBPS; } +static void debugfs_dump_init_v3_hw(struct hisi_hba *hisi_hba) +{ + int i; + + hisi_hba->debugfs_dump_dentry = + debugfs_create_dir("dump", hisi_hba->debugfs_dir); + + for (i = 0; i < hisi_sas_debugfs_dump_count; i++) + debugfs_create_files_v3_hw(hisi_hba, i); +} + static void debugfs_exit_v3_hw(struct hisi_hba *hisi_hba) { debugfs_remove_recursive(hisi_hba->debugfs_dir); @@ -4922,8 +4978,7 @@ static void debugfs_init_v3_hw(struct hisi_hba *hisi_hba) /* create bist structures */ debugfs_bist_init_v3_hw(hisi_hba); - hisi_hba->debugfs_dump_dentry = - debugfs_create_dir("dump", hisi_hba->debugfs_dir); + debugfs_dump_init_v3_hw(hisi_hba); debugfs_phy_down_cnt_init_v3_hw(hisi_hba); debugfs_fifo_init_v3_hw(hisi_hba); -- Gitee From 7bee51111a854098367c3f66ecb872a609ed95f9 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Tue, 8 Oct 2024 10:18:22 +0800 Subject: [PATCH 15/17] scsi: hisi_sas: Add latest_dump for the debugfs dump driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5FF2 CVE: NA --------------------------------------------------------------------- Before that, after the user triggers the dump, the latest dump information can be viewed in the directory with the maximum number in the dump directory. After this series patch, the driver creates all debugfs directories and files during initialization. Therefore, users cannot know the directory where the latest dump information is stored. So, add latest_dump file to notify users where the latest dump information is stored. Fixes: 623a4b6d5c2a ("scsi: hisi_sas: Move debugfs code to v3 hw driver") Signed-off-by: Yihang Li Link: https://lore.kernel.org/r/20241008021822.2617339-14-liyihang9@huawei.com Reviewed-by: Xingui Yang Signed-off-by: Martin K. Petersen Signed-off-by: Bing Xia --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 12204a24722a..a86cdb6dbee6 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -4952,6 +4952,19 @@ static void debugfs_bist_init_v3_hw(struct hisi_hba *hisi_hba) hisi_hba->debugfs_bist_linkrate = SAS_LINK_RATE_1_5_GBPS; } +static int debugfs_dump_index_v3_hw_show(struct seq_file *s, void *p) +{ + int *debugfs_dump_index = s->private; + + if (*debugfs_dump_index > 0) + seq_printf(s, "%d\n", *debugfs_dump_index - 1); + else + seq_puts(s, "dump not triggered\n"); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(debugfs_dump_index_v3_hw); + static void debugfs_dump_init_v3_hw(struct hisi_hba *hisi_hba) { int i; @@ -4959,6 +4972,10 @@ static void debugfs_dump_init_v3_hw(struct hisi_hba *hisi_hba) hisi_hba->debugfs_dump_dentry = debugfs_create_dir("dump", hisi_hba->debugfs_dir); + debugfs_create_file("latest_dump", 0400, hisi_hba->debugfs_dump_dentry, + &hisi_hba->debugfs_dump_index, + &debugfs_dump_index_v3_hw_fops); + for (i = 0; i < hisi_sas_debugfs_dump_count; i++) debugfs_create_files_v3_hw(hisi_hba, i); } -- Gitee From ebbc9589262c87a97e82482d7894b46113f8a8e1 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:02 -0600 Subject: [PATCH 16/17] scsi: core: Allow passthrough to request midlayer retries mainline inclusion from mainline-v6.9-rc1 commit 994724e6b3f05fb3b6e4b1e87d7e074b65d47bf9 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5FF2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?id=994724e6b3f05fb3b6e4b1e87d7e074b65d47bf9 --------------------------------------------------------------------- For passthrough we don't retry any error which we get a check condition for. This results in a lot of callers driving their own retries for all UAs, specific UAs, NOT_READY, specific sense values or any type of failure. This adds the core code to allow passthrough users to specify what errors they want the SCSI midlayer to retry for them. We can then convert users to drop a lot of their sense parsing and retry handling. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-2-michael.christie@oracle.com Reviewed-by: John Garry Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 98 ++++++++++++++++++++++++++++++++++++-- include/scsi/scsi_device.h | 48 +++++++++++++++++++ 2 files changed, 143 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 031b484eb970..bf8a6078d0fb 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -184,6 +184,92 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) __scsi_queue_insert(cmd, reason, true); } +void scsi_failures_reset_retries(struct scsi_failures *failures) +{ + struct scsi_failure *failure; + + failures->total_retries = 0; + + for (failure = failures->failure_definitions; failure->result; + failure++) + failure->retries = 0; +} +EXPORT_SYMBOL_GPL(scsi_failures_reset_retries); + +/** + * scsi_check_passthrough - Determine if passthrough scsi_cmnd needs a retry. + * @scmd: scsi_cmnd to check. + * @failures: scsi_failures struct that lists failures to check for. + * + * Returns -EAGAIN if the caller should retry else 0. + */ +static int scsi_check_passthrough(struct scsi_cmnd *scmd, + struct scsi_failures *failures) +{ + struct scsi_failure *failure; + struct scsi_sense_hdr sshdr; + enum sam_status status; + + if (!failures) + return 0; + + for (failure = failures->failure_definitions; failure->result; + failure++) { + if (failure->result == SCMD_FAILURE_RESULT_ANY) + goto maybe_retry; + + if (host_byte(scmd->result) && + host_byte(scmd->result) == host_byte(failure->result)) + goto maybe_retry; + + status = status_byte(scmd->result); + if (!status) + continue; + + if (failure->result == SCMD_FAILURE_STAT_ANY && + !scsi_status_is_good(scmd->result)) + goto maybe_retry; + + if (status != status_byte(failure->result)) + continue; + + if (status_byte(failure->result) != SAM_STAT_CHECK_CONDITION || + failure->sense == SCMD_FAILURE_SENSE_ANY) + goto maybe_retry; + + if (!scsi_command_normalize_sense(scmd, &sshdr)) + return 0; + + if (failure->sense != sshdr.sense_key) + continue; + + if (failure->asc == SCMD_FAILURE_ASC_ANY) + goto maybe_retry; + + if (failure->asc != sshdr.asc) + continue; + + if (failure->ascq == SCMD_FAILURE_ASCQ_ANY || + failure->ascq == sshdr.ascq) + goto maybe_retry; + } + + return 0; + +maybe_retry: + if (failure->allowed) { + if (failure->allowed == SCMD_FAILURE_NO_LIMIT || + ++failure->retries <= failure->allowed) + return -EAGAIN; + } else { + if (failures->total_allowed == SCMD_FAILURE_NO_LIMIT || + ++failures->total_retries <= failures->total_allowed) + return -EAGAIN; + } + + return 0; +} + /** * scsi_execute_cmd - insert request and wait for the result * @sdev: scsi_device @@ -192,7 +278,7 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) * @buffer: data buffer * @bufflen: len of buffer * @timeout: request timeout in HZ - * @retries: number of times to retry request + * @ml_retries: number of times SCSI midlayer will retry request * @args: Optional args. See struct definition for field descriptions * * Returns the scsi_cmnd result field if a command was executed, or a negative @@ -200,7 +286,7 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) */ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, blk_opf_t opf, void *buffer, unsigned int bufflen, - int timeout, int retries, + int timeout, int ml_retries, const struct scsi_exec_args *args) { static const struct scsi_exec_args default_args; @@ -214,6 +300,7 @@ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, args->sense_len != SCSI_SENSE_BUFFERSIZE)) return -EINVAL; +retry: req = scsi_alloc_request(sdev->request_queue, opf, args->req_flags); if (IS_ERR(req)) return PTR_ERR(req); @@ -227,7 +314,7 @@ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, scmd = blk_mq_rq_to_pdu(req); scmd->cmd_len = COMMAND_SIZE(cmd[0]); memcpy(scmd->cmnd, cmd, scmd->cmd_len); - scmd->allowed = retries; + scmd->allowed = ml_retries; scmd->flags |= args->scmd_flags; req->timeout = timeout; req->rq_flags |= RQF_QUIET; @@ -237,6 +324,11 @@ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, */ blk_execute_rq(req, true); + if (scsi_check_passthrough(scmd, args->failures) == -EAGAIN) { + blk_mq_free_request(req); + goto retry; + } + /* * Some devices (USB mass-storage in particular) may transfer * garbage data together with a residue indicating that the data diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 257b26a001c3..1c3601ad0615 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -508,6 +508,52 @@ extern int scsi_is_sdev_device(const struct device *); extern int scsi_is_target_device(const struct device *); extern void scsi_sanitize_inquiry_string(unsigned char *s, int len); +/* + * scsi_execute_cmd users can set scsi_failure.result to have + * scsi_check_passthrough fail/retry a command. scsi_failure.result can be a + * specific host byte or message code, or SCMD_FAILURE_RESULT_ANY can be used + * to match any host or message code. + */ +#define SCMD_FAILURE_RESULT_ANY 0x7fffffff +/* + * Set scsi_failure.result to SCMD_FAILURE_STAT_ANY to fail/retry any failure + * scsi_status_is_good returns false for. + */ +#define SCMD_FAILURE_STAT_ANY 0xff +/* + * The following can be set to the scsi_failure sense, asc and ascq fields to + * match on any sense, ASC, or ASCQ value. + */ +#define SCMD_FAILURE_SENSE_ANY 0xff +#define SCMD_FAILURE_ASC_ANY 0xff +#define SCMD_FAILURE_ASCQ_ANY 0xff +/* Always retry a matching failure. */ +#define SCMD_FAILURE_NO_LIMIT -1 + +struct scsi_failure { + int result; + u8 sense; + u8 asc; + u8 ascq; + /* + * Number of times scsi_execute_cmd will retry the failure. It does + * not count for the total_allowed. + */ + s8 allowed; + /* Number of times the failure has been retried. */ + s8 retries; +}; + +struct scsi_failures { + /* + * If a scsi_failure does not have a retry limit setup this limit will + * be used. + */ + int total_allowed; + int total_retries; + struct scsi_failure *failure_definitions; +}; + /* Optional arguments to scsi_execute_cmd */ struct scsi_exec_args { unsigned char *sense; /* sense buffer */ @@ -516,6 +562,7 @@ struct scsi_exec_args { blk_mq_req_flags_t req_flags; /* BLK_MQ_REQ flags */ int scmd_flags; /* SCMD flags */ int *resid; /* residual length */ + struct scsi_failures *failures; /* failures to retry */ KABI_RESERVE(1) }; @@ -524,6 +571,7 @@ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, blk_opf_t opf, void *buffer, unsigned int bufflen, int timeout, int retries, const struct scsi_exec_args *args); +void scsi_failures_reset_retries(struct scsi_failures *failures); extern void sdev_disable_disk_events(struct scsi_device *sdev); extern void sdev_enable_disk_events(struct scsi_device *sdev); -- Gitee From 4a2a0c0c2f7cf823563dfc7a4b23a4757ce1c71b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 4 Sep 2024 14:03:04 -0700 Subject: [PATCH 17/17] scsi: sd: Retry START STOP UNIT commands mainline inclusion from mainline-v6.12-rc1 commit a8598aefae31f3bf27bad1a4cebf2b04a4cdf220 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5FF2 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?id=a8598aefae31f3bf27bad1a4cebf2b04a4cdf220 --------------------------------------------------------------------- During system resume, sd_start_stop_device() submits a START STOP UNIT command to the SCSI device that is being resumed. That command is not retried in case of a unit attention and hence may fail. An example: [16575.983359] sd 0:0:0:3: [sdd] Starting disk [16575.983693] sd 0:0:0:3: [sdd] Start/Stop Unit failed: Result: hostbyte=0x00 driverbyte=DRIVER_OK [16575.983712] sd 0:0:0:3: [sdd] Sense Key : 0x6 [16575.983730] sd 0:0:0:3: [sdd] ASC=0x29 ASCQ=0x0 [16575.983738] sd 0:0:0:3: PM: dpm_run_callback(): scsi_bus_resume+0x0/0xa0 returns -5 [16575.983783] sd 0:0:0:3: PM: failed to resume async: error -5 Make the SCSI core retry the START STOP UNIT command if the device reports that it has been powered on or that it has been reset. Cc: Damien Le Moal Cc: Mike Christie Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240904210304.2947789-1-bvanassche@acm.org Reviewed-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 11245bf46f85..24a9b2da47b1 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3833,9 +3833,38 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start) { unsigned char cmd[6] = { START_STOP }; /* START_VALID */ struct scsi_sense_hdr sshdr; + struct scsi_failure failure_defs[] = { + { + /* Power on, reset, or bus device reset occurred */ + .sense = UNIT_ATTENTION, + .asc = 0x29, + .ascq = 0, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + /* Power on occurred */ + .sense = UNIT_ATTENTION, + .asc = 0x29, + .ascq = 1, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + /* SCSI bus reset */ + .sense = UNIT_ATTENTION, + .asc = 0x29, + .ascq = 2, + .result = SAM_STAT_CHECK_CONDITION, + }, + {} + }; + struct scsi_failures failures = { + .total_allowed = 3, + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, .req_flags = BLK_MQ_REQ_PM, + .failures = &failures, }; struct scsi_device *sdp = sdkp->device; int res; -- Gitee