From e5abc426eb1636d4bcc3e38d08fee5cb922a152f Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 15 Oct 2021 13:58:40 -0500 Subject: [PATCH 01/11] PCI/portdrv: Rename pm_iter() to pcie_port_device_iter() ANBZ: #26970 KOSBZ: #0000233 commit 3134689f98f9e09004a4727370adc46e7635b4be upstream. Rename pm_iter() to pcie_port_device_iter() and make it visible outside CONFIG_PM and portdrv_core.c so it can be used for pciehp slot reset recovery. [bhelgaas: split into its own patch] Link: https://lore.kernel.org/linux-pci/08c046b0-c9f2-3489-eeef-7e7aca435bb9@gmail.com/ Link: https://lore.kernel.org/r/251f4edcc04c14f873ff1c967bc686169cd07d2d.1627638184.git.lukas@wunner.de Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/portdrv.h | 1 + drivers/pci/pcie/portdrv_core.c | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 2ff5724b8f13..6126ee4676a7 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -110,6 +110,7 @@ void pcie_port_service_unregister(struct pcie_port_service_driver *new); extern struct bus_type pcie_port_bus_type; int pcie_port_device_register(struct pci_dev *dev); +int pcie_port_device_iter(struct device *dev, void *data); #ifdef CONFIG_PM int pcie_port_device_suspend(struct device *dev); int pcie_port_device_resume_noirq(struct device *dev); diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index ff8213ad9889..075b0a9bf8d6 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -403,24 +403,24 @@ int pcie_port_device_register(struct pci_dev *dev) return status; } -#ifdef CONFIG_PM -typedef int (*pcie_pm_callback_t)(struct pcie_device *); +typedef int (*pcie_callback_t)(struct pcie_device *); -static int pm_iter(struct device *dev, void *data) +int pcie_port_device_iter(struct device *dev, void *data) { struct pcie_port_service_driver *service_driver; size_t offset = *(size_t *)data; - pcie_pm_callback_t cb; + pcie_callback_t cb; if ((dev->bus == &pcie_port_bus_type) && dev->driver) { service_driver = to_service_driver(dev->driver); - cb = *(pcie_pm_callback_t *)((void *)service_driver + offset); + cb = *(pcie_callback_t *)((void *)service_driver + offset); if (cb) return cb(to_pcie_device(dev)); } return 0; } +#ifdef CONFIG_PM /** * pcie_port_device_suspend - suspend port services associated with a PCIe port * @dev: PCI Express port to handle @@ -428,13 +428,13 @@ static int pm_iter(struct device *dev, void *data) int pcie_port_device_suspend(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, suspend); - return device_for_each_child(dev, &off, pm_iter); + return device_for_each_child(dev, &off, pcie_port_device_iter); } int pcie_port_device_resume_noirq(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, resume_noirq); - return device_for_each_child(dev, &off, pm_iter); + return device_for_each_child(dev, &off, pcie_port_device_iter); } /** @@ -444,7 +444,7 @@ int pcie_port_device_resume_noirq(struct device *dev) int pcie_port_device_resume(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, resume); - return device_for_each_child(dev, &off, pm_iter); + return device_for_each_child(dev, &off, pcie_port_device_iter); } /** @@ -454,7 +454,7 @@ int pcie_port_device_resume(struct device *dev) int pcie_port_device_runtime_suspend(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, runtime_suspend); - return device_for_each_child(dev, &off, pm_iter); + return device_for_each_child(dev, &off, pcie_port_device_iter); } /** @@ -464,7 +464,7 @@ int pcie_port_device_runtime_suspend(struct device *dev) int pcie_port_device_runtime_resume(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, runtime_resume); - return device_for_each_child(dev, &off, pm_iter); + return device_for_each_child(dev, &off, pcie_port_device_iter); } #endif /* PM */ -- Gitee From 821dafe1b02408ead7f1ac42feb845c041e5936e Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 31 Jul 2021 14:39:01 +0200 Subject: [PATCH 02/11] PCI: pciehp: Ignore Link Down/Up caused by error-induced Hot Reset ANBZ: #26970 KOSBZ: #0000233 commit ea401499e943c307e6d44af6c2b4e068643e7884 upstream. Stuart Hayes reports that an error handled by DPC at a Root Port results in pciehp gratuitously bringing down a subordinate hotplug port: RP -- UP -- DP -- UP -- DP (hotplug) -- EP pciehp brings the slot down because the Link to the Endpoint goes down. That is caused by a Hot Reset being propagated as a result of DPC. Per PCIe Base Spec 5.0, section 6.6.1 "Conventional Reset": For a Switch, the following must cause a hot reset to be sent on all Downstream Ports: [...] * The Data Link Layer of the Upstream Port reporting DL_Down status. In Switches that support Link speeds greater than 5.0 GT/s, the Upstream Port must direct the LTSSM of each Downstream Port to the Hot Reset state, but not hold the LTSSMs in that state. This permits each Downstream Port to begin Link training immediately after its hot reset completes. This behavior is recommended for all Switches. * Receiving a hot reset on the Upstream Port. Once DPC recovers, pcie_do_recovery() walks down the hierarchy and invokes pcie_portdrv_slot_reset() to restore each port's config space. At that point, a hotplug interrupt is signaled per PCIe Base Spec r5.0, section 6.7.3.4 "Software Notification of Hot-Plug Events": If the Port is enabled for edge-triggered interrupt signaling using MSI or MSI-X, an interrupt message must be sent every time the logical AND of the following conditions transitions from FALSE to TRUE: [...] * The Hot-Plug Interrupt Enable bit in the Slot Control register is set to 1b. * At least one hot-plug event status bit in the Slot Status register and its associated enable bit in the Slot Control register are both set to 1b. Prevent pciehp from gratuitously bringing down the slot by clearing the error-induced Data Link Layer State Changed event before restoring config space. Afterwards, check whether the link has unexpectedly failed to retrain and synthesize a DLLSC event if so. Allow each pcie_port_service_driver (one of them being pciehp) to define a slot_reset callback and re-use the existing pm_iter() function to iterate over the callbacks. Thereby, the Endpoint driver remains bound throughout error recovery and may restore the device to working state. Surprise removal during error recovery is detected through a Presence Detect Changed event. The hotplug port is expected to not signal that event as a result of a Hot Reset. The issue isn't DPC-specific, it also occurs when an error is handled by AER through aer_root_reset(). So while the issue was noticed only now, it's been around since 2006 when AER support was first introduced. [bhelgaas: drop PCI_ERROR_RECOVERY Kconfig, split pm_iter() rename to preparatory patch] Link: https://lore.kernel.org/linux-pci/08c046b0-c9f2-3489-eeef-7e7aca435bb9@gmail.com/ Fixes: 6c2b374d7485 ("PCI-Express AER implemetation: AER core and aerdriver") Link: https://lore.kernel.org/r/251f4edcc04c14f873ff1c967bc686169cd07d2d.1627638184.git.lukas@wunner.de Reported-by: Stuart Hayes Tested-by: Stuart Hayes Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v2.6.19+: ba952824e6c1: PCI/portdrv: Report reset for frozen channel Cc: Keith Busch Signed-off-by: Chu Guangqing --- drivers/pci/hotplug/pciehp.h | 2 ++ drivers/pci/hotplug/pciehp_core.c | 2 ++ drivers/pci/hotplug/pciehp_hpc.c | 26 ++++++++++++++++++++++++++ drivers/pci/pcie/portdrv.h | 2 ++ drivers/pci/pcie/portdrv_pci.c | 3 +++ 5 files changed, 35 insertions(+) diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index f1f789fe0637..d9e6b9cf81d2 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -189,6 +189,8 @@ int pciehp_get_attention_status(struct hotplug_slot *hotplug_slot, u8 *status); int pciehp_set_raw_indicator_status(struct hotplug_slot *h_slot, u8 status); int pciehp_get_raw_indicator_status(struct hotplug_slot *h_slot, u8 *status); +int pciehp_slot_reset(struct pcie_device *dev); + static inline const char *slot_name(struct controller *ctrl) { return hotplug_slot_name(&ctrl->hotplug_slot); diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index c7af6298219d..88c30218c2ce 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -384,6 +384,8 @@ static struct pcie_port_service_driver hpdriver_portdrv = { .runtime_suspend = pciehp_runtime_suspend, .runtime_resume = pciehp_runtime_resume, #endif /* PM */ + + .slot_reset = pciehp_slot_reset, }; int __init pcie_hp_init(void) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 75c6c72ec32a..44093b23cf85 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -859,6 +859,32 @@ void pcie_disable_interrupt(struct controller *ctrl) pcie_write_cmd(ctrl, 0, mask); } +/** + * pciehp_slot_reset() - ignore link event caused by error-induced hot reset + * @dev: PCI Express port service device + * + * Called from pcie_portdrv_slot_reset() after AER or DPC initiated a reset + * further up in the hierarchy to recover from an error. The reset was + * propagated down to this hotplug port. Ignore the resulting link flap. + * If the link failed to retrain successfully, synthesize the ignored event. + * Surprise removal during reset is detected through Presence Detect Changed. + */ +int pciehp_slot_reset(struct pcie_device *dev) +{ + struct controller *ctrl = get_service_data(dev); + + if (ctrl->state != ON_STATE) + return 0; + + pcie_capability_write_word(dev->port, PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_DLLSC); + + if (!pciehp_check_link_active(ctrl)) + pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC); + + return 0; +} + /* * pciehp has a 1:1 bus:slot relationship so we ultimately want a secondary * bus reset of the bridge, but at the same time we want to ensure that it is diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 6126ee4676a7..41fe1ffd5907 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -85,6 +85,8 @@ struct pcie_port_service_driver { int (*runtime_suspend)(struct pcie_device *dev); int (*runtime_resume)(struct pcie_device *dev); + int (*slot_reset)(struct pcie_device *dev); + /* Device driver may resume normal operations */ void (*error_resume)(struct pci_dev *dev); diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 8d282cdcb5f8..fa81e15f9dd7 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -172,6 +172,9 @@ static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev, static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev) { + size_t off = offsetof(struct pcie_port_service_driver, slot_reset); + device_for_each_child(&dev->dev, &off, pcie_port_device_iter); + pci_restore_state(dev); pci_save_state(dev); return PCI_ERS_RESULT_RECOVERED; -- Gitee From 62900b50d89cc1c4ede423521f494c239892b851 Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Tue, 25 Jan 2022 08:18:19 +0100 Subject: [PATCH 03/11] PCI/portdrv: Don't disable AER reporting in get_port_device_capability() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #26970 KOSBZ: #0000233 commit 8795e182b02dc87e343c79e73af6b8b7f9c5e635 upstream. AER reporting is currently disabled in the DevCtl registers of all non Root Port PCIe devices on systems using pcie_ports_native || host->native_aer, disabling AER completely in such systems. This is because 2bd50dd800b5 ("PCI: PCIe: Disable PCIe port services during port initialization"), added a call to pci_disable_pcie_error_reporting() *after* the AER setup was completed for the PCIe device tree. Here a longer analysis about the current status of AER enabling / disabling upon bootup provided by Bjorn: pcie_portdrv_probe pcie_port_device_register get_port_device_capability pci_disable_pcie_error_reporting clear CERE NFERE FERE URRE # <-- disable for RP USP DSP pcie_device_init device_register # new AER service device aer_probe aer_enable_rootport # RP only set_downstream_devices_error_reporting set_device_error_reporting # self (RP) if (RP || USP || DSP) pci_enable_pcie_error_reporting set CERE NFERE FERE URRE # <-- enable for RP pci_walk_bus set_device_error_reporting if (RP || USP || DSP) pci_enable_pcie_error_reporting set CERE NFERE FERE URRE # <-- enable for USP DSP In a typical Root Port -> Endpoint hierarchy, the above: - Disables Error Reporting for the Root Port, - Enables Error Reporting for the Root Port, - Does NOT enable Error Reporting for the Endpoint because it is not a Root Port or Switch Port. In a deeper Root Port -> Upstream Switch Port -> Downstream Switch Port -> Endpoint hierarchy: - Disables Error Reporting for the Root Port, - Enables Error Reporting for the Root Port, - Enables Error Reporting for both Switch Ports, - Does NOT enable Error Reporting for the Endpoint because it is not a Root Port or Switch Port, - Disables Error Reporting for the Switch Ports when pcie_portdrv_probe() claims them. AER does not re-enable it because these are not Root Ports. Remove this call to pci_disable_pcie_error_reporting() from get_port_device_capability(), leaving the already enabled AER configuration intact. With this change, AER is enabled in the Root Port and the PCIe switch upstream and downstream ports. Only the PCIe Endpoints don't have AER enabled yet. A follow-up patch will take care of this Endpoint enabling. Fixes: 2bd50dd800b5 ("PCI: PCIe: Disable PCIe port services during port initialization") Link: https://lore.kernel.org/r/20220125071820.2247260-3-sr@denx.de Signed-off-by: Stefan Roese Signed-off-by: Bjorn Helgaas Reviewed-by: Pali Rohár Cc: Rafael J. Wysocki Cc: Bharat Kumar Gogada Cc: Michal Simek Cc: Yao Hongbo Cc: Naveen Naidu Signed-off-by: Chu Guangqing --- drivers/pci/pcie/portdrv_core.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 075b0a9bf8d6..adec625282c6 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -258,15 +258,8 @@ static int get_port_device_capability(struct pci_dev *dev) #ifdef CONFIG_PCIEAER if (dev->aer_cap && pci_aer_available() && - (pcie_ports_native || host->native_aer)) { + (pcie_ports_native || host->native_aer)) services |= PCIE_PORT_SERVICE_AER; - - /* - * Disable AER on this port in case it's been enabled by the - * BIOS (the AER service driver will enable it when necessary). - */ - pci_disable_pcie_error_reporting(dev); - } #endif /* Root Ports and Root Complex Event Collectors may generate PMEs */ -- Gitee From 628969e53ccc89222a7623b18e5791c03fa63ba8 Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Tue, 25 Jan 2022 08:18:20 +0100 Subject: [PATCH 04/11] PCI/AER: Enable error reporting when AER is native MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #26970 KOSBZ: #0000233 commit f26e58bf6f547031f91a1b0e39b9308d48a4ba8c upstream. If we have native control of AER, set the following error reporting enable bits: - Correctable Error Reporting Enable - Non-Fatal Error Reporting Enable - Fatal Error Reporting Enable - Unsupported Request Reporting Enable Note that these bits are all in the Device Control register and are not AER-specific. This affects all devices with an AER capability, including hot-added devices. Please note that this change is quite invasive, as error reporting now will be enabled for all available PCIe Endpoints, which was previously not the case. When "pci=noaer" is selected, error reporting stays disabled of course. [bhelgaas: commit log, note error reporting is not AER-specific] Link: https://lore.kernel.org/r/20220125071820.2247260-4-sr@denx.de Signed-off-by: Stefan Roese Signed-off-by: Bjorn Helgaas Reviewed-by: Pali Rohár Cc: Bharat Kumar Gogada Cc: Michal Simek Cc: Yao Hongbo Cc: Naveen Naidu Signed-off-by: Chu Guangqing --- drivers/pci/pcie/aer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index df8089842c76..a80cb4042145 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -392,6 +392,10 @@ void pci_aer_init(struct pci_dev *dev) pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_ERR, sizeof(u32) * n); pci_aer_clear_status(dev); + + /* Enable AER if requested */ + if (pci_aer_available()) + pci_enable_pcie_error_reporting(dev); } void pci_aer_exit(struct pci_dev *dev) -- Gitee From d2fc0e9bfb3157b8f6a0daf4083fc1d1f3573ae1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 14 Feb 2022 12:41:08 +0100 Subject: [PATCH 05/11] PCI: Add defines for normal and subtractive PCI bridges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #26970 KOSBZ: #0000233 commit 904b10fb189cc15376e9bfce1ef0282e68b0b004 upstream. Add these PCI class codes to pci_ids.h: PCI_CLASS_BRIDGE_PCI_NORMAL PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE Use these defines in all kernel code for describing PCI class codes for normal and subtractive PCI bridges. [bhelgaas: similar change in pci-mvebu.c] Link: https://lore.kernel.org/r/20220214114109.26809-1-pali@kernel.org Signed-off-by: Pali Rohár Signed-off-by: Bjorn Helgaas Signed-off-by: Chu Guangqing --- .../mips/include/asm/mach-bcm63xx/bcm63xx_regs.h | 2 -- arch/mips/pci/fixup-sb1250.c | 2 +- arch/mips/pci/pci-bcm63xx.c | 2 +- arch/powerpc/platforms/powernv/pci.c | 2 +- arch/powerpc/sysdev/fsl_pci.c | 2 +- arch/sh/drivers/pci/pcie-sh7786.c | 2 +- drivers/pci/controller/dwc/pci-keystone.c | 8 ++++---- drivers/pci/controller/dwc/pci-meson.c | 16 ++++++++-------- drivers/pci/controller/dwc/pcie-qcom.c | 2 +- .../pci/controller/mobiveil/pcie-mobiveil-host.c | 2 +- drivers/pci/controller/pci-aardvark.c | 2 +- drivers/pci/controller/pci-loongson.c | 2 +- drivers/pci/controller/pci-tegra.c | 2 +- drivers/pci/controller/pcie-iproc-bcma.c | 2 +- drivers/pci/controller/pcie-iproc.c | 2 +- drivers/pci/controller/pcie-rcar-host.c | 2 +- drivers/pci/controller/pcie-rockchip-host.c | 2 +- drivers/pci/controller/pcie-rockchip.h | 1 - drivers/pci/hotplug/shpchp_core.c | 2 +- drivers/pci/pci-bridge-emul.c | 2 +- drivers/pci/pcie/portdrv_pci.c | 4 ++-- include/linux/pci_ids.h | 2 ++ 22 files changed, 32 insertions(+), 33 deletions(-) diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h index 9ceb5e72889f..d3f397dcab6e 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h @@ -1380,8 +1380,6 @@ #define PCIE_IDVAL3_REG 0x43c #define IDVAL3_CLASS_CODE_MASK 0xffffff -#define IDVAL3_SUBCLASS_SHIFT 8 -#define IDVAL3_CLASS_SHIFT 16 #define PCIE_DLSTATUS_REG 0x1048 #define DLSTATUS_PHYLINKUP (1 << 13) diff --git a/arch/mips/pci/fixup-sb1250.c b/arch/mips/pci/fixup-sb1250.c index 40efc990cdce..3f914c33b7de 100644 --- a/arch/mips/pci/fixup-sb1250.c +++ b/arch/mips/pci/fixup-sb1250.c @@ -75,7 +75,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_PCI, */ static void quirk_sb1250_ht(struct pci_dev *dev) { - dev->class = PCI_CLASS_BRIDGE_PCI << 8; + dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL; } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_HT, quirk_sb1250_ht); diff --git a/arch/mips/pci/pci-bcm63xx.c b/arch/mips/pci/pci-bcm63xx.c index 5548365605c0..ac83243772d2 100644 --- a/arch/mips/pci/pci-bcm63xx.c +++ b/arch/mips/pci/pci-bcm63xx.c @@ -186,7 +186,7 @@ static int __init bcm63xx_register_pcie(void) /* setup class code as bridge */ val = bcm_pcie_readl(PCIE_IDVAL3_REG); val &= ~IDVAL3_CLASS_CODE_MASK; - val |= (PCI_CLASS_BRIDGE_PCI << IDVAL3_SUBCLASS_SHIFT); + val |= PCI_CLASS_BRIDGE_PCI_NORMAL; bcm_pcie_writel(val, PCIE_IDVAL3_REG); /* disable bar1 size */ diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 9b9bca169275..7bd7678157f5 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -882,7 +882,7 @@ void pnv_pci_shutdown(void) /* Fixup wrong class code in p7ioc and p8 root complex */ static void pnv_p7ioc_rc_quirk(struct pci_dev *dev) { - dev->class = PCI_CLASS_BRIDGE_PCI << 8; + dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL; } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk); diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 040b9d01c079..ae77a2aedc6a 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -55,7 +55,7 @@ static void quirk_fsl_pcie_early(struct pci_dev *dev) if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE) return; - dev->class = PCI_CLASS_BRIDGE_PCI << 8; + dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL; fsl_pcie_bus_fixup = 1; return; } diff --git a/arch/sh/drivers/pci/pcie-sh7786.c b/arch/sh/drivers/pci/pcie-sh7786.c index 4468289ab2ca..4570aea185df 100644 --- a/arch/sh/drivers/pci/pcie-sh7786.c +++ b/arch/sh/drivers/pci/pcie-sh7786.c @@ -314,7 +314,7 @@ static int __init pcie_init(struct sh7786_pcie_port *port) * class to match. Hardware takes care of propagating the IDSETR * settings, so there is no need to bother with a quirk. */ - pci_write_reg(chan, PCI_CLASS_BRIDGE_PCI << 16, SH4A_PCIEIDSETR1); + pci_write_reg(chan, PCI_CLASS_BRIDGE_PCI_NORMAL << 8, SH4A_PCIEIDSETR1); /* Initialize default capabilities. */ data = pci_read_reg(chan, SH4A_PCIEEXPCAP0); diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c index ff92da3df76c..1d808f0a5d65 100644 --- a/drivers/pci/controller/dwc/pci-keystone.c +++ b/drivers/pci/controller/dwc/pci-keystone.c @@ -558,13 +558,13 @@ static void ks_pcie_quirk(struct pci_dev *dev) static const struct pci_device_id rc_pci_devids[] = { { PCI_DEVICE(PCI_VENDOR_ID_TI, PCIE_RC_K2HK), - .class = PCI_CLASS_BRIDGE_PCI << 8, .class_mask = ~0, }, + .class = PCI_CLASS_BRIDGE_PCI_NORMAL, .class_mask = ~0, }, { PCI_DEVICE(PCI_VENDOR_ID_TI, PCIE_RC_K2E), - .class = PCI_CLASS_BRIDGE_PCI << 8, .class_mask = ~0, }, + .class = PCI_CLASS_BRIDGE_PCI_NORMAL, .class_mask = ~0, }, { PCI_DEVICE(PCI_VENDOR_ID_TI, PCIE_RC_K2L), - .class = PCI_CLASS_BRIDGE_PCI << 8, .class_mask = ~0, }, + .class = PCI_CLASS_BRIDGE_PCI_NORMAL, .class_mask = ~0, }, { PCI_DEVICE(PCI_VENDOR_ID_TI, PCIE_RC_K2G), - .class = PCI_CLASS_BRIDGE_PCI << 8, .class_mask = ~0, }, + .class = PCI_CLASS_BRIDGE_PCI_NORMAL, .class_mask = ~0, }, { 0, }, }; static const struct pci_device_id am6_pci_devids[] = { diff --git a/drivers/pci/controller/dwc/pci-meson.c b/drivers/pci/controller/dwc/pci-meson.c index 1913dc2c8fa0..28226745fec2 100644 --- a/drivers/pci/controller/dwc/pci-meson.c +++ b/drivers/pci/controller/dwc/pci-meson.c @@ -319,14 +319,14 @@ static int meson_pcie_rd_own_conf(struct pci_bus *bus, u32 devfn, * cannot program the PCI_CLASS_DEVICE register, so we must fabricate * the return value in the config accessors. */ - if (where == PCI_CLASS_REVISION && size == 4) - *val = (PCI_CLASS_BRIDGE_PCI << 16) | (*val & 0xffff); - else if (where == PCI_CLASS_DEVICE && size == 2) - *val = PCI_CLASS_BRIDGE_PCI; - else if (where == PCI_CLASS_DEVICE && size == 1) - *val = PCI_CLASS_BRIDGE_PCI & 0xff; - else if (where == PCI_CLASS_DEVICE + 1 && size == 1) - *val = (PCI_CLASS_BRIDGE_PCI >> 8) & 0xff; + if ((where & ~3) == PCI_CLASS_REVISION) { + if (size <= 2) + *val = (*val & ((1 << (size * 8)) - 1)) << (8 * (where & 3)); + *val &= ~0xffffff00; + *val |= PCI_CLASS_BRIDGE_PCI_NORMAL << 8; + if (size <= 2) + *val = (*val >> (8 * (where & 3))) & ((1 << (size * 8)) - 1); + } return PCIBIOS_SUCCESSFUL; } diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 1b8b3c12eece..bb902c0a248a 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -1474,7 +1474,7 @@ static const struct of_device_id qcom_pcie_match[] = { static void qcom_fixup_class(struct pci_dev *dev) { - dev->class = PCI_CLASS_BRIDGE_PCI << 8; + dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL; } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0101, qcom_fixup_class); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0104, qcom_fixup_class); diff --git a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c index a2632d02ce8f..b2590bee3a69 100644 --- a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c +++ b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c @@ -298,7 +298,7 @@ int mobiveil_host_init(struct mobiveil_pcie *pcie, bool reinit) /* fixup for PCIe class register */ value = mobiveil_csr_readl(pcie, PAB_INTP_AXI_PIO_CLASS); value &= 0xff; - value |= (PCI_CLASS_BRIDGE_PCI << 16); + value |= PCI_CLASS_BRIDGE_PCI_NORMAL << 8; mobiveil_csr_writel(pcie, value, PAB_INTP_AXI_PIO_CLASS); return 0; diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 0c603e069f21..fabb151c1b03 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -536,7 +536,7 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) */ reg = advk_readl(pcie, PCIE_CORE_DEV_REV_REG); reg &= ~0xffffff00; - reg |= (PCI_CLASS_BRIDGE_PCI << 8) << 8; + reg |= PCI_CLASS_BRIDGE_PCI_NORMAL << 8; advk_writel(pcie, reg, PCIE_CORE_DEV_REV_REG); /* Disable Root Bridge I/O space, memory space and bus mastering */ diff --git a/drivers/pci/controller/pci-loongson.c b/drivers/pci/controller/pci-loongson.c index dc27dbc5ad73..68064970a7cb 100644 --- a/drivers/pci/controller/pci-loongson.c +++ b/drivers/pci/controller/pci-loongson.c @@ -58,7 +58,7 @@ struct loongson_pci { /* Fixup wrong class code in PCIe bridges */ static void bridge_class_quirk(struct pci_dev *dev) { - dev->class = PCI_CLASS_BRIDGE_PCI << 8; + dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL; } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, DEV_PCIE_PORT_0, bridge_class_quirk); diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c index b4eb75f25906..85d2a26993c0 100644 --- a/drivers/pci/controller/pci-tegra.c +++ b/drivers/pci/controller/pci-tegra.c @@ -745,7 +745,7 @@ static void tegra_pcie_port_free(struct tegra_pcie_port *port) /* Tegra PCIE root complex wrongly reports device class */ static void tegra_pcie_fixup_class(struct pci_dev *dev) { - dev->class = PCI_CLASS_BRIDGE_PCI << 8; + dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL; } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0bf0, tegra_pcie_fixup_class); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0bf1, tegra_pcie_fixup_class); diff --git a/drivers/pci/controller/pcie-iproc-bcma.c b/drivers/pci/controller/pcie-iproc-bcma.c index f918c713afb0..8eb17817e5cd 100644 --- a/drivers/pci/controller/pcie-iproc-bcma.c +++ b/drivers/pci/controller/pcie-iproc-bcma.c @@ -18,7 +18,7 @@ /* NS: CLASS field is R/O, and set to wrong 0x200 value */ static void bcma_pcie2_fixup_class(struct pci_dev *dev) { - dev->class = PCI_CLASS_BRIDGE_PCI << 8; + dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL; } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0x8011, bcma_pcie2_fixup_class); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0x8012, bcma_pcie2_fixup_class); diff --git a/drivers/pci/controller/pcie-iproc.c b/drivers/pci/controller/pcie-iproc.c index cc5b7823edeb..0aa0679ae758 100644 --- a/drivers/pci/controller/pcie-iproc.c +++ b/drivers/pci/controller/pcie-iproc.c @@ -1594,7 +1594,7 @@ static void quirk_paxc_bridge(struct pci_dev *pdev) * code that the bridge is not an Ethernet device. */ if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) - pdev->class = PCI_CLASS_BRIDGE_PCI << 8; + pdev->class = PCI_CLASS_BRIDGE_PCI_NORMAL; /* * MPSS is not being set properly (as it is currently 0). This is diff --git a/drivers/pci/controller/pcie-rcar-host.c b/drivers/pci/controller/pcie-rcar-host.c index 2bee09b16255..94930625150d 100644 --- a/drivers/pci/controller/pcie-rcar-host.c +++ b/drivers/pci/controller/pcie-rcar-host.c @@ -362,7 +362,7 @@ static int rcar_pcie_hw_init(struct rcar_pcie *pcie) * class to match. Hardware takes care of propagating the IDSETR * settings, so there is no need to bother with a quirk. */ - rcar_pci_write_reg(pcie, PCI_CLASS_BRIDGE_PCI << 16, IDSETR1); + rcar_pci_write_reg(pcie, PCI_CLASS_BRIDGE_PCI_NORMAL << 8, IDSETR1); /* * Setup Secondary Bus Number & Subordinate Bus Number, even though diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c index 0d6df73bb918..957b19e75044 100644 --- a/drivers/pci/controller/pcie-rockchip-host.c +++ b/drivers/pci/controller/pcie-rockchip-host.c @@ -373,7 +373,7 @@ static int rockchip_pcie_host_init_port(struct rockchip_pcie *rockchip) rockchip_pcie_write(rockchip, ROCKCHIP_VENDOR_ID, PCIE_CORE_CONFIG_VENDOR); rockchip_pcie_write(rockchip, - PCI_CLASS_BRIDGE_PCI << PCIE_RC_CONFIG_SCC_SHIFT, + PCI_CLASS_BRIDGE_PCI_NORMAL << 8, PCIE_RC_CONFIG_RID_CCR); /* Clear THP cap's next cap pointer to remove L1 substate cap */ diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index c7d0178fc8c2..a1cf8ac99ef8 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -133,7 +133,6 @@ #define PCIE_RC_CONFIG_NORMAL_BASE 0x800000 #define PCIE_RC_CONFIG_BASE 0xa00000 #define PCIE_RC_CONFIG_RID_CCR (PCIE_RC_CONFIG_BASE + 0x08) -#define PCIE_RC_CONFIG_SCC_SHIFT 16 #define PCIE_RC_CONFIG_DCR (PCIE_RC_CONFIG_BASE + 0xc4) #define PCIE_RC_CONFIG_DCR_CSPL_SHIFT 18 #define PCIE_RC_CONFIG_DCR_CSPL_LIMIT 0xff diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c index 81a918d47895..53692b048301 100644 --- a/drivers/pci/hotplug/shpchp_core.c +++ b/drivers/pci/hotplug/shpchp_core.c @@ -312,7 +312,7 @@ static void shpc_remove(struct pci_dev *dev) } static const struct pci_device_id shpcd_pci_tbl[] = { - {PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x00), ~0)}, + {PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_NORMAL, ~0)}, { /* end: all zeroes */ } }; MODULE_DEVICE_TABLE(pci, shpcd_pci_tbl); diff --git a/drivers/pci/pci-bridge-emul.c b/drivers/pci/pci-bridge-emul.c index 37504c2cce9b..34a7729ad826 100644 --- a/drivers/pci/pci-bridge-emul.c +++ b/drivers/pci/pci-bridge-emul.c @@ -284,7 +284,7 @@ int pci_bridge_emul_init(struct pci_bridge_emul *bridge, { BUILD_BUG_ON(sizeof(bridge->conf) != PCI_BRIDGE_CONF_END); - bridge->conf.class_revision |= cpu_to_le32(PCI_CLASS_BRIDGE_PCI << 16); + bridge->conf.class_revision |= cpu_to_le32(PCI_CLASS_BRIDGE_PCI_NORMAL << 16); bridge->conf.header_type = PCI_HEADER_TYPE_BRIDGE; bridge->conf.cache_line_size = 0x10; bridge->conf.status = cpu_to_le16(PCI_STATUS_CAP_LIST); diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index fa81e15f9dd7..b73eafbe437b 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -213,9 +213,9 @@ static void pcie_portdrv_err_resume(struct pci_dev *dev) */ static const struct pci_device_id port_pci_ids[] = { /* handle any PCI-Express port */ - { PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x00), ~0) }, + { PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_NORMAL, ~0) }, /* subtractive decode PCI-to-PCI bridge, class type is 060401h */ - { PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x01), ~0) }, + { PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE, ~0) }, /* handle any Root Complex Event Collector */ { PCI_DEVICE_CLASS(((PCI_CLASS_SYSTEM_RCEC << 8) | 0x00), ~0) }, { }, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 9b182809e987..ada175615111 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -59,6 +59,8 @@ #define PCI_CLASS_BRIDGE_EISA 0x0602 #define PCI_CLASS_BRIDGE_MC 0x0603 #define PCI_CLASS_BRIDGE_PCI 0x0604 +#define PCI_CLASS_BRIDGE_PCI_NORMAL 0x060400 +#define PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE 0x060401 #define PCI_CLASS_BRIDGE_PCMCIA 0x0605 #define PCI_CLASS_BRIDGE_NUBUS 0x0606 #define PCI_CLASS_BRIDGE_CARDBUS 0x0607 -- Gitee From 20c868e2efb6c34d4e8a5661049e867f1dc150ff Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 18 Apr 2022 08:49:51 +0800 Subject: [PATCH 06/11] driver core: Add dma_cleanup callback in bus_type ANBZ: #26970 KOSBZ: #0000233 commit 25f3bcfc54bcf7b0e45d140ec8bfbbf10ba11869 upstream. The bus_type structure defines dma_configure() callback for bus drivers to configure DMA on the devices. This adds the paired dma_cleanup() callback and calls it during driver unbinding so that bus drivers can do some cleanup work. One use case for this paired DMA callbacks is for the bus driver to check for DMA ownership conflicts during driver binding, where multiple devices belonging to a same IOMMU group (the minimum granularity of isolation and protection) may be assigned to kernel drivers or user space respectively. Without this change, for example, the vfio driver has to listen to a bus BOUND_DRIVER event and then BUG_ON() in case of dma ownership conflict. This leads to bad user experience since careless driver binding operation may crash the system if the admin overlooks the group restriction. Aside from bad design, this leads to a security problem as a root user, even with lockdown=integrity, can force the kernel to BUG. With this change, the bus driver could check and set the DMA ownership in driver binding process and fail on ownership conflicts. The DMA ownership should be released during driver unbinding. Signed-off-by: Lu Baolu Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220418005000.897664-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Chu Guangqing --- drivers/base/dd.c | 5 +++++ include/linux/device/bus.h | 3 +++ 2 files changed, 8 insertions(+) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index c37184775b26..87377e4aa157 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -651,6 +651,8 @@ static int really_probe(struct device *dev, struct device_driver *drv) if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DRIVER_NOT_BOUND, dev); + if (dev->bus && dev->bus->dma_cleanup) + dev->bus->dma_cleanup(dev); pinctrl_bind_failed: device_links_no_driver(dev); devres_release_all(dev); @@ -1241,6 +1243,9 @@ static void __device_release_driver(struct device *dev, struct device *parent) pm_runtime_reinit(dev); dev_pm_set_driver_flags(dev, 0); + if (dev->bus && dev->bus->dma_cleanup) + dev->bus->dma_cleanup(dev); + device_links_driver_cleanup(dev); klist_remove(&dev->p->knode_driver); diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 3e1212b6fb58..b4eecdaafde5 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -60,6 +60,8 @@ struct fwnode_handle; * bus supports. * @dma_configure: Called to setup DMA configuration on a device on * this bus. + * @dma_cleanup: Called to cleanup DMA configuration on a device on + * this bus. * @pm: Power management operations of this bus, callback the specific * device driver's pm-ops. * @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU @@ -104,6 +106,7 @@ struct bus_type { int (*num_vf)(struct device *dev); int (*dma_configure)(struct device *dev); + void (*dma_cleanup)(struct device *dev); const struct dev_pm_ops *pm; -- Gitee From 320d02575abb99d75a19bbcf8a66e2bc5eb52134 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 18 Apr 2022 08:49:52 +0800 Subject: [PATCH 07/11] amba: Stop sharing platform_dma_configure() ANBZ: #26970 KOSBZ: #0000233 commit 4a6d9dd564d0e7339fc15ecc5ce66db4ad842be2 upstream. Stop sharing platform_dma_configure() helper as they are about to have their own bus dma_configure callbacks. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220418005000.897664-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Chu Guangqing --- drivers/amba/bus.c | 19 ++++++++++++++++++- drivers/base/platform.c | 2 +- include/linux/platform_device.h | 2 -- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index 47c72447ccd5..93964b1fe352 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -21,6 +21,8 @@ #include #include +#include +#include #define to_amba_driver(d) container_of(d, struct amba_driver, drv) @@ -152,6 +154,21 @@ static struct attribute *amba_dev_attrs[] = { }; ATTRIBUTE_GROUPS(amba_dev); +static int amba_dma_configure(struct device *dev) +{ + enum dev_dma_attr attr; + int ret = 0; + + if (dev->of_node) { + ret = of_dma_configure(dev, dev->of_node, true); + } else if (has_acpi_companion(dev)) { + attr = acpi_get_dma_attr(to_acpi_device_node(dev->fwnode)); + ret = acpi_dma_configure(dev, attr); + } + + return ret; +} + #ifdef CONFIG_PM /* * Hooks to provide runtime PM of the pclk (bus clock). It is safe to @@ -217,7 +234,7 @@ struct bus_type amba_bustype = { .dev_groups = amba_dev_groups, .match = amba_match, .uevent = amba_uevent, - .dma_configure = platform_dma_configure, + .dma_configure = amba_dma_configure, .pm = &amba_pm, }; EXPORT_SYMBOL_GPL(amba_bustype); diff --git a/drivers/base/platform.c b/drivers/base/platform.c index ea8add164b89..7a64cef4ce6d 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -1457,7 +1457,7 @@ int platform_pm_restore(struct device *dev) #endif /* CONFIG_HIBERNATE_CALLBACKS */ -int platform_dma_configure(struct device *dev) +static int platform_dma_configure(struct device *dev) { enum dev_dma_attr attr; int ret = 0; diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 4323812f438a..bbbe631856a2 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -330,8 +330,6 @@ extern int platform_pm_restore(struct device *dev); #define platform_pm_restore NULL #endif -extern int platform_dma_configure(struct device *dev); - #ifdef CONFIG_PM_SLEEP #define USE_PLATFORM_PM_SLEEP_OPS \ .suspend = platform_pm_suspend, \ -- Gitee From 6010b574dc493591ffe343221cdbdc6138ce7d60 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 18 Apr 2022 08:49:50 +0800 Subject: [PATCH 08/11] iommu: Add DMA ownership management interfaces ANBZ: #26970 KOSBZ: #0000233 commit 1ea2a07a532b0e22aabe7e8483f935c672b9e7ed upstream. Multiple devices may be placed in the same IOMMU group because they cannot be isolated from each other. These devices must either be entirely under kernel control or userspace control, never a mixture. This adds dma ownership management in iommu core and exposes several interfaces for the device drivers and the device userspace assignment framework (i.e. VFIO), so that any conflict between user and kernel controlled dma could be detected at the beginning. The device driver oriented interfaces are, int iommu_device_use_default_domain(struct device *dev); void iommu_device_unuse_default_domain(struct device *dev); By calling iommu_device_use_default_domain(), the device driver tells the iommu layer that the device dma is handled through the kernel DMA APIs. The iommu layer will manage the IOVA and use the default domain for DMA address translation. The device user-space assignment framework oriented interfaces are, int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner); void iommu_group_release_dma_owner(struct iommu_group *group); bool iommu_group_dma_owner_claimed(struct iommu_group *group); The device userspace assignment must be disallowed if the DMA owner claiming interface returns failure. Signed-off-by: Jason Gunthorpe Signed-off-by: Kevin Tian Signed-off-by: Lu Baolu Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20220418005000.897664-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 153 +++++++++++++++++++++++++++++++++++++++++- include/linux/iommu.h | 31 +++++++++ 2 files changed, 181 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index ec1c8f83f2f1..e5d4662cd70c 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -63,6 +63,8 @@ struct iommu_group { struct iommu_domain *default_domain; struct iommu_domain *domain; struct list_head entry; + unsigned int owner_cnt; + void *owner; }; struct group_device { @@ -299,7 +301,11 @@ int iommu_probe_device(struct device *dev) */ iommu_alloc_default_domain(group, dev); - if (group->default_domain) { + /* + * If device joined an existing group which has been claimed, don't + * attach the default domain. + */ + if (group->default_domain && !group->owner) { ret = __iommu_attach_device(group->default_domain, dev); if (ret) { iommu_group_put(group); @@ -2642,7 +2648,7 @@ static int __iommu_attach_group(struct iommu_domain *domain, { int ret; - if (group->default_domain && group->domain != group->default_domain) + if (group->domain && group->domain != group->default_domain) return -EBUSY; ret = __iommu_group_for_each_dev(group, domain, @@ -2679,7 +2685,11 @@ static void __iommu_detach_group(struct iommu_domain *domain, { int ret; - if (!group->default_domain) { + /* + * If the group has been claimed already, do not re-attach the default + * domain. + */ + if (!group->default_domain || group->owner) { __iommu_group_for_each_dev(group, domain, iommu_group_do_detach_device); group->domain = NULL; @@ -4074,3 +4084,140 @@ ioasid_t iommu_get_pasid_from_domain(struct device *dev, struct iommu_domain *do return pasid; } + +/** + * iommu_device_use_default_domain() - Device driver wants to handle device + * DMA through the kernel DMA API. + * @dev: The device. + * + * The device driver about to bind @dev wants to do DMA through the kernel + * DMA API. Return 0 if it is allowed, otherwise an error. + */ +int iommu_device_use_default_domain(struct device *dev) +{ + struct iommu_group *group = iommu_group_get(dev); + int ret = 0; + + if (!group) + return 0; + + mutex_lock(&group->mutex); + if (group->owner_cnt) { + if (group->domain != group->default_domain || + group->owner) { + ret = -EBUSY; + goto unlock_out; + } + } + + group->owner_cnt++; + +unlock_out: + mutex_unlock(&group->mutex); + iommu_group_put(group); + + return ret; +} + +/** + * iommu_device_unuse_default_domain() - Device driver stops handling device + * DMA through the kernel DMA API. + * @dev: The device. + * + * The device driver doesn't want to do DMA through kernel DMA API anymore. + * It must be called after iommu_device_use_default_domain(). + */ +void iommu_device_unuse_default_domain(struct device *dev) +{ + struct iommu_group *group = iommu_group_get(dev); + + if (!group) + return; + + mutex_lock(&group->mutex); + if (!WARN_ON(!group->owner_cnt)) + group->owner_cnt--; + + mutex_unlock(&group->mutex); + iommu_group_put(group); +} + +/** + * iommu_group_claim_dma_owner() - Set DMA ownership of a group + * @group: The group. + * @owner: Caller specified pointer. Used for exclusive ownership. + * + * This is to support backward compatibility for vfio which manages + * the dma ownership in iommu_group level. New invocations on this + * interface should be prohibited. + */ +int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) +{ + int ret = 0; + + mutex_lock(&group->mutex); + if (group->owner_cnt) { + ret = -EPERM; + goto unlock_out; + } else { + if (group->domain && group->domain != group->default_domain) { + ret = -EBUSY; + goto unlock_out; + } + + group->owner = owner; + if (group->domain) + __iommu_detach_group(group->domain, group); + } + + group->owner_cnt++; +unlock_out: + mutex_unlock(&group->mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); + +/** + * iommu_group_release_dma_owner() - Release DMA ownership of a group + * @group: The group. + * + * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). + */ +void iommu_group_release_dma_owner(struct iommu_group *group) +{ + mutex_lock(&group->mutex); + if (WARN_ON(!group->owner_cnt || !group->owner)) + goto unlock_out; + + group->owner_cnt = 0; + /* + * The UNMANAGED domain should be detached before all USER + * owners have been released. + */ + if (!WARN_ON(group->domain) && group->default_domain) + __iommu_attach_group(group->default_domain, group); + group->owner = NULL; +unlock_out: + mutex_unlock(&group->mutex); +} +EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); + +/** + * iommu_group_dma_owner_claimed() - Query group dma ownership status + * @group: The group. + * + * This provides status query on a given group. It is racy and only for + * non-binding status reporting. + */ +bool iommu_group_dma_owner_claimed(struct iommu_group *group) +{ + unsigned int user; + + mutex_lock(&group->mutex); + user = group->owner_cnt; + mutex_unlock(&group->mutex); + + return user; +} +EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b565d2f4840b..c7b4fc79f48a 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -766,6 +766,13 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, void iommu_sva_unbind_device(struct iommu_sva *handle); u32 iommu_sva_get_pasid(struct iommu_sva *handle); +int iommu_device_use_default_domain(struct device *dev); +void iommu_device_unuse_default_domain(struct device *dev); + +int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner); +void iommu_group_release_dma_owner(struct iommu_group *group); +bool iommu_group_dma_owner_claimed(struct iommu_group *group); + int iommu_attach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); void iommu_detach_device_pasid(struct iommu_domain *domain, @@ -1266,6 +1273,30 @@ static ioasid_t iommu_get_pasid_from_domain(struct device *dev, { return INVALID_IOASID; } + +static inline int iommu_device_use_default_domain(struct device *dev) +{ + return 0; +} + +static inline void iommu_device_unuse_default_domain(struct device *dev) +{ +} + +static inline int +iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) +{ + return -ENODEV; +} + +static inline void iommu_group_release_dma_owner(struct iommu_group *group) +{ +} + +static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group) +{ + return false; +} #endif /* CONFIG_IOMMU_API */ /** -- Gitee From 8f89ff54dd782ce5a99c36a65dfa1d27c9a92a60 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 18 Apr 2022 08:49:53 +0800 Subject: [PATCH 09/11] bus: platform,amba,fsl-mc,PCI: Add device DMA ownership management ANBZ: #26970 KOSBZ: #0000233 commit 512881eacfa72c2136b27b9934b7b27504a9efc2 upstream. The devices on platform/amba/fsl-mc/PCI buses could be bound to drivers with the device DMA managed by kernel drivers or user-space applications. Unfortunately, multiple devices may be placed in the same IOMMU group because they cannot be isolated from each other. The DMA on these devices must either be entirely under kernel control or userspace control, never a mixture. Otherwise the driver integrity is not guaranteed because they could access each other through the peer-to-peer accesses which by-pass the IOMMU protection. This checks and sets the default DMA mode during driver binding, and cleanups during driver unbinding. In the default mode, the device DMA is managed by the device driver which handles DMA operations through the kernel DMA APIs (see Documentation/core-api/dma-api.rst). For cases where the devices are assigned for userspace control through the userspace driver framework(i.e. VFIO), the drivers(for example, vfio_pci/ vfio_platfrom etc.) may set a new flag (driver_managed_dma) to skip this default setting in the assumption that the drivers know what they are doing with the device DMA. Calling iommu_device_use_default_domain() before {of,acpi}_dma_configure is currently a problem. As things stand, the IOMMU driver ignored the initial iommu_probe_device() call when the device was added, since at that point it had no fwspec yet. In this situation, {of,acpi}_iommu_configure() are retriggering iommu_probe_device() after the IOMMU driver has seen the firmware data via .of_xlate to learn that it actually responsible for the given device. As the result, before that gets fixed, iommu_use_default_domain() goes at the end, and calls arch_teardown_dma_ops() if it fails. Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Stuart Yoder Cc: Laurentiu Tudor Signed-off-by: Lu Baolu Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Reviewed-by: Robin Murphy Tested-by: Eric Auger Link: https://lore.kernel.org/r/20220418005000.897664-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Chu Guangqing --- drivers/amba/bus.c | 18 ++++++++++++++++++ drivers/base/platform.c | 18 ++++++++++++++++++ drivers/bus/fsl-mc/fsl-mc-bus.c | 24 ++++++++++++++++++++++-- drivers/pci/pci-driver.c | 18 ++++++++++++++++++ include/linux/amba/bus.h | 8 ++++++++ include/linux/fsl/mc.h | 8 ++++++++ include/linux/pci.h | 8 ++++++++ include/linux/platform_device.h | 8 ++++++++ 8 files changed, 108 insertions(+), 2 deletions(-) diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index 93964b1fe352..03de6223116c 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #define to_amba_driver(d) container_of(d, struct amba_driver, drv) @@ -156,6 +158,7 @@ ATTRIBUTE_GROUPS(amba_dev); static int amba_dma_configure(struct device *dev) { + struct amba_driver *drv = to_amba_driver(dev->driver); enum dev_dma_attr attr; int ret = 0; @@ -166,9 +169,23 @@ static int amba_dma_configure(struct device *dev) ret = acpi_dma_configure(dev, attr); } + if (!ret && !drv->driver_managed_dma) { + ret = iommu_device_use_default_domain(dev); + if (ret) + arch_teardown_dma_ops(dev); + } + return ret; } +static void amba_dma_cleanup(struct device *dev) +{ + struct amba_driver *drv = to_amba_driver(dev->driver); + + if (!drv->driver_managed_dma) + iommu_device_unuse_default_domain(dev); +} + #ifdef CONFIG_PM /* * Hooks to provide runtime PM of the pclk (bus clock). It is safe to @@ -235,6 +252,7 @@ struct bus_type amba_bustype = { .match = amba_match, .uevent = amba_uevent, .dma_configure = amba_dma_configure, + .dma_cleanup = amba_dma_cleanup, .pm = &amba_pm, }; EXPORT_SYMBOL_GPL(amba_bustype); diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 7a64cef4ce6d..4087c5fa82e3 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include "base.h" #include "power/power.h" @@ -1459,6 +1461,7 @@ int platform_pm_restore(struct device *dev) static int platform_dma_configure(struct device *dev) { + struct platform_driver *drv = to_platform_driver(dev->driver); enum dev_dma_attr attr; int ret = 0; @@ -1469,9 +1472,23 @@ static int platform_dma_configure(struct device *dev) ret = acpi_dma_configure(dev, attr); } + if (!ret && !drv->driver_managed_dma) { + ret = iommu_device_use_default_domain(dev); + if (ret) + arch_teardown_dma_ops(dev); + } + return ret; } +static void platform_dma_cleanup(struct device *dev) +{ + struct platform_driver *drv = to_platform_driver(dev->driver); + + if (!drv->driver_managed_dma) + iommu_device_unuse_default_domain(dev); +} + static const struct dev_pm_ops platform_dev_pm_ops = { .runtime_suspend = pm_generic_runtime_suspend, .runtime_resume = pm_generic_runtime_resume, @@ -1484,6 +1501,7 @@ struct bus_type platform_bus_type = { .match = platform_match, .uevent = platform_uevent, .dma_configure = platform_dma_configure, + .dma_cleanup = platform_dma_cleanup, .pm = &platform_dev_pm_ops, }; EXPORT_SYMBOL_GPL(platform_bus_type); diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index 5ffe7333a3c9..922d1843c745 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "fsl-mc-private.h" @@ -135,15 +136,33 @@ static int fsl_mc_dma_configure(struct device *dev) { struct device *dma_dev = dev; struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev); + struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(dev->driver); u32 input_id = mc_dev->icid; + int ret; while (dev_is_fsl_mc(dma_dev)) dma_dev = dma_dev->parent; if (dev_of_node(dma_dev)) - return of_dma_configure_id(dev, dma_dev->of_node, 0, &input_id); + ret = of_dma_configure_id(dev, dma_dev->of_node, 0, &input_id); + else + ret = acpi_dma_configure_id(dev, DEV_DMA_COHERENT, &input_id); + + if (!ret && !mc_drv->driver_managed_dma) { + ret = iommu_device_use_default_domain(dev); + if (ret) + arch_teardown_dma_ops(dev); + } + + return ret; +} + +static void fsl_mc_dma_cleanup(struct device *dev) +{ + struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(dev->driver); - return acpi_dma_configure_id(dev, DEV_DMA_COHERENT, &input_id); + if (!mc_drv->driver_managed_dma) + iommu_device_unuse_default_domain(dev); } static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, @@ -212,6 +231,7 @@ struct bus_type fsl_mc_bus_type = { .match = fsl_mc_bus_match, .uevent = fsl_mc_bus_uevent, .dma_configure = fsl_mc_dma_configure, + .dma_cleanup = fsl_mc_dma_cleanup, .dev_groups = fsl_mc_dev_groups, }; EXPORT_SYMBOL_GPL(fsl_mc_bus_type); diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index a4730e300060..eae1e4a031da 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "pci.h" #include "pcie/portdrv.h" @@ -1597,6 +1598,7 @@ static int pci_bus_num_vf(struct device *dev) */ static int pci_dma_configure(struct device *dev) { + struct pci_driver *driver = to_pci_driver(dev->driver); struct device *bridge; int ret = 0; @@ -1612,9 +1614,24 @@ static int pci_dma_configure(struct device *dev) } pci_put_host_bridge_device(bridge); + + if (!ret && !driver->driver_managed_dma) { + ret = iommu_device_use_default_domain(dev); + if (ret) + arch_teardown_dma_ops(dev); + } + return ret; } +static void pci_dma_cleanup(struct device *dev) +{ + struct pci_driver *driver = to_pci_driver(dev->driver); + + if (!driver->driver_managed_dma) + iommu_device_unuse_default_domain(dev); +} + struct bus_type pci_bus_type = { .name = "pci", .match = pci_bus_match, @@ -1628,6 +1645,7 @@ struct bus_type pci_bus_type = { .pm = PCI_PM_OPS_PTR, .num_vf = pci_bus_num_vf, .dma_configure = pci_dma_configure, + .dma_cleanup = pci_dma_cleanup, }; EXPORT_SYMBOL(pci_bus_type); diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 6cc93ab5b809..dcb2620e159a 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -79,6 +79,14 @@ struct amba_driver { void (*remove)(struct amba_device *); void (*shutdown)(struct amba_device *); const struct amba_id *id_table; + /* + * For most device drivers, no need to care about this flag as long as + * all DMAs are handled through the kernel DMA API. For some special + * ones, for example VFIO drivers, they know how to manage the DMA + * themselves and set this flag so that the IOMMU layer will allow them + * to setup and manage their own I/O address space. + */ + bool driver_managed_dma; }; /* diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index db244874e834..3b715be770a2 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -31,6 +31,13 @@ struct fsl_mc_io; * @shutdown: Function called at shutdown time to quiesce the device * @suspend: Function called when a device is stopped * @resume: Function called when a device is resumed + * @driver_managed_dma: Device driver doesn't use kernel DMA API for DMA. + * For most device drivers, no need to care about this flag + * as long as all DMAs are handled through the kernel DMA API. + * For some special ones, for example VFIO drivers, they know + * how to manage the DMA themselves and set this flag so that + * the IOMMU layer will allow them to setup and manage their + * own I/O address space. * * Generic DPAA device driver object for device drivers that are registered * with a DPRC bus. This structure is to be embedded in each device-specific @@ -44,6 +51,7 @@ struct fsl_mc_driver { void (*shutdown)(struct fsl_mc_device *dev); int (*suspend)(struct fsl_mc_device *dev, pm_message_t state); int (*resume)(struct fsl_mc_device *dev); + bool driver_managed_dma; }; #define to_fsl_mc_driver(_drv) \ diff --git a/include/linux/pci.h b/include/linux/pci.h index 9c79cf6a6142..d225cc5a80a4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -906,6 +906,13 @@ struct module; * @groups: Sysfs attribute groups. * @driver: Driver model structure. * @dynids: List of dynamically added device IDs. + * @driver_managed_dma: Device driver doesn't use kernel DMA API for DMA. + * For most device drivers, no need to care about this flag + * as long as all DMAs are handled through the kernel DMA API. + * For some special ones, for example VFIO drivers, they know + * how to manage the DMA themselves and set this flag so that + * the IOMMU layer will allow them to setup and manage their + * own I/O address space. */ struct pci_driver { struct list_head node; @@ -921,6 +928,7 @@ struct pci_driver { const struct attribute_group **groups; struct device_driver driver; struct pci_dynids dynids; + bool driver_managed_dma; CK_KABI_RESERVE(1) CK_KABI_RESERVE(2) diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index bbbe631856a2..6fd857b0e296 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -212,6 +212,14 @@ struct platform_driver { struct device_driver driver; const struct platform_device_id *id_table; bool prevent_deferred_probe; + /* + * For most device drivers, no need to care about this flag as long as + * all DMAs are handled through the kernel DMA API. For some special + * ones, for example VFIO drivers, they know how to manage the DMA + * themselves and set this flag so that the IOMMU layer will allow them + * to setup and manage their own I/O address space. + */ + bool driver_managed_dma; }; #define to_platform_driver(drv) (container_of((drv), struct platform_driver, \ -- Gitee From ff67ab8a5d1fb47b6dc3f39e720b3e33f7ae1300 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 19 Oct 2022 15:41:25 -0500 Subject: [PATCH 10/11] PCI/portdrv: Squash into portdrv.c ANBZ: #26970 KOSBZ: #0000233 commit a1ccd3d911382f68753033c6adcf69663c2a9fc5 upstream. Squash portdrv_core.c and portdrv_pci.c into portdrv.c to make it easier to find things. The whole thing is less than 1000 lines, and it's a pain to bounce back and forth between two files. Several portdrv_core.c functions were non-static because they were referenced from portdrv_pci.c. Make them static since they're now all in portdrv.c. No functional change intended. Link: https://lore.kernel.org/r/20221019204127.44463-2-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Chu Guangqing --- drivers/pci/pcie/Makefile | 2 +- .../pci/pcie/{portdrv_core.c => portdrv.c} | 252 ++++++++++++++- drivers/pci/pcie/portdrv.h | 10 - drivers/pci/pcie/portdrv_pci.c | 286 ------------------ 4 files changed, 244 insertions(+), 306 deletions(-) rename drivers/pci/pcie/{portdrv_core.c => portdrv.c} (72%) delete mode 100644 drivers/pci/pcie/portdrv_pci.c diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index b2980db88cc0..54498e3a7703 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -2,7 +2,7 @@ # # Makefile for PCI Express features and port driver -pcieportdrv-y := portdrv_core.o portdrv_pci.o err.o rcec.o +pcieportdrv-y := portdrv.o err.o rcec.o obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv.c similarity index 72% rename from drivers/pci/pcie/portdrv_core.c rename to drivers/pci/pcie/portdrv.c index adec625282c6..d1770bb25073 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv.c @@ -1,11 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Purpose: PCI Express Port Bus Driver's Core Functions + * Purpose: PCI Express Port Bus Driver * * Copyright (C) 2004 Intel * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) */ +#include +#include #include #include #include @@ -344,7 +346,7 @@ static int pcie_device_init(struct pci_dev *pdev, int service, int irq) * Allocate the port extension structure and register services associated with * the port. */ -int pcie_port_device_register(struct pci_dev *dev) +static int pcie_port_device_register(struct pci_dev *dev) { int status, capabilities, i, nr_service; int irqs[PCIE_PORT_DEVICE_MAXSERVICES]; @@ -398,7 +400,7 @@ int pcie_port_device_register(struct pci_dev *dev) typedef int (*pcie_callback_t)(struct pcie_device *); -int pcie_port_device_iter(struct device *dev, void *data) +static int pcie_port_device_iter(struct device *dev, void *data) { struct pcie_port_service_driver *service_driver; size_t offset = *(size_t *)data; @@ -418,13 +420,13 @@ int pcie_port_device_iter(struct device *dev, void *data) * pcie_port_device_suspend - suspend port services associated with a PCIe port * @dev: PCI Express port to handle */ -int pcie_port_device_suspend(struct device *dev) +static int pcie_port_device_suspend(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, suspend); return device_for_each_child(dev, &off, pcie_port_device_iter); } -int pcie_port_device_resume_noirq(struct device *dev) +static int pcie_port_device_resume_noirq(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, resume_noirq); return device_for_each_child(dev, &off, pcie_port_device_iter); @@ -434,7 +436,7 @@ int pcie_port_device_resume_noirq(struct device *dev) * pcie_port_device_resume - resume port services associated with a PCIe port * @dev: PCI Express port to handle */ -int pcie_port_device_resume(struct device *dev) +static int pcie_port_device_resume(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, resume); return device_for_each_child(dev, &off, pcie_port_device_iter); @@ -444,7 +446,7 @@ int pcie_port_device_resume(struct device *dev) * pcie_port_device_runtime_suspend - runtime suspend port services * @dev: PCI Express port to handle */ -int pcie_port_device_runtime_suspend(struct device *dev) +static int pcie_port_device_runtime_suspend(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, runtime_suspend); return device_for_each_child(dev, &off, pcie_port_device_iter); @@ -454,7 +456,7 @@ int pcie_port_device_runtime_suspend(struct device *dev) * pcie_port_device_runtime_resume - runtime resume port services * @dev: PCI Express port to handle */ -int pcie_port_device_runtime_resume(struct device *dev) +static int pcie_port_device_runtime_resume(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, runtime_resume); return device_for_each_child(dev, &off, pcie_port_device_iter); @@ -518,7 +520,7 @@ EXPORT_SYMBOL_GPL(pcie_port_find_device); * Remove PCI Express port service devices associated with given port and * disable MSI-X or MSI for the port. */ -void pcie_port_device_remove(struct pci_dev *dev) +static void pcie_port_device_remove(struct pci_dev *dev) { device_for_each_child(&dev->dev, NULL, remove_iter); pci_free_irq_vectors(dev); @@ -619,3 +621,235 @@ void pcie_port_service_unregister(struct pcie_port_service_driver *drv) driver_unregister(&drv->driver); } EXPORT_SYMBOL(pcie_port_service_unregister); + +/* If this switch is set, PCIe port native services should not be enabled. */ +bool pcie_ports_disabled; + +/* + * If the user specified "pcie_ports=native", use the PCIe services regardless + * of whether the platform has given us permission. On ACPI systems, this + * means we ignore _OSC. + */ +bool pcie_ports_native; + +/* + * If the user specified "pcie_ports=dpc-native", use the Linux DPC PCIe + * service even if the platform hasn't given us permission. + */ +bool pcie_ports_dpc_native; + +static int __init pcie_port_setup(char *str) +{ + if (!strncmp(str, "compat", 6)) + pcie_ports_disabled = true; + else if (!strncmp(str, "native", 6)) + pcie_ports_native = true; + else if (!strncmp(str, "dpc-native", 10)) + pcie_ports_dpc_native = true; + + return 1; +} +__setup("pcie_ports=", pcie_port_setup); + +/* global data */ + +#ifdef CONFIG_PM +static int pcie_port_runtime_suspend(struct device *dev) +{ + if (!to_pci_dev(dev)->bridge_d3) + return -EBUSY; + + return pcie_port_device_runtime_suspend(dev); +} + +static int pcie_port_runtime_idle(struct device *dev) +{ + /* + * Assume the PCI core has set bridge_d3 whenever it thinks the port + * should be good to go to D3. Everything else, including moving + * the port to D3, is handled by the PCI core. + */ + return to_pci_dev(dev)->bridge_d3 ? 0 : -EBUSY; +} + +static const struct dev_pm_ops pcie_portdrv_pm_ops = { + .suspend = pcie_port_device_suspend, + .resume_noirq = pcie_port_device_resume_noirq, + .resume = pcie_port_device_resume, + .freeze = pcie_port_device_suspend, + .thaw = pcie_port_device_resume, + .poweroff = pcie_port_device_suspend, + .restore_noirq = pcie_port_device_resume_noirq, + .restore = pcie_port_device_resume, + .runtime_suspend = pcie_port_runtime_suspend, + .runtime_resume = pcie_port_device_runtime_resume, + .runtime_idle = pcie_port_runtime_idle, +}; + +#define PCIE_PORTDRV_PM_OPS (&pcie_portdrv_pm_ops) + +#else /* !PM */ + +#define PCIE_PORTDRV_PM_OPS NULL +#endif /* !PM */ + +/* + * pcie_portdrv_probe - Probe PCI-Express port devices + * @dev: PCI-Express port device being probed + * + * If detected invokes the pcie_port_device_register() method for + * this port device. + * + */ +static int pcie_portdrv_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + int type = pci_pcie_type(dev); + int status; + + if (!pci_is_pcie(dev) || + ((type != PCI_EXP_TYPE_ROOT_PORT) && + (type != PCI_EXP_TYPE_UPSTREAM) && + (type != PCI_EXP_TYPE_DOWNSTREAM) && + (type != PCI_EXP_TYPE_RC_EC))) + return -ENODEV; + + if (type == PCI_EXP_TYPE_RC_EC) + pcie_link_rcec(dev); + + status = pcie_port_device_register(dev); + if (status) + return status; + + pci_save_state(dev); + + dev_pm_set_driver_flags(&dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE | + DPM_FLAG_SMART_SUSPEND); + + if (pci_bridge_d3_possible(dev)) { + /* + * Keep the port resumed 100ms to make sure things like + * config space accesses from userspace (lspci) will not + * cause the port to repeatedly suspend and resume. + */ + pm_runtime_set_autosuspend_delay(&dev->dev, 100); + pm_runtime_use_autosuspend(&dev->dev); + pm_runtime_mark_last_busy(&dev->dev); + pm_runtime_put_autosuspend(&dev->dev); + pm_runtime_allow(&dev->dev); + } + + return 0; +} + +static void pcie_portdrv_remove(struct pci_dev *dev) +{ + if (pci_bridge_d3_possible(dev)) { + pm_runtime_forbid(&dev->dev); + pm_runtime_get_noresume(&dev->dev); + pm_runtime_dont_use_autosuspend(&dev->dev); + } + + pcie_port_device_remove(dev); +} + +static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev, + pci_channel_state_t error) +{ + if (error == pci_channel_io_frozen) + return PCI_ERS_RESULT_NEED_RESET; + return PCI_ERS_RESULT_CAN_RECOVER; +} + +static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev) +{ + size_t off = offsetof(struct pcie_port_service_driver, slot_reset); + device_for_each_child(&dev->dev, &off, pcie_port_device_iter); + + pci_restore_state(dev); + pci_save_state(dev); + return PCI_ERS_RESULT_RECOVERED; +} + +static pci_ers_result_t pcie_portdrv_mmio_enabled(struct pci_dev *dev) +{ + return PCI_ERS_RESULT_RECOVERED; +} + +/* + * LINUX Device Driver Model + */ +static const struct pci_device_id port_pci_ids[] = { + /* handle any PCI-Express port */ + { PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_NORMAL, ~0) }, + /* subtractive decode PCI-to-PCI bridge, class type is 060401h */ + { PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE, ~0) }, + /* handle any Root Complex Event Collector */ + { PCI_DEVICE_CLASS(((PCI_CLASS_SYSTEM_RCEC << 8) | 0x00), ~0) }, + { }, +}; + +static const struct pci_error_handlers pcie_portdrv_err_handler = { + .error_detected = pcie_portdrv_error_detected, + .slot_reset = pcie_portdrv_slot_reset, + .mmio_enabled = pcie_portdrv_mmio_enabled, +}; + +static struct pci_driver pcie_portdriver = { + .name = "pcieport", + .id_table = &port_pci_ids[0], + + .probe = pcie_portdrv_probe, + .remove = pcie_portdrv_remove, + .shutdown = pcie_portdrv_remove, + + .err_handler = &pcie_portdrv_err_handler, + + .driver_managed_dma = true, + + .driver.pm = PCIE_PORTDRV_PM_OPS, +}; + +static int __init dmi_pcie_pme_disable_msi(const struct dmi_system_id *d) +{ + pr_notice("%s detected: will not use MSI for PCIe PME signaling\n", + d->ident); + pcie_pme_disable_msi(); + return 0; +} + +static const struct dmi_system_id pcie_portdrv_dmi_table[] __initconst = { + /* + * Boxes that should not use MSI for PCIe PME signaling. + */ + { + .callback = dmi_pcie_pme_disable_msi, + .ident = "MSI Wind U-100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, + "MICRO-STAR INTERNATIONAL CO., LTD"), + DMI_MATCH(DMI_PRODUCT_NAME, "U-100"), + }, + }, + {} +}; + +static void __init pcie_init_services(void) +{ + pcie_aer_init(); + pcie_pme_init(); + pcie_dpc_init(); + pcie_hp_init(); +} + +static int __init pcie_portdrv_init(void) +{ + if (pcie_ports_disabled) + return -EACCES; + + pcie_init_services(); + dmi_check_system(pcie_portdrv_dmi_table); + + return pci_register_driver(&pcie_portdriver); +} +device_initcall(pcie_portdrv_init); diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 41fe1ffd5907..ee041e47eec9 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -111,16 +111,6 @@ void pcie_port_service_unregister(struct pcie_port_service_driver *new); #define get_descriptor_id(type, service) (((type - 4) << 8) | service) extern struct bus_type pcie_port_bus_type; -int pcie_port_device_register(struct pci_dev *dev); -int pcie_port_device_iter(struct device *dev, void *data); -#ifdef CONFIG_PM -int pcie_port_device_suspend(struct device *dev); -int pcie_port_device_resume_noirq(struct device *dev); -int pcie_port_device_resume(struct device *dev); -int pcie_port_device_runtime_suspend(struct device *dev); -int pcie_port_device_runtime_resume(struct device *dev); -#endif -void pcie_port_device_remove(struct pci_dev *dev); int __must_check pcie_port_bus_register(void); void pcie_port_bus_unregister(void); diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c deleted file mode 100644 index b73eafbe437b..000000000000 --- a/drivers/pci/pcie/portdrv_pci.c +++ /dev/null @@ -1,286 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Purpose: PCI Express Port Bus Driver - * Author: Tom Nguyen - * - * Copyright (C) 2004 Intel - * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../pci.h" -#include "portdrv.h" - -/* If this switch is set, PCIe port native services should not be enabled. */ -bool pcie_ports_disabled; - -/* - * If the user specified "pcie_ports=native", use the PCIe services regardless - * of whether the platform has given us permission. On ACPI systems, this - * means we ignore _OSC. - */ -bool pcie_ports_native; - -/* - * If the user specified "pcie_ports=dpc-native", use the Linux DPC PCIe - * service even if the platform hasn't given us permission. - */ -bool pcie_ports_dpc_native; - -static int __init pcie_port_setup(char *str) -{ - if (!strncmp(str, "compat", 6)) - pcie_ports_disabled = true; - else if (!strncmp(str, "native", 6)) - pcie_ports_native = true; - else if (!strncmp(str, "dpc-native", 10)) - pcie_ports_dpc_native = true; - - return 1; -} -__setup("pcie_ports=", pcie_port_setup); - -/* global data */ - -#ifdef CONFIG_PM -static int pcie_port_runtime_suspend(struct device *dev) -{ - if (!to_pci_dev(dev)->bridge_d3) - return -EBUSY; - - return pcie_port_device_runtime_suspend(dev); -} - -static int pcie_port_runtime_idle(struct device *dev) -{ - /* - * Assume the PCI core has set bridge_d3 whenever it thinks the port - * should be good to go to D3. Everything else, including moving - * the port to D3, is handled by the PCI core. - */ - return to_pci_dev(dev)->bridge_d3 ? 0 : -EBUSY; -} - -static const struct dev_pm_ops pcie_portdrv_pm_ops = { - .suspend = pcie_port_device_suspend, - .resume_noirq = pcie_port_device_resume_noirq, - .resume = pcie_port_device_resume, - .freeze = pcie_port_device_suspend, - .thaw = pcie_port_device_resume, - .poweroff = pcie_port_device_suspend, - .restore_noirq = pcie_port_device_resume_noirq, - .restore = pcie_port_device_resume, - .runtime_suspend = pcie_port_runtime_suspend, - .runtime_resume = pcie_port_device_runtime_resume, - .runtime_idle = pcie_port_runtime_idle, -}; - -#define PCIE_PORTDRV_PM_OPS (&pcie_portdrv_pm_ops) - -#else /* !PM */ - -#define PCIE_PORTDRV_PM_OPS NULL -#endif /* !PM */ - -/* - * pcie_portdrv_probe - Probe PCI-Express port devices - * @dev: PCI-Express port device being probed - * - * If detected invokes the pcie_port_device_register() method for - * this port device. - * - */ -static int pcie_portdrv_probe(struct pci_dev *dev, - const struct pci_device_id *id) -{ - int type = pci_pcie_type(dev); - int status; - - if (!pci_is_pcie(dev) || - ((type != PCI_EXP_TYPE_ROOT_PORT) && - (type != PCI_EXP_TYPE_UPSTREAM) && - (type != PCI_EXP_TYPE_DOWNSTREAM) && - (type != PCI_EXP_TYPE_RC_EC))) - return -ENODEV; - - if (type == PCI_EXP_TYPE_RC_EC) - pcie_link_rcec(dev); - - status = pcie_port_device_register(dev); - if (status) - return status; - - pci_save_state(dev); - - dev_pm_set_driver_flags(&dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE | - DPM_FLAG_SMART_SUSPEND); - - if (pci_bridge_d3_possible(dev)) { - /* - * Keep the port resumed 100ms to make sure things like - * config space accesses from userspace (lspci) will not - * cause the port to repeatedly suspend and resume. - */ - pm_runtime_set_autosuspend_delay(&dev->dev, 100); - pm_runtime_use_autosuspend(&dev->dev); - pm_runtime_mark_last_busy(&dev->dev); - pm_runtime_put_autosuspend(&dev->dev); - pm_runtime_allow(&dev->dev); - } - - return 0; -} - -static void pcie_portdrv_remove(struct pci_dev *dev) -{ - if (pci_bridge_d3_possible(dev)) { - pm_runtime_forbid(&dev->dev); - pm_runtime_get_noresume(&dev->dev); - pm_runtime_dont_use_autosuspend(&dev->dev); - } - - pcie_port_device_remove(dev); - - pci_disable_device(dev); -} - -static void pcie_portdrv_shutdown(struct pci_dev *dev) -{ - if (pci_bridge_d3_possible(dev)) { - pm_runtime_forbid(&dev->dev); - pm_runtime_get_noresume(&dev->dev); - pm_runtime_dont_use_autosuspend(&dev->dev); - } - - pcie_port_device_remove(dev); -} - -static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev, - pci_channel_state_t error) -{ - /* Root Port has no impact. Always recovers. */ - return PCI_ERS_RESULT_CAN_RECOVER; -} - -static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev) -{ - size_t off = offsetof(struct pcie_port_service_driver, slot_reset); - device_for_each_child(&dev->dev, &off, pcie_port_device_iter); - - pci_restore_state(dev); - pci_save_state(dev); - return PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t pcie_portdrv_mmio_enabled(struct pci_dev *dev) -{ - return PCI_ERS_RESULT_RECOVERED; -} - -static int resume_iter(struct device *device, void *data) -{ - struct pcie_device *pcie_device; - struct pcie_port_service_driver *driver; - - if (device->bus == &pcie_port_bus_type && device->driver) { - driver = to_service_driver(device->driver); - if (driver && driver->error_resume) { - pcie_device = to_pcie_device(device); - - /* Forward error message to service drivers */ - driver->error_resume(pcie_device->port); - } - } - - return 0; -} - -static void pcie_portdrv_err_resume(struct pci_dev *dev) -{ - device_for_each_child(&dev->dev, NULL, resume_iter); -} - -/* - * LINUX Device Driver Model - */ -static const struct pci_device_id port_pci_ids[] = { - /* handle any PCI-Express port */ - { PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_NORMAL, ~0) }, - /* subtractive decode PCI-to-PCI bridge, class type is 060401h */ - { PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE, ~0) }, - /* handle any Root Complex Event Collector */ - { PCI_DEVICE_CLASS(((PCI_CLASS_SYSTEM_RCEC << 8) | 0x00), ~0) }, - { }, -}; - -static const struct pci_error_handlers pcie_portdrv_err_handler = { - .error_detected = pcie_portdrv_error_detected, - .slot_reset = pcie_portdrv_slot_reset, - .mmio_enabled = pcie_portdrv_mmio_enabled, - .resume = pcie_portdrv_err_resume, -}; - -static struct pci_driver pcie_portdriver = { - .name = "pcieport", - .id_table = &port_pci_ids[0], - - .probe = pcie_portdrv_probe, - .remove = pcie_portdrv_remove, - .shutdown = pcie_portdrv_shutdown, - - .err_handler = &pcie_portdrv_err_handler, - - .driver.pm = PCIE_PORTDRV_PM_OPS, -}; - -static int __init dmi_pcie_pme_disable_msi(const struct dmi_system_id *d) -{ - pr_notice("%s detected: will not use MSI for PCIe PME signaling\n", - d->ident); - pcie_pme_disable_msi(); - return 0; -} - -static const struct dmi_system_id pcie_portdrv_dmi_table[] __initconst = { - /* - * Boxes that should not use MSI for PCIe PME signaling. - */ - { - .callback = dmi_pcie_pme_disable_msi, - .ident = "MSI Wind U-100", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, - "MICRO-STAR INTERNATIONAL CO., LTD"), - DMI_MATCH(DMI_PRODUCT_NAME, "U-100"), - }, - }, - {} -}; - -static void __init pcie_init_services(void) -{ - pcie_aer_init(); - pcie_pme_init(); - pcie_dpc_init(); - pcie_hp_init(); -} - -static int __init pcie_portdrv_init(void) -{ - if (pcie_ports_disabled) - return -EACCES; - - pcie_init_services(); - dmi_check_system(pcie_portdrv_dmi_table); - - return pci_register_driver(&pcie_portdriver); -} -device_initcall(pcie_portdrv_init); -- Gitee From 507fd257c90ca8893a114b6a18455994de89a83a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 9 Dec 2022 11:01:00 -0600 Subject: [PATCH 11/11] PCI/portdrv: Allow AER service only for Root Ports & RCECs ANBZ: #26970 KOSBZ: #0000233 commit d8d2b65a940bb497749d66bdab59b530901d3854 upstream. Previously portdrv allowed the AER service for any device with an AER capability (assuming Linux had control of AER) even though the AER service driver only attaches to Root Port and RCECs. Because get_port_device_capability() included AER for non-RP, non-RCEC devices, we tried to initialize the AER IRQ even though these devices don't generate AER interrupts. Intel DG1 and DG2 discrete graphics cards contain a switch leading to a GPU. The switch supports AER but not MSI, so initializing an AER IRQ failed, and portdrv failed to claim the switch port at all. The GPU itself could be suspended, but the switch could not be put in a low-power state because it had no driver. Don't allow the AER service on non-Root Port, non-Root Complex Event Collector devices. This means we won't enable Bus Mastering if the device doesn't require MSI, the AER service will not appear in sysfs, and the AER service driver will not bind to the device. Link: https://lore.kernel.org/r/20221207084105.84947-1-mika.westerberg@linux.intel.com Link: https://lore.kernel.org/r/20221210002922.1749403-1-helgaas@kernel.org Based-on-patch-by: Mika Westerberg Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Signed-off-by: Chu Guangqing --- drivers/pci/pcie/portdrv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pcie/portdrv.c b/drivers/pci/pcie/portdrv.c index d1770bb25073..4cf4a4c9460d 100644 --- a/drivers/pci/pcie/portdrv.c +++ b/drivers/pci/pcie/portdrv.c @@ -259,7 +259,9 @@ static int get_port_device_capability(struct pci_dev *dev) } #ifdef CONFIG_PCIEAER - if (dev->aer_cap && pci_aer_available() && + if ((pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC) && + dev->aer_cap && pci_aer_available() && (pcie_ports_native || host->native_aer)) services |= PCIE_PORT_SERVICE_AER; #endif -- Gitee