From 7932572ee6556422a37e20a33fec9b390885e6c9 Mon Sep 17 00:00:00 2001 From: Jay Chen Date: Mon, 1 Sep 2025 19:03:02 +0800 Subject: [PATCH 001/143] anolis: Revert "anolis: Add kh40000_iommu_dma_ops for KH-40000 platform" ANBZ: #13617 fix the revert conflict in driver/iommu/intel/iommu.c, becauase of adapting the zhaoxin in intel iommu driver This reverts commit 61c832fa983ba0bb6f372ff455bd3b47ba665474. Signed-off-by: Jay Chen --- arch/x86/kernel/zhaoxin_kh40000.c | 175 ------------------------------ drivers/iommu/intel/iommu.c | 3 - include/linux/dma-map-ops.h | 6 - 3 files changed, 184 deletions(-) diff --git a/arch/x86/kernel/zhaoxin_kh40000.c b/arch/x86/kernel/zhaoxin_kh40000.c index e8dd3bd43e72..c477b18892fa 100644 --- a/arch/x86/kernel/zhaoxin_kh40000.c +++ b/arch/x86/kernel/zhaoxin_kh40000.c @@ -174,178 +174,3 @@ const struct dma_map_ops kh40000_dma_direct_ops = { .map_sg = dma_direct_map_sg, .map_resource = dma_direct_map_resource, }; - -/* zhaoxin kh-40000 iommu dma ops */ -static const struct dma_map_ops *iommu_dma_ops; - -static void *kh40000_iommu_dma_alloc(struct device *dev, size_t size, - dma_addr_t *addr, gfp_t gfp, unsigned long attrs) -{ - gfp |= __GFP_THISNODE; - - return iommu_dma_ops->alloc(dev, size, addr, gfp, attrs); -} - -static void kh40000_iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, unsigned long attrs) -{ - iommu_dma_ops->free(dev, size, cpu_addr, handle, attrs); -} - -static struct page *kh40000_dma_common_alloc_pages(struct device *dev, size_t size, - dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) -{ - return iommu_dma_ops->alloc_pages(dev, size, dma_handle, dir, gfp); -} - -static void kh40000_dma_common_free_pages(struct device *dev, size_t size, struct page *page, - dma_addr_t dma_handle, enum dma_data_direction dir) -{ - iommu_dma_ops->free_pages(dev, size, page, dma_handle, dir); -} - -static struct sg_table *kh40000_iommu_dma_alloc_noncontiguous(struct device *dev, - size_t size, enum dma_data_direction dir, gfp_t gfp, - unsigned long attrs) -{ - return iommu_dma_ops->alloc_noncontiguous(dev, size, dir, gfp, attrs); -} - -static void kh40000_iommu_dma_free_noncontiguous(struct device *dev, size_t size, - struct sg_table *sgt, enum dma_data_direction dir) -{ - return iommu_dma_ops->free_noncontiguous(dev, size, sgt, dir); -} - -static int kh40000_iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ - return iommu_dma_ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); -} - -static void kh40000_iommu_dma_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - kh40000_sync_single_dma_for_cpu(dev, addr, dir, 1); - iommu_dma_ops->unmap_page(dev, addr, size, dir, attrs); -} - -static int kh40000_iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ - return iommu_dma_ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); -} - -static dma_addr_t kh40000_iommu_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - return iommu_dma_ops->map_page(dev, page, offset, size, dir, attrs); -} - -static int kh40000_iommu_dma_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - return iommu_dma_ops->map_sg(dev, sgl, nents, dir, attrs); -} - -static void kh40000_iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nelems, i) - kh40000_sync_single_dma_for_cpu(dev, sg_dma_address(sg), dir, 1); - iommu_dma_ops->unmap_sg(dev, sgl, nelems, dir, attrs); -} - -static void kh40000_iommu_dma_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - kh40000_sync_single_dma_for_cpu(dev, addr, dir, 1); - iommu_dma_ops->sync_single_for_cpu(dev, addr, size, dir); -} - -static void kh40000_iommu_dma_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - iommu_dma_ops->sync_single_for_device(dev, addr, size, dir); -} - -static void kh40000_iommu_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nelems, i) - kh40000_sync_single_dma_for_cpu(dev, sg_dma_address(sg), dir, 1); - iommu_dma_ops->sync_sg_for_cpu(dev, sgl, nelems, dir); -} - -static void kh40000_iommu_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - iommu_dma_ops->sync_sg_for_device(dev, sgl, nelems, dir); -} - -static dma_addr_t kh40000_iommu_dma_map_resource(struct device *dev, phys_addr_t phys, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - return iommu_dma_ops->map_resource(dev, phys, size, dir, attrs); -} - -static void kh40000_iommu_dma_unmap_resource(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - kh40000_sync_single_dma_for_cpu(dev, addr, dir, 1); - iommu_dma_ops->unmap_resource(dev, addr, size, dir, attrs); -} - -static unsigned long kh40000_iommu_dma_get_merge_boundary(struct device *dev) -{ - return iommu_dma_ops->get_merge_boundary(dev); -} - -static size_t kh40000_iommu_dma_opt_mapping_size(void) -{ - return iommu_dma_ops->opt_mapping_size(); -} - -const struct dma_map_ops kh40000_dma_iommu_ops = { - .flags = DMA_F_PCI_P2PDMA_SUPPORTED, - .alloc = kh40000_iommu_dma_alloc, - .free = kh40000_iommu_dma_free, - .unmap_page = kh40000_iommu_dma_unmap_page, - .alloc_pages = kh40000_dma_common_alloc_pages, - .free_pages = kh40000_dma_common_free_pages, - .alloc_noncontiguous = kh40000_iommu_dma_alloc_noncontiguous, - .free_noncontiguous = kh40000_iommu_dma_free_noncontiguous, - .mmap = kh40000_iommu_dma_mmap, - .get_sgtable = kh40000_iommu_dma_get_sgtable, - .map_page = kh40000_iommu_dma_map_page, - .map_sg = kh40000_iommu_dma_map_sg, - .unmap_sg = kh40000_iommu_dma_unmap_sg, - .sync_single_for_cpu = kh40000_iommu_dma_sync_single_for_cpu, - .sync_single_for_device = kh40000_iommu_dma_sync_single_for_device, - .sync_sg_for_cpu = kh40000_iommu_dma_sync_sg_for_cpu, - .sync_sg_for_device = kh40000_iommu_dma_sync_sg_for_device, - .map_resource = kh40000_iommu_dma_map_resource, - .unmap_resource = kh40000_iommu_dma_unmap_resource, - .get_merge_boundary = kh40000_iommu_dma_get_merge_boundary, - .opt_mapping_size = kh40000_iommu_dma_opt_mapping_size, -}; - -void kh40000_set_iommu_dma_ops(struct device *dev) -{ - if (dev->dma_ops) { - iommu_dma_ops = dev->dma_ops; - set_dma_ops(dev, &kh40000_dma_iommu_ops); - pr_info_once("zhaoxin iommu dma patch enabled\n"); - } -} diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index fa0d1f19699e..7a8b9bb56835 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4196,9 +4196,6 @@ static void intel_iommu_probe_finalize(struct device *dev) { set_dma_ops(dev, NULL); iommu_setup_dma_ops(dev, 0, U64_MAX); - - if (is_zhaoxin_kh40000) - kh40000_set_iommu_dma_ops(dev); } static void intel_iommu_get_resv_regions(struct device *device, diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 3888a21cf4ff..3957edcb6fe0 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -521,16 +521,10 @@ pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, extern bool is_zhaoxin_kh40000; extern const struct dma_map_ops kh40000_dma_direct_ops; -void kh40000_set_iommu_dma_ops(struct device *dev); #else bool __weak is_zhaoxin_kh40000; -static inline void kh40000_set_iommu_dma_ops(struct device *dev) -{ - -} - #endif -- Gitee From 636d739f9ec3c7f843f7b48acad0a29136ed1708 Mon Sep 17 00:00:00 2001 From: Jay Chen Date: Mon, 1 Sep 2025 19:05:05 +0800 Subject: [PATCH 002/143] anolis: Revert "anolis: Add kh40000_direct_dma_ops for KH-40000 platform" ANBZ: #13617 This reverts commit f2215c8614dcb51a1d4da72a25ef4370891ece56. Signed-off-by: Jay Chen --- .../admin-guide/kernel-parameters.txt | 5 - arch/x86/kernel/Makefile | 1 - arch/x86/kernel/early-quirks.c | 1 - arch/x86/kernel/zhaoxin_kh40000.c | 176 ------------------ include/linux/dma-map-ops.h | 1 - kernel/dma/contiguous.c | 3 - 6 files changed, 187 deletions(-) delete mode 100644 arch/x86/kernel/zhaoxin_kh40000.c diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 7e54b6e91071..dccea0e74159 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2333,11 +2333,6 @@ isapnp= [ISAPNP] Format: ,,, - zhaoxin_patch_bitmask= - [X86] Bitmask for Zhaoxin Platform's patch. - bit 0: enable KH-40000 dma patch's node check function - - isolcpus= [KNL,SMP,ISOL] Isolate a given set of CPUs from disturbance. [Deprecated - use cpusets instead] Format: [flag-list,] diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 2b433325ca8f..c25d40cbbdbe 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -159,7 +159,6 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_MMCONF_FAM10H) += mmconf-fam10h_64.o obj-y += vsmp_64.o - obj-$(CONFIG_PCI) += zhaoxin_kh40000.o endif obj-$(CONFIG_HYGON_CSV) += csv.o diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index b5f5e0916894..ddb857d94ed9 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -696,7 +696,6 @@ static void quirk_zhaoxin_dma_patch(int num, int slot, int func) revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID); if (revision == 0x10) { is_zhaoxin_kh40000 = true; - dma_ops = &kh40000_dma_direct_ops; pr_info("zhaoxin direct dma patch enabled\n"); } } diff --git a/arch/x86/kernel/zhaoxin_kh40000.c b/arch/x86/kernel/zhaoxin_kh40000.c deleted file mode 100644 index c477b18892fa..000000000000 --- a/arch/x86/kernel/zhaoxin_kh40000.c +++ /dev/null @@ -1,176 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "../../../kernel/dma/direct.h" - -/*** - * usage: - * set "zhaoxin_patch_bitmask=" in cmdline - * value description: - * bit 0: enable(1) node check or not(0). default 1 - */ -enum { - ZHAOXIN_P2CW_NODE_CHECK = BIT(0), - ZHAOXIN_PATCH_CODE_MAX = ZHAOXIN_P2CW_NODE_CHECK, -}; - -#define ZHAOXIN_PATCH_CODE_DEFAULT ZHAOXIN_P2CW_NODE_CHECK - -unsigned long zhaoxin_patch_code = ZHAOXIN_PATCH_CODE_DEFAULT; - -static int __init zhaoxin_patch_code_setup(char *str) -{ - int err = kstrtoul(str, 0, &zhaoxin_patch_code); - - if (err || (zhaoxin_patch_code > ZHAOXIN_PATCH_CODE_MAX)) { - pr_err("cmdline 'zhaoxin_patch_bitmask=%s' inappropriate\n", - str); - return err; - } - - if (ZHAOXIN_P2CW_NODE_CHECK | zhaoxin_patch_code) - pr_info("zhaoxin dma patch node check is enabled\n"); - - return 0; -} -__setup("zhaoxin_patch_bitmask=", zhaoxin_patch_code_setup); - -static struct pci_dev *kh40000_get_pci_dev(struct device *dev) -{ - if (dev_is_pci(dev)) - return to_pci_dev(dev); - - if (dev->parent) - return kh40000_get_pci_dev(dev->parent); - - return NULL; -} - -static void kh40000_sync_single_dma_for_cpu(struct device *dev, dma_addr_t paddr, - enum dma_data_direction dir, bool is_iommu) -{ - u8 vid; - struct pci_dev *pci; - u64 dma_mask = *dev->dma_mask; - - /* check direction */ - if ((dir != DMA_FROM_DEVICE) && (dir != DMA_BIDIRECTIONAL)) - return; - - /* check dma capability */ - if (dma_mask <= DMA_BIT_MASK(32)) - return; - - /* check device type */ - pci = kh40000_get_pci_dev(dev); - if (pci == NULL) - return; - - /* get real physical address */ - if (is_iommu) { - struct iommu_domain *domain = iommu_get_dma_domain(dev); - - paddr = iommu_iova_to_phys(domain, paddr); - if (!paddr) - return; - } - - /* check node or not */ - if ((zhaoxin_patch_code & ZHAOXIN_P2CW_NODE_CHECK) - && pfn_to_nid(PFN_DOWN(paddr)) == dev_to_node(dev)) - return; - - /* flush data by one pci read cycle */ - pci_read_config_byte(pci, PCI_VENDOR_ID, &vid); -} - -/* zhaoxin kh-40000 direct dma ops */ -static void *kh40000_dma_direct_alloc(struct device *dev, size_t size, - dma_addr_t *addr, gfp_t gfp, unsigned long attrs) -{ - if (dev->coherent_dma_mask > DMA_BIT_MASK(32)) - gfp |= __GFP_THISNODE; - - return dma_direct_alloc(dev, size, addr, gfp, attrs); -} - -static void kh40000_dma_direct_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - kh40000_sync_single_dma_for_cpu(dev, addr, dir, 0); - dma_direct_unmap_page(dev, addr, size, dir, attrs); -} - -static void kh40000_dma_direct_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - kh40000_sync_single_dma_for_cpu(dev, sg_dma_address(sg), dir, 0); - - dma_direct_sync_sg_for_cpu(dev, sgl, nents, dir); -} - -static void kh40000_dma_direct_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - kh40000_sync_single_dma_for_cpu(dev, addr, dir, 0); - dma_direct_sync_single_for_cpu(dev, addr, size, dir); -} - -static void kh40000_dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - kh40000_sync_single_dma_for_cpu(dev, sg_dma_address(sg), dir, 0); - - dma_direct_unmap_sg(dev, sgl, nents, dir, attrs); -} - -static void kh40000_dma_direct_unmap_resource(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - kh40000_sync_single_dma_for_cpu(dev, addr, dir, 0); -} - -const struct dma_map_ops kh40000_dma_direct_ops = { - .flags = DMA_F_PCI_P2PDMA_SUPPORTED, - .alloc = kh40000_dma_direct_alloc, - .sync_sg_for_cpu = kh40000_dma_direct_sync_sg_for_cpu, - .unmap_page = kh40000_dma_direct_unmap_page, - .sync_single_for_cpu = kh40000_dma_direct_sync_single_for_cpu, - .unmap_sg = kh40000_dma_direct_unmap_sg, - .unmap_resource = kh40000_dma_direct_unmap_resource, - .dma_supported = dma_direct_supported, - .free = dma_direct_free, - .alloc_pages = dma_direct_alloc_pages, - .free_pages = dma_direct_free_pages, - .sync_single_for_device = dma_direct_sync_single_for_device, - .sync_sg_for_device = dma_direct_sync_sg_for_device, - .get_required_mask = dma_direct_get_required_mask, - .max_mapping_size = dma_direct_max_mapping_size, - .mmap = dma_direct_mmap, - .get_sgtable = dma_direct_get_sgtable, - .map_page = dma_direct_map_page, - .map_sg = dma_direct_map_sg, - .map_resource = dma_direct_map_resource, -}; diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 3957edcb6fe0..80b8f56e57be 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -520,7 +520,6 @@ pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, #if defined CONFIG_PCI && defined CONFIG_X86 extern bool is_zhaoxin_kh40000; -extern const struct dma_map_ops kh40000_dma_direct_ops; #else diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 0ff132dd86a8..5bc41e33fa5c 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -227,9 +227,6 @@ void __init dma_contiguous_reserve(phys_addr_t limit) dma_numa_cma_reserve(); - if (is_zhaoxin_kh40000) - return; - pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit); if (size_cmdline != -1) { -- Gitee From dbcdb6d4e2b0ec23db41fc21166aec3c99fe5043 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 19 Apr 2024 17:54:40 +0100 Subject: [PATCH 003/143] OF: Retire dma-ranges mask workaround ANBZ: #13617 commit 0c3457926e7e65710f32e02920c7d423417c93a2 upstream. The fixup adding 1 to the dma-ranges size may have been for the benefit of some early AMD Seattle DTs, or may have merely been a just-in-case, but either way anyone who might have deserved to get the message has hopefully seen the warning in the 9 years we've had it there. The modern dma_range_map mechanism should happily handle odd-sized ranges with no ill effect, so there's little need to care anyway now. Clean it up. Acked-by: Rob Herring Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/26620039901fdae52079ec1c8a4b2b324964a13e.1713523152.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/of/device.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/of/device.c b/drivers/of/device.c index 873d933e8e6d..7b149741f45f 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -129,22 +129,6 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, dma_end = r->dma_start + r->size; } size = dma_end - dma_start; - - /* - * Add a work around to treat the size as mask + 1 in case - * it is defined in DT as a mask. - */ - if (size & 1) { - dev_warn(dev, "Invalid size 0x%llx for dma-range(s)\n", - size); - size = size + 1; - } - - if (!size) { - dev_err(dev, "Adjusted size 0x%llx invalid\n", size); - kfree(map); - return -EINVAL; - } } /* -- Gitee From b25166998f3d99e0e5c7c10458bad458c7b08165 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 19 Apr 2024 17:54:41 +0100 Subject: [PATCH 004/143] OF: Simplify DMA range calculations ANBZ: #13617 commit ba503cf41c90e173f9421f7882d84b228bada97a upstream. Juggling start, end, and size values for a range is somewhat redundant and a little hard to follow. Consolidate down to just using inclusive start and end, which saves us worrying about size overflows for full 64-bit ranges (note that passing a potentially-overflowed value through to arch_setup_dma_ops() is benign for all current implementations, and this is working towards removing that anyway). Acked-by: Rob Herring Reviewed-by: Jason Gunthorpe Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/3e0a72fe3d79eae660e4284bb32f2cb39868ccd7.1713523152.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/of/device.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/of/device.c b/drivers/of/device.c index 7b149741f45f..460eaaec7f9b 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -96,7 +96,7 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, const struct bus_dma_region *map = NULL; struct device_node *bus_np; u64 dma_start = 0; - u64 mask, end, size = 0; + u64 mask, end = 0; bool coherent; int iommu_ret; int ret; @@ -118,17 +118,15 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, return ret == -ENODEV ? 0 : ret; } else { const struct bus_dma_region *r = map; - u64 dma_end = 0; /* Determine the overall bounds of all DMA regions */ for (dma_start = ~0; r->size; r++) { /* Take lower and upper limits */ if (r->dma_start < dma_start) dma_start = r->dma_start; - if (r->dma_start + r->size > dma_end) - dma_end = r->dma_start + r->size; + if (r->dma_start + r->size > end) + end = r->dma_start + r->size; } - size = dma_end - dma_start; } /* @@ -142,16 +140,15 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, dev->dma_mask = &dev->coherent_dma_mask; } - if (!size && dev->coherent_dma_mask) - size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1); - else if (!size) - size = 1ULL << 32; + if (!end && dev->coherent_dma_mask) + end = dev->coherent_dma_mask; + else if (!end) + end = (1ULL << 32) - 1; /* * Limit coherent and dma mask based on size and default mask * set by the driver. */ - end = dma_start + size - 1; mask = DMA_BIT_MASK(ilog2(end) + 1); dev->coherent_dma_mask &= mask; *dev->dma_mask &= mask; @@ -185,7 +182,7 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, } else dev_dbg(dev, "device is behind an iommu\n"); - arch_setup_dma_ops(dev, dma_start, size, coherent); + arch_setup_dma_ops(dev, dma_start, end - dma_start + 1, coherent); if (iommu_ret) of_dma_set_restricted_buffer(dev, np); -- Gitee From b7ea78b5b65512bf770d6264698c2c1ed6d291ba Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 19 Apr 2024 17:54:42 +0100 Subject: [PATCH 005/143] ACPI/IORT: Handle memory address size limits as limits ANBZ: #13617 commit 91cfd679f9e8b9a7bf2f26adf66eff99dbe2026b upstream. Return the Root Complex/Named Component memory address size limit as an inclusive limit value, rather than an exclusive size. This saves having to fudge an off-by-one for the 64-bit case, and simplifies our caller. Acked-by: Hanjun Guo Reviewed-by: Jason Gunthorpe Tested-by: Hanjun Guo Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/284ae9fbadb12f2e3b5a30cd4d037d0e6843a8f4.1713523152.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/acpi/arm64/dma.c | 9 +++------ drivers/acpi/arm64/iort.c | 20 ++++++++++---------- include/linux/acpi_iort.h | 4 ++-- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/drivers/acpi/arm64/dma.c b/drivers/acpi/arm64/dma.c index 93d796531af3..b98a149f8d50 100644 --- a/drivers/acpi/arm64/dma.c +++ b/drivers/acpi/arm64/dma.c @@ -8,7 +8,6 @@ void acpi_arch_dma_setup(struct device *dev) { int ret; u64 end, mask; - u64 size = 0; const struct bus_dma_region *map = NULL; /* @@ -23,9 +22,9 @@ void acpi_arch_dma_setup(struct device *dev) } if (dev->coherent_dma_mask) - size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1); + end = dev->coherent_dma_mask; else - size = 1ULL << 32; + end = (1ULL << 32) - 1; ret = acpi_dma_get_range(dev, &map); if (!ret && map) { @@ -36,18 +35,16 @@ void acpi_arch_dma_setup(struct device *dev) end = r->dma_start + r->size - 1; } - size = end + 1; dev->dma_range_map = map; } if (ret == -ENODEV) - ret = iort_dma_get_ranges(dev, &size); + ret = iort_dma_get_ranges(dev, &end); if (!ret) { /* * Limit coherent and dma mask based on size retrieved from * firmware. */ - end = size - 1; mask = DMA_BIT_MASK(ilog2(end) + 1); dev->bus_dma_limit = end; dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask); diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 1a31106a14e4..fd567b5efa5f 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1367,7 +1367,7 @@ int iort_iommu_configure_id(struct device *dev, const u32 *input_id) { return -ENODEV; } #endif -static int nc_dma_get_range(struct device *dev, u64 *size) +static int nc_dma_get_range(struct device *dev, u64 *limit) { struct acpi_iort_node *node; struct acpi_iort_named_component *ncomp; @@ -1384,13 +1384,13 @@ static int nc_dma_get_range(struct device *dev, u64 *size) return -EINVAL; } - *size = ncomp->memory_address_limit >= 64 ? U64_MAX : - 1ULL<memory_address_limit; + *limit = ncomp->memory_address_limit >= 64 ? U64_MAX : + (1ULL << ncomp->memory_address_limit) - 1; return 0; } -static int rc_dma_get_range(struct device *dev, u64 *size) +static int rc_dma_get_range(struct device *dev, u64 *limit) { struct acpi_iort_node *node; struct acpi_iort_root_complex *rc; @@ -1408,8 +1408,8 @@ static int rc_dma_get_range(struct device *dev, u64 *size) return -EINVAL; } - *size = rc->memory_address_limit >= 64 ? U64_MAX : - 1ULL<memory_address_limit; + *limit = rc->memory_address_limit >= 64 ? U64_MAX : + (1ULL << rc->memory_address_limit) - 1; return 0; } @@ -1417,16 +1417,16 @@ static int rc_dma_get_range(struct device *dev, u64 *size) /** * iort_dma_get_ranges() - Look up DMA addressing limit for the device * @dev: device to lookup - * @size: DMA range size result pointer + * @limit: DMA limit result pointer * * Return: 0 on success, an error otherwise. */ -int iort_dma_get_ranges(struct device *dev, u64 *size) +int iort_dma_get_ranges(struct device *dev, u64 *limit) { if (dev_is_pci(dev)) - return rc_dma_get_range(dev, size); + return rc_dma_get_range(dev, limit); else - return nc_dma_get_range(dev, size); + return nc_dma_get_range(dev, limit); } static void __init acpi_iort_register_irq(int hwirq, const char *name, diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 1cb65592c95d..d4ed5622cf2b 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -39,7 +39,7 @@ void iort_get_rmr_sids(struct fwnode_handle *iommu_fwnode, void iort_put_rmr_sids(struct fwnode_handle *iommu_fwnode, struct list_head *head); /* IOMMU interface */ -int iort_dma_get_ranges(struct device *dev, u64 *size); +int iort_dma_get_ranges(struct device *dev, u64 *limit); int iort_iommu_configure_id(struct device *dev, const u32 *id_in); void iort_iommu_get_resv_regions(struct device *dev, struct list_head *head); phys_addr_t acpi_iort_dma_get_max_cpu_address(void); @@ -55,7 +55,7 @@ void iort_get_rmr_sids(struct fwnode_handle *iommu_fwnode, struct list_head *hea static inline void iort_put_rmr_sids(struct fwnode_handle *iommu_fwnode, struct list_head *head) { } /* IOMMU interface */ -static inline int iort_dma_get_ranges(struct device *dev, u64 *size) +static inline int iort_dma_get_ranges(struct device *dev, u64 *limit) { return -ENODEV; } static inline int iort_iommu_configure_id(struct device *dev, const u32 *id_in) { return -ENODEV; } -- Gitee From de514d6fb9ce41baa021423f2c0a3a50626c74aa Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 19 Apr 2024 17:54:43 +0100 Subject: [PATCH 006/143] dma-mapping: Add helpers for dma_range_map bounds ANBZ: #13617 commit fece6530bf4b59b01a476a12851e07751e73d69f upstream. Several places want to compute the lower and/or upper bounds of a dma_range_map, so let's factor that out into reusable helpers. Acked-by: Rob Herring Reviewed-by: Christoph Hellwig Reviewed-by: Hanjun Guo # For arm64 Reviewed-by: Jason Gunthorpe Tested-by: Hanjun Guo Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/45ec52f033ec4dfb364e23f48abaf787f612fa53.1713523152.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- arch/loongarch/kernel/dma.c | 9 ++------- drivers/acpi/arm64/dma.c | 8 +------- drivers/of/device.c | 11 ++--------- include/linux/dma-direct.h | 18 ++++++++++++++++++ 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/arch/loongarch/kernel/dma.c b/arch/loongarch/kernel/dma.c index cc0ccde58db8..3b6c16dc90b3 100644 --- a/arch/loongarch/kernel/dma.c +++ b/arch/loongarch/kernel/dma.c @@ -30,7 +30,7 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) void acpi_arch_dma_setup(struct device *dev) { int ret; - u64 mask, end = 0; + u64 mask, end; const struct bus_dma_region *map = NULL; if (node_id_offset == 0) { @@ -40,12 +40,7 @@ void acpi_arch_dma_setup(struct device *dev) ret = acpi_dma_get_range(dev, &map); if (!ret && map) { - const struct bus_dma_region *r = map; - - for (end = 0; r->size; r++) { - if (r->dma_start + r->size - 1 > end) - end = r->dma_start + r->size - 1; - } + end = dma_range_map_max(map); mask = DMA_BIT_MASK(ilog2(end) + 1); dev->bus_dma_limit = end; diff --git a/drivers/acpi/arm64/dma.c b/drivers/acpi/arm64/dma.c index b98a149f8d50..52b2abf88689 100644 --- a/drivers/acpi/arm64/dma.c +++ b/drivers/acpi/arm64/dma.c @@ -28,13 +28,7 @@ void acpi_arch_dma_setup(struct device *dev) ret = acpi_dma_get_range(dev, &map); if (!ret && map) { - const struct bus_dma_region *r = map; - - for (end = 0; r->size; r++) { - if (r->dma_start + r->size - 1 > end) - end = r->dma_start + r->size - 1; - } - + end = dma_range_map_max(map); dev->dma_range_map = map; } diff --git a/drivers/of/device.c b/drivers/of/device.c index 460eaaec7f9b..0681c220d114 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -117,16 +117,9 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, if (!force_dma) return ret == -ENODEV ? 0 : ret; } else { - const struct bus_dma_region *r = map; - /* Determine the overall bounds of all DMA regions */ - for (dma_start = ~0; r->size; r++) { - /* Take lower and upper limits */ - if (r->dma_start < dma_start) - dma_start = r->dma_start; - if (r->dma_start + r->size > end) - end = r->dma_start + r->size; - } + dma_start = dma_range_map_min(map); + end = dma_range_map_max(map); } /* diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 18aade195884..185ef49b2a91 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -49,6 +49,24 @@ static inline phys_addr_t translate_dma_to_phys(struct device *dev, return (phys_addr_t)-1; } +static inline dma_addr_t dma_range_map_min(const struct bus_dma_region *map) +{ + dma_addr_t ret = (dma_addr_t)U64_MAX; + + for (; map->size; map++) + ret = min(ret, map->dma_start); + return ret; +} + +static inline dma_addr_t dma_range_map_max(const struct bus_dma_region *map) +{ + dma_addr_t ret = 0; + + for (; map->size; map++) + ret = max(ret, map->dma_start + map->size - 1); + return ret; +} + #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include #ifndef phys_to_dma_unencrypted -- Gitee From 8fa529d329c27edeb03a2ebd5454dd1aa541c748 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 19 Apr 2024 17:54:44 +0100 Subject: [PATCH 007/143] iommu/dma: Make limit checks self-contained ANBZ: #13617 commit ad4750b07d3462ce29a0c9b1e88b2a1f9795290e upstream. It's now easy to retrieve the device's DMA limits if we want to check them against the domain aperture, so do that ourselves instead of relying on them being passed through the callchain. Reviewed-by: Jason Gunthorpe Tested-by: Hanjun Guo Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/e28a114243d1e79eb3609aded034f8529521333f.1713523152.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/dma-iommu.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 3020b3b295d2..c53cb9bffaf2 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -680,19 +680,16 @@ static void iommu_dma_init_options(struct iommu_dma_options *options, /** * iommu_dma_init_domain - Initialise a DMA mapping domain * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() - * @base: IOVA at which the mappable address space starts - * @limit: Last address of the IOVA space * @dev: Device the domain is being initialised for * - * @base and @limit + 1 should be exact multiples of IOMMU page granularity to - * avoid rounding surprises. If necessary, we reserve the page at address 0 + * If the geometry and dma_range_map include address 0, we reserve that page * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but * any change which could make prior IOVAs invalid will fail. */ -static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, - dma_addr_t limit, struct device *dev) +static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev) { struct iommu_dma_cookie *cookie = domain->iova_cookie; + const struct bus_dma_region *map = dev->dma_range_map; unsigned long order, base_pfn; struct iova_domain *iovad; int ret; @@ -704,18 +701,18 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, /* Use the smallest supported page size for IOVA granularity */ order = __ffs(domain->pgsize_bitmap); - base_pfn = max_t(unsigned long, 1, base >> order); + base_pfn = 1; /* Check the domain allows at least some access to the device... */ - if (domain->geometry.force_aperture) { + if (map) { + dma_addr_t base = dma_range_map_min(map); if (base > domain->geometry.aperture_end || - limit < domain->geometry.aperture_start) { + dma_range_map_max(map) < domain->geometry.aperture_start) { pr_warn("specified DMA range outside IOMMU capability\n"); return -EFAULT; } /* ...then finally give it a kicking to make sure it fits */ - base_pfn = max_t(unsigned long, base_pfn, - domain->geometry.aperture_start >> order); + base_pfn = max(base, domain->geometry.aperture_start) >> order; } /* start_pfn is always nonzero for an already-initialised domain */ @@ -1781,7 +1778,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) * underlying IOMMU driver needs to support via the dma-iommu layer. */ if (iommu_is_dma_domain(domain)) { - if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev)) + if (iommu_dma_init_domain(domain, dev)) goto out_err; dev->dma_ops = &iommu_dmafops; } -- Gitee From 1131063bebef2ca62b14e917ec71a352a8f2b6cf Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 19 Apr 2024 17:54:45 +0100 Subject: [PATCH 008/143] iommu/dma: Centralise iommu_setup_dma_ops() ANBZ: #13617 commit b67483b3c44eaef2f771fa4c712e13f452675a67 upstream. It's somewhat hard to see, but arm64's arch_setup_dma_ops() should only ever call iommu_setup_dma_ops() after a successful iommu_probe_device(), which means there should be no harm in achieving the same order of operations by running it off the back of iommu_probe_device() itself. This then puts it in line with the x86 and s390 .probe_finalize bodges, letting us pull it all into the main flow properly. As a bonus this lets us fold in and de-scope the PCI workaround setup as well. At this point we can also then pull the call up inside the group mutex, and avoid having to think about whether iommu_group_store_type() could theoretically race and free the domain if iommu_setup_dma_ops() ran just *before* iommu_device_use_default_domain() claims it... Furthermore we replace one .probe_finalize call completely, since the only remaining implementations are now one which only needs to run once for the initial boot-time probe, and two which themselves render that path unreachable. This leaves us a big step closer to realistically being able to unpick the variety of different things that iommu_setup_dma_ops() has been muddling together, and further streamline iommu-dma into core API flows in future. Reviewed-by: Lu Baolu # For Intel IOMMU Reviewed-by: Jason Gunthorpe Tested-by: Hanjun Guo Signed-off-by: Robin Murphy Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/bebea331c1d688b34d9862eefd5ede47503961b8.1713523152.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel [ Shuai Xue: Conflicts: drivers/iommu/dma-iommu.c 47bd33d47225 rename iommu_dma_ops to iommu_dmafops, which is conflict with upstream, rename back to avoid future conflict ] Signed-off-by: Shuai Xue Signed-off-by: Jay Chen --- arch/arm64/mm/dma-mapping.c | 2 -- drivers/iommu/amd/iommu.c | 8 -------- drivers/iommu/dma-iommu.c | 22 ++++++++-------------- drivers/iommu/dma-iommu.h | 14 ++++++-------- drivers/iommu/intel/iommu.c | 7 ------- drivers/iommu/iommu.c | 20 +++++++------------- drivers/iommu/s390-iommu.c | 6 ------ drivers/iommu/virtio-iommu.c | 10 ---------- include/linux/iommu.h | 7 ------- 9 files changed, 21 insertions(+), 75 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 61886e43e3a1..313d8938a2f0 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -58,8 +58,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, ARCH_DMA_MINALIGN, cls); dev->dma_coherent = coherent; - if (device_iommu_mapped(dev)) - iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1); xen_setup_dma_ops(dev); } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 654a75041ea6..ba331bb174ce 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2188,13 +2188,6 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) return iommu_dev; } -static void amd_iommu_probe_finalize(struct device *dev) -{ - /* Domains are initialized for this device - have a look what we ended up with */ - set_dma_ops(dev, NULL); - iommu_setup_dma_ops(dev, 0, U64_MAX); -} - static void amd_iommu_release_device(struct device *dev) { struct amd_iommu *iommu; @@ -2810,7 +2803,6 @@ const struct iommu_ops amd_iommu_ops = { .domain_alloc_user = amd_iommu_domain_alloc_user, .probe_device = amd_iommu_probe_device, .release_device = amd_iommu_release_device, - .probe_finalize = amd_iommu_probe_finalize, .device_group = amd_iommu_device_group, .get_resv_regions = amd_iommu_get_resv_regions, .is_attach_deferred = amd_iommu_is_attach_deferred, diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index c53cb9bffaf2..4b0cd53f93b1 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1737,7 +1737,7 @@ static size_t iommu_dma_max_mapping_size(struct device *dev) return SIZE_MAX; } -static const struct dma_map_ops iommu_dmafops = { +static const struct dma_map_ops iommu_dma_ops = { .flags = DMA_F_PCI_P2PDMA_SUPPORTED, .alloc = iommu_dma_alloc, .free = iommu_dma_free, @@ -1762,25 +1762,20 @@ static const struct dma_map_ops iommu_dmafops = { .max_mapping_size = iommu_dma_max_mapping_size, }; -/* - * The IOMMU core code allocates the default DMA domain, which the underlying - * IOMMU driver needs to support via the dma-iommu layer. - */ -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) +void iommu_setup_dma_ops(struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - if (!domain) - goto out_err; + if (dev_is_pci(dev)) + dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac; - /* - * The IOMMU core code allocates the default DMA domain, which the - * underlying IOMMU driver needs to support via the dma-iommu layer. - */ if (iommu_is_dma_domain(domain)) { if (iommu_dma_init_domain(domain, dev)) goto out_err; - dev->dma_ops = &iommu_dmafops; + dev->dma_ops = &iommu_dma_ops; + } else if (dev->dma_ops == &iommu_dma_ops) { + /* Clean up if we've switched *from* a DMA domain */ + dev->dma_ops = NULL; } return; @@ -1788,7 +1783,6 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", dev_name(dev)); } -EXPORT_SYMBOL_GPL(iommu_setup_dma_ops); static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, phys_addr_t msi_addr, struct iommu_domain *domain) diff --git a/drivers/iommu/dma-iommu.h b/drivers/iommu/dma-iommu.h index c829f1f82a99..c12d63457c76 100644 --- a/drivers/iommu/dma-iommu.h +++ b/drivers/iommu/dma-iommu.h @@ -9,6 +9,8 @@ #ifdef CONFIG_IOMMU_DMA +void iommu_setup_dma_ops(struct device *dev); + int iommu_get_dma_cookie(struct iommu_domain *domain); void iommu_put_dma_cookie(struct iommu_domain *domain); @@ -17,13 +19,13 @@ int iommu_dma_init_fq(struct iommu_domain *domain); void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); extern bool iommu_dma_forcedac; -static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev) -{ - dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac; -} #else /* CONFIG_IOMMU_DMA */ +static inline void iommu_setup_dma_ops(struct device *dev) +{ +} + static inline int iommu_dma_init_fq(struct iommu_domain *domain) { return -EINVAL; @@ -42,9 +44,5 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he { } -static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev) -{ -} - #endif /* CONFIG_IOMMU_DMA */ #endif /* __DMA_IOMMU_H */ diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7a8b9bb56835..239a47983517 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4192,12 +4192,6 @@ static void intel_iommu_release_device(struct device *dev) set_dma_ops(dev, NULL); } -static void intel_iommu_probe_finalize(struct device *dev) -{ - set_dma_ops(dev, NULL); - iommu_setup_dma_ops(dev, 0, U64_MAX); -} - static void intel_iommu_get_resv_regions(struct device *device, struct list_head *head) { @@ -4668,7 +4662,6 @@ const struct iommu_ops intel_iommu_ops = { .domain_alloc_user = intel_iommu_domain_alloc_user, .domain_alloc_sva = intel_svm_domain_alloc, .probe_device = intel_iommu_probe_device, - .probe_finalize = intel_iommu_probe_finalize, .release_device = intel_iommu_release_device, .get_resv_regions = intel_iommu_get_resv_regions, .device_group = intel_iommu_device_group, diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9265670edf0b..c41ebd98410c 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -581,10 +581,11 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list if (list_empty(&group->entry)) list_add_tail(&group->entry, group_list); } - mutex_unlock(&group->mutex); - if (dev_is_pci(dev)) - iommu_dma_set_pci_32bit_workaround(dev); + if (group->default_domain) + iommu_setup_dma_ops(dev); + + mutex_unlock(&group->mutex); return 0; @@ -1839,6 +1840,8 @@ int bus_iommu_probe(const struct bus_type *bus) mutex_unlock(&group->mutex); return ret; } + for_each_group_device(group, gdev) + iommu_setup_dma_ops(gdev->dev); mutex_unlock(&group->mutex); /* @@ -3079,18 +3082,9 @@ static ssize_t iommu_group_store_type(struct iommu_group *group, if (ret) goto out_unlock; - /* - * Release the mutex here because ops->probe_finalize() call-back of - * some vendor IOMMU drivers calls arm_iommu_attach_device() which - * in-turn might call back into IOMMU core code, where it tries to take - * group->mutex, resulting in a deadlock. - */ - mutex_unlock(&group->mutex); - /* Make sure dma_ops is appropriatley set */ for_each_group_device(group, gdev) - iommu_group_do_probe_finalize(gdev->dev); - return count; + iommu_setup_dma_ops(gdev->dev); out_unlock: mutex_unlock(&group->mutex); diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 9a5196f523de..d8eaa7ea380b 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -695,11 +695,6 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain, return size; } -static void s390_iommu_probe_finalize(struct device *dev) -{ - iommu_setup_dma_ops(dev, 0, U64_MAX); -} - struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev) { if (!zdev || !zdev->s390_domain) @@ -785,7 +780,6 @@ static const struct iommu_ops s390_iommu_ops = { .capable = s390_iommu_capable, .domain_alloc_paging = s390_domain_alloc_paging, .probe_device = s390_iommu_probe_device, - .probe_finalize = s390_iommu_probe_finalize, .release_device = s390_iommu_release_device, .device_group = generic_device_group, .pgsize_bitmap = SZ_4K, diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 04048f64a2c0..8e776f6c6e35 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -1025,15 +1025,6 @@ static struct iommu_device *viommu_probe_device(struct device *dev) return ERR_PTR(ret); } -static void viommu_probe_finalize(struct device *dev) -{ -#ifndef CONFIG_ARCH_HAS_SETUP_DMA_OPS - /* First clear the DMA ops in case we're switching from a DMA domain */ - set_dma_ops(dev, NULL); - iommu_setup_dma_ops(dev, 0, U64_MAX); -#endif -} - static void viommu_release_device(struct device *dev) { struct viommu_endpoint *vdev = dev_iommu_priv_get(dev); @@ -1073,7 +1064,6 @@ static struct iommu_ops viommu_ops = { .capable = viommu_capable, .domain_alloc = viommu_domain_alloc, .probe_device = viommu_probe_device, - .probe_finalize = viommu_probe_finalize, .release_device = viommu_release_device, .device_group = viommu_device_group, .get_resv_regions = viommu_get_resv_regions, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 4edcc5a979b2..b5a431eb8c4d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1480,9 +1480,6 @@ static inline void iommu_debugfs_setup(void) {} #ifdef CONFIG_IOMMU_DMA #include -/* Setup call for arch DMA mapping code */ -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit); - int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); @@ -1493,10 +1490,6 @@ void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg); struct msi_desc; struct msi_msg; -static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) -{ -} - static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) { return -ENODEV; -- Gitee From 38358b52c0120f3da4b571ea4dec90c7dd19f8a4 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Tue, 23 Apr 2024 11:17:25 +0000 Subject: [PATCH 009/143] iommu/amd: Enhance def_domain_type to handle untrusted device ANBZ: #13617 commit 0f91d0795741c12cee200667648669a91b568735 upstream. Previously, IOMMU core layer was forcing IOMMU_DOMAIN_DMA domain for untrusted device. This always took precedence over driver's def_domain_type(). Commit 59ddce4418da ("iommu: Reorganize iommu_get_default_domain_type() to respect def_domain_type()") changed the behaviour. Current code calls def_domain_type() but if it doesn't return IOMMU_DOMAIN_DMA for untrusted device it throws error. This results in IOMMU group (and potentially IOMMU itself) in undetermined state. This patch adds untrusted check in AMD IOMMU driver code. So that it allows eGPUs behind Thunderbolt work again. Fine tuning amd_iommu_def_domain_type() will be done later. Reported-by: Eric Wagner Link: https://lore.kernel.org/linux-iommu/CAHudX3zLH6CsRmLE-yb+gRjhh-v4bU5_1jW_xCcxOo_oUUZKYg@mail.gmail.com Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3182 Fixes: 59ddce4418da ("iommu: Reorganize iommu_get_default_domain_type() to respect def_domain_type()") Cc: Robin Murphy Cc: Jason Gunthorpe Cc: stable@kernel.org # v6.7+ Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20240423111725.5813-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/iommu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index ba331bb174ce..a9ccb205854f 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2770,6 +2770,10 @@ static int amd_iommu_def_domain_type(struct device *dev) if (!dev_data) return 0; + /* Always use DMA domain for untrusted device */ + if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted) + return IOMMU_DOMAIN_DMA; + /* * Do not identity map IOMMUv2 capable devices when: * - memory encryption is active, because some of those devices -- Gitee From 35d33026fbf8c1cbc422bc47b54c4c27ca472c4f Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 18 Apr 2024 10:33:46 +0000 Subject: [PATCH 010/143] iommu/amd: Rename amd_iommu_v2_supported() as amd_iommu_pasid_supported() ANBZ: #13617 commit 9433d5b2ace529d8a66863d4b5127c6c4ca6e4c2 upstream. To reflect its usage. No functional changes intended. Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240418103400.6229-2-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/init.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 325f63923257..4b87b093c51d 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -38,7 +38,7 @@ extern int amd_iommu_guest_ir; extern enum io_pgtable_fmt amd_iommu_pgtable; extern int amd_iommu_gpt_level; -bool amd_iommu_v2_supported(void); +bool amd_iommu_pasid_supported(void); /* Device capabilities */ int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev); diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 62317af56d17..f37d158b1a80 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3673,7 +3673,7 @@ __setup("ivrs_ioapic", parse_ivrs_ioapic); __setup("ivrs_hpet", parse_ivrs_hpet); __setup("ivrs_acpihid", parse_ivrs_acpihid); -bool amd_iommu_v2_supported(void) +bool amd_iommu_pasid_supported(void) { /* CPU page table size should match IOMMU guest page table size */ if (cpu_feature_enabled(X86_FEATURE_LA57) && -- Gitee From 25ce455204d1dfc694a75270c052ee7197e61bbd Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 18 Apr 2024 10:33:47 +0000 Subject: [PATCH 011/143] iommu/amd: Introduce per device DTE update function ANBZ: #13617 commit c5ebd09625391000026b0860952e05d0f7fc4519 upstream. Consolidate per device update and flush logic into separate function. Also make it as global function as it will be used in subsequent series to update the DTE. Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240418103400.6229-3-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu.h | 1 + drivers/iommu/amd/iommu.c | 26 ++++++++++++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 4b87b093c51d..5fb2a59e4dad 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -56,6 +56,7 @@ int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid); void amd_iommu_flush_all_caches(struct amd_iommu *iommu); void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); void amd_iommu_domain_update(struct protection_domain *domain); +void amd_iommu_dev_update_dte(struct iommu_dev_data *dev_data, bool set); void amd_iommu_domain_flush_complete(struct protection_domain *domain); void amd_iommu_domain_flush_pages(struct protection_domain *domain, u64 address, size_t size); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index a9ccb205854f..21a881d2afe4 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2012,6 +2012,21 @@ static void clear_dte_entry(struct amd_iommu *iommu, u16 devid) amd_iommu_apply_erratum_63(iommu, devid); } +/* Update and flush DTE for the given device */ +void amd_iommu_dev_update_dte(struct iommu_dev_data *dev_data, bool set) +{ + struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev); + + if (set) + set_dte_entry(iommu, dev_data); + else + clear_dte_entry(iommu, dev_data->devid); + + clone_aliases(iommu, dev_data->dev); + device_flush_dte(dev_data); + iommu_completion_wait(iommu); +} + static int do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { @@ -2046,10 +2061,7 @@ static int do_attach(struct iommu_dev_data *dev_data, } /* Update device table */ - set_dte_entry(iommu, dev_data); - clone_aliases(iommu, dev_data->dev); - - device_flush_dte(dev_data); + amd_iommu_dev_update_dte(dev_data, true); return ret; } @@ -2068,11 +2080,9 @@ static void do_detach(struct iommu_dev_data *dev_data) /* Update data structures */ dev_data->domain = NULL; list_del(&dev_data->list); - clear_dte_entry(iommu, dev_data->devid); - clone_aliases(iommu, dev_data->dev); - /* Flush the DTE entry */ - device_flush_dte(dev_data); + /* Clear DTE and flush the entry */ + amd_iommu_dev_update_dte(dev_data, false); /* Flush IOTLB and wait for the flushes to finish */ amd_iommu_domain_flush_all(domain); -- Gitee From cfb48bd2ce2d1ebc136f09081d1de94ddef7d0ff Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 18 Apr 2024 10:33:48 +0000 Subject: [PATCH 012/143] iommu/amd: Add support for enabling/disabling IOMMU features ANBZ: #13617 commit db44bd517ff2de4224fdb0e9c0db426572f9fa11 upstream. Add support for struct iommu_ops.dev_{enable/disable}_feat. Please note that the empty feature switches will be populated by subsequent patches. Signed-off-by: Wei Huang Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240418103400.6229-4-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/iommu.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 21a881d2afe4..983a12c239f9 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2811,6 +2811,32 @@ static const struct iommu_dirty_ops amd_dirty_ops = { .read_and_clear_dirty = amd_iommu_read_and_clear_dirty, }; +static int amd_iommu_dev_enable_feature(struct device *dev, + enum iommu_dev_features feat) +{ + int ret; + + switch (feat) { + default: + ret = -EINVAL; + break; + } + return ret; +} + +static int amd_iommu_dev_disable_feature(struct device *dev, + enum iommu_dev_features feat) +{ + int ret; + + switch (feat) { + default: + ret = -EINVAL; + break; + } + return ret; +} + const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .domain_alloc = amd_iommu_domain_alloc, @@ -2822,6 +2848,8 @@ const struct iommu_ops amd_iommu_ops = { .is_attach_deferred = amd_iommu_is_attach_deferred, .pgsize_bitmap = AMD_IOMMU_PGSIZES, .def_domain_type = amd_iommu_def_domain_type, + .dev_enable_feat = amd_iommu_dev_enable_feature, + .dev_disable_feat = amd_iommu_dev_disable_feature, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = amd_iommu_attach_device, .map_pages = amd_iommu_map_pages, -- Gitee From d22b3b5d429c2b44cdfacf35350814b1a9df772c Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 18 Apr 2024 10:33:49 +0000 Subject: [PATCH 013/143] iommu/amd: Move PPR-related functions into ppr.c ANBZ: #13617 commit e08fcd901c4301c150a8212df28df7f4d4811988 upstream. In preparation to subsequent PPR-related patches, and also remove static declaration for certain helper functions so that it can be reused in other files. Also rename below functions: alloc_ppr_log -> amd_iommu_alloc_ppr_log iommu_enable_ppr_log -> amd_iommu_enable_ppr_log free_ppr_log -> amd_iommu_free_ppr_log iommu_poll_ppr_log -> amd_iommu_poll_ppr_log Signed-off-by: Suravee Suthikulpanit Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240418103400.6229-5-vasant.hegde@amd.com Signed-off-by: Joerg Roedel [ Shuai Xue: do not know why conflict Conflicts: drivers/iommu/amd/init.c commit 43130880df8a change flags of iommu_alloc_4k_pages in amd_iommu_alloc_ppr_log() ] Signed-off-by: Shuai Xue --- drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu.h | 17 ++++- drivers/iommu/amd/init.c | 64 +++---------------- drivers/iommu/amd/iommu.c | 55 +--------------- drivers/iommu/amd/ppr.c | 114 ++++++++++++++++++++++++++++++++++ 5 files changed, 139 insertions(+), 113 deletions(-) create mode 100644 drivers/iommu/amd/ppr.c diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index f454fbb1569e..93b11b6d764f 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o +obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 5fb2a59e4dad..712c8782b99f 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -17,10 +17,16 @@ irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data); irqreturn_t amd_iommu_int_thread_galog(int irq, void *data); irqreturn_t amd_iommu_int_handler(int irq, void *data); void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid); +void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type, + u8 cntrl_intr, u8 cntrl_log, + u32 status_run_mask, u32 status_overflow_mask); void amd_iommu_restart_event_logging(struct amd_iommu *iommu); void amd_iommu_restart_ga_log(struct amd_iommu *iommu); void amd_iommu_restart_ppr_log(struct amd_iommu *iommu); void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid); +void iommu_feature_enable(struct amd_iommu *iommu, u8 bit); +void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, + gfp_t gfp, size_t size); #ifdef CONFIG_AMD_IOMMU_DEBUGFS void amd_iommu_debugfs_setup(struct amd_iommu *iommu); @@ -49,6 +55,14 @@ int amd_iommu_set_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid, unsigned long gcr3); int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid); +/* PPR */ +int __init amd_iommu_alloc_ppr_log(struct amd_iommu *iommu); +void __init amd_iommu_free_ppr_log(struct amd_iommu *iommu); +void amd_iommu_enable_ppr_log(struct amd_iommu *iommu); +void amd_iommu_poll_ppr_log(struct amd_iommu *iommu); +int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, + int status, int tag); + /* * This function flushes all internal caches of * the IOMMU used by this driver. @@ -74,9 +88,6 @@ static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu) } #endif -int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, - int status, int tag); - static inline bool is_rd890_iommu(struct pci_dev *pdev) { return (pdev->vendor == PCI_VENDOR_ID_ATI) && diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index f37d158b1a80..2457d3d321ab 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -419,7 +419,7 @@ static void iommu_set_device_table(struct amd_iommu *iommu) } /* Generic functions to enable/disable certain features of the IOMMU. */ -static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) +void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) { u64 ctrl; @@ -744,9 +744,9 @@ static int __init alloc_command_buffer(struct amd_iommu *iommu) * Interrupt handler has processed all pending events and adjusted head * and tail pointer. Reset overflow mask and restart logging again. */ -static void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type, - u8 cntrl_intr, u8 cntrl_log, - u32 status_run_mask, u32 status_overflow_mask) +void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type, + u8 cntrl_intr, u8 cntrl_log, + u32 status_run_mask, u32 status_overflow_mask) { u32 status; @@ -787,17 +787,6 @@ void amd_iommu_restart_ga_log(struct amd_iommu *iommu) MMIO_STATUS_GALOG_OVERFLOW_MASK); } -/* - * This function restarts ppr logging in case the IOMMU experienced - * PPR log overflow. - */ -void amd_iommu_restart_ppr_log(struct amd_iommu *iommu) -{ - amd_iommu_restart_log(iommu, "PPR", CONTROL_PPRINT_EN, - CONTROL_PPRLOG_EN, MMIO_STATUS_PPR_RUN_MASK, - MMIO_STATUS_PPR_OVERFLOW_MASK); -} - /* * This function resets the command buffer if the IOMMU stopped fetching * commands from it. @@ -846,8 +835,8 @@ static void __init free_command_buffer(struct amd_iommu *iommu) iommu_free_pages(iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); } -static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, - gfp_t gfp, size_t size) +void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp, + size_t size) { int order = get_order(size); void *buf = iommu_alloc_pages(gfp, order); @@ -902,41 +891,6 @@ static void __init free_event_buffer(struct amd_iommu *iommu) iommu_free_pages(iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); } -/* allocates the memory where the IOMMU will log its events to */ -static int __init alloc_ppr_log(struct amd_iommu *iommu) -{ - iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL, PPR_LOG_SIZE); - - return iommu->ppr_log ? 0 : -ENOMEM; -} - -static void iommu_enable_ppr_log(struct amd_iommu *iommu) -{ - u64 entry; - - if (iommu->ppr_log == NULL) - return; - - iommu_feature_enable(iommu, CONTROL_PPR_EN); - - entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512; - - memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET, - &entry, sizeof(entry)); - - /* set head and tail to zero manually */ - writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); - writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); - - iommu_feature_enable(iommu, CONTROL_PPRLOG_EN); - iommu_feature_enable(iommu, CONTROL_PPRINT_EN); -} - -static void __init free_ppr_log(struct amd_iommu *iommu) -{ - iommu_free_pages(iommu->ppr_log, get_order(PPR_LOG_SIZE)); -} - static void free_ga_log(struct amd_iommu *iommu) { #ifdef CONFIG_IRQ_REMAP @@ -1685,7 +1639,7 @@ static void __init free_iommu_one(struct amd_iommu *iommu) free_cwwb_sem(iommu); free_command_buffer(iommu); free_event_buffer(iommu); - free_ppr_log(iommu); + amd_iommu_free_ppr_log(iommu); free_ga_log(iommu); iommu_unmap_mmio_space(iommu); } @@ -2101,7 +2055,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval); } - if (check_feature(FEATURE_PPR) && alloc_ppr_log(iommu)) + if (check_feature(FEATURE_PPR) && amd_iommu_alloc_ppr_log(iommu)) return -ENOMEM; if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) { @@ -2847,7 +2801,7 @@ static void enable_iommus_v2(void) struct amd_iommu *iommu; for_each_iommu(iommu) - iommu_enable_ppr_log(iommu); + amd_iommu_enable_ppr_log(iommu); } static void enable_iommus_vapic(void) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 983a12c239f9..cdd4a58d9155 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -819,59 +819,6 @@ static void iommu_poll_events(struct amd_iommu *iommu) writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); } -static void iommu_poll_ppr_log(struct amd_iommu *iommu) -{ - u32 head, tail; - - if (iommu->ppr_log == NULL) - return; - - head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); - tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); - - while (head != tail) { - volatile u64 *raw; - u64 entry[2]; - int i; - - raw = (u64 *)(iommu->ppr_log + head); - - /* - * Hardware bug: Interrupt may arrive before the entry is - * written to memory. If this happens we need to wait for the - * entry to arrive. - */ - for (i = 0; i < LOOP_TIMEOUT; ++i) { - if (PPR_REQ_TYPE(raw[0]) != 0) - break; - udelay(1); - } - - /* Avoid memcpy function-call overhead */ - entry[0] = raw[0]; - entry[1] = raw[1]; - - /* - * To detect the hardware errata 733 we need to clear the - * entry back to zero. This issue does not exist on SNP - * enabled system. Also this buffer is not writeable on - * SNP enabled system. - */ - if (!amd_iommu_snp_en) - raw[0] = raw[1] = 0UL; - - /* Update head pointer of hardware ring-buffer */ - head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; - writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); - - /* TODO: PPR Handler will be added when we add IOPF support */ - - /* Refresh ring-buffer information */ - head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); - tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); - } -} - #ifdef CONFIG_IRQ_REMAP static int (*iommu_ga_log_notifier)(u32); @@ -1000,7 +947,7 @@ irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data) { amd_iommu_handle_irq(data, "PPR", MMIO_STATUS_PPR_INT_MASK, MMIO_STATUS_PPR_OVERFLOW_MASK, - iommu_poll_ppr_log, amd_iommu_restart_ppr_log); + amd_iommu_poll_ppr_log, amd_iommu_restart_ppr_log); return IRQ_HANDLED; } diff --git a/drivers/iommu/amd/ppr.c b/drivers/iommu/amd/ppr.c new file mode 100644 index 000000000000..1f76ed549ec1 --- /dev/null +++ b/drivers/iommu/amd/ppr.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 Advanced Micro Devices, Inc. + */ + +#define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) + +#include +#include +#include + +#include + +#include "amd_iommu.h" +#include "amd_iommu_types.h" + +int __init amd_iommu_alloc_ppr_log(struct amd_iommu *iommu) +{ + iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, + PPR_LOG_SIZE); + return iommu->ppr_log ? 0 : -ENOMEM; +} + +void amd_iommu_enable_ppr_log(struct amd_iommu *iommu) +{ + u64 entry; + + if (iommu->ppr_log == NULL) + return; + + iommu_feature_enable(iommu, CONTROL_PPR_EN); + + entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512; + + memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET, + &entry, sizeof(entry)); + + /* set head and tail to zero manually */ + writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); + writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); + + iommu_feature_enable(iommu, CONTROL_PPRINT_EN); + iommu_feature_enable(iommu, CONTROL_PPRLOG_EN); +} + +void __init amd_iommu_free_ppr_log(struct amd_iommu *iommu) +{ + free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE)); +} + +/* + * This function restarts ppr logging in case the IOMMU experienced + * PPR log overflow. + */ +void amd_iommu_restart_ppr_log(struct amd_iommu *iommu) +{ + amd_iommu_restart_log(iommu, "PPR", CONTROL_PPRINT_EN, + CONTROL_PPRLOG_EN, MMIO_STATUS_PPR_RUN_MASK, + MMIO_STATUS_PPR_OVERFLOW_MASK); +} + +void amd_iommu_poll_ppr_log(struct amd_iommu *iommu) +{ + u32 head, tail; + + if (iommu->ppr_log == NULL) + return; + + head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); + + while (head != tail) { + volatile u64 *raw; + u64 entry[2]; + int i; + + raw = (u64 *)(iommu->ppr_log + head); + + /* + * Hardware bug: Interrupt may arrive before the entry is + * written to memory. If this happens we need to wait for the + * entry to arrive. + */ + for (i = 0; i < LOOP_TIMEOUT; ++i) { + if (PPR_REQ_TYPE(raw[0]) != 0) + break; + udelay(1); + } + + /* Avoid memcpy function-call overhead */ + entry[0] = raw[0]; + entry[1] = raw[1]; + + /* + * To detect the hardware errata 733 we need to clear the + * entry back to zero. This issue does not exist on SNP + * enabled system. Also this buffer is not writeable on + * SNP enabled system. + */ + if (!amd_iommu_snp_en) + raw[0] = raw[1] = 0UL; + + /* Update head pointer of hardware ring-buffer */ + head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; + writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); + + /* TODO: PPR Handler will be added when we add IOPF support */ + + /* Refresh ring-buffer information */ + head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); + } +} -- Gitee From f873f6ca1591bb37a4a001764761c32d162b80b2 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 18 Apr 2024 10:33:50 +0000 Subject: [PATCH 014/143] iommu/amd: Fix PPR interrupt processing logic ANBZ: #13617 commit 7c5b7176f0c3996adbe853adb1f857bd4f82b1e2 upstream. * Do not re-read ppr head pointer as its just updated by the driver. * Do not read PPR buffer tail pointer inside while loop. If IOMMU generates PPR events continuously then completing interrupt processing takes long time. In worst case it may cause infinite loop. Suggested-by: Tom Lendacky Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240418103400.6229-6-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/ppr.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/iommu/amd/ppr.c b/drivers/iommu/amd/ppr.c index 1f76ed549ec1..65db1745f40c 100644 --- a/drivers/iommu/amd/ppr.c +++ b/drivers/iommu/amd/ppr.c @@ -106,9 +106,5 @@ void amd_iommu_poll_ppr_log(struct amd_iommu *iommu) writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); /* TODO: PPR Handler will be added when we add IOPF support */ - - /* Refresh ring-buffer information */ - head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); - tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); } } -- Gitee From 9500f35a2359e4e08a765dafbbf739727647e4ff Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 18 Apr 2024 10:33:51 +0000 Subject: [PATCH 015/143] iommu/amd: Introduce iommu_dev_data.max_pasids ANBZ: #13617 commit a0c47f233e683e6a81fced5c9c9cef6fa0da9446 upstream. This variable will track the number of PASIDs supported by the device. If IOMMU or device doesn't support PASID then it will be zero. This will be used while allocating GCR3 table to decide required number of PASID table levels. Also in PASID bind path it will use this variable to check whether device supports PASID or not. Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240418103400.6229-7-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu_types.h | 1 + drivers/iommu/amd/iommu.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index d1fed5fc219b..03442e86ae44 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -813,6 +813,7 @@ struct iommu_dev_data { struct device *dev; u16 devid; /* PCI Device ID */ + u32 max_pasids; /* Max supported PASIDs */ u32 flags; /* Holds AMD_IOMMU_DEVICE_FLAG_<*> */ int ats_qdep; u8 ats_enabled :1; /* ATS state */ diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index cdd4a58d9155..096c4b26d0ca 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2114,6 +2114,7 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) { struct iommu_device *iommu_dev; struct amd_iommu *iommu; + struct iommu_dev_data *dev_data; int ret; if (!check_device(dev)) @@ -2140,6 +2141,17 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) iommu_dev = &iommu->iommu; } + /* + * If IOMMU and device supports PASID then it will contain max + * supported PASIDs, else it will be zero. + */ + dev_data = dev_iommu_priv_get(dev); + if (amd_iommu_pasid_supported() && dev_is_pci(dev) && + pdev_pasid_supported(dev_data)) { + dev_data->max_pasids = min_t(u32, iommu->iommu.max_pasids, + pci_max_pasids(to_pci_dev(dev))); + } + iommu_completion_wait(iommu); return iommu_dev; -- Gitee From dbf16ae3e4698ba192d9b8225c2c4abedd849949 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 18 Apr 2024 10:33:52 +0000 Subject: [PATCH 016/143] iommu/amd: Setup GCR3 table in advance if domain is SVA capable ANBZ: #13617 commit c9e8701132e6cc162d082e7dad8a2e9110f5f8fd upstream. SVA can be supported if domain is in passthrough mode or paging domain with v2 page table. Current code sets up GCR3 table for domain with v2 page table only. Setup GCR3 table for all SVA capable domains. - Move GCR3 init/destroy to separate function. - Change default GCR3 table to use MAX supported PASIDs. Ideally it should use 1 level PASID table as its using PASID zero only. But we don't have support to extend PASID table yet. We will fix this later. - When domain is configured with passthrough mode, allocate default GCR3 table only if device is SVA capable. Note that in attach_device() path it will not know whether device will use SVA or not. If device is attached to passthrough domain and if it doesn't use SVA then GCR3 table will never be used. We will endup wasting memory allocated for GCR3 table. This is done to avoid DTE update when attaching PASID to device. Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240418103400.6229-8-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/iommu.c | 86 ++++++++++++++++++++++++++++++++------- 1 file changed, 71 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 096c4b26d0ca..ca38f59ac1bc 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -90,6 +90,21 @@ static inline bool pdom_is_v2_pgtbl_mode(struct protection_domain *pdom) return (pdom && (pdom->pd_mode == PD_MODE_V2)); } +static inline bool pdom_is_in_pt_mode(struct protection_domain *pdom) +{ + return (pdom->domain.type == IOMMU_DOMAIN_IDENTITY); +} + +/* + * We cannot support PASID w/ existing v1 page table in the same domain + * since it will be nested. However, existing domain w/ v2 page table + * or passthrough mode can be used for PASID. + */ +static inline bool pdom_is_sva_capable(struct protection_domain *pdom) +{ + return pdom_is_v2_pgtbl_mode(pdom) || pdom_is_in_pt_mode(pdom); +} + static inline int get_acpihid_device_id(struct device *dev, struct acpihid_map_entry **entry) { @@ -1974,6 +1989,58 @@ void amd_iommu_dev_update_dte(struct iommu_dev_data *dev_data, bool set) iommu_completion_wait(iommu); } +/* + * If domain is SVA capable then initialize GCR3 table. Also if domain is + * in v2 page table mode then update GCR3[0]. + */ +static int init_gcr3_table(struct iommu_dev_data *dev_data, + struct protection_domain *pdom) +{ + struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); + int max_pasids = dev_data->max_pasids; + int ret = 0; + + /* + * If domain is in pt mode then setup GCR3 table only if device + * is PASID capable + */ + if (pdom_is_in_pt_mode(pdom) && !pdev_pasid_supported(dev_data)) + return ret; + + /* + * By default, setup GCR3 table to support MAX PASIDs + * supported by the device/IOMMU. + */ + ret = setup_gcr3_table(&dev_data->gcr3_info, iommu, + max_pasids > 0 ? max_pasids : 1); + if (ret) + return ret; + + /* Setup GCR3[0] only if domain is setup with v2 page table mode */ + if (!pdom_is_v2_pgtbl_mode(pdom)) + return ret; + + ret = update_gcr3(dev_data, 0, iommu_virt_to_phys(pdom->iop.pgd), true); + if (ret) + free_gcr3_table(&dev_data->gcr3_info); + + return ret; +} + +static void destroy_gcr3_table(struct iommu_dev_data *dev_data, + struct protection_domain *pdom) +{ + struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; + + if (pdom_is_v2_pgtbl_mode(pdom)) + update_gcr3(dev_data, 0, 0, false); + + if (gcr3_info->gcr3_tbl == NULL) + return; + + free_gcr3_table(gcr3_info); +} + static int do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { @@ -1992,19 +2059,10 @@ static int do_attach(struct iommu_dev_data *dev_data, domain->dev_iommu[iommu->index] += 1; domain->dev_cnt += 1; - /* Init GCR3 table and update device table */ - if (domain->pd_mode == PD_MODE_V2) { - /* By default, setup GCR3 table to support single PASID */ - ret = setup_gcr3_table(&dev_data->gcr3_info, iommu, 1); + if (pdom_is_sva_capable(domain)) { + ret = init_gcr3_table(dev_data, domain); if (ret) return ret; - - ret = update_gcr3(dev_data, 0, - iommu_virt_to_phys(domain->iop.pgd), true); - if (ret) { - free_gcr3_table(&dev_data->gcr3_info); - return ret; - } } /* Update device table */ @@ -2019,10 +2077,8 @@ static void do_detach(struct iommu_dev_data *dev_data) struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); /* Clear GCR3 table */ - if (domain->pd_mode == PD_MODE_V2) { - update_gcr3(dev_data, 0, 0, false); - free_gcr3_table(&dev_data->gcr3_info); - } + if (pdom_is_sva_capable(domain)) + destroy_gcr3_table(dev_data, domain); /* Update data structures */ dev_data->domain = NULL; -- Gitee From 742c46190647614181b1f9c0e8880e8bccb9e0f5 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 18 Apr 2024 10:33:53 +0000 Subject: [PATCH 017/143] iommu/amd: Enable PCI features based on attached domain capability ANBZ: #13617 commit 25efbb055863079bbd47b32fc6d3c4a6f082daf1 upstream. Commit eda8c2860ab6 ("iommu/amd: Enable device ATS/PASID/PRI capabilities independently") changed the way it enables device capability while attaching devices. I missed to account the attached domain capability. Meaning if domain is not capable of handling PASID/PRI (ex: paging domain with v1 page table) then enabling device feature is not required. This patch enables PASID/PRI only if domain is capable of handling SVA. Also move pci feature enablement to do_attach() function so that we make SVA capability in one place. Finally make PRI enable/disable functions as static functions. Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240418103400.6229-9-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu.h | 4 ---- drivers/iommu/amd/iommu.c | 22 ++++++++++++++-------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 712c8782b99f..b24d5fed6b37 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -46,10 +46,6 @@ extern int amd_iommu_gpt_level; bool amd_iommu_pasid_supported(void); -/* Device capabilities */ -int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev); -void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev); - /* GCR3 setup */ int amd_iommu_set_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid, unsigned long gcr3); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index ca38f59ac1bc..0edf1657c568 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -400,7 +400,7 @@ static inline void pdev_disable_cap_ats(struct pci_dev *pdev) } } -int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev) +static inline int pdev_enable_cap_pri(struct pci_dev *pdev) { struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev); int ret = -EINVAL; @@ -408,6 +408,9 @@ int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev) if (dev_data->pri_enabled) return 0; + if (!dev_data->ats_enabled) + return 0; + if (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) { /* * First reset the PRI state of the device. @@ -424,7 +427,7 @@ int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev) return ret; } -void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev) +static inline void pdev_disable_cap_pri(struct pci_dev *pdev) { struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev); @@ -466,15 +469,14 @@ static void pdev_enable_caps(struct pci_dev *pdev) { pdev_enable_cap_ats(pdev); pdev_enable_cap_pasid(pdev); - amd_iommu_pdev_enable_cap_pri(pdev); - + pdev_enable_cap_pri(pdev); } static void pdev_disable_caps(struct pci_dev *pdev) { pdev_disable_cap_ats(pdev); pdev_disable_cap_pasid(pdev); - amd_iommu_pdev_disable_cap_pri(pdev); + pdev_disable_cap_pri(pdev); } /* @@ -2045,6 +2047,7 @@ static int do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); + struct pci_dev *pdev; int ret = 0; /* Update data structures */ @@ -2059,10 +2062,16 @@ static int do_attach(struct iommu_dev_data *dev_data, domain->dev_iommu[iommu->index] += 1; domain->dev_cnt += 1; + pdev = dev_is_pci(dev_data->dev) ? to_pci_dev(dev_data->dev) : NULL; if (pdom_is_sva_capable(domain)) { ret = init_gcr3_table(dev_data, domain); if (ret) return ret; + + if (pdev) + pdev_enable_caps(pdev); + } else if (pdev) { + pdev_enable_cap_ats(pdev); } /* Update device table */ @@ -2117,9 +2126,6 @@ static int attach_device(struct device *dev, goto out; } - if (dev_is_pci(dev)) - pdev_enable_caps(to_pci_dev(dev)); - ret = do_attach(dev_data, domain); out: -- Gitee From b07d0b6faf630cad3e63cbf101a201c8a7297924 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 18 Apr 2024 10:33:54 +0000 Subject: [PATCH 018/143] iommu/amd: Define per-IOMMU iopf_queue ANBZ: #13617 commit 61928bab9d26b147d41d9b9a4d2328d0e1d6c0c0 upstream. AMD IOMMU hardware supports PCI Peripheral Paging Request (PPR) using a PPR log, which is a circular buffer containing requests from downstream end-point devices. There is one PPR log per IOMMU instance. Therefore, allocate an iopf_queue per IOMMU instance during driver initialization, and free the queue during driver deinitialization. Also rename enable_iommus_v2() -> enable_iommus_ppr() to reflect its usage. And add amd_iommu_gt_ppr_supported() check before enabling PPR log. Signed-off-by: Suravee Suthikulpanit Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240418103400.6229-10-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu.h | 4 ++++ drivers/iommu/amd/amd_iommu_types.h | 4 ++++ drivers/iommu/amd/init.c | 18 +++++++++++++++-- drivers/iommu/amd/ppr.c | 31 +++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index b24d5fed6b37..55d16717c371 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -46,6 +46,10 @@ extern int amd_iommu_gpt_level; bool amd_iommu_pasid_supported(void); +/* IOPF */ +int amd_iommu_iopf_init(struct amd_iommu *iommu); +void amd_iommu_iopf_uninit(struct amd_iommu *iommu); + /* GCR3 setup */ int amd_iommu_set_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid, unsigned long gcr3); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 03442e86ae44..1a014dca7c2e 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -762,6 +762,10 @@ struct amd_iommu { /* DebugFS Info */ struct dentry *debugfs; #endif + + /* IOPF support */ + struct iopf_queue *iopf_queue; + unsigned char iopfq_name[32]; }; static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 2457d3d321ab..791f289a275d 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1642,6 +1642,7 @@ static void __init free_iommu_one(struct amd_iommu *iommu) amd_iommu_free_ppr_log(iommu); free_ga_log(iommu); iommu_unmap_mmio_space(iommu); + amd_iommu_iopf_uninit(iommu); } static void __init free_iommu_all(void) @@ -2111,6 +2112,16 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) if (ret) return ret; + /* + * Allocate per IOMMU IOPF queue here so that in attach device path, + * PRI capable device can be added to IOPF queue + */ + if (amd_iommu_gt_ppr_supported()) { + ret = amd_iommu_iopf_init(iommu); + if (ret) + return ret; + } + iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); return pci_enable_device(iommu->dev); @@ -2796,10 +2807,13 @@ static void early_enable_iommus(void) } } -static void enable_iommus_v2(void) +static void enable_iommus_ppr(void) { struct amd_iommu *iommu; + if (!amd_iommu_gt_ppr_supported()) + return; + for_each_iommu(iommu) amd_iommu_enable_ppr_log(iommu); } @@ -3144,7 +3158,7 @@ static int amd_iommu_enable_interrupts(void) * PPR and GA log interrupt for all IOMMUs. */ enable_iommus_vapic(); - enable_iommus_v2(); + enable_iommus_ppr(); out: return ret; diff --git a/drivers/iommu/amd/ppr.c b/drivers/iommu/amd/ppr.c index 65db1745f40c..d4e08800beac 100644 --- a/drivers/iommu/amd/ppr.c +++ b/drivers/iommu/amd/ppr.c @@ -108,3 +108,34 @@ void amd_iommu_poll_ppr_log(struct amd_iommu *iommu) /* TODO: PPR Handler will be added when we add IOPF support */ } } + +/************************************************************** + * + * IOPF handling stuff + */ + +/* Setup per-IOMMU IOPF queue if not exist. */ +int amd_iommu_iopf_init(struct amd_iommu *iommu) +{ + int ret = 0; + + if (iommu->iopf_queue) + return ret; + + snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name), + "amdiommu-%#x-iopfq", + PCI_SEG_DEVID_TO_SBDF(iommu->pci_seg->id, iommu->devid)); + + iommu->iopf_queue = iopf_queue_alloc(iommu->iopfq_name); + if (!iommu->iopf_queue) + ret = -ENOMEM; + + return ret; +} + +/* Destroy per-IOMMU IOPF queue if no longer needed. */ +void amd_iommu_iopf_uninit(struct amd_iommu *iommu) +{ + iopf_queue_free(iommu->iopf_queue); + iommu->iopf_queue = NULL; +} -- Gitee From 049c636820835ae9ddc491261673812520197907 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 18 Apr 2024 10:33:55 +0000 Subject: [PATCH 019/143] iommu/amd: Add support for page response ANBZ: #13617 commit 405e2f122b83363f6793c5cb086cede536495b4f upstream. This generates AMD IOMMU COMPLETE_PPR_REQUEST for the specified device with the specified PRI Response Code. Also update amd_iommu_complete_ppr() to accept 'struct device' instead of pdev as it just need device reference. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Wei Huang Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240418103400.6229-11-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu.h | 5 +++-- drivers/iommu/amd/iommu.c | 8 ++++---- drivers/iommu/amd/ppr.c | 6 ++++++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 55d16717c371..ee8cb741d24e 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -49,6 +49,8 @@ bool amd_iommu_pasid_supported(void); /* IOPF */ int amd_iommu_iopf_init(struct amd_iommu *iommu); void amd_iommu_iopf_uninit(struct amd_iommu *iommu); +void amd_iommu_page_response(struct device *dev, struct iopf_fault *evt, + struct iommu_page_response *resp); /* GCR3 setup */ int amd_iommu_set_gcr3(struct iommu_dev_data *dev_data, @@ -60,8 +62,7 @@ int __init amd_iommu_alloc_ppr_log(struct amd_iommu *iommu); void __init amd_iommu_free_ppr_log(struct amd_iommu *iommu); void amd_iommu_enable_ppr_log(struct amd_iommu *iommu); void amd_iommu_poll_ppr_log(struct amd_iommu *iommu); -int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, - int status, int tag); +int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag); /* * This function flushes all internal caches of diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 0edf1657c568..4004e525b64e 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1638,15 +1638,14 @@ void amd_iommu_domain_update(struct protection_domain *domain) amd_iommu_domain_flush_all(domain); } -int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, - int status, int tag) +int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag) { struct iommu_dev_data *dev_data; struct amd_iommu *iommu; struct iommu_cmd cmd; - dev_data = dev_iommu_priv_get(&pdev->dev); - iommu = get_amd_iommu_from_dev(&pdev->dev); + dev_data = dev_iommu_priv_get(dev); + iommu = get_amd_iommu_from_dev(dev); build_complete_ppr(&cmd, dev_data->devid, pasid, status, tag, dev_data->pri_tlp); @@ -2871,6 +2870,7 @@ const struct iommu_ops amd_iommu_ops = { .def_domain_type = amd_iommu_def_domain_type, .dev_enable_feat = amd_iommu_dev_enable_feature, .dev_disable_feat = amd_iommu_dev_disable_feature, + .page_response = amd_iommu_page_response, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = amd_iommu_attach_device, .map_pages = amd_iommu_map_pages, diff --git a/drivers/iommu/amd/ppr.c b/drivers/iommu/amd/ppr.c index d4e08800beac..a33ce537b76e 100644 --- a/drivers/iommu/amd/ppr.c +++ b/drivers/iommu/amd/ppr.c @@ -139,3 +139,9 @@ void amd_iommu_iopf_uninit(struct amd_iommu *iommu) iopf_queue_free(iommu->iopf_queue); iommu->iopf_queue = NULL; } + +void amd_iommu_page_response(struct device *dev, struct iopf_fault *evt, + struct iommu_page_response *resp) +{ + amd_iommu_complete_ppr(dev, resp->pasid, resp->code, resp->grpid); +} -- Gitee From 8693f3d081b2efc199ec095f59ad3d2029a31f7a Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 18 Apr 2024 10:33:56 +0000 Subject: [PATCH 020/143] iommu/amd: Add IO page fault notifier handler ANBZ: #13617 commit 978d626b8f1a239acc635323d731c77eae54eb61 upstream. Whenever there is a page fault IOMMU logs entry to ppr log and sends interrupt to host. We have to handle the page fault and respond to IOMMU. Add support to validate page fault request and hook it to core iommu page fault handler. Signed-off-by: Wei Huang Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240418103400.6229-12-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu_types.h | 8 +++ drivers/iommu/amd/ppr.c | 100 +++++++++++++++++++++++++++- 2 files changed, 107 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 1a014dca7c2e..61ca32b7bb07 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -251,6 +251,14 @@ #define PPR_ENTRY_SIZE 16 #define PPR_LOG_SIZE (PPR_ENTRY_SIZE * PPR_LOG_ENTRIES) +/* PAGE_SERVICE_REQUEST PPR Log Buffer Entry flags */ +#define PPR_FLAG_EXEC 0x002 /* Execute permission requested */ +#define PPR_FLAG_READ 0x004 /* Read permission requested */ +#define PPR_FLAG_WRITE 0x020 /* Write permission requested */ +#define PPR_FLAG_US 0x040 /* 1: User, 0: Supervisor */ +#define PPR_FLAG_RVSD 0x080 /* Reserved bit not zero */ +#define PPR_FLAG_GN 0x100 /* GVA and PASID is valid */ + #define PPR_REQ_TYPE(x) (((x) >> 60) & 0xfULL) #define PPR_FLAGS(x) (((x) >> 48) & 0xfffULL) #define PPR_DEVID(x) ((x) & 0xffffULL) diff --git a/drivers/iommu/amd/ppr.c b/drivers/iommu/amd/ppr.c index a33ce537b76e..9bdd1db5f60a 100644 --- a/drivers/iommu/amd/ppr.c +++ b/drivers/iommu/amd/ppr.c @@ -60,6 +60,103 @@ void amd_iommu_restart_ppr_log(struct amd_iommu *iommu) MMIO_STATUS_PPR_OVERFLOW_MASK); } +static inline u32 ppr_flag_to_fault_perm(u16 flag) +{ + int perm = 0; + + if (flag & PPR_FLAG_READ) + perm |= IOMMU_FAULT_PERM_READ; + if (flag & PPR_FLAG_WRITE) + perm |= IOMMU_FAULT_PERM_WRITE; + if (flag & PPR_FLAG_EXEC) + perm |= IOMMU_FAULT_PERM_EXEC; + if (!(flag & PPR_FLAG_US)) + perm |= IOMMU_FAULT_PERM_PRIV; + + return perm; +} + +static bool ppr_is_valid(struct amd_iommu *iommu, u64 *raw) +{ + struct device *dev = iommu->iommu.dev; + u16 devid = PPR_DEVID(raw[0]); + + if (!(PPR_FLAGS(raw[0]) & PPR_FLAG_GN)) { + dev_dbg(dev, "PPR logged [Request ignored due to GN=0 (device=%04x:%02x:%02x.%x " + "pasid=0x%05llx address=0x%llx flags=0x%04llx tag=0x%03llx]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + PPR_PASID(raw[0]), raw[1], PPR_FLAGS(raw[0]), PPR_TAG(raw[0])); + return false; + } + + if (PPR_FLAGS(raw[0]) & PPR_FLAG_RVSD) { + dev_dbg(dev, "PPR logged [Invalid request format (device=%04x:%02x:%02x.%x " + "pasid=0x%05llx address=0x%llx flags=0x%04llx tag=0x%03llx]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + PPR_PASID(raw[0]), raw[1], PPR_FLAGS(raw[0]), PPR_TAG(raw[0])); + return false; + } + + return true; +} + +static void iommu_call_iopf_notifier(struct amd_iommu *iommu, u64 *raw) +{ + struct iommu_dev_data *dev_data; + struct iopf_fault event; + struct pci_dev *pdev; + u16 devid = PPR_DEVID(raw[0]); + + if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) { + pr_info_ratelimited("Unknown PPR request received\n"); + return; + } + + pdev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, + PCI_BUS_NUM(devid), devid & 0xff); + if (!pdev) + return; + + if (!ppr_is_valid(iommu, raw)) + goto out; + + memset(&event, 0, sizeof(struct iopf_fault)); + + event.fault.type = IOMMU_FAULT_PAGE_REQ; + event.fault.prm.perm = ppr_flag_to_fault_perm(PPR_FLAGS(raw[0])); + event.fault.prm.addr = (u64)(raw[1] & PAGE_MASK); + event.fault.prm.pasid = PPR_PASID(raw[0]); + event.fault.prm.grpid = PPR_TAG(raw[0]) & 0x1FF; + + /* + * PASID zero is used for requests from the I/O device without + * a PASID + */ + dev_data = dev_iommu_priv_get(&pdev->dev); + if (event.fault.prm.pasid == 0 || + event.fault.prm.pasid >= dev_data->max_pasids) { + pr_info_ratelimited("Invalid PASID : 0x%x, device : 0x%x\n", + event.fault.prm.pasid, pdev->dev.id); + goto out; + } + + event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; + if (PPR_TAG(raw[0]) & 0x200) + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; + + /* Submit event */ + iommu_report_device_fault(&pdev->dev, &event); + + return; + +out: + /* Nobody cared, abort */ + amd_iommu_complete_ppr(&pdev->dev, PPR_PASID(raw[0]), + IOMMU_PAGE_RESP_FAILURE, + PPR_TAG(raw[0]) & 0x1FF); +} + void amd_iommu_poll_ppr_log(struct amd_iommu *iommu) { u32 head, tail; @@ -105,7 +202,8 @@ void amd_iommu_poll_ppr_log(struct amd_iommu *iommu) head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); - /* TODO: PPR Handler will be added when we add IOPF support */ + /* Handle PPR entry */ + iommu_call_iopf_notifier(iommu, entry); } } -- Gitee From b8aaa0101c56e94619546e651b68c161b49deac2 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 18 Apr 2024 10:33:57 +0000 Subject: [PATCH 021/143] iommu/amd: Add support for enable/disable IOPF ANBZ: #13617 commit c4cb23111103a841c2df30058597398443bcad5f upstream. Return success from enable_feature(IOPF) path as this interface is going away. Instead we will enable/disable IOPF support in attach/detach device path. In attach device path, if device is capable of PRI, then we will add it to per IOMMU IOPF queue and enable PPR support in IOMMU. Also it will attach device to domain even if it fails to enable PRI or add device to IOPF queue as device can continue to work without PRI support. In detach device patch it follows following sequence: - Flush the queue for the given device - Disable PPR support in DTE[devid] - Remove device from IOPF queue - Disable device PRI Also add IOMMU_IOPF as dependency to AMD_IOMMU driver. Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240418103400.6229-13-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/Kconfig | 1 + drivers/iommu/amd/amd_iommu.h | 4 ++++ drivers/iommu/amd/iommu.c | 39 ++++++++++++++++++++++++++------- drivers/iommu/amd/ppr.c | 41 +++++++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index 443b2c13c37b..d563f6d496ca 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -10,6 +10,7 @@ config AMD_IOMMU select IOMMU_API select IOMMU_IOVA select IOMMU_IO_PGTABLE + select IOMMU_IOPF select IOMMUFD_DRIVER if IOMMUFD depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE help diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index ee8cb741d24e..14fcec846b24 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -51,6 +51,10 @@ int amd_iommu_iopf_init(struct amd_iommu *iommu); void amd_iommu_iopf_uninit(struct amd_iommu *iommu); void amd_iommu_page_response(struct device *dev, struct iopf_fault *evt, struct iommu_page_response *resp); +int amd_iommu_iopf_add_device(struct amd_iommu *iommu, + struct iommu_dev_data *dev_data); +void amd_iommu_iopf_remove_device(struct amd_iommu *iommu, + struct iommu_dev_data *dev_data); /* GCR3 setup */ int amd_iommu_set_gcr3(struct iommu_dev_data *dev_data, diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 4004e525b64e..74cbbf391c4c 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2067,8 +2067,17 @@ static int do_attach(struct iommu_dev_data *dev_data, if (ret) return ret; - if (pdev) + if (pdev) { pdev_enable_caps(pdev); + + /* + * Device can continue to function even if IOPF + * enablement failed. Hence in error path just + * disable device PRI support. + */ + if (amd_iommu_iopf_add_device(iommu, dev_data)) + pdev_disable_cap_pri(pdev); + } } else if (pdev) { pdev_enable_cap_ats(pdev); } @@ -2140,12 +2149,11 @@ static int attach_device(struct device *dev, */ static void detach_device(struct device *dev) { - struct protection_domain *domain; - struct iommu_dev_data *dev_data; + struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); + struct protection_domain *domain = dev_data->domain; + struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); unsigned long flags; - - dev_data = dev_iommu_priv_get(dev); - domain = dev_data->domain; + bool ppr = dev_data->ppr; spin_lock_irqsave(&domain->lock, flags); @@ -2160,8 +2168,19 @@ static void detach_device(struct device *dev) if (WARN_ON(!dev_data->domain)) goto out; + if (ppr) { + iopf_queue_flush_dev(dev); + + /* Updated here so that it gets reflected in DTE */ + dev_data->ppr = false; + } + do_detach(dev_data); + /* Remove IOPF handler */ + if (ppr) + amd_iommu_iopf_remove_device(iommu, dev_data); + if (dev_is_pci(dev)) pdev_disable_caps(to_pci_dev(dev)); @@ -2834,9 +2853,11 @@ static const struct iommu_dirty_ops amd_dirty_ops = { static int amd_iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) { - int ret; + int ret = 0; switch (feat) { + case IOMMU_DEV_FEAT_IOPF: + break; default: ret = -EINVAL; break; @@ -2847,9 +2868,11 @@ static int amd_iommu_dev_enable_feature(struct device *dev, static int amd_iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) { - int ret; + int ret = 0; switch (feat) { + case IOMMU_DEV_FEAT_IOPF: + break; default: ret = -EINVAL; break; diff --git a/drivers/iommu/amd/ppr.c b/drivers/iommu/amd/ppr.c index 9bdd1db5f60a..0463daa2d46b 100644 --- a/drivers/iommu/amd/ppr.c +++ b/drivers/iommu/amd/ppr.c @@ -243,3 +243,44 @@ void amd_iommu_page_response(struct device *dev, struct iopf_fault *evt, { amd_iommu_complete_ppr(dev, resp->pasid, resp->code, resp->grpid); } + +int amd_iommu_iopf_add_device(struct amd_iommu *iommu, + struct iommu_dev_data *dev_data) +{ + unsigned long flags; + int ret = 0; + + if (!dev_data->pri_enabled) + return ret; + + raw_spin_lock_irqsave(&iommu->lock, flags); + + if (!iommu->iopf_queue) { + ret = -EINVAL; + goto out_unlock; + } + + ret = iopf_queue_add_device(iommu->iopf_queue, dev_data->dev); + if (ret) + goto out_unlock; + + dev_data->ppr = true; + +out_unlock: + raw_spin_unlock_irqrestore(&iommu->lock, flags); + return ret; +} + +/* Its assumed that caller has verified that device was added to iopf queue */ +void amd_iommu_iopf_remove_device(struct amd_iommu *iommu, + struct iommu_dev_data *dev_data) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&iommu->lock, flags); + + iopf_queue_remove_device(iommu->iopf_queue, dev_data->dev); + dev_data->ppr = false; + + raw_spin_unlock_irqrestore(&iommu->lock, flags); +} -- Gitee From 74fd4a26ee9425c4471e9413467dd451179b8176 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 18 Apr 2024 10:33:58 +0000 Subject: [PATCH 022/143] iommu/amd: Initial SVA support for AMD IOMMU ANBZ: #13617 commit 1af95763e0a33e854b4c0a961756719eb8a2b74d upstream. This includes : - Add data structure to track per protection domain dev/pasid binding details protection_domain->dev_data_list will track attached list of dev_data/PASIDs. - Move 'to_pdomain()' to header file - Add iommu_sva_set_dev_pasid(). It will check whether PASID is supported or not. Also adds PASID to SVA protection domain list as well as to device GCR3 table. - Add iommu_ops.remove_dev_pasid support. It will unbind PASID from device. Also remove pasid data from protection domain device list. - Add IOMMU_SVA as dependency to AMD_IOMMU driver For a given PASID, iommu_set_dev_pasid() will bind all devices to same SVA protection domain (1 PASID : 1 SVA protection domain : N devices). This protection domain is different from device protection domain (one that's mapped in attach_device() path). IOMMU uses domain ID for caching, invalidation, etc. In SVA mode it will use per-device-domain-ID. Hence in invalidation path we retrieve domain ID from gcr3_info_table structure and use that for invalidation. Co-developed-by: Wei Huang Signed-off-by: Wei Huang Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240418103400.6229-14-vasant.hegde@amd.com Signed-off-by: Joerg Roedel [ Shuai Xue: fix merge conflict to add iommu_domain in amd_iommu_remove_dev_pasid() as upstream merge commit 2bd5059c6 does ] Signed-off-by: Shuai Xue --- drivers/iommu/amd/Kconfig | 1 + drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu.h | 12 +++ drivers/iommu/amd/amd_iommu_types.h | 19 +++++ drivers/iommu/amd/iommu.c | 7 +- drivers/iommu/amd/pasid.c | 118 ++++++++++++++++++++++++++++ 6 files changed, 153 insertions(+), 6 deletions(-) create mode 100644 drivers/iommu/amd/pasid.c diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index d563f6d496ca..68d8fc107cb9 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -10,6 +10,7 @@ config AMD_IOMMU select IOMMU_API select IOMMU_IOVA select IOMMU_IO_PGTABLE + select IOMMU_SVA select IOMMU_IOPF select IOMMUFD_DRIVER if IOMMUFD depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index 93b11b6d764f..9de33b2d42f5 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o +obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o pasid.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 14fcec846b24..a145395677dc 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -44,6 +44,13 @@ extern int amd_iommu_guest_ir; extern enum io_pgtable_fmt amd_iommu_pgtable; extern int amd_iommu_gpt_level; +/* Protection domain ops */ +int iommu_sva_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid); +void amd_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, + struct iommu_domain *domain); + +/* SVA/PASID */ bool amd_iommu_pasid_supported(void); /* IOPF */ @@ -166,6 +173,11 @@ static inline struct amd_iommu *get_amd_iommu_from_dev_data(struct iommu_dev_dat return iommu_get_iommu_dev(dev_data->dev, struct amd_iommu, iommu); } +static inline struct protection_domain *to_pdomain(struct iommu_domain *dom) +{ + return container_of(dom, struct protection_domain, domain); +} + bool translation_pre_enabled(struct amd_iommu *iommu); bool amd_iommu_is_attach_deferred(struct device *dev); int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 61ca32b7bb07..ed9971cd3f0e 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -8,7 +8,9 @@ #ifndef _ASM_X86_AMD_IOMMU_TYPES_H #define _ASM_X86_AMD_IOMMU_TYPES_H +#include #include +#include #include #include #include @@ -511,6 +513,11 @@ extern struct kmem_cache *amd_iommu_irq_cache; list_for_each_entry((iommu), &amd_iommu_list, list) #define for_each_iommu_safe(iommu, next) \ list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list) +/* Making iterating over protection_domain->dev_data_list easier */ +#define for_each_pdom_dev_data(pdom_dev_data, pdom) \ + list_for_each_entry(pdom_dev_data, &pdom->dev_data_list, list) +#define for_each_pdom_dev_data_safe(pdom_dev_data, next, pdom) \ + list_for_each_entry_safe((pdom_dev_data), (next), &pdom->dev_data_list, list) struct amd_iommu; struct iommu_domain; @@ -552,6 +559,16 @@ enum protection_domain_mode { PD_MODE_V2, }; +/* Track dev_data/PASID list for the protection domain */ +struct pdom_dev_data { + /* Points to attached device data */ + struct iommu_dev_data *dev_data; + /* PASID attached to the protection domain */ + ioasid_t pasid; + /* For protection_domain->dev_data_list */ + struct list_head list; +}; + /* * This structure contains generic data for IOMMU protection domains * independent of their use. @@ -568,6 +585,8 @@ struct protection_domain { bool dirty_tracking; /* dirty tracking is enabled in the domain */ unsigned dev_cnt; /* devices assigned to this domain */ unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ + + struct list_head dev_data_list; /* List of pdom_dev_data */ }; /* diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 74cbbf391c4c..32e5edaec5e1 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -195,11 +195,6 @@ static struct amd_iommu *rlookup_amd_iommu(struct device *dev) return __rlookup_amd_iommu(seg, PCI_SBDF_TO_DEVID(devid)); } -static struct protection_domain *to_pdomain(struct iommu_domain *dom) -{ - return container_of(dom, struct protection_domain, domain); -} - static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid) { struct iommu_dev_data *dev_data; @@ -2348,6 +2343,7 @@ static struct protection_domain *protection_domain_alloc(unsigned int type) spin_lock_init(&domain->lock); INIT_LIST_HEAD(&domain->dev_list); + INIT_LIST_HEAD(&domain->dev_data_list); domain->nid = NUMA_NO_NODE; switch (type) { @@ -2893,6 +2889,7 @@ const struct iommu_ops amd_iommu_ops = { .def_domain_type = amd_iommu_def_domain_type, .dev_enable_feat = amd_iommu_dev_enable_feature, .dev_disable_feat = amd_iommu_dev_disable_feature, + .remove_dev_pasid = amd_iommu_remove_dev_pasid, .page_response = amd_iommu_page_response, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = amd_iommu_attach_device, diff --git a/drivers/iommu/amd/pasid.c b/drivers/iommu/amd/pasid.c new file mode 100644 index 000000000000..35c7678f9444 --- /dev/null +++ b/drivers/iommu/amd/pasid.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. + */ + +#define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) + +#include +#include + +#include "amd_iommu.h" + +static inline bool is_pasid_enabled(struct iommu_dev_data *dev_data) +{ + if (dev_data->pasid_enabled && dev_data->max_pasids && + dev_data->gcr3_info.gcr3_tbl != NULL) + return true; + + return false; +} + +static inline bool is_pasid_valid(struct iommu_dev_data *dev_data, + ioasid_t pasid) +{ + if (pasid > 0 && pasid < dev_data->max_pasids) + return true; + + return false; +} + +static void remove_dev_pasid(struct pdom_dev_data *pdom_dev_data) +{ + /* Update GCR3 table and flush IOTLB */ + amd_iommu_clear_gcr3(pdom_dev_data->dev_data, pdom_dev_data->pasid); + + list_del(&pdom_dev_data->list); + kfree(pdom_dev_data); +} + +/* Clear PASID from device GCR3 table and remove pdom_dev_data from list */ +static void remove_pdom_dev_pasid(struct protection_domain *pdom, + struct device *dev, ioasid_t pasid) +{ + struct pdom_dev_data *pdom_dev_data; + struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); + + lockdep_assert_held(&pdom->lock); + + for_each_pdom_dev_data(pdom_dev_data, pdom) { + if (pdom_dev_data->dev_data == dev_data && + pdom_dev_data->pasid == pasid) { + remove_dev_pasid(pdom_dev_data); + break; + } + } +} + +int iommu_sva_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + struct pdom_dev_data *pdom_dev_data; + struct protection_domain *sva_pdom = to_pdomain(domain); + struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); + unsigned long flags; + int ret = -EINVAL; + + /* PASID zero is used for requests from the I/O device without PASID */ + if (!is_pasid_valid(dev_data, pasid)) + return ret; + + /* Make sure PASID is enabled */ + if (!is_pasid_enabled(dev_data)) + return ret; + + /* Add PASID to protection domain pasid list */ + pdom_dev_data = kzalloc(sizeof(*pdom_dev_data), GFP_KERNEL); + if (pdom_dev_data == NULL) + return ret; + + pdom_dev_data->pasid = pasid; + pdom_dev_data->dev_data = dev_data; + + spin_lock_irqsave(&sva_pdom->lock, flags); + + /* Setup GCR3 table */ + ret = amd_iommu_set_gcr3(dev_data, pasid, + iommu_virt_to_phys(domain->mm->pgd)); + if (ret) { + kfree(pdom_dev_data); + goto out_unlock; + } + + list_add(&pdom_dev_data->list, &sva_pdom->dev_data_list); + +out_unlock: + spin_unlock_irqrestore(&sva_pdom->lock, flags); + return ret; +} + +void amd_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, + struct iommu_domain *domain) +{ + struct protection_domain *sva_pdom; + unsigned long flags; + + if (!is_pasid_valid(dev_iommu_priv_get(dev), pasid)) + return; + + sva_pdom = to_pdomain(domain); + + spin_lock_irqsave(&sva_pdom->lock, flags); + + /* Remove PASID from dev_data_list */ + remove_pdom_dev_pasid(sva_pdom, dev, pasid); + + spin_unlock_irqrestore(&sva_pdom->lock, flags); +} -- Gitee From d9c628b1a6b5843acde1853579160dec5ca9a2e9 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 18 Apr 2024 10:34:00 +0000 Subject: [PATCH 023/143] iommu/amd: Add SVA domain support ANBZ: #13617 commit a5a91e54846d35c234939fb9170e035805353049 upstream. - Allocate SVA domain and setup mmu notifier. In free path unregister mmu notifier and free protection domain. - Add mmu notifier callback function. It will retrieve SVA protection domain and invalidates IO/TLB. Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240418103400.6229-16-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu.h | 5 ++ drivers/iommu/amd/amd_iommu_types.h | 1 + drivers/iommu/amd/iommu.c | 10 ++-- drivers/iommu/amd/pasid.c | 80 +++++++++++++++++++++++++++++ 4 files changed, 93 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index a145395677dc..5f761bb34b0c 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -45,6 +45,11 @@ extern enum io_pgtable_fmt amd_iommu_pgtable; extern int amd_iommu_gpt_level; /* Protection domain ops */ +struct protection_domain *protection_domain_alloc(unsigned int type); +void protection_domain_free(struct protection_domain *domain); +struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, + struct mm_struct *mm); +void amd_iommu_domain_free(struct iommu_domain *dom); int iommu_sva_set_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); void amd_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index ed9971cd3f0e..2b76b5dedc1d 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -586,6 +586,7 @@ struct protection_domain { unsigned dev_cnt; /* devices assigned to this domain */ unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ + struct mmu_notifier mn; /* mmu notifier for the SVA domain */ struct list_head dev_data_list; /* List of pdom_dev_data */ }; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 32e5edaec5e1..71fcd22a0256 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2283,7 +2283,7 @@ static void cleanup_domain(struct protection_domain *domain) WARN_ON(domain->dev_cnt != 0); } -static void protection_domain_free(struct protection_domain *domain) +void protection_domain_free(struct protection_domain *domain) { if (!domain) return; @@ -2326,7 +2326,7 @@ static int protection_domain_init_v2(struct protection_domain *pdom) return 0; } -static struct protection_domain *protection_domain_alloc(unsigned int type) +struct protection_domain *protection_domain_alloc(unsigned int type) { struct io_pgtable_ops *pgtbl_ops; struct protection_domain *domain; @@ -2349,6 +2349,7 @@ static struct protection_domain *protection_domain_alloc(unsigned int type) switch (type) { /* No need to allocate io pgtable ops in passthrough mode */ case IOMMU_DOMAIN_IDENTITY: + case IOMMU_DOMAIN_SVA: return domain; case IOMMU_DOMAIN_DMA: pgtable = amd_iommu_pgtable; @@ -2481,7 +2482,7 @@ amd_iommu_domain_alloc_user(struct device *dev, u32 flags, return do_iommu_domain_alloc(type, dev, flags); } -static void amd_iommu_domain_free(struct iommu_domain *dom) +void amd_iommu_domain_free(struct iommu_domain *dom) { struct protection_domain *domain; unsigned long flags; @@ -2853,6 +2854,7 @@ static int amd_iommu_dev_enable_feature(struct device *dev, switch (feat) { case IOMMU_DEV_FEAT_IOPF: + case IOMMU_DEV_FEAT_SVA: break; default: ret = -EINVAL; @@ -2868,6 +2870,7 @@ static int amd_iommu_dev_disable_feature(struct device *dev, switch (feat) { case IOMMU_DEV_FEAT_IOPF: + case IOMMU_DEV_FEAT_SVA: break; default: ret = -EINVAL; @@ -2880,6 +2883,7 @@ const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .domain_alloc = amd_iommu_domain_alloc, .domain_alloc_user = amd_iommu_domain_alloc_user, + .domain_alloc_sva = amd_iommu_domain_alloc_sva, .probe_device = amd_iommu_probe_device, .release_device = amd_iommu_release_device, .device_group = amd_iommu_device_group, diff --git a/drivers/iommu/amd/pasid.c b/drivers/iommu/amd/pasid.c index 35c7678f9444..a68215f2b3e1 100644 --- a/drivers/iommu/amd/pasid.c +++ b/drivers/iommu/amd/pasid.c @@ -56,6 +56,49 @@ static void remove_pdom_dev_pasid(struct protection_domain *pdom, } } +static void sva_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct pdom_dev_data *pdom_dev_data; + struct protection_domain *sva_pdom; + unsigned long flags; + + sva_pdom = container_of(mn, struct protection_domain, mn); + + spin_lock_irqsave(&sva_pdom->lock, flags); + + for_each_pdom_dev_data(pdom_dev_data, sva_pdom) { + amd_iommu_dev_flush_pasid_pages(pdom_dev_data->dev_data, + pdom_dev_data->pasid, + start, end - start); + } + + spin_unlock_irqrestore(&sva_pdom->lock, flags); +} + +static void sva_mn_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ + struct pdom_dev_data *pdom_dev_data, *next; + struct protection_domain *sva_pdom; + unsigned long flags; + + sva_pdom = container_of(mn, struct protection_domain, mn); + + spin_lock_irqsave(&sva_pdom->lock, flags); + + /* Assume dev_data_list contains same PASID with different devices */ + for_each_pdom_dev_data_safe(pdom_dev_data, next, sva_pdom) + remove_dev_pasid(pdom_dev_data); + + spin_unlock_irqrestore(&sva_pdom->lock, flags); +} + +static const struct mmu_notifier_ops sva_mn = { + .arch_invalidate_secondary_tlbs = sva_arch_invalidate_secondary_tlbs, + .release = sva_mn_release, +}; + int iommu_sva_set_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid) { @@ -116,3 +159,40 @@ void amd_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, spin_unlock_irqrestore(&sva_pdom->lock, flags); } + +static void iommu_sva_domain_free(struct iommu_domain *domain) +{ + struct protection_domain *sva_pdom = to_pdomain(domain); + + if (sva_pdom->mn.ops) + mmu_notifier_unregister(&sva_pdom->mn, domain->mm); + + amd_iommu_domain_free(domain); +} + +static const struct iommu_domain_ops amd_sva_domain_ops = { + .set_dev_pasid = iommu_sva_set_dev_pasid, + .free = iommu_sva_domain_free +}; + +struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, + struct mm_struct *mm) +{ + struct protection_domain *pdom; + int ret; + + pdom = protection_domain_alloc(IOMMU_DOMAIN_SVA); + if (!pdom) + return ERR_PTR(-ENOMEM); + + pdom->domain.ops = &amd_sva_domain_ops; + pdom->mn.ops = &sva_mn; + + ret = mmu_notifier_register(&pdom->mn, mm); + if (ret) { + protection_domain_free(pdom); + return ERR_PTR(ret); + } + + return &pdom->domain; +} -- Gitee From ea234c039f9bafd5abae6445014f03e52f2f6f57 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 19 Apr 2024 21:59:13 +1000 Subject: [PATCH 024/143] powerpc/dart: Drop unnecessary call to kmemleak_no_scan() ANBZ: #13617 commit 4ccae23609f589dd69a593f457f76ee8b0e2d4e0 upstream. Erhard reported that kmemleak was showing a warning at boot: kmemleak: Not scanning unknown object at 0xc00000007f000000 CPU: 0 PID: 0 Comm: swapper Not tainted 5.19.0-rc3-PMacG5+ #2 Call Trace: .dump_stack_lvl+0x7c/0xc4 (unreliable) .kmemleak_no_scan+0xe0/0x100 .iommu_init_early_dart+0x2f0/0x924 .pmac_probe+0x1b0/0x20c .setup_arch+0x1b8/0x674 .start_kernel+0xdc/0xb74 start_here_common+0x1c/0x44 DART table allocated at: (____ptrval____) Which he bisected to a change in kmemleak, commit 23c2d497de21 ("mm: kmemleak: take a full lowmem check in kmemleak_*_phys()"). Because pmac_probe() is called before mem_topology_setup(), the min/ max PFN variables are still zero. That causes kmemleak_alloc_phys() to ignore the allocation, because the checks against the PFN fail. Then kmemleak_no_scan() can't find the allocation and prints warning. Given that kmemleak_alloc_phys() is ignoring the allocation to begin with, there's no need to call kmemleak_no_scan() at all, which avoids the warning. Reported-by: Erhard Furtner Closes: https://lore.kernel.org/all/bug-216156-206035@https.bugzilla.kernel.org%2F/ Signed-off-by: Michael Ellerman Link: https://msgid.link/20240419115913.3317575-1-mpe@ellerman.id.au Signed-off-by: Shuai Xue --- arch/powerpc/sysdev/dart_iommu.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 98096bbfd62e..c0d10c149661 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -243,9 +242,6 @@ static void __init allocate_dart(void) if (!dart_tablebase) panic("Failed to allocate 16MB below 2GB for DART table\n"); - /* There is no point scanning the DART space for leaks*/ - kmemleak_no_scan((void *)dart_tablebase); - /* Allocate a spare page to map all invalid DART pages. We need to do * that to work around what looks like a problem with the HT bridge * prefetching into invalid pages and corrupting data -- Gitee From b071846d5fd6f4613b980d2ba04c09fb6fce750e Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Tue, 23 Apr 2024 10:41:13 -0700 Subject: [PATCH 025/143] iommu/vt-d: Make posted MSI an opt-in command line option ANBZ: #13617 commit be9be07b22c96dc03d0ecc76b5a5f21c2dcb05a1 upstream. Add a command line opt-in option for posted MSI if CONFIG_X86_POSTED_MSI=y. Also introduce a helper function for testing if posted MSI is supported on the platform. Signed-off-by: Jacob Pan Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240423174114.526704-12-jacob.jun.pan@linux.intel.com Signed-off-by: Shuai Xue --- Documentation/admin-guide/kernel-parameters.txt | 2 ++ arch/x86/include/asm/irq_remapping.h | 7 +++++++ drivers/iommu/irq_remapping.c | 5 ++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index dccea0e74159..2f6ce5dbb77d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2226,6 +2226,8 @@ no_x2apic_optout BIOS x2APIC opt-out request will be ignored nopost disable Interrupt Posting + posted_msi + enable MSIs delivered as posted interrupts iomem= Disable strict checking of access to MMIO memory strict regions from userspace. diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 7a2ed154a5e1..5036f13ab69f 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -50,6 +50,13 @@ static inline struct irq_domain *arch_get_ir_parent_domain(void) return x86_vector_domain; } +extern bool enable_posted_msi; + +static inline bool posted_msi_supported(void) +{ + return enable_posted_msi && irq_remapping_cap(IRQ_POSTING_CAP); +} + #else /* CONFIG_IRQ_REMAP */ static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; } diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 2f7281ccc05f..c2443659812a 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -24,6 +24,8 @@ int no_x2apic_optout; int disable_irq_post = 0; +bool enable_posted_msi __ro_after_init; + static int disable_irq_remap; static struct irq_remap_ops *remap_ops; @@ -70,7 +72,8 @@ static __init int setup_irqremap(char *str) no_x2apic_optout = 1; else if (!strncmp(str, "nopost", 6)) disable_irq_post = 1; - + else if (IS_ENABLED(CONFIG_X86_POSTED_MSI) && !strncmp(str, "posted_msi", 10)) + enable_posted_msi = true; str += strcspn(str, ","); while (*str == ',') str++; -- Gitee From 4edcf9fb4ec70fdb6468e29dd82dda6fcfdc26c0 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Tue, 23 Apr 2024 10:41:14 -0700 Subject: [PATCH 026/143] iommu/vt-d: Enable posted mode for device MSIs ANBZ: #13617 commit ed1e48ea43703002dc202ac7f3b0b0b9981ec2f0 upstream. With posted MSI feature enabled on the CPU side, iommu interrupt remapping table entries (IRTEs) for device MSI/x can be allocated, activated, and programed in posted mode. This means that IRTEs are linked with their respective PIDs of the target CPU. Handlers for the posted MSI notification vector will de-multiplex device MSI handlers. CPU notifications are coalesced if interrupts arrive at a high frequency. Posted interrupts are only used for device MSI and not for legacy devices (IO/APIC, HPET). Introduce a new irq_chip for posted MSIs, which has a dummy irq_ack() callback as EOI is performed in the notification handler once. When posted MSI is enabled, MSI domain/chip hierarchy will look like this example: domain: IR-PCI-MSIX-0000:50:00.0-12 hwirq: 0x29 chip: IR-PCI-MSIX-0000:50:00.0 flags: 0x430 IRQCHIP_SKIP_SET_WAKE IRQCHIP_ONESHOT_SAFE parent: domain: INTEL-IR-10-13 hwirq: 0x2d0000 chip: INTEL-IR-POST flags: 0x0 parent: domain: VECTOR hwirq: 0x77 chip: APIC Suggested-by: Thomas Gleixner Signed-off-by: Jacob Pan Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240423174114.526704-13-jacob.jun.pan@linux.intel.com Signed-off-by: Shuai Xue --- drivers/iommu/intel/irq_remapping.c | 113 +++++++++++++++++++++++++++- 1 file changed, 109 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index f4ddd4d9a825..2d6e6cbba680 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "iommu.h" #include "../irq_remapping.h" @@ -50,6 +51,7 @@ struct irq_2_iommu { u16 sub_handle; u8 irte_mask; enum irq_mode mode; + bool posted_msi; }; struct intel_ir_data { @@ -1118,6 +1120,14 @@ static void prepare_irte(struct irte *irte, int vector, unsigned int dest) irte->redir_hint = 1; } +static void prepare_irte_posted(struct irte *irte) +{ + memset(irte, 0, sizeof(*irte)); + + irte->present = 1; + irte->p_pst = 1; +} + struct irq_remap_ops intel_irq_remap_ops = { .prepare = intel_prepare_irq_remapping, .enable = intel_enable_irq_remapping, @@ -1126,6 +1136,47 @@ struct irq_remap_ops intel_irq_remap_ops = { .enable_faulting = enable_drhd_fault_handling, }; +#ifdef CONFIG_X86_POSTED_MSI + +static phys_addr_t get_pi_desc_addr(struct irq_data *irqd) +{ + int cpu = cpumask_first(irq_data_get_effective_affinity_mask(irqd)); + + if (WARN_ON(cpu >= nr_cpu_ids)) + return 0; + + return __pa(per_cpu_ptr(&posted_msi_pi_desc, cpu)); +} + +static void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) +{ + struct intel_ir_data *ir_data = irqd->chip_data; + struct irte *irte = &ir_data->irte_entry; + struct irte irte_pi; + u64 pid_addr; + + pid_addr = get_pi_desc_addr(irqd); + + if (!pid_addr) { + pr_warn("Failed to setup IRQ %d for posted mode", irqd->irq); + return; + } + + memset(&irte_pi, 0, sizeof(irte_pi)); + + /* The shared IRTE already be set up as posted during alloc_irte */ + dmar_copy_shared_irte(&irte_pi, irte); + + irte_pi.pda_l = (pid_addr >> (32 - PDA_LOW_BIT)) & ~(-1UL << PDA_LOW_BIT); + irte_pi.pda_h = (pid_addr >> 32) & ~(-1UL << PDA_HIGH_BIT); + + modify_irte(&ir_data->irq_2_iommu, &irte_pi); +} + +#else +static inline void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) {} +#endif + static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force) { struct intel_ir_data *ir_data = irqd->chip_data; @@ -1139,8 +1190,9 @@ static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force) irte->vector = cfg->vector; irte->dest_id = IRTE_DEST(cfg->dest_apicid); - /* Update the hardware only if the interrupt is in remapped mode. */ - if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING) + if (ir_data->irq_2_iommu.posted_msi) + intel_ir_reconfigure_irte_posted(irqd); + else if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING) modify_irte(&ir_data->irq_2_iommu, irte); } @@ -1194,7 +1246,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info) struct intel_ir_data *ir_data = data->chip_data; struct vcpu_data *vcpu_pi_info = info; - /* stop posting interrupts, back to remapping mode */ + /* stop posting interrupts, back to the default mode */ if (!vcpu_pi_info) { modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry); } else { @@ -1233,6 +1285,49 @@ static struct irq_chip intel_ir_chip = { .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, }; +/* + * With posted MSIs, all vectors are multiplexed into a single notification + * vector. Devices MSIs are then dispatched in a demux loop where + * EOIs can be coalesced as well. + * + * "INTEL-IR-POST" IRQ chip does not do EOI on ACK, thus the dummy irq_ack() + * function. Instead EOI is performed by the posted interrupt notification + * handler. + * + * For the example below, 3 MSIs are coalesced into one CPU notification. Only + * one apic_eoi() is needed. + * + * __sysvec_posted_msi_notification() + * irq_enter(); + * handle_edge_irq() + * irq_chip_ack_parent() + * dummy(); // No EOI + * handle_irq_event() + * driver_handler() + * handle_edge_irq() + * irq_chip_ack_parent() + * dummy(); // No EOI + * handle_irq_event() + * driver_handler() + * handle_edge_irq() + * irq_chip_ack_parent() + * dummy(); // No EOI + * handle_irq_event() + * driver_handler() + * apic_eoi() + * irq_exit() + */ + +static void dummy_ack(struct irq_data *d) { } + +static struct irq_chip intel_ir_chip_post_msi = { + .name = "INTEL-IR-POST", + .irq_ack = dummy_ack, + .irq_set_affinity = intel_ir_set_affinity, + .irq_compose_msi_msg = intel_ir_compose_msi_msg, + .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, +}; + static void fill_msi_msg(struct msi_msg *msg, u32 index, u32 subhandle) { memset(msg, 0, sizeof(*msg)); @@ -1274,6 +1369,11 @@ static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data, break; case X86_IRQ_ALLOC_TYPE_PCI_MSI: case X86_IRQ_ALLOC_TYPE_PCI_MSIX: + if (posted_msi_supported()) { + prepare_irte_posted(irte); + data->irq_2_iommu.posted_msi = 1; + } + set_msi_sid(irte, pci_real_dma_dev(msi_desc_to_pci_dev(info->desc))); break; @@ -1361,7 +1461,12 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain, irq_data->hwirq = (index << 16) + i; irq_data->chip_data = ird; - irq_data->chip = &intel_ir_chip; + if (posted_msi_supported() && + ((info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI) || + (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSIX))) + irq_data->chip = &intel_ir_chip_post_msi; + else + irq_data->chip = &intel_ir_chip; intel_irq_remapping_prepare_irte(ird, irq_cfg, info, index, i); irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT); } -- Gitee From 891e8a959b7d309e51543c4fa9f0e5dd92669ef0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 1 May 2024 15:27:29 +0100 Subject: [PATCH 027/143] iommu/arm-smmu-qcom: Don't build debug features as a kernel module ANBZ: #13617 commit 0928fc15f31553c7acb8117b0609799fc0f22fa5 upstream. The Qualcomm TBU debug support introduced by 414ecb030870 ("iommu/arm-smmu-qcom-debug: Add support for TBUs") provides its own driver initialisation function, which breaks the link when the core SMMU driver is built as a module: ld.lld: error: duplicate symbol: init_module >>> defined at arm-smmu.c >>> drivers/iommu/arm/arm-smmu/arm-smmu.o:(init_module) >>> defined at arm-smmu-qcom-debug.c >>> drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.o:(.init.text+0x4) Since we're late in the cycle, just make the debug features depend on a non-modular SMMU driver for now while the initialisation is reworked to hang off qcom_smmu_impl_init(). Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 9cb6193b3aaf..b76437d09ac7 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -377,7 +377,7 @@ config ARM_SMMU_QCOM config ARM_SMMU_QCOM_DEBUG bool "ARM SMMU QCOM implementation defined debug support" - depends on ARM_SMMU_QCOM + depends on ARM_SMMU_QCOM=y help Support for implementation specific debug features in ARM SMMU hardware found in QTI platforms. This include support for -- Gitee From ff81a3318a8a19372ad693fe3e52981fed0fd7a2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Apr 2024 14:21:33 -0300 Subject: [PATCH 028/143] iommu/arm-smmu-v3: Add an ops indirection to the STE code ANBZ: #13617 commit de31c355541286aa4c938c982dfcafbf062fcb93 upstream. Prepare to put the CD code into the same mechanism. Add an ops indirection around all the STE specific code and make the worker functions independent of the entry content being processed. get_used and sync ops are provided to hook the correct code. Signed-off-by: Michael Shavit Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v9-5040dc602008+177d7-smmuv3_newapi_p2_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 176 ++++++++++++-------- 1 file changed, 104 insertions(+), 72 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index e064fd8bec6d..d152197c0250 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -42,8 +42,19 @@ enum arm_smmu_msi_index { ARM_SMMU_MAX_MSIS, }; -static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, - ioasid_t sid); +struct arm_smmu_entry_writer_ops; +struct arm_smmu_entry_writer { + const struct arm_smmu_entry_writer_ops *ops; + struct arm_smmu_master *master; +}; + +struct arm_smmu_entry_writer_ops { + void (*get_used)(const __le64 *entry, __le64 *used); + void (*sync)(struct arm_smmu_entry_writer *writer); +}; + +#define NUM_ENTRY_QWORDS 8 +static_assert(sizeof(struct arm_smmu_ste) == NUM_ENTRY_QWORDS * sizeof(u64)); static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = { [EVTQ_MSI_INDEX] = { @@ -972,43 +983,42 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) * would be nice if this was complete according to the spec, but minimally it * has to capture the bits this driver uses. */ -static void arm_smmu_get_ste_used(const struct arm_smmu_ste *ent, - struct arm_smmu_ste *used_bits) +static void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) { - unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent->data[0])); + unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0])); - used_bits->data[0] = cpu_to_le64(STRTAB_STE_0_V); - if (!(ent->data[0] & cpu_to_le64(STRTAB_STE_0_V))) + used_bits[0] = cpu_to_le64(STRTAB_STE_0_V); + if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V))) return; - used_bits->data[0] |= cpu_to_le64(STRTAB_STE_0_CFG); + used_bits[0] |= cpu_to_le64(STRTAB_STE_0_CFG); /* S1 translates */ if (cfg & BIT(0)) { - used_bits->data[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT | - STRTAB_STE_0_S1CTXPTR_MASK | - STRTAB_STE_0_S1CDMAX); - used_bits->data[1] |= + used_bits[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT | + STRTAB_STE_0_S1CTXPTR_MASK | + STRTAB_STE_0_S1CDMAX); + used_bits[1] |= cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR | STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH | STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW | STRTAB_STE_1_EATS); - used_bits->data[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID); + used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID); } /* S2 translates */ if (cfg & BIT(1)) { - used_bits->data[1] |= + used_bits[1] |= cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG); - used_bits->data[2] |= + used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR | STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI | STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2R); - used_bits->data[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK); + used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK); } if (cfg == STRTAB_STE_0_CFG_BYPASS) - used_bits->data[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG); + used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG); } /* @@ -1017,57 +1027,55 @@ static void arm_smmu_get_ste_used(const struct arm_smmu_ste *ent, * unused_update is an intermediate value of entry that has unused bits set to * their new values. */ -static u8 arm_smmu_entry_qword_diff(const struct arm_smmu_ste *entry, - const struct arm_smmu_ste *target, - struct arm_smmu_ste *unused_update) +static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer, + const __le64 *entry, const __le64 *target, + __le64 *unused_update) { - struct arm_smmu_ste target_used = {}; - struct arm_smmu_ste cur_used = {}; + __le64 target_used[NUM_ENTRY_QWORDS] = {}; + __le64 cur_used[NUM_ENTRY_QWORDS] = {}; u8 used_qword_diff = 0; unsigned int i; - arm_smmu_get_ste_used(entry, &cur_used); - arm_smmu_get_ste_used(target, &target_used); + writer->ops->get_used(entry, cur_used); + writer->ops->get_used(target, target_used); - for (i = 0; i != ARRAY_SIZE(target_used.data); i++) { + for (i = 0; i != NUM_ENTRY_QWORDS; i++) { /* * Check that masks are up to date, the make functions are not * allowed to set a bit to 1 if the used function doesn't say it * is used. */ - WARN_ON_ONCE(target->data[i] & ~target_used.data[i]); + WARN_ON_ONCE(target[i] & ~target_used[i]); /* Bits can change because they are not currently being used */ - unused_update->data[i] = (entry->data[i] & cur_used.data[i]) | - (target->data[i] & ~cur_used.data[i]); + unused_update[i] = (entry[i] & cur_used[i]) | + (target[i] & ~cur_used[i]); /* * Each bit indicates that a used bit in a qword needs to be * changed after unused_update is applied. */ - if ((unused_update->data[i] & target_used.data[i]) != - target->data[i]) + if ((unused_update[i] & target_used[i]) != target[i]) used_qword_diff |= 1 << i; } return used_qword_diff; } -static bool entry_set(struct arm_smmu_device *smmu, ioasid_t sid, - struct arm_smmu_ste *entry, - const struct arm_smmu_ste *target, unsigned int start, +static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry, + const __le64 *target, unsigned int start, unsigned int len) { bool changed = false; unsigned int i; for (i = start; len != 0; len--, i++) { - if (entry->data[i] != target->data[i]) { - WRITE_ONCE(entry->data[i], target->data[i]); + if (entry[i] != target[i]) { + WRITE_ONCE(entry[i], target[i]); changed = true; } } if (changed) - arm_smmu_sync_ste_for_sid(smmu, sid); + writer->ops->sync(writer); return changed; } @@ -1097,24 +1105,21 @@ static bool entry_set(struct arm_smmu_device *smmu, ioasid_t sid, * V=0 process. This relies on the IGNORED behavior described in the * specification. */ -static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, - struct arm_smmu_ste *entry, - const struct arm_smmu_ste *target) +static void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, + __le64 *entry, const __le64 *target) { - unsigned int num_entry_qwords = ARRAY_SIZE(target->data); - struct arm_smmu_device *smmu = master->smmu; - struct arm_smmu_ste unused_update; + __le64 unused_update[NUM_ENTRY_QWORDS]; u8 used_qword_diff; used_qword_diff = - arm_smmu_entry_qword_diff(entry, target, &unused_update); + arm_smmu_entry_qword_diff(writer, entry, target, unused_update); if (hweight8(used_qword_diff) == 1) { /* * Only one qword needs its used bits to be changed. This is a - * hitless update, update all bits the current STE is ignoring - * to their new values, then update a single "critical qword" to - * change the STE and finally 0 out any bits that are now unused - * in the target configuration. + * hitless update, update all bits the current STE/CD is + * ignoring to their new values, then update a single "critical + * qword" to change the STE/CD and finally 0 out any bits that + * are now unused in the target configuration. */ unsigned int critical_qword_index = ffs(used_qword_diff) - 1; @@ -1123,22 +1128,21 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, * writing it in the next step anyways. This can save a sync * when the only change is in that qword. */ - unused_update.data[critical_qword_index] = - entry->data[critical_qword_index]; - entry_set(smmu, sid, entry, &unused_update, 0, num_entry_qwords); - entry_set(smmu, sid, entry, target, critical_qword_index, 1); - entry_set(smmu, sid, entry, target, 0, num_entry_qwords); + unused_update[critical_qword_index] = + entry[critical_qword_index]; + entry_set(writer, entry, unused_update, 0, NUM_ENTRY_QWORDS); + entry_set(writer, entry, target, critical_qword_index, 1); + entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS); } else if (used_qword_diff) { /* * At least two qwords need their inuse bits to be changed. This * requires a breaking update, zero the V bit, write all qwords * but 0, then set qword 0 */ - unused_update.data[0] = entry->data[0] & - cpu_to_le64(~STRTAB_STE_0_V); - entry_set(smmu, sid, entry, &unused_update, 0, 1); - entry_set(smmu, sid, entry, target, 1, num_entry_qwords - 1); - entry_set(smmu, sid, entry, target, 0, 1); + unused_update[0] = 0; + entry_set(writer, entry, unused_update, 0, 1); + entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1); + entry_set(writer, entry, target, 0, 1); } else { /* * No inuse bit changed. Sanity check that all unused bits are 0 @@ -1146,18 +1150,7 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, * compute_qword_diff(). */ WARN_ON_ONCE( - entry_set(smmu, sid, entry, target, 0, num_entry_qwords)); - } - - /* It's likely that we'll want to use the new STE soon */ - if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) { - struct arm_smmu_cmdq_ent - prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG, - .prefetch = { - .sid = sid, - } }; - - arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); + entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS)); } } @@ -1430,17 +1423,56 @@ arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc) WRITE_ONCE(*dst, cpu_to_le64(val)); } -static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) +struct arm_smmu_ste_writer { + struct arm_smmu_entry_writer writer; + u32 sid; +}; + +static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer) { + struct arm_smmu_ste_writer *ste_writer = + container_of(writer, struct arm_smmu_ste_writer, writer); struct arm_smmu_cmdq_ent cmd = { .opcode = CMDQ_OP_CFGI_STE, .cfgi = { - .sid = sid, + .sid = ste_writer->sid, .leaf = true, }, }; - arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); + arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd); +} + +static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = { + .sync = arm_smmu_ste_writer_sync_entry, + .get_used = arm_smmu_get_ste_used, +}; + +static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, + struct arm_smmu_ste *ste, + const struct arm_smmu_ste *target) +{ + struct arm_smmu_device *smmu = master->smmu; + struct arm_smmu_ste_writer ste_writer = { + .writer = { + .ops = &arm_smmu_ste_writer_ops, + .master = master, + }, + .sid = sid, + }; + + arm_smmu_write_entry(&ste_writer.writer, ste->data, target->data); + + /* It's likely that we'll want to use the new STE soon */ + if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) { + struct arm_smmu_cmdq_ent + prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG, + .prefetch = { + .sid = sid, + } }; + + arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); + } } static void arm_smmu_make_abort_ste(struct arm_smmu_ste *target) -- Gitee From 5b2ec403f72271a6666a450bb4f59a61d2e0e406 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Apr 2024 14:21:34 -0300 Subject: [PATCH 029/143] iommu/arm-smmu-v3: Make CD programming use arm_smmu_write_entry() ANBZ: #13617 commit 78a5fbe8395b365d58142ff9b7a6aeb556481a1f upstream. CD table entries and STE's have the same essential programming sequence, just with different types. Use the new ops indirection to link CD programming to the common writer. In a few more patches all CD writers will call an appropriate make function and then directly call arm_smmu_write_cd_entry(). arm_smmu_write_ctx_desc() will be removed. Until then lightly tweak arm_smmu_write_ctx_desc() to also use the new programmer by using the same logic as right now to build the target CD on the stack, sanitizing it to meet the used rules, and then using the writer. Sanitizing is necessary because the writer expects that the currently programmed CD follows the used rules. Next patches add new make functions and new direct calls to arm_smmu_write_cd_entry() which will require this. Signed-off-by: Michael Shavit Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Moritz Fischer Reviewed-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v9-5040dc602008+177d7-smmuv3_newapi_p2_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 89 ++++++++++++++++----- 1 file changed, 67 insertions(+), 22 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index d152197c0250..13b33ad6805a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -55,6 +55,7 @@ struct arm_smmu_entry_writer_ops { #define NUM_ENTRY_QWORDS 8 static_assert(sizeof(struct arm_smmu_ste) == NUM_ENTRY_QWORDS * sizeof(u64)); +static_assert(sizeof(struct arm_smmu_cd) == NUM_ENTRY_QWORDS * sizeof(u64)); static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = { [EVTQ_MSI_INDEX] = { @@ -1230,6 +1231,59 @@ static struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, return &l1_desc->l2ptr[idx]; } +struct arm_smmu_cd_writer { + struct arm_smmu_entry_writer writer; + unsigned int ssid; +}; + +static void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits) +{ + used_bits[0] = cpu_to_le64(CTXDESC_CD_0_V); + if (!(ent[0] & cpu_to_le64(CTXDESC_CD_0_V))) + return; + memset(used_bits, 0xFF, sizeof(struct arm_smmu_cd)); + + /* + * If EPD0 is set by the make function it means + * T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED + */ + if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) { + used_bits[0] &= ~cpu_to_le64( + CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 | + CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 | + CTXDESC_CD_0_TCR_SH0); + used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK); + } +} + +static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer *writer) +{ + struct arm_smmu_cd_writer *cd_writer = + container_of(writer, struct arm_smmu_cd_writer, writer); + + arm_smmu_sync_cd(writer->master, cd_writer->ssid, true); +} + +static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = { + .sync = arm_smmu_cd_writer_sync_entry, + .get_used = arm_smmu_get_cd_used, +}; + +static void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, + struct arm_smmu_cd *cdptr, + const struct arm_smmu_cd *target) +{ + struct arm_smmu_cd_writer cd_writer = { + .writer = { + .ops = &arm_smmu_cd_writer_ops, + .master = master, + }, + .ssid = ssid, + }; + + arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data); +} + int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, struct arm_smmu_ctx_desc *cd) { @@ -1246,26 +1300,34 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, */ u64 val; bool cd_live; - struct arm_smmu_cd *cdptr; + struct arm_smmu_cd target; + struct arm_smmu_cd *cdptr = ⌖ + struct arm_smmu_cd *cd_table_entry; struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; struct arm_smmu_device *smmu = master->smmu; if (WARN_ON(ssid >= (1 << cd_table->s1cdmax))) return -E2BIG; - cdptr = arm_smmu_get_cd_ptr(master, ssid); - if (!cdptr) + cd_table_entry = arm_smmu_get_cd_ptr(master, ssid); + if (!cd_table_entry) return -ENOMEM; + target = *cd_table_entry; val = le64_to_cpu(cdptr->data[0]); cd_live = !!(val & CTXDESC_CD_0_V); if (!cd) { /* (5) */ + memset(cdptr, 0, sizeof(*cdptr)); val = 0; } else if (cd == &quiet_cd) { /* (4) */ + val &= ~(CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 | + CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 | + CTXDESC_CD_0_TCR_SH0); if (!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) val &= ~(CTXDESC_CD_0_S | CTXDESC_CD_0_R); val |= CTXDESC_CD_0_TCR_EPD0; + cdptr->data[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK); } else if (cd_live) { /* (3) */ val &= ~CTXDESC_CD_0_ASID; val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid); @@ -1278,13 +1340,6 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, cdptr->data[2] = 0; cdptr->data[3] = cpu_to_le64(cd->mair); - /* - * STE may be live, and the SMMU might read dwords of this CD in any - * order. Ensure that it observes valid values before reading - * V=1. - */ - arm_smmu_sync_cd(master, ssid, true); - val = cd->tcr | #ifdef __BIG_ENDIAN CTXDESC_CD_0_ENDI | @@ -1298,18 +1353,8 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, if (cd_table->stall_enabled) val |= CTXDESC_CD_0_S; } - - /* - * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3 - * "Configuration structures and configuration invalidation completion" - * - * The size of single-copy atomic reads made by the SMMU is - * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single - * field within an aligned 64-bit span of a structure can be altered - * without first making the structure invalid. - */ - WRITE_ONCE(cdptr->data[0], cpu_to_le64(val)); - arm_smmu_sync_cd(master, ssid, true); + cdptr->data[0] = cpu_to_le64(val); + arm_smmu_write_cd_entry(master, ssid, cd_table_entry, &target); return 0; } -- Gitee From 85cda13ceb54903a4e7f1459add6b4b638600c94 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Apr 2024 14:21:35 -0300 Subject: [PATCH 030/143] iommu/arm-smmu-v3: Move the CD generation for S1 domains into a function ANBZ: #13617 commit e9d1e4ff74b96cf180d04be38541a245c8c574c1 upstream. Introduce arm_smmu_make_s1_cd() to build the CD from the paging S1 domain, and reorganize all the places programming S1 domain CD table entries to call it. Split arm_smmu_update_s1_domain_cd_entry() from arm_smmu_update_ctx_desc_devices() so that the S1 path has its own call chain separate from the unrelated SVA path. arm_smmu_update_s1_domain_cd_entry() only works on S1 domains attached to RIDs and refreshes all their CDs. Remove case (3) from arm_smmu_write_ctx_desc() as it is now handled by directly calling arm_smmu_write_cd_entry(). Remove the forced clear of the CD during S1 domain attach, arm_smmu_write_cd_entry() will do this automatically if necessary. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Mostafa Saleh Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v9-5040dc602008+177d7-smmuv3_newapi_p2_jgg@nvidia.com [will: Drop unused arm_smmu_clean_cd_entry() function] Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 25 ++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 71 +++++++++++-------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 9 +++ 3 files changed, 76 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 41b44baef15e..d159f6048093 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -53,6 +53,29 @@ static void arm_smmu_update_ctx_desc_devices(struct arm_smmu_domain *smmu_domain spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); } +static void +arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) +{ + struct arm_smmu_master *master; + struct arm_smmu_cd target_cd; + unsigned long flags; + + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_for_each_entry(master, &smmu_domain->devices, domain_head) { + struct arm_smmu_cd *cdptr; + + /* S1 domains only support RID attachment right now */ + cdptr = arm_smmu_get_cd_ptr(master, IOMMU_NO_PASID); + if (WARN_ON(!cdptr)) + continue; + + arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); + arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr, + &target_cd); + } + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); +} + /* * Check if the CPU ASID is available on the SMMU side. If a private context * descriptor is using it, try to replace it. @@ -96,7 +119,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid) * be some overlap between use of both ASIDs, until we invalidate the * TLB. */ - arm_smmu_update_ctx_desc_devices(smmu_domain, IOMMU_NO_PASID, cd); + arm_smmu_update_s1_domain_cd_entry(smmu_domain); /* Invalidate TLB entries previously associated with that context */ arm_smmu_tlb_inv_asid(smmu, asid); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 13b33ad6805a..4babf7cb0639 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1203,8 +1203,8 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst, WRITE_ONCE(*dst, cpu_to_le64(val)); } -static struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, - u32 ssid) +struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, + u32 ssid) { __le64 *l1ptr; unsigned int idx; @@ -1269,9 +1269,9 @@ static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = { .get_used = arm_smmu_get_cd_used, }; -static void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, - struct arm_smmu_cd *cdptr, - const struct arm_smmu_cd *target) +void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, + struct arm_smmu_cd *cdptr, + const struct arm_smmu_cd *target) { struct arm_smmu_cd_writer cd_writer = { .writer = { @@ -1284,6 +1284,32 @@ static void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data); } +void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, + struct arm_smmu_master *master, + struct arm_smmu_domain *smmu_domain) +{ + struct arm_smmu_ctx_desc *cd = &smmu_domain->cd; + + memset(target, 0, sizeof(*target)); + + target->data[0] = cpu_to_le64( + cd->tcr | +#ifdef __BIG_ENDIAN + CTXDESC_CD_0_ENDI | +#endif + CTXDESC_CD_0_V | + CTXDESC_CD_0_AA64 | + (master->stall_enabled ? CTXDESC_CD_0_S : 0) | + CTXDESC_CD_0_R | + CTXDESC_CD_0_A | + CTXDESC_CD_0_ASET | + FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) + ); + + target->data[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK); + target->data[3] = cpu_to_le64(cd->mair); +} + int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, struct arm_smmu_ctx_desc *cd) { @@ -1292,14 +1318,11 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, * * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0). * (2) Install a secondary CD, for SID+SSID traffic. - * (3) Update ASID of a CD. Atomically write the first 64 bits of the - * CD, then invalidate the old entry and mappings. * (4) Quiesce the context without clearing the valid bit. Disable * translation, and ignore any translation fault. * (5) Remove a secondary CD. */ u64 val; - bool cd_live; struct arm_smmu_cd target; struct arm_smmu_cd *cdptr = ⌖ struct arm_smmu_cd *cd_table_entry; @@ -1315,7 +1338,6 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, target = *cd_table_entry; val = le64_to_cpu(cdptr->data[0]); - cd_live = !!(val & CTXDESC_CD_0_V); if (!cd) { /* (5) */ memset(cdptr, 0, sizeof(*cdptr)); @@ -1328,13 +1350,6 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, val &= ~(CTXDESC_CD_0_S | CTXDESC_CD_0_R); val |= CTXDESC_CD_0_TCR_EPD0; cdptr->data[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK); - } else if (cd_live) { /* (3) */ - val &= ~CTXDESC_CD_0_ASID; - val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid); - /* - * Until CD+TLB invalidation, both ASIDs may be used for tagging - * this substream's traffic - */ } else { /* (1) and (2) */ cdptr->data[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK); cdptr->data[2] = 0; @@ -2644,29 +2659,29 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); switch (smmu_domain->stage) { - case ARM_SMMU_DOMAIN_S1: + case ARM_SMMU_DOMAIN_S1: { + struct arm_smmu_cd target_cd; + struct arm_smmu_cd *cdptr; + if (!master->cd_table.cdtab) { ret = arm_smmu_alloc_cd_tables(master); if (ret) goto out_list_del; - } else { - /* - * arm_smmu_write_ctx_desc() relies on the entry being - * invalid to work, clear any existing entry. - */ - ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, - NULL); - if (ret) - goto out_list_del; } - ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd); - if (ret) + cdptr = arm_smmu_get_cd_ptr(master, IOMMU_NO_PASID); + if (!cdptr) { + ret = -ENOMEM; goto out_list_del; + } + arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); + arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr, + &target_cd); arm_smmu_make_cdtable_ste(&target, master); arm_smmu_install_ste_for_dev(master, &target); break; + } case ARM_SMMU_DOMAIN_S2: arm_smmu_make_s2_domain_ste(&target, master, smmu_domain); arm_smmu_install_ste_for_dev(master, &target); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 2da767a542ff..91a8365f30df 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -751,6 +751,15 @@ extern struct xarray arm_smmu_asid_xa; extern struct mutex arm_smmu_asid_lock; extern struct arm_smmu_ctx_desc quiet_cd; +struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, + u32 ssid); +void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, + struct arm_smmu_master *master, + struct arm_smmu_domain *smmu_domain); +void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, + struct arm_smmu_cd *cdptr, + const struct arm_smmu_cd *target); + int arm_smmu_write_ctx_desc(struct arm_smmu_master *smmu_master, int ssid, struct arm_smmu_ctx_desc *cd); void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid); -- Gitee From b0fd7e0e9dd1bc32c4b958c595fb05efb6943bc5 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Apr 2024 14:21:36 -0300 Subject: [PATCH 031/143] iommu/arm-smmu-v3: Consolidate clearing a CD table entry ANBZ: #13617 commit af8f0b83ea2bcc7cd365c32044f31bdadc07c351 upstream. A cleared entry is all 0's. Make arm_smmu_clear_cd() do this sequence. If we are clearing an entry and for some reason it is not already allocated in the CD table then something has gone wrong. Remove case (5) from arm_smmu_write_ctx_desc(). Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Moritz Fischer Reviewed-by: Mostafa Saleh Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/4-v9-5040dc602008+177d7-smmuv3_newapi_p2_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 2 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 26 ++++++++++++------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 + 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index d159f6048093..7cf286f7a009 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -569,7 +569,7 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, mutex_lock(&sva_lock); - arm_smmu_write_ctx_desc(master, id, NULL); + arm_smmu_clear_cd(master, id); list_for_each_entry(t, &master->bonds, list) { if (t->mm == mm) { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 4babf7cb0639..422da0ff948d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1310,6 +1310,19 @@ void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, target->data[3] = cpu_to_le64(cd->mair); } +void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid) +{ + struct arm_smmu_cd target = {}; + struct arm_smmu_cd *cdptr; + + if (!master->cd_table.cdtab) + return; + cdptr = arm_smmu_get_cd_ptr(master, ssid); + if (WARN_ON(!cdptr)) + return; + arm_smmu_write_cd_entry(master, ssid, cdptr, &target); +} + int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, struct arm_smmu_ctx_desc *cd) { @@ -1320,7 +1333,6 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, * (2) Install a secondary CD, for SID+SSID traffic. * (4) Quiesce the context without clearing the valid bit. Disable * translation, and ignore any translation fault. - * (5) Remove a secondary CD. */ u64 val; struct arm_smmu_cd target; @@ -1339,10 +1351,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, target = *cd_table_entry; val = le64_to_cpu(cdptr->data[0]); - if (!cd) { /* (5) */ - memset(cdptr, 0, sizeof(*cdptr)); - val = 0; - } else if (cd == &quiet_cd) { /* (4) */ + if (cd == &quiet_cd) { /* (4) */ val &= ~(CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 | CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 | CTXDESC_CD_0_TCR_SH0); @@ -2685,9 +2694,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) case ARM_SMMU_DOMAIN_S2: arm_smmu_make_s2_domain_ste(&target, master, smmu_domain); arm_smmu_install_ste_for_dev(master, &target); - if (master->cd_table.cdtab) - arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, - NULL); + arm_smmu_clear_cd(master, IOMMU_NO_PASID); break; } @@ -2735,8 +2742,7 @@ static int arm_smmu_attach_dev_ste(struct device *dev, * arm_smmu_domain->devices to avoid races updating the same context * descriptor from arm_smmu_share_asid(). */ - if (master->cd_table.cdtab) - arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL); + arm_smmu_clear_cd(master, IOMMU_NO_PASID); return 0; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 91a8365f30df..4a84867e46a1 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -751,6 +751,7 @@ extern struct xarray arm_smmu_asid_xa; extern struct mutex arm_smmu_asid_lock; extern struct arm_smmu_ctx_desc quiet_cd; +void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid); struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid); void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, -- Gitee From 76c32a2e9c77106d766c5c014fc5252fd1942eb2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Apr 2024 14:21:37 -0300 Subject: [PATCH 032/143] iommu/arm-smmu-v3: Make arm_smmu_alloc_cd_ptr() ANBZ: #13617 commit b2f4c0fcf094dacd2d1fb96a6fd6598919501589 upstream. Only the attach callers can perform an allocation for the CD table entry, the other callers must not do so, they do not have the correct locking and they cannot sleep. Split up the functions so this is clear. arm_smmu_get_cd_ptr() will return pointer to a CD table entry without doing any kind of allocation. arm_smmu_alloc_cd_ptr() will allocate the table and any required leaf. A following patch will add lockdep assertions to arm_smmu_alloc_cd_ptr() once the restructuring is completed and arm_smmu_alloc_cd_ptr() is never called in the wrong context. Tested-by: Nicolin Chen Reviewed-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/5-v9-5040dc602008+177d7-smmuv3_newapi_p2_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 59 +++++++++++++-------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +- 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 422da0ff948d..93d9c1b624ca 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -97,6 +97,7 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, struct arm_smmu_device *smmu); +static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master); static void parse_driver_options(struct arm_smmu_device *smmu) { @@ -1206,29 +1207,51 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst, struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid) { - __le64 *l1ptr; - unsigned int idx; struct arm_smmu_l1_ctx_desc *l1_desc; - struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; + if (!cd_table->cdtab) + return NULL; + if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) return (struct arm_smmu_cd *)(cd_table->cdtab + ssid * CTXDESC_CD_DWORDS); - idx = ssid >> CTXDESC_SPLIT; - l1_desc = &cd_table->l1_desc[idx]; - if (!l1_desc->l2ptr) { - if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc)) + l1_desc = &cd_table->l1_desc[ssid / CTXDESC_L2_ENTRIES]; + if (!l1_desc->l2ptr) + return NULL; + return &l1_desc->l2ptr[ssid % CTXDESC_L2_ENTRIES]; +} + +static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, + u32 ssid) +{ + struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; + struct arm_smmu_device *smmu = master->smmu; + + if (!cd_table->cdtab) { + if (arm_smmu_alloc_cd_tables(master)) return NULL; + } + + if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) { + unsigned int idx = ssid / CTXDESC_L2_ENTRIES; + struct arm_smmu_l1_ctx_desc *l1_desc; + + l1_desc = &cd_table->l1_desc[idx]; + if (!l1_desc->l2ptr) { + __le64 *l1ptr; - l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS; - arm_smmu_write_cd_l1_desc(l1ptr, l1_desc); - /* An invalid L1CD can be cached */ - arm_smmu_sync_cd(master, ssid, false); + if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc)) + return NULL; + + l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS; + arm_smmu_write_cd_l1_desc(l1ptr, l1_desc); + /* An invalid L1CD can be cached */ + arm_smmu_sync_cd(master, ssid, false); + } } - idx = ssid & (CTXDESC_L2_ENTRIES - 1); - return &l1_desc->l2ptr[idx]; + return arm_smmu_get_cd_ptr(master, ssid); } struct arm_smmu_cd_writer { @@ -1344,7 +1367,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, if (WARN_ON(ssid >= (1 << cd_table->s1cdmax))) return -E2BIG; - cd_table_entry = arm_smmu_get_cd_ptr(master, ssid); + cd_table_entry = arm_smmu_alloc_cd_ptr(master, ssid); if (!cd_table_entry) return -ENOMEM; @@ -2672,13 +2695,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) struct arm_smmu_cd target_cd; struct arm_smmu_cd *cdptr; - if (!master->cd_table.cdtab) { - ret = arm_smmu_alloc_cd_tables(master); - if (ret) - goto out_list_del; - } - - cdptr = arm_smmu_get_cd_ptr(master, IOMMU_NO_PASID); + cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID); if (!cdptr) { ret = -ENOMEM; goto out_list_del; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 4a84867e46a1..522ed397b053 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -275,8 +275,7 @@ struct arm_smmu_ste { * 2lvl: at most 1024 L1 entries, * 1024 lazy entries per table. */ -#define CTXDESC_SPLIT 10 -#define CTXDESC_L2_ENTRIES (1 << CTXDESC_SPLIT) +#define CTXDESC_L2_ENTRIES 1024 #define CTXDESC_L1_DESC_DWORDS 1 #define CTXDESC_L1_DESC_V (1UL << 0) -- Gitee From e406375e131d729cd43b82fe77ad0e198c40e548 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Apr 2024 14:21:38 -0300 Subject: [PATCH 033/143] iommu/arm-smmu-v3: Allocate the CD table entry in advance ANBZ: #13617 commit 13abe4faac4348da0cf1c4eeb2b1b39fcfdb4b8f upstream. Avoid arm_smmu_attach_dev() having to undo the changes to the smmu_domain->devices list, acquire the cdptr earlier so we don't need to handle that error. Now there is a clear break in arm_smmu_attach_dev() where all the prep-work has been done non-disruptively and we commit to making the HW change, which cannot fail. This completes transforming arm_smmu_attach_dev() so that it does not disturb the HW if it fails. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Mostafa Saleh Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/6-v9-5040dc602008+177d7-smmuv3_newapi_p2_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 24 +++++++-------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 93d9c1b624ca..3fca7268028d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2646,6 +2646,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) struct arm_smmu_device *smmu; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_master *master; + struct arm_smmu_cd *cdptr; if (!fwspec) return -ENOENT; @@ -2674,6 +2675,12 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) if (ret) return ret; + if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { + cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID); + if (!cdptr) + return -ENOMEM; + } + /* * Prevent arm_smmu_share_asid() from trying to change the ASID * of either the old or new domain while we are working on it. @@ -2693,13 +2700,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) switch (smmu_domain->stage) { case ARM_SMMU_DOMAIN_S1: { struct arm_smmu_cd target_cd; - struct arm_smmu_cd *cdptr; - - cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID); - if (!cdptr) { - ret = -ENOMEM; - goto out_list_del; - } arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr, @@ -2716,16 +2716,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) } arm_smmu_enable_ats(master, smmu_domain); - goto out_unlock; - -out_list_del: - spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_del_init(&master->domain_head); - spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); - -out_unlock: mutex_unlock(&arm_smmu_asid_lock); - return ret; + return 0; } static int arm_smmu_attach_dev_ste(struct device *dev, -- Gitee From 7f8941c1de0f00e001ce8520d1cdaf08d55aa03f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Apr 2024 14:21:39 -0300 Subject: [PATCH 034/143] iommu/arm-smmu-v3: Move the CD generation for SVA into a function ANBZ: #13617 commit 7b87c93c8b86d9d9b9567d83f0ca3d3046fdfc5a upstream. Pull all the calculations for building the CD table entry for a mmu_struct into arm_smmu_make_sva_cd(). Call it in the two places installing the SVA CD table entry. Open code the last caller of arm_smmu_update_ctx_desc_devices() and remove the function. Remove arm_smmu_write_ctx_desc() since all callers are gone. Add the locking assertions to arm_smmu_alloc_cd_ptr() since arm_smmu_update_ctx_desc_devices() was the last problematic caller. Remove quiet_cd since all users are gone, arm_smmu_make_sva_cd() creates the same value. The behavior of quiet_cd changes slightly, the old implementation edited the CD in place to set CTXDESC_CD_0_TCR_EPD0 assuming it was a SVA CD entry. This version generates a full CD entry with a 0 TTB0 and relies on arm_smmu_write_cd_entry() to install it hitlessly. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/7-v9-5040dc602008+177d7-smmuv3_newapi_p2_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 155 +++++++++++------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 77 +-------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 7 +- 3 files changed, 107 insertions(+), 132 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 7cf286f7a009..8730a7043909 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -34,25 +34,6 @@ struct arm_smmu_bond { static DEFINE_MUTEX(sva_lock); -/* - * Write the CD to the CD tables for all masters that this domain is attached - * to. Note that this is only used to update existing CD entries in the target - * CD table, for which it's assumed that arm_smmu_write_ctx_desc can't fail. - */ -static void arm_smmu_update_ctx_desc_devices(struct arm_smmu_domain *smmu_domain, - int ssid, - struct arm_smmu_ctx_desc *cd) -{ - struct arm_smmu_master *master; - unsigned long flags; - - spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) { - arm_smmu_write_ctx_desc(master, ssid, cd); - } - spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); -} - static void arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) { @@ -128,11 +109,85 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid) return NULL; } +static u64 page_size_to_cd(void) +{ + static_assert(PAGE_SIZE == SZ_4K || PAGE_SIZE == SZ_16K || + PAGE_SIZE == SZ_64K); + if (PAGE_SIZE == SZ_64K) + return ARM_LPAE_TCR_TG0_64K; + if (PAGE_SIZE == SZ_16K) + return ARM_LPAE_TCR_TG0_16K; + return ARM_LPAE_TCR_TG0_4K; +} + +static void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, + struct arm_smmu_master *master, + struct mm_struct *mm, u16 asid) +{ + u64 par; + + memset(target, 0, sizeof(*target)); + + par = cpuid_feature_extract_unsigned_field( + read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1), + ID_AA64MMFR0_EL1_PARANGE_SHIFT); + + target->data[0] = cpu_to_le64( + CTXDESC_CD_0_TCR_EPD1 | +#ifdef __BIG_ENDIAN + CTXDESC_CD_0_ENDI | +#endif + CTXDESC_CD_0_V | + FIELD_PREP(CTXDESC_CD_0_TCR_IPS, par) | + CTXDESC_CD_0_AA64 | + (master->stall_enabled ? CTXDESC_CD_0_S : 0) | + CTXDESC_CD_0_R | + CTXDESC_CD_0_A | + CTXDESC_CD_0_ASET | + FIELD_PREP(CTXDESC_CD_0_ASID, asid)); + + /* + * If no MM is passed then this creates a SVA entry that faults + * everything. arm_smmu_write_cd_entry() can hitlessly go between these + * two entries types since TTB0 is ignored by HW when EPD0 is set. + */ + if (mm) { + target->data[0] |= cpu_to_le64( + FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, + 64ULL - vabits_actual) | + FIELD_PREP(CTXDESC_CD_0_TCR_TG0, page_size_to_cd()) | + FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, + ARM_LPAE_TCR_RGN_WBWA) | + FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, + ARM_LPAE_TCR_RGN_WBWA) | + FIELD_PREP(CTXDESC_CD_0_TCR_SH0, ARM_LPAE_TCR_SH_IS)); + + target->data[1] = cpu_to_le64(virt_to_phys(mm->pgd) & + CTXDESC_CD_1_TTB0_MASK); + } else { + target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_EPD0); + + /* + * Disable stall and immediately generate an abort if stall + * disable is permitted. This speeds up cleanup for an unclean + * exit if the device is still doing a lot of DMA. + */ + if (!(master->smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) + target->data[0] &= + cpu_to_le64(~(CTXDESC_CD_0_S | CTXDESC_CD_0_R)); + } + + /* + * MAIR value is pretty much constant and global, so we can just get it + * from the current CPU register + */ + target->data[3] = cpu_to_le64(read_sysreg(mair_el1)); +} + static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm) { u16 asid; int err = 0; - u64 tcr, par, reg; struct arm_smmu_ctx_desc *cd; struct arm_smmu_ctx_desc *ret = NULL; @@ -166,39 +221,6 @@ static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm) if (err) goto out_free_asid; - tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, 64ULL - vabits_actual) | - FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, ARM_LPAE_TCR_RGN_WBWA) | - FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, ARM_LPAE_TCR_RGN_WBWA) | - FIELD_PREP(CTXDESC_CD_0_TCR_SH0, ARM_LPAE_TCR_SH_IS) | - CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; - - switch (PAGE_SIZE) { - case SZ_4K: - tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_TG0, ARM_LPAE_TCR_TG0_4K); - break; - case SZ_16K: - tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_TG0, ARM_LPAE_TCR_TG0_16K); - break; - case SZ_64K: - tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_TG0, ARM_LPAE_TCR_TG0_64K); - break; - default: - WARN_ON(1); - err = -EINVAL; - goto out_free_asid; - } - - reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); - par = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_EL1_PARANGE_SHIFT); - tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_IPS, par); - - cd->ttbr = virt_to_phys(mm->pgd); - cd->tcr = tcr; - /* - * MAIR value is pretty much constant and global, so we can just get it - * from the current CPU register - */ - cd->mair = read_sysreg(mair_el1); cd->asid = asid; cd->mm = mm; @@ -276,6 +298,8 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); struct arm_smmu_domain *smmu_domain = smmu_mn->domain; + struct arm_smmu_master *master; + unsigned long flags; mutex_lock(&sva_lock); if (smmu_mn->cleared) { @@ -287,8 +311,19 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) * DMA may still be running. Keep the cd valid to avoid C_BAD_CD events, * but disable translation. */ - arm_smmu_update_ctx_desc_devices(smmu_domain, mm_get_enqcmd_pasid(mm), - &quiet_cd); + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_for_each_entry(master, &smmu_domain->devices, domain_head) { + struct arm_smmu_cd target; + struct arm_smmu_cd *cdptr; + + cdptr = arm_smmu_get_cd_ptr(master, mm_get_enqcmd_pasid(mm)); + if (WARN_ON(!cdptr)) + continue; + arm_smmu_make_sva_cd(&target, master, NULL, smmu_mn->cd->asid); + arm_smmu_write_cd_entry(master, mm_get_enqcmd_pasid(mm), cdptr, + &target); + } + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid); arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), 0, 0); @@ -383,6 +418,8 @@ static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid, struct mm_struct *mm) { int ret; + struct arm_smmu_cd target; + struct arm_smmu_cd *cdptr; struct arm_smmu_bond *bond; struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct iommu_domain *domain = iommu_get_domain_for_dev(dev); @@ -409,9 +446,13 @@ static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid, goto err_free_bond; } - ret = arm_smmu_write_ctx_desc(master, pasid, bond->smmu_mn->cd); - if (ret) + cdptr = arm_smmu_alloc_cd_ptr(master, mm_get_enqcmd_pasid(mm)); + if (!cdptr) { + ret = -ENOMEM; goto err_put_notifier; + } + arm_smmu_make_sva_cd(&target, master, mm, bond->smmu_mn->cd->asid); + arm_smmu_write_cd_entry(master, pasid, cdptr, &target); list_add(&bond->list, &master->bonds); return 0; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 3fca7268028d..8c19f75571a5 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -83,12 +83,6 @@ struct arm_smmu_option_prop { DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa); DEFINE_MUTEX(arm_smmu_asid_lock); -/* - * Special value used by SVA when a process dies, to quiesce a CD without - * disabling it. - */ -struct arm_smmu_ctx_desc quiet_cd = { 0 }; - static struct arm_smmu_option_prop arm_smmu_options[] = { { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" }, { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"}, @@ -1200,7 +1194,7 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst, u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | CTXDESC_L1_DESC_V; - /* See comment in arm_smmu_write_ctx_desc() */ + /* The HW has 64 bit atomicity with stores to the L2 CD table */ WRITE_ONCE(*dst, cpu_to_le64(val)); } @@ -1223,12 +1217,15 @@ struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, return &l1_desc->l2ptr[ssid % CTXDESC_L2_ENTRIES]; } -static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, - u32 ssid) +struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, + u32 ssid) { struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; struct arm_smmu_device *smmu = master->smmu; + might_sleep(); + iommu_group_mutex_assert(master->dev); + if (!cd_table->cdtab) { if (arm_smmu_alloc_cd_tables(master)) return NULL; @@ -1346,65 +1343,6 @@ void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid) arm_smmu_write_cd_entry(master, ssid, cdptr, &target); } -int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, - struct arm_smmu_ctx_desc *cd) -{ - /* - * This function handles the following cases: - * - * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0). - * (2) Install a secondary CD, for SID+SSID traffic. - * (4) Quiesce the context without clearing the valid bit. Disable - * translation, and ignore any translation fault. - */ - u64 val; - struct arm_smmu_cd target; - struct arm_smmu_cd *cdptr = ⌖ - struct arm_smmu_cd *cd_table_entry; - struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; - struct arm_smmu_device *smmu = master->smmu; - - if (WARN_ON(ssid >= (1 << cd_table->s1cdmax))) - return -E2BIG; - - cd_table_entry = arm_smmu_alloc_cd_ptr(master, ssid); - if (!cd_table_entry) - return -ENOMEM; - - target = *cd_table_entry; - val = le64_to_cpu(cdptr->data[0]); - - if (cd == &quiet_cd) { /* (4) */ - val &= ~(CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 | - CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 | - CTXDESC_CD_0_TCR_SH0); - if (!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) - val &= ~(CTXDESC_CD_0_S | CTXDESC_CD_0_R); - val |= CTXDESC_CD_0_TCR_EPD0; - cdptr->data[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK); - } else { /* (1) and (2) */ - cdptr->data[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK); - cdptr->data[2] = 0; - cdptr->data[3] = cpu_to_le64(cd->mair); - - val = cd->tcr | -#ifdef __BIG_ENDIAN - CTXDESC_CD_0_ENDI | -#endif - CTXDESC_CD_0_R | CTXDESC_CD_0_A | - (cd->mm ? 0 : CTXDESC_CD_0_ASET) | - CTXDESC_CD_0_AA64 | - FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | - CTXDESC_CD_0_V; - - if (cd_table->stall_enabled) - val |= CTXDESC_CD_0_S; - } - cdptr->data[0] = cpu_to_le64(val); - arm_smmu_write_cd_entry(master, ssid, cd_table_entry, &target); - return 0; -} - static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master) { int ret; @@ -1413,7 +1351,6 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master) struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; - cd_table->stall_enabled = master->stall_enabled; cd_table->s1cdmax = master->ssid_bits; max_contexts = 1 << cd_table->s1cdmax; @@ -1511,7 +1448,7 @@ arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc) val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span); val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK; - /* See comment in arm_smmu_write_ctx_desc() */ + /* The HW has 64 bit atomicity with stores to the L2 STE table */ WRITE_ONCE(*dst, cpu_to_le64(val)); } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 522ed397b053..144c5c9085cb 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -608,8 +608,6 @@ struct arm_smmu_ctx_desc_cfg { u8 s1fmt; /* log2 of the maximum number of CDs supported by this table */ u8 s1cdmax; - /* Whether CD entries in this table have the stall bit set. */ - u8 stall_enabled:1; }; struct arm_smmu_s2_cfg { @@ -748,11 +746,12 @@ static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) extern struct xarray arm_smmu_asid_xa; extern struct mutex arm_smmu_asid_lock; -extern struct arm_smmu_ctx_desc quiet_cd; void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid); struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid); +struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, + u32 ssid); void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain); @@ -760,8 +759,6 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, struct arm_smmu_cd *cdptr, const struct arm_smmu_cd *target); -int arm_smmu_write_ctx_desc(struct arm_smmu_master *smmu_master, int ssid, - struct arm_smmu_ctx_desc *cd); void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid); void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, size_t granule, bool leaf, -- Gitee From 3ee06d7c42f9a12915b85970bf012556ffa24d61 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Apr 2024 14:21:40 -0300 Subject: [PATCH 035/143] iommu/arm-smmu-v3: Build the whole CD in arm_smmu_make_s1_cd() ANBZ: #13617 commit 04905c17f64890311e6b5a5065d8c220602712e5 upstream. Half the code was living in arm_smmu_domain_finalise_s1(), just move it here and take the values directly from the pgtbl_ops instead of storing copies. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Michael Shavit Reviewed-by: Mostafa Saleh Reviewed-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/8-v9-5040dc602008+177d7-smmuv3_newapi_p2_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 47 ++++++++------------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 -- 2 files changed, 18 insertions(+), 32 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 8c19f75571a5..2643c9f2c2f8 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1309,15 +1309,25 @@ void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, struct arm_smmu_domain *smmu_domain) { struct arm_smmu_ctx_desc *cd = &smmu_domain->cd; + const struct io_pgtable_cfg *pgtbl_cfg = + &io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg; + typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = + &pgtbl_cfg->arm_lpae_s1_cfg.tcr; memset(target, 0, sizeof(*target)); target->data[0] = cpu_to_le64( - cd->tcr | + FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) | + FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) | + FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) | + FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) | + FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) | #ifdef __BIG_ENDIAN CTXDESC_CD_0_ENDI | #endif + CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_V | + FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) | CTXDESC_CD_0_AA64 | (master->stall_enabled ? CTXDESC_CD_0_S : 0) | CTXDESC_CD_0_R | @@ -1325,9 +1335,9 @@ void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, CTXDESC_CD_0_ASET | FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) ); - - target->data[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK); - target->data[3] = cpu_to_le64(cd->mair); + target->data[1] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.ttbr & + CTXDESC_CD_1_TTB0_MASK); + target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.mair); } void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid) @@ -2295,13 +2305,11 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) } static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu, - struct arm_smmu_domain *smmu_domain, - struct io_pgtable_cfg *pgtbl_cfg) + struct arm_smmu_domain *smmu_domain) { int ret; u32 asid; struct arm_smmu_ctx_desc *cd = &smmu_domain->cd; - typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr; refcount_set(&cd->refs, 1); @@ -2309,31 +2317,13 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu, mutex_lock(&arm_smmu_asid_lock); ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd, XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); - if (ret) - goto out_unlock; - cd->asid = (u16)asid; - cd->ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; - cd->tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) | - FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) | - FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) | - FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) | - FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) | - FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) | - CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; - cd->mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; - - mutex_unlock(&arm_smmu_asid_lock); - return 0; - -out_unlock: mutex_unlock(&arm_smmu_asid_lock); return ret; } static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu, - struct arm_smmu_domain *smmu_domain, - struct io_pgtable_cfg *pgtbl_cfg) + struct arm_smmu_domain *smmu_domain) { int vmid; struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; @@ -2357,8 +2347,7 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable_ops *pgtbl_ops; int (*finalise_stage_fn)(struct arm_smmu_device *smmu, - struct arm_smmu_domain *smmu_domain, - struct io_pgtable_cfg *pgtbl_cfg); + struct arm_smmu_domain *smmu_domain); /* Restrict the stage to what we can actually support */ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) @@ -2401,7 +2390,7 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1; smmu_domain->domain.geometry.force_aperture = true; - ret = finalise_stage_fn(smmu, smmu_domain, &pgtbl_cfg); + ret = finalise_stage_fn(smmu, smmu_domain); if (ret < 0) { free_io_pgtable_ops(pgtbl_ops); return ret; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 144c5c9085cb..0dfbff9e9053 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -587,9 +587,6 @@ struct arm_smmu_strtab_l1_desc { struct arm_smmu_ctx_desc { u16 asid; - u64 ttbr; - u64 tcr; - u64 mair; refcount_t refs; struct mm_struct *mm; -- Gitee From 876d9f5077ddcf829ed0cfc0ae4e8bf7c8403bb9 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Apr 2024 14:21:41 -0300 Subject: [PATCH 036/143] iommu/arm-smmu-v3: Add unit tests for arm_smmu_write_entry ANBZ: #13617 commit 56e1a4cc2588a7cb9664457a62fd7a77e005aa01 upstream. Add tests for some of the more common STE update operations that we expect to see, as well as some artificial STE updates to test the edges of arm_smmu_write_entry. These also serve as a record of which common operation is expected to be hitless, and how many syncs they require. arm_smmu_write_entry implements a generic algorithm that updates an STE/CD to any other abritrary STE/CD configuration. The update requires a sequence of write+sync operations with some invariants that must be held true after each sync. arm_smmu_write_entry lends itself well to unit-testing since the function's interaction with the STE/CD is already abstracted by input callbacks that we can hook to introspect into the sequence of operations. We can use these hooks to guarantee that invariants are held throughout the entire update operation. Link: https://lore.kernel.org/r/20240106083617.1173871-3-mshavit@google.com Tested-by: Nicolin Chen Signed-off-by: Michael Shavit Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/9-v9-5040dc602008+177d7-smmuv3_newapi_p2_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/Kconfig | 13 +- drivers/iommu/arm/arm-smmu-v3/Makefile | 1 + .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 8 +- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c | 465 ++++++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 43 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 30 ++ 6 files changed, 533 insertions(+), 27 deletions(-) create mode 100644 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index b76437d09ac7..20d9bd4bc074 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -402,9 +402,9 @@ config ARM_SMMU_V3 Say Y here if your system includes an IOMMU device implementing the ARM SMMUv3 architecture. +if ARM_SMMU_V3 config ARM_SMMU_V3_SVA bool "Shared Virtual Addressing support for the ARM SMMUv3" - depends on ARM_SMMU_V3 select IOMMU_SVA select IOMMU_IOPF select MMU_NOTIFIER @@ -415,6 +415,17 @@ config ARM_SMMU_V3_SVA Say Y here if your system supports SVA extensions such as PCIe PASID and PRI. +config ARM_SMMU_V3_KUNIT_TEST + bool "KUnit tests for arm-smmu-v3 driver" if !KUNIT_ALL_TESTS + depends on KUNIT + depends on ARM_SMMU_V3_SVA + default KUNIT_ALL_TESTS + help + Enable this option to unit-test arm-smmu-v3 driver functions. + + If unsure, say N. +endif + config S390_IOMMU def_bool y if S390 && PCI depends on S390 && PCI diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile index 54feb1ecccad..0b97054b3929 100644 --- a/drivers/iommu/arm/arm-smmu-v3/Makefile +++ b/drivers/iommu/arm/arm-smmu-v3/Makefile @@ -2,4 +2,5 @@ obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o arm_smmu_v3-objs-y += arm-smmu-v3.o arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o +arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_KUNIT_TEST) += arm-smmu-v3-test.o arm_smmu_v3-objs := $(arm_smmu_v3-objs-y) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 8730a7043909..34a977a0767d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "arm-smmu-v3.h" #include "../../io-pgtable-arm.h" @@ -120,9 +121,10 @@ static u64 page_size_to_cd(void) return ARM_LPAE_TCR_TG0_4K; } -static void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, - struct arm_smmu_master *master, - struct mm_struct *mm, u16 asid) +VISIBLE_IF_KUNIT +void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, + struct arm_smmu_master *master, struct mm_struct *mm, + u16 asid) { u64 par; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c new file mode 100644 index 000000000000..417804392ff0 --- /dev/null +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c @@ -0,0 +1,465 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2024 Google LLC. + */ +#include +#include + +#include "arm-smmu-v3.h" + +struct arm_smmu_test_writer { + struct arm_smmu_entry_writer writer; + struct kunit *test; + const __le64 *init_entry; + const __le64 *target_entry; + __le64 *entry; + + bool invalid_entry_written; + unsigned int num_syncs; +}; + +#define NUM_ENTRY_QWORDS 8 +#define NUM_EXPECTED_SYNCS(x) x + +static struct arm_smmu_ste bypass_ste; +static struct arm_smmu_ste abort_ste; +static struct arm_smmu_device smmu = { + .features = ARM_SMMU_FEAT_STALLS | ARM_SMMU_FEAT_ATTR_TYPES_OVR +}; +static struct mm_struct sva_mm = { + .pgd = (void *)0xdaedbeefdeadbeefULL, +}; + +static bool arm_smmu_entry_differs_in_used_bits(const __le64 *entry, + const __le64 *used_bits, + const __le64 *target, + unsigned int length) +{ + bool differs = false; + unsigned int i; + + for (i = 0; i < length; i++) { + if ((entry[i] & used_bits[i]) != target[i]) + differs = true; + } + return differs; +} + +static void +arm_smmu_test_writer_record_syncs(struct arm_smmu_entry_writer *writer) +{ + struct arm_smmu_test_writer *test_writer = + container_of(writer, struct arm_smmu_test_writer, writer); + __le64 *entry_used_bits; + + entry_used_bits = kunit_kzalloc( + test_writer->test, sizeof(*entry_used_bits) * NUM_ENTRY_QWORDS, + GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test_writer->test, entry_used_bits); + + pr_debug("STE value is now set to: "); + print_hex_dump_debug(" ", DUMP_PREFIX_NONE, 16, 8, + test_writer->entry, + NUM_ENTRY_QWORDS * sizeof(*test_writer->entry), + false); + + test_writer->num_syncs += 1; + if (!test_writer->entry[0]) { + test_writer->invalid_entry_written = true; + } else { + /* + * At any stage in a hitless transition, the entry must be + * equivalent to either the initial entry or the target entry + * when only considering the bits used by the current + * configuration. + */ + writer->ops->get_used(test_writer->entry, entry_used_bits); + KUNIT_EXPECT_FALSE( + test_writer->test, + arm_smmu_entry_differs_in_used_bits( + test_writer->entry, entry_used_bits, + test_writer->init_entry, NUM_ENTRY_QWORDS) && + arm_smmu_entry_differs_in_used_bits( + test_writer->entry, entry_used_bits, + test_writer->target_entry, + NUM_ENTRY_QWORDS)); + } +} + +static void +arm_smmu_v3_test_debug_print_used_bits(struct arm_smmu_entry_writer *writer, + const __le64 *ste) +{ + __le64 used_bits[NUM_ENTRY_QWORDS] = {}; + + arm_smmu_get_ste_used(ste, used_bits); + pr_debug("STE used bits: "); + print_hex_dump_debug(" ", DUMP_PREFIX_NONE, 16, 8, used_bits, + sizeof(used_bits), false); +} + +static const struct arm_smmu_entry_writer_ops test_ste_ops = { + .sync = arm_smmu_test_writer_record_syncs, + .get_used = arm_smmu_get_ste_used, +}; + +static const struct arm_smmu_entry_writer_ops test_cd_ops = { + .sync = arm_smmu_test_writer_record_syncs, + .get_used = arm_smmu_get_cd_used, +}; + +static void arm_smmu_v3_test_ste_expect_transition( + struct kunit *test, const struct arm_smmu_ste *cur, + const struct arm_smmu_ste *target, unsigned int num_syncs_expected, + bool hitless) +{ + struct arm_smmu_ste cur_copy = *cur; + struct arm_smmu_test_writer test_writer = { + .writer = { + .ops = &test_ste_ops, + }, + .test = test, + .init_entry = cur->data, + .target_entry = target->data, + .entry = cur_copy.data, + .num_syncs = 0, + .invalid_entry_written = false, + + }; + + pr_debug("STE initial value: "); + print_hex_dump_debug(" ", DUMP_PREFIX_NONE, 16, 8, cur_copy.data, + sizeof(cur_copy), false); + arm_smmu_v3_test_debug_print_used_bits(&test_writer.writer, cur->data); + pr_debug("STE target value: "); + print_hex_dump_debug(" ", DUMP_PREFIX_NONE, 16, 8, target->data, + sizeof(cur_copy), false); + arm_smmu_v3_test_debug_print_used_bits(&test_writer.writer, + target->data); + + arm_smmu_write_entry(&test_writer.writer, cur_copy.data, target->data); + + KUNIT_EXPECT_EQ(test, test_writer.invalid_entry_written, !hitless); + KUNIT_EXPECT_EQ(test, test_writer.num_syncs, num_syncs_expected); + KUNIT_EXPECT_MEMEQ(test, target->data, cur_copy.data, sizeof(cur_copy)); +} + +static void arm_smmu_v3_test_ste_expect_hitless_transition( + struct kunit *test, const struct arm_smmu_ste *cur, + const struct arm_smmu_ste *target, unsigned int num_syncs_expected) +{ + arm_smmu_v3_test_ste_expect_transition(test, cur, target, + num_syncs_expected, true); +} + +static const dma_addr_t fake_cdtab_dma_addr = 0xF0F0F0F0F0F0; + +static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste, + const dma_addr_t dma_addr) +{ + struct arm_smmu_master master = { + .cd_table.cdtab_dma = dma_addr, + .cd_table.s1cdmax = 0xFF, + .cd_table.s1fmt = STRTAB_STE_0_S1FMT_64K_L2, + .smmu = &smmu, + }; + + arm_smmu_make_cdtable_ste(ste, &master); +} + +static void arm_smmu_v3_write_ste_test_bypass_to_abort(struct kunit *test) +{ + /* + * Bypass STEs has used bits in the first two Qwords, while abort STEs + * only have used bits in the first QWord. Transitioning from bypass to + * abort requires two syncs: the first to set the first qword and make + * the STE into an abort, the second to clean up the second qword. + */ + arm_smmu_v3_test_ste_expect_hitless_transition( + test, &bypass_ste, &abort_ste, NUM_EXPECTED_SYNCS(2)); +} + +static void arm_smmu_v3_write_ste_test_abort_to_bypass(struct kunit *test) +{ + /* + * Transitioning from abort to bypass also requires two syncs: the first + * to set the second qword data required by the bypass STE, and the + * second to set the first qword and switch to bypass. + */ + arm_smmu_v3_test_ste_expect_hitless_transition( + test, &abort_ste, &bypass_ste, NUM_EXPECTED_SYNCS(2)); +} + +static void arm_smmu_v3_write_ste_test_cdtable_to_abort(struct kunit *test) +{ + struct arm_smmu_ste ste; + + arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &abort_ste, + NUM_EXPECTED_SYNCS(2)); +} + +static void arm_smmu_v3_write_ste_test_abort_to_cdtable(struct kunit *test) +{ + struct arm_smmu_ste ste; + + arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &abort_ste, &ste, + NUM_EXPECTED_SYNCS(2)); +} + +static void arm_smmu_v3_write_ste_test_cdtable_to_bypass(struct kunit *test) +{ + struct arm_smmu_ste ste; + + arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &bypass_ste, + NUM_EXPECTED_SYNCS(3)); +} + +static void arm_smmu_v3_write_ste_test_bypass_to_cdtable(struct kunit *test) +{ + struct arm_smmu_ste ste; + + arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &bypass_ste, &ste, + NUM_EXPECTED_SYNCS(3)); +} + +static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste, + bool ats_enabled) +{ + struct arm_smmu_master master = { + .smmu = &smmu, + .ats_enabled = ats_enabled, + }; + struct io_pgtable io_pgtable = {}; + struct arm_smmu_domain smmu_domain = { + .pgtbl_ops = &io_pgtable.ops, + }; + + io_pgtable.cfg.arm_lpae_s2_cfg.vttbr = 0xdaedbeefdeadbeefULL; + io_pgtable.cfg.arm_lpae_s2_cfg.vtcr.ps = 1; + io_pgtable.cfg.arm_lpae_s2_cfg.vtcr.tg = 2; + io_pgtable.cfg.arm_lpae_s2_cfg.vtcr.sh = 3; + io_pgtable.cfg.arm_lpae_s2_cfg.vtcr.orgn = 1; + io_pgtable.cfg.arm_lpae_s2_cfg.vtcr.irgn = 2; + io_pgtable.cfg.arm_lpae_s2_cfg.vtcr.sl = 3; + io_pgtable.cfg.arm_lpae_s2_cfg.vtcr.tsz = 4; + + arm_smmu_make_s2_domain_ste(ste, &master, &smmu_domain); +} + +static void arm_smmu_v3_write_ste_test_s2_to_abort(struct kunit *test) +{ + struct arm_smmu_ste ste; + + arm_smmu_test_make_s2_ste(&ste, true); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &abort_ste, + NUM_EXPECTED_SYNCS(2)); +} + +static void arm_smmu_v3_write_ste_test_abort_to_s2(struct kunit *test) +{ + struct arm_smmu_ste ste; + + arm_smmu_test_make_s2_ste(&ste, true); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &abort_ste, &ste, + NUM_EXPECTED_SYNCS(2)); +} + +static void arm_smmu_v3_write_ste_test_s2_to_bypass(struct kunit *test) +{ + struct arm_smmu_ste ste; + + arm_smmu_test_make_s2_ste(&ste, true); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &bypass_ste, + NUM_EXPECTED_SYNCS(2)); +} + +static void arm_smmu_v3_write_ste_test_bypass_to_s2(struct kunit *test) +{ + struct arm_smmu_ste ste; + + arm_smmu_test_make_s2_ste(&ste, true); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &bypass_ste, &ste, + NUM_EXPECTED_SYNCS(2)); +} + +static void arm_smmu_v3_test_cd_expect_transition( + struct kunit *test, const struct arm_smmu_cd *cur, + const struct arm_smmu_cd *target, unsigned int num_syncs_expected, + bool hitless) +{ + struct arm_smmu_cd cur_copy = *cur; + struct arm_smmu_test_writer test_writer = { + .writer = { + .ops = &test_cd_ops, + }, + .test = test, + .init_entry = cur->data, + .target_entry = target->data, + .entry = cur_copy.data, + .num_syncs = 0, + .invalid_entry_written = false, + + }; + + pr_debug("CD initial value: "); + print_hex_dump_debug(" ", DUMP_PREFIX_NONE, 16, 8, cur_copy.data, + sizeof(cur_copy), false); + arm_smmu_v3_test_debug_print_used_bits(&test_writer.writer, cur->data); + pr_debug("CD target value: "); + print_hex_dump_debug(" ", DUMP_PREFIX_NONE, 16, 8, target->data, + sizeof(cur_copy), false); + arm_smmu_v3_test_debug_print_used_bits(&test_writer.writer, + target->data); + + arm_smmu_write_entry(&test_writer.writer, cur_copy.data, target->data); + + KUNIT_EXPECT_EQ(test, test_writer.invalid_entry_written, !hitless); + KUNIT_EXPECT_EQ(test, test_writer.num_syncs, num_syncs_expected); + KUNIT_EXPECT_MEMEQ(test, target->data, cur_copy.data, sizeof(cur_copy)); +} + +static void arm_smmu_v3_test_cd_expect_non_hitless_transition( + struct kunit *test, const struct arm_smmu_cd *cur, + const struct arm_smmu_cd *target, unsigned int num_syncs_expected) +{ + arm_smmu_v3_test_cd_expect_transition(test, cur, target, + num_syncs_expected, false); +} + +static void arm_smmu_v3_test_cd_expect_hitless_transition( + struct kunit *test, const struct arm_smmu_cd *cur, + const struct arm_smmu_cd *target, unsigned int num_syncs_expected) +{ + arm_smmu_v3_test_cd_expect_transition(test, cur, target, + num_syncs_expected, true); +} + +static void arm_smmu_test_make_s1_cd(struct arm_smmu_cd *cd, unsigned int asid) +{ + struct arm_smmu_master master = { + .smmu = &smmu, + }; + struct io_pgtable io_pgtable = {}; + struct arm_smmu_domain smmu_domain = { + .pgtbl_ops = &io_pgtable.ops, + .cd = { + .asid = asid, + }, + }; + + io_pgtable.cfg.arm_lpae_s1_cfg.ttbr = 0xdaedbeefdeadbeefULL; + io_pgtable.cfg.arm_lpae_s1_cfg.tcr.ips = 1; + io_pgtable.cfg.arm_lpae_s1_cfg.tcr.tg = 2; + io_pgtable.cfg.arm_lpae_s1_cfg.tcr.sh = 3; + io_pgtable.cfg.arm_lpae_s1_cfg.tcr.orgn = 1; + io_pgtable.cfg.arm_lpae_s1_cfg.tcr.irgn = 2; + io_pgtable.cfg.arm_lpae_s1_cfg.tcr.tsz = 4; + io_pgtable.cfg.arm_lpae_s1_cfg.mair = 0xabcdef012345678ULL; + + arm_smmu_make_s1_cd(cd, &master, &smmu_domain); +} + +static void arm_smmu_v3_write_cd_test_s1_clear(struct kunit *test) +{ + struct arm_smmu_cd cd = {}; + struct arm_smmu_cd cd_2; + + arm_smmu_test_make_s1_cd(&cd_2, 1997); + arm_smmu_v3_test_cd_expect_non_hitless_transition( + test, &cd, &cd_2, NUM_EXPECTED_SYNCS(2)); + arm_smmu_v3_test_cd_expect_non_hitless_transition( + test, &cd_2, &cd, NUM_EXPECTED_SYNCS(2)); +} + +static void arm_smmu_v3_write_cd_test_s1_change_asid(struct kunit *test) +{ + struct arm_smmu_cd cd = {}; + struct arm_smmu_cd cd_2; + + arm_smmu_test_make_s1_cd(&cd, 778); + arm_smmu_test_make_s1_cd(&cd_2, 1997); + arm_smmu_v3_test_cd_expect_hitless_transition(test, &cd, &cd_2, + NUM_EXPECTED_SYNCS(1)); + arm_smmu_v3_test_cd_expect_hitless_transition(test, &cd_2, &cd, + NUM_EXPECTED_SYNCS(1)); +} + +static void arm_smmu_test_make_sva_cd(struct arm_smmu_cd *cd, unsigned int asid) +{ + struct arm_smmu_master master = { + .smmu = &smmu, + }; + + arm_smmu_make_sva_cd(cd, &master, &sva_mm, asid); +} + +static void arm_smmu_test_make_sva_release_cd(struct arm_smmu_cd *cd, + unsigned int asid) +{ + struct arm_smmu_master master = { + .smmu = &smmu, + }; + + arm_smmu_make_sva_cd(cd, &master, NULL, asid); +} + +static void arm_smmu_v3_write_cd_test_sva_clear(struct kunit *test) +{ + struct arm_smmu_cd cd = {}; + struct arm_smmu_cd cd_2; + + arm_smmu_test_make_sva_cd(&cd_2, 1997); + arm_smmu_v3_test_cd_expect_non_hitless_transition( + test, &cd, &cd_2, NUM_EXPECTED_SYNCS(2)); + arm_smmu_v3_test_cd_expect_non_hitless_transition( + test, &cd_2, &cd, NUM_EXPECTED_SYNCS(2)); +} + +static void arm_smmu_v3_write_cd_test_sva_release(struct kunit *test) +{ + struct arm_smmu_cd cd; + struct arm_smmu_cd cd_2; + + arm_smmu_test_make_sva_cd(&cd, 1997); + arm_smmu_test_make_sva_release_cd(&cd_2, 1997); + arm_smmu_v3_test_cd_expect_hitless_transition(test, &cd, &cd_2, + NUM_EXPECTED_SYNCS(2)); + arm_smmu_v3_test_cd_expect_hitless_transition(test, &cd_2, &cd, + NUM_EXPECTED_SYNCS(2)); +} + +static struct kunit_case arm_smmu_v3_test_cases[] = { + KUNIT_CASE(arm_smmu_v3_write_ste_test_bypass_to_abort), + KUNIT_CASE(arm_smmu_v3_write_ste_test_abort_to_bypass), + KUNIT_CASE(arm_smmu_v3_write_ste_test_cdtable_to_abort), + KUNIT_CASE(arm_smmu_v3_write_ste_test_abort_to_cdtable), + KUNIT_CASE(arm_smmu_v3_write_ste_test_cdtable_to_bypass), + KUNIT_CASE(arm_smmu_v3_write_ste_test_bypass_to_cdtable), + KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_abort), + KUNIT_CASE(arm_smmu_v3_write_ste_test_abort_to_s2), + KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_bypass), + KUNIT_CASE(arm_smmu_v3_write_ste_test_bypass_to_s2), + KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_clear), + KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_change_asid), + KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_clear), + KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_release), + {}, +}; + +static int arm_smmu_v3_test_suite_init(struct kunit_suite *test) +{ + arm_smmu_make_bypass_ste(&smmu, &bypass_ste); + arm_smmu_make_abort_ste(&abort_ste); + return 0; +} + +static struct kunit_suite arm_smmu_v3_test_module = { + .name = "arm-smmu-v3-kunit-test", + .suite_init = arm_smmu_v3_test_suite_init, + .test_cases = arm_smmu_v3_test_cases, +}; +kunit_test_suites(&arm_smmu_v3_test_module); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 2643c9f2c2f8..3a8ef3e205b1 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "arm-smmu-v3.h" #include "../../dma-iommu.h" @@ -42,17 +43,6 @@ enum arm_smmu_msi_index { ARM_SMMU_MAX_MSIS, }; -struct arm_smmu_entry_writer_ops; -struct arm_smmu_entry_writer { - const struct arm_smmu_entry_writer_ops *ops; - struct arm_smmu_master *master; -}; - -struct arm_smmu_entry_writer_ops { - void (*get_used)(const __le64 *entry, __le64 *used); - void (*sync)(struct arm_smmu_entry_writer *writer); -}; - #define NUM_ENTRY_QWORDS 8 static_assert(sizeof(struct arm_smmu_ste) == NUM_ENTRY_QWORDS * sizeof(u64)); static_assert(sizeof(struct arm_smmu_cd) == NUM_ENTRY_QWORDS * sizeof(u64)); @@ -979,7 +969,8 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) * would be nice if this was complete according to the spec, but minimally it * has to capture the bits this driver uses. */ -static void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) +VISIBLE_IF_KUNIT +void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) { unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0])); @@ -1101,8 +1092,9 @@ static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry, * V=0 process. This relies on the IGNORED behavior described in the * specification. */ -static void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, - __le64 *entry, const __le64 *target) +VISIBLE_IF_KUNIT +void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry, + const __le64 *target) { __le64 unused_update[NUM_ENTRY_QWORDS]; u8 used_qword_diff; @@ -1256,7 +1248,8 @@ struct arm_smmu_cd_writer { unsigned int ssid; }; -static void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits) +VISIBLE_IF_KUNIT +void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits) { used_bits[0] = cpu_to_le64(CTXDESC_CD_0_V); if (!(ent[0] & cpu_to_le64(CTXDESC_CD_0_V))) @@ -1514,7 +1507,8 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, } } -static void arm_smmu_make_abort_ste(struct arm_smmu_ste *target) +VISIBLE_IF_KUNIT +void arm_smmu_make_abort_ste(struct arm_smmu_ste *target) { memset(target, 0, sizeof(*target)); target->data[0] = cpu_to_le64( @@ -1522,8 +1516,9 @@ static void arm_smmu_make_abort_ste(struct arm_smmu_ste *target) FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT)); } -static void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu, - struct arm_smmu_ste *target) +VISIBLE_IF_KUNIT +void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu, + struct arm_smmu_ste *target) { memset(target, 0, sizeof(*target)); target->data[0] = cpu_to_le64( @@ -1535,8 +1530,9 @@ static void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu, STRTAB_STE_1_SHCFG_INCOMING)); } -static void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, - struct arm_smmu_master *master) +VISIBLE_IF_KUNIT +void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, + struct arm_smmu_master *master) { struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; struct arm_smmu_device *smmu = master->smmu; @@ -1585,9 +1581,10 @@ static void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, } } -static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, - struct arm_smmu_master *master, - struct arm_smmu_domain *smmu_domain) +VISIBLE_IF_KUNIT +void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, + struct arm_smmu_master *master, + struct arm_smmu_domain *smmu_domain) { struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg; const struct io_pgtable_cfg *pgtbl_cfg = diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 0dfbff9e9053..3dc2bbaf141a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -736,6 +736,36 @@ struct arm_smmu_domain { struct list_head mmu_notifiers; }; +/* The following are exposed for testing purposes. */ +struct arm_smmu_entry_writer_ops; +struct arm_smmu_entry_writer { + const struct arm_smmu_entry_writer_ops *ops; + struct arm_smmu_master *master; +}; + +struct arm_smmu_entry_writer_ops { + void (*get_used)(const __le64 *entry, __le64 *used); + void (*sync)(struct arm_smmu_entry_writer *writer); +}; + +#if IS_ENABLED(CONFIG_KUNIT) +void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits); +void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *cur, + const __le64 *target); +void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits); +void arm_smmu_make_abort_ste(struct arm_smmu_ste *target); +void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu, + struct arm_smmu_ste *target); +void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, + struct arm_smmu_master *master); +void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, + struct arm_smmu_master *master, + struct arm_smmu_domain *smmu_domain); +void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, + struct arm_smmu_master *master, struct mm_struct *mm, + u16 asid); +#endif + static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); -- Gitee From c55c2c45b9d93ce3686e5d329e5fba6cb28b64a9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 29 Apr 2024 14:17:07 +0300 Subject: [PATCH 037/143] iommu/amd: Fix compilation error ANBZ: #13617 commit bbe1e78ae23e7e474401880d0d496acc8cec6863 upstream. With WERROR=y, which is default, clang is not happy: .../amd/pasid.c:168:3: error: call to undeclared function 'mmu_notifier_unregister'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] .../amd/pasid.c:191:8: error: call to undeclared function 'mmu_notifier_register'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] 2 errors generated. Select missed dependency. Fixes: a5a91e54846d ("iommu/amd: Add SVA domain support") Signed-off-by: Andy Shevchenko Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240429111707.2795194-1-andriy.shevchenko@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index 68d8fc107cb9..994063e5586f 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -7,6 +7,7 @@ config AMD_IOMMU select PCI_ATS select PCI_PRI select PCI_PASID + select MMU_NOTIFIER select IOMMU_API select IOMMU_IOVA select IOMMU_IO_PGTABLE -- Gitee From 945dfdc1482c7e9f39bba4fd5a7950335dec0ff8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 3 May 2024 21:36:02 +0800 Subject: [PATCH 038/143] iommu/vt-d: Decouple igfx_off from graphic identity mapping ANBZ: #13617 commit ba00196ca41c4f6d0b0d3c4a6748a133577abe05 upstream. A kernel command called igfx_off was introduced in commit ("Intel IOMMU: Intel IOMMU driver"). This command allows the user to disable the IOMMU dedicated to SOC-integrated graphic devices. Commit <9452618e7462> ("iommu/intel: disable DMAR for g4x integrated gfx") used this mechanism to disable the graphic-dedicated IOMMU for some problematic devices. Later, more problematic graphic devices were added to the list by commit <1f76249cc3beb> ("iommu/vt-d: Declare Broadwell igfx dmar support snafu"). On the other hand, commit <19943b0e30b05> ("intel-iommu: Unify hardware and software passthrough support") uses the identity domain for graphic devices if CONFIG_DMAR_BROKEN_GFX_WA is selected. + if (iommu_pass_through) + iommu_identity_mapping = 1; +#ifdef CONFIG_DMAR_BROKEN_GFX_WA + else + iommu_identity_mapping = 2; +#endif ... static int iommu_should_identity_map(struct pci_dev *pdev, int startup) { + if (iommu_identity_mapping == 2) + return IS_GFX_DEVICE(pdev); ... In the following driver evolution, CONFIG_DMAR_BROKEN_GFX_WA and quirk_iommu_igfx() are mixed together, causing confusion in the driver's device_def_domain_type callback. On one hand, dmar_map_gfx is used to turn off the graphic-dedicated IOMMU as a workaround for some buggy hardware; on the other hand, for those graphic devices, IDENTITY mapping is required for the IOMMU core. Commit <4b8d18c0c986> "iommu/vt-d: Remove INTEL_IOMMU_BROKEN_GFX_WA" has removed the CONFIG_DMAR_BROKEN_GFX_WA option, so the IDENTITY_DOMAIN requirement for graphic devices is no longer needed. Therefore, this requirement can be removed from device_def_domain_type() and igfx_off can be made independent. Fixes: 4b8d18c0c986 ("iommu/vt-d: Remove INTEL_IOMMU_BROKEN_GFX_WA") Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240428032020.214616-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/intel/iommu.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 239a47983517..d5dfa27117c9 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -217,12 +217,11 @@ int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON); int intel_iommu_enabled = 0; EXPORT_SYMBOL_GPL(intel_iommu_enabled); -static int dmar_map_gfx = 1; static int intel_iommu_superpage = 1; static int iommu_identity_mapping; static int iommu_skip_te_disable; +static int disable_igfx_iommu; -#define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 const struct iommu_ops intel_iommu_ops; @@ -261,7 +260,7 @@ static int __init intel_iommu_setup(char *str) no_platform_optin = 1; pr_info("IOMMU disabled\n"); } else if (!strncmp(str, "igfx_off", 8)) { - dmar_map_gfx = 0; + disable_igfx_iommu = 1; pr_info("Disable GFX device mapping\n"); } else if (!strncmp(str, "forcedac", 8)) { pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n"); @@ -2208,9 +2207,6 @@ static int device_def_domain_type(struct device *dev) if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) return IOMMU_DOMAIN_IDENTITY; - - if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) - return IOMMU_DOMAIN_IDENTITY; } return 0; @@ -2511,9 +2507,6 @@ static int __init init_dmars(void) iommu_set_root_entry(iommu); } - if (!dmar_map_gfx) - iommu_identity_mapping |= IDENTMAP_GFX; - check_tylersburg_isoch(); ret = si_domain_init(hw_pass_through); @@ -2604,7 +2597,7 @@ static void __init init_no_remapping_devices(void) /* This IOMMU has *only* gfx devices. Either bypass it or set the gfx_mapped flag, as appropriate */ drhd->gfx_dedicated = 1; - if (!dmar_map_gfx) + if (disable_igfx_iommu) drhd->ignored = 1; } } @@ -4694,7 +4687,7 @@ static void quirk_iommu_igfx(struct pci_dev *dev) return; pci_info(dev, "Disabling IOMMU for graphics on this chipset\n"); - dmar_map_gfx = 0; + disable_igfx_iommu = 1; } /* G4x/GM45 integrated gfx dmar support is totally busted. */ @@ -4778,8 +4771,8 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) if (!(ggc & GGC_MEMORY_VT_ENABLED)) { pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); - dmar_map_gfx = 0; - } else if (dmar_map_gfx) { + disable_igfx_iommu = 1; + } else if (!disable_igfx_iommu) { /* we have to ensure the gfx device is idle before we flush */ pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n"); iommu_set_dma_strict(); -- Gitee From 42ba40f651740817acba904e1c17e02570432126 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 6 May 2024 08:20:39 +0000 Subject: [PATCH 039/143] iommu/amd: Enable Guest Translation after reading IOMMU feature register ANBZ: #13617 commit de111f6b4f6a3010020825d22a068f416bc29c95 upstream. Commit 8e0179733172 ("iommu/amd: Enable Guest Translation before registering devices") moved IOMMU Guest Translation (GT) enablement to early init path. It does feature check based on Global EFR value (got from ACPI IVRS table). Later it adjusts EFR value based on IOMMU feature register (late_iommu_features_init()). It seems in some systems BIOS doesn't set gloabl EFR value properly. This is causing mismatch. Hence move IOMMU GT enablement after late_iommu_features_init() so that it does check based on IOMMU EFR value. Fixes: 8e0179733172 ("iommu/amd: Enable Guest Translation before registering devices") Reported-by: Klara Modin Closes: https://lore.kernel.org/linux-iommu/333e6eb6-361c-4afb-8107-2573324bf689@gmail.com/ Signed-off-by: Vasant Hegde Tested-by: Klara Modin Link: https://lore.kernel.org/r/20240506082039.7575-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 791f289a275d..0d5f75a7543a 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2054,6 +2054,8 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) amd_iommu_max_glx_val = glxval; else amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval); + + iommu_enable_gt(iommu); } if (check_feature(FEATURE_PPR) && amd_iommu_alloc_ppr_log(iommu)) @@ -2740,7 +2742,6 @@ static void early_enable_iommu(struct amd_iommu *iommu) iommu_enable_command_buffer(iommu); iommu_enable_event_buffer(iommu); iommu_set_exclusion_range(iommu); - iommu_enable_gt(iommu); iommu_enable_ga(iommu); iommu_enable_xt(iommu); iommu_enable_irtcachedis(iommu); @@ -2797,7 +2798,6 @@ static void early_enable_iommus(void) iommu_disable_irtcachedis(iommu); iommu_enable_command_buffer(iommu); iommu_enable_event_buffer(iommu); - iommu_enable_gt(iommu); iommu_enable_ga(iommu); iommu_enable_xt(iommu); iommu_enable_irtcachedis(iommu); -- Gitee From 70af62b1de68a362e3ca4602176aa3249adfea0a Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 7 Apr 2024 21:11:41 -0700 Subject: [PATCH 040/143] swiotlb: remove alloc_size argument to swiotlb_tbl_map_single() ANBZ: #13617 commit 327e2c97c46a4d971c5450a9d05b4a673f46c4da upstream. Currently swiotlb_tbl_map_single() takes alloc_align_mask and alloc_size arguments to specify an swiotlb allocation that is larger than mapping_size. This larger allocation is used solely by iommu_dma_map_single() to handle untrusted devices that should not have DMA visibility to memory pages that are partially used for unrelated kernel data. Having two arguments to specify the allocation is redundant. While alloc_align_mask naturally specifies the alignment of the starting address of the allocation, it can also implicitly specify the size by rounding up the mapping_size to that alignment. Additionally, the current approach has an edge case bug. iommu_dma_map_page() already does the rounding up to compute the alloc_size argument. But swiotlb_tbl_map_single() then calculates the alignment offset based on the DMA min_align_mask, and adds that offset to alloc_size. If the offset is non-zero, the addition may result in a value that is larger than the max the swiotlb can allocate. If the rounding up is done _after_ the alignment offset is added to the mapping_size (and the original mapping_size conforms to the value returned by swiotlb_max_mapping_size), then the max that the swiotlb can allocate will not be exceeded. In view of these issues, simplify the swiotlb_tbl_map_single() interface by removing the alloc_size argument. Most call sites pass the same value for mapping_size and alloc_size, and they pass alloc_align_mask as zero. Just remove the redundant argument from these callers, as they will see no functional change. For iommu_dma_map_page() also remove the alloc_size argument, and have swiotlb_tbl_map_single() compute the alloc_size by rounding up mapping_size after adding the offset based on min_align_mask. This has the side effect of fixing the edge case bug but with no other functional change. Also add a sanity test on the alloc_align_mask. While IOMMU code currently ensures the granule is not larger than PAGE_SIZE, if that guarantee were to be removed in the future, the downstream effect on the swiotlb might go unnoticed until strange allocation failures occurred. Tested on an ARM64 system with 16K page size and some kernel test-only hackery to allow modifying the DMA min_align_mask and the granule size that becomes the alloc_align_mask. Tested these combinations with a variety of original memory addresses and sizes, including those that reproduce the edge case bug: * 4K granule and 0 min_align_mask * 4K granule and 0xFFF min_align_mask (4K - 1) * 16K granule and 0xFFF min_align_mask * 64K granule and 0xFFF min_align_mask * 64K granule and 0x3FFF min_align_mask (16K - 1) With the changes, all combinations pass. Signed-off-by: Michael Kelley Reviewed-by: Petr Tesarik Signed-off-by: Christoph Hellwig [Shuai Xue: kernel/dma/phytium/pswiotlb-iommu.c also use swiotlb_tbl_map_single, remove alloc_size argument too] Signed-off-by: Shuai Xue --- drivers/iommu/dma-iommu.c | 2 +- drivers/xen/swiotlb-xen.c | 2 +- include/linux/swiotlb.h | 2 +- kernel/dma/phytium/pswiotlb-iommu.c | 2 +- kernel/dma/swiotlb.c | 56 +++++++++++++++++++++-------- 5 files changed, 46 insertions(+), 18 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 4b0cd53f93b1..06826333271c 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1183,7 +1183,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, trace_swiotlb_bounced(dev, phys, size); aligned_size = iova_align(iovad, size); - phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size, + phys = swiotlb_tbl_map_single(dev, phys, size, iova_mask(iovad), dir, attrs); if (phys == DMA_MAPPING_ERROR) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 5770f3b374ec..042253460b98 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -227,7 +227,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, */ trace_swiotlb_bounced(dev, dev_addr, size); - map = swiotlb_tbl_map_single(dev, phys, size, size, 0, dir, attrs); + map = swiotlb_tbl_map_single(dev, phys, size, 0, dir, attrs); if (map == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index ecde0312dd52..05e6f1b3474e 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -43,7 +43,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, extern void __init swiotlb_update_mem_attributes(void); phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, - size_t mapping_size, size_t alloc_size, + size_t mapping_size, unsigned int alloc_aligned_mask, enum dma_data_direction dir, unsigned long attrs); diff --git a/kernel/dma/phytium/pswiotlb-iommu.c b/kernel/dma/phytium/pswiotlb-iommu.c index 0dcde77249dd..4c3f6b9ac9f1 100644 --- a/kernel/dma/phytium/pswiotlb-iommu.c +++ b/kernel/dma/phytium/pswiotlb-iommu.c @@ -768,7 +768,7 @@ dma_addr_t pswiotlb_iommu_dma_map_page(struct device *dev, struct page *page, } aligned_size = iova_align(iovad, size); - phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size, + phys = swiotlb_tbl_map_single(dev, phys, size, iova_mask(iovad), dir, attrs); if (phys == DMA_MAPPING_ERROR) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index e7c3fbd0737e..2c6cbeab8021 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -1298,15 +1298,40 @@ static unsigned long mem_used(struct io_tlb_mem *mem) #endif /* CONFIG_DEBUG_FS */ +/** + * swiotlb_tbl_map_single() - bounce buffer map a single contiguous physical area + * @dev: Device which maps the buffer. + * @orig_addr: Original (non-bounced) physical IO buffer address + * @mapping_size: Requested size of the actual bounce buffer, excluding + * any pre- or post-padding for alignment + * @alloc_align_mask: Required start and end alignment of the allocated buffer + * @dir: DMA direction + * @attrs: Optional DMA attributes for the map operation + * + * Find and allocate a suitable sequence of IO TLB slots for the request. + * The allocated space starts at an alignment specified by alloc_align_mask, + * and the size of the allocated space is rounded up so that the total amount + * of allocated space is a multiple of (alloc_align_mask + 1). If + * alloc_align_mask is zero, the allocated space may be at any alignment and + * the size is not rounded up. + * + * The returned address is within the allocated space and matches the bits + * of orig_addr that are specified in the DMA min_align_mask for the device. As + * such, this returned address may be offset from the beginning of the allocated + * space. The bounce buffer space starting at the returned address for + * mapping_size bytes is initialized to the contents of the original IO buffer + * area. Any pre-padding (due to an offset) and any post-padding (due to + * rounding-up the size) is not initialized. + */ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, - size_t mapping_size, size_t alloc_size, - unsigned int alloc_align_mask, enum dma_data_direction dir, - unsigned long attrs) + size_t mapping_size, unsigned int alloc_align_mask, + enum dma_data_direction dir, unsigned long attrs) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; unsigned int offset; struct io_tlb_pool *pool; unsigned int i; + size_t size; int index; phys_addr_t tlb_addr; unsigned short pad_slots; @@ -1320,20 +1345,24 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n"); - if (mapping_size > alloc_size) { - dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)", - mapping_size, alloc_size); - return (phys_addr_t)DMA_MAPPING_ERROR; - } + /* + * The default swiotlb memory pool is allocated with PAGE_SIZE + * alignment. If a mapping is requested with larger alignment, + * the mapping may be unable to use the initial slot(s) in all + * sets of IO_TLB_SEGSIZE slots. In such case, a mapping request + * of or near the maximum mapping size would always fail. + */ + dev_WARN_ONCE(dev, alloc_align_mask > ~PAGE_MASK, + "Alloc alignment may prevent fulfilling requests with max mapping_size\n"); offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr); - index = swiotlb_find_slots(dev, orig_addr, - alloc_size + offset, alloc_align_mask, &pool); + size = ALIGN(mapping_size + offset, alloc_align_mask + 1); + index = swiotlb_find_slots(dev, orig_addr, size, alloc_align_mask, &pool); if (index == -1) { if (!(attrs & DMA_ATTR_NO_WARN)) dev_warn_ratelimited(dev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", - alloc_size, mem->nslabs, mem_used(mem)); + size, mem->nslabs, mem_used(mem)); return (phys_addr_t)DMA_MAPPING_ERROR; } @@ -1346,7 +1375,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, offset &= (IO_TLB_SIZE - 1); index += pad_slots; pool->slots[index].pad_slots = pad_slots; - for (i = 0; i < nr_slots(alloc_size + offset); i++) + for (i = 0; i < (nr_slots(size) - pad_slots); i++) pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); tlb_addr = slot_addr(pool->start, index) + offset; /* @@ -1498,8 +1527,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size); - swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir, - attrs); + swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, 0, dir, attrs); if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; -- Gitee From e4a4f725432f5a02233369f8a3f5b4cf2bdd9aef Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 7 Apr 2024 21:11:42 -0700 Subject: [PATCH 041/143] iommu/dma: fix zeroing of bounce buffer padding used by untrusted devices ANBZ: #13617 commit 2650073f1b5858008c32712f3d9e1e808ce7e967 upstream. iommu_dma_map_page() allocates swiotlb memory as a bounce buffer when an untrusted device wants to map only part of the memory in an granule. The goal is to disallow the untrusted device having DMA access to unrelated kernel data that may be sharing the granule. To meet this goal, the bounce buffer itself is zeroed, and any additional swiotlb memory up to alloc_size after the bounce buffer end (i.e., "post-padding") is also zeroed. However, as of commit 901c7280ca0d ("Reinstate some of "swiotlb: rework "fix info leak with DMA_FROM_DEVICE"""), swiotlb_tbl_map_single() always initializes the contents of the bounce buffer to the original memory. Zeroing the bounce buffer is redundant and probably wrong per the discussion in that commit. Only the post-padding needs to be zeroed. Also, when the DMA min_align_mask is non-zero, the allocated bounce buffer space may not start on a granule boundary. The swiotlb memory from the granule boundary to the start of the allocated bounce buffer might belong to some unrelated bounce buffer. So as described in the "second issue" in [1], it can't be zeroed to protect against untrusted devices. But as of commit af133562d5af ("swiotlb: extend buffer pre-padding to alloc_align_mask if necessary"), swiotlb_tbl_map_single() allocates pre-padding slots when necessary to meet min_align_mask requirements, making it possible to zero the pre-padding area as well. Finally, iommu_dma_map_page() uses the swiotlb for untrusted devices and also for certain kmalloc() memory. Current code does the zeroing for both cases, but it is needed only for the untrusted device case. Fix all of this by updating iommu_dma_map_page() to zero both the pre-padding and post-padding areas, but not the actual bounce buffer. Do this only in the case where the bounce buffer is used because of an untrusted device. [1] https://lore.kernel.org/all/20210929023300.335969-1-stevensd@google.com/ Signed-off-by: Michael Kelley Reviewed-by: Petr Tesarik Signed-off-by: Christoph Hellwig Signed-off-by: Shuai Xue --- drivers/iommu/dma-iommu.c | 29 ++++++++++++++++------------- include/linux/iova.h | 5 +++++ 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 06826333271c..8ac257da905d 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1172,9 +1172,6 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, */ if (dev_use_swiotlb(dev, size, dir) && iova_offset(iovad, phys | size)) { - void *padding_start; - size_t padding_size, aligned_size; - if (!is_swiotlb_active(dev)) { dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n"); return DMA_MAPPING_ERROR; @@ -1182,24 +1179,30 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, trace_swiotlb_bounced(dev, phys, size); - aligned_size = iova_align(iovad, size); phys = swiotlb_tbl_map_single(dev, phys, size, iova_mask(iovad), dir, attrs); if (phys == DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; - /* Cleanup the padding area. */ - padding_start = phys_to_virt(phys); - padding_size = aligned_size; + /* + * Untrusted devices should not see padding areas with random + * leftover kernel data, so zero the pre- and post-padding. + * swiotlb_tbl_map_single() has initialized the bounce buffer + * proper to the contents of the original memory buffer. + */ + if (dev_is_untrusted(dev)) { + size_t start, virt = (size_t)phys_to_virt(phys); - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) { - padding_start += size; - padding_size -= size; - } + /* Pre-padding */ + start = iova_align_down(iovad, virt); + memset((void *)start, 0, virt - start); - memset(padding_start, 0, padding_size); + /* Post-padding */ + start = virt + size; + memset((void *)start, 0, + iova_align(iovad, start) - start); + } } if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) diff --git a/include/linux/iova.h b/include/linux/iova.h index 83c00fac2acb..d2c4fd923efa 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -65,6 +65,11 @@ static inline size_t iova_align(struct iova_domain *iovad, size_t size) return ALIGN(size, iovad->granule); } +static inline size_t iova_align_down(struct iova_domain *iovad, size_t size) +{ + return ALIGN_DOWN(size, iovad->granule); +} + static inline dma_addr_t iova_dma_addr(struct iova_domain *iovad, struct iova *iova) { return (dma_addr_t)iova->pfn_lo << iova_shift(iovad); -- Gitee From fabd6b63bf89bb1c00e8afb9c3f1dc61730d3a29 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Thu, 25 Jan 2024 16:26:37 +0800 Subject: [PATCH 042/143] powerpc/iommu: Code cleanup for cell/iommu.c ANBZ: #13617 commit f3560a2ba5cbbb6c62c14dbdc1e33cb3565199d0 upstream. This part was commented from commit 165785e5c0be ("[POWERPC] Cell iommu support") in about 17 years before. If there are no plans to enable this part code in the future, we can remove this dead code. Signed-off-by: Kunwu Chan Signed-off-by: Michael Ellerman Link: https://msgid.link/20240125082637.532826-1-chentao@kylinos.cn Signed-off-by: Shuai Xue --- arch/powerpc/platforms/cell/iommu.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 1202a69b0a20..4cd9c0de22c2 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -424,23 +424,6 @@ static void __init cell_iommu_setup_hardware(struct cbe_iommu *iommu, cell_iommu_enable_hardware(iommu); } -#if 0/* Unused for now */ -static struct iommu_window *find_window(struct cbe_iommu *iommu, - unsigned long offset, unsigned long size) -{ - struct iommu_window *window; - - /* todo: check for overlapping (but not equal) windows) */ - - list_for_each_entry(window, &(iommu->windows), list) { - if (window->offset == offset && window->size == size) - return window; - } - - return NULL; -} -#endif - static inline u32 cell_iommu_get_ioid(struct device_node *np) { const u32 *ioid; -- Gitee From d351c97d10f52865e2173f40ef311221f8bfb999 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 9 May 2024 14:45:51 -0300 Subject: [PATCH 043/143] iommu/arm-smmu: Use the correct type in nvidia_smmu_context_fault() ANBZ: #13617 commit 65ade5653f5ab5a21635e51d0c65e95f490f5b6f upstream. This was missed because of the function pointer indirection. nvidia_smmu_context_fault() is also installed as a irq function, and the 'void *' was changed to a struct arm_smmu_domain. Since the iommu_domain is embedded at a non-zero offset this causes nvidia_smmu_context_fault() to miscompute the offset. Fixup the types. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000120 Mem abort info: ESR = 0x0000000096000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=0000000107c9f000 [0000000000000120] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 0000000096000004 [#1] SMP Modules linked in: CPU: 1 PID: 47 Comm: kworker/u25:0 Not tainted 6.9.0-0.rc7.58.eln136.aarch64 #1 Hardware name: Unknown NVIDIA Jetson Orin NX/NVIDIA Jetson Orin NX, BIOS 3.1-32827747 03/19/2023 Workqueue: events_unbound deferred_probe_work_func pstate: 604000c9 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : nvidia_smmu_context_fault+0x1c/0x158 lr : __free_irq+0x1d4/0x2e8 sp : ffff80008044b6f0 x29: ffff80008044b6f0 x28: ffff000080a60b18 x27: ffffd32b5172e970 x26: 0000000000000000 x25: ffff0000802f5aac x24: ffff0000802f5a30 x23: ffff0000802f5b60 x22: 0000000000000057 x21: 0000000000000000 x20: ffff0000802f5a00 x19: ffff000087d4cd80 x18: ffffffffffffffff x17: 6234362066666666 x16: 6630303078302d30 x15: ffff00008156d888 x14: 0000000000000000 x13: ffff0000801db910 x12: ffff00008156d6d0 x11: 0000000000000003 x10: ffff0000801db918 x9 : ffffd32b50f94d9c x8 : 1fffe0001032fda1 x7 : ffff00008197ed00 x6 : 000000000000000f x5 : 000000000000010e x4 : 000000000000010e x3 : 0000000000000000 x2 : ffffd32b51720cd8 x1 : ffff000087e6f700 x0 : 0000000000000057 Call trace: nvidia_smmu_context_fault+0x1c/0x158 __free_irq+0x1d4/0x2e8 free_irq+0x3c/0x80 devm_free_irq+0x64/0xa8 arm_smmu_domain_free+0xc4/0x158 iommu_domain_free+0x44/0xa0 iommu_deinit_device+0xd0/0xf8 __iommu_group_remove_device+0xcc/0xe0 iommu_bus_notifier+0x64/0xa8 notifier_call_chain+0x78/0x148 blocking_notifier_call_chain+0x4c/0x90 bus_notify+0x44/0x70 device_del+0x264/0x3e8 pci_remove_bus_device+0x84/0x120 pci_remove_root_bus+0x5c/0xc0 dw_pcie_host_deinit+0x38/0xe0 tegra_pcie_config_rp+0xc0/0x1f0 tegra_pcie_dw_probe+0x34c/0x700 platform_probe+0x70/0xe8 really_probe+0xc8/0x3a0 __driver_probe_device+0x84/0x160 driver_probe_device+0x44/0x130 __device_attach_driver+0xc4/0x170 bus_for_each_drv+0x90/0x100 __device_attach+0xa8/0x1c8 device_initial_probe+0x1c/0x30 bus_probe_device+0xb0/0xc0 deferred_probe_work_func+0xbc/0x120 process_one_work+0x194/0x490 worker_thread+0x284/0x3b0 kthread+0xf4/0x108 ret_from_fork+0x10/0x20 Code: a9b97bfd 910003fd a9025bf5 f85a0035 (b94122a1) Cc: stable@vger.kernel.org Fixes: e0976331ad11 ("iommu/arm-smmu: Pass arm_smmu_domain to internal functions") Reported-by: Jerry Snitselaar Closes: https://lore.kernel.org/all/jto5e3ili4auk6sbzpnojdvhppgwuegir7mpd755anfhwcbkfz@2u5gh7bxb4iv Signed-off-by: Jason Gunthorpe Tested-by: Jerry Snitselaar Acked-by: Jerry Snitselaar Link: https://lore.kernel.org/r/0-v1-24ce064de41f+4ac-nvidia_smmu_fault_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c index 87bf522b9d2e..957d988b6d83 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c @@ -221,11 +221,9 @@ static irqreturn_t nvidia_smmu_context_fault(int irq, void *dev) unsigned int inst; irqreturn_t ret = IRQ_NONE; struct arm_smmu_device *smmu; - struct iommu_domain *domain = dev; - struct arm_smmu_domain *smmu_domain; + struct arm_smmu_domain *smmu_domain = dev; struct nvidia_smmu *nvidia; - smmu_domain = container_of(domain, struct arm_smmu_domain, domain); smmu = smmu_domain->smmu; nvidia = to_nvidia_smmu(smmu); -- Gitee From ef633a9a0216553138726ea6e749318d8f76ed85 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 7 May 2024 10:21:10 -0300 Subject: [PATCH 044/143] iommu/arm-smmu-v3: Make the kunit into a module ANBZ: #13617 commit da55da5a42d4247d7a48b843fa5fcd9a4a10f4fe upstream. It turns out kconfig has problems ensuring the SMMU module and the KUNIT module are consistently y/m to allow linking. It will permit KUNIT to be a module while SMMU is built in. Also, Fedora apparently enables kunit on production kernels. So, put the entire kunit in its own module using the VISIBLE_IF_KUNIT/EXPORT_SYMBOL_IF_KUNIT machinery. This keeps it out of vmlinus on Fedora and makes the kconfig work in the normal way. There is no cost if kunit is disabled. Fixes: 56e1a4cc2588 ("iommu/arm-smmu-v3: Add unit tests for arm_smmu_write_entry") Reported-by: Thorsten Leemhuis Link: https://lore.kernel.org/all/aeea8546-5bce-4c51-b506-5d2008e52fef@leemhuis.info Signed-off-by: Jason Gunthorpe Tested-by: Thorsten Leemhuis Acked-by: Will Deacon Link: https://lore.kernel.org/r/0-v1-24cba6c0f404+2ae-smmu_kunit_module_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/Kconfig | 2 +- drivers/iommu/arm/arm-smmu-v3/Makefile | 3 ++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 1 + drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c | 3 +++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 8 ++++++++ 5 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 20d9bd4bc074..0197014056ab 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -416,7 +416,7 @@ config ARM_SMMU_V3_SVA and PRI. config ARM_SMMU_V3_KUNIT_TEST - bool "KUnit tests for arm-smmu-v3 driver" if !KUNIT_ALL_TESTS + tristate "KUnit tests for arm-smmu-v3 driver" if !KUNIT_ALL_TESTS depends on KUNIT depends on ARM_SMMU_V3_SVA default KUNIT_ALL_TESTS diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile index 0b97054b3929..014a997753a8 100644 --- a/drivers/iommu/arm/arm-smmu-v3/Makefile +++ b/drivers/iommu/arm/arm-smmu-v3/Makefile @@ -2,5 +2,6 @@ obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o arm_smmu_v3-objs-y += arm-smmu-v3.o arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o -arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_KUNIT_TEST) += arm-smmu-v3-test.o arm_smmu_v3-objs := $(arm_smmu_v3-objs-y) + +obj-$(CONFIG_ARM_SMMU_V3_KUNIT_TEST) += arm-smmu-v3-test.o diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 34a977a0767d..e490ffb38015 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -185,6 +185,7 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, */ target->data[3] = cpu_to_le64(read_sysreg(mair_el1)); } +EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_sva_cd); static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm) { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c index 417804392ff0..315e487fd990 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c @@ -463,3 +463,6 @@ static struct kunit_suite arm_smmu_v3_test_module = { .test_cases = arm_smmu_v3_test_cases, }; kunit_test_suites(&arm_smmu_v3_test_module); + +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 3a8ef3e205b1..c84577e140d6 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1007,6 +1007,7 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) if (cfg == STRTAB_STE_0_CFG_BYPASS) used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG); } +EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_used); /* * Figure out if we can do a hitless update of entry to become target. Returns a @@ -1141,6 +1142,7 @@ void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry, entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS)); } } +EXPORT_SYMBOL_IF_KUNIT(arm_smmu_write_entry); static void arm_smmu_sync_cd(struct arm_smmu_master *master, int ssid, bool leaf) @@ -1268,6 +1270,7 @@ void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits) used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK); } } +EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_cd_used); static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer *writer) { @@ -1332,6 +1335,7 @@ void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, CTXDESC_CD_1_TTB0_MASK); target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.mair); } +EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s1_cd); void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid) { @@ -1515,6 +1519,7 @@ void arm_smmu_make_abort_ste(struct arm_smmu_ste *target) STRTAB_STE_0_V | FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT)); } +EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_abort_ste); VISIBLE_IF_KUNIT void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu, @@ -1529,6 +1534,7 @@ void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu, target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); } +EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste); VISIBLE_IF_KUNIT void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, @@ -1580,6 +1586,7 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0)); } } +EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste); VISIBLE_IF_KUNIT void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, @@ -1627,6 +1634,7 @@ void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr & STRTAB_STE_3_S2TTB_MASK); } +EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s2_domain_ste); /* * This can safely directly manipulate the STE memory without a sync sequence -- Gitee From fbff0f85b0ca37b5cde2e07b3c14a4e3aafed444 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:44:01 +0200 Subject: [PATCH 045/143] iommu: virtio: drop owner assignment ANBZ: #13617 commit ac5990d3ecc7b4517bddf68a5a99efca78211444 upstream. virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-14-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin Signed-off-by: Shuai Xue --- drivers/iommu/virtio-iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 8e776f6c6e35..36d680826b57 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -1251,7 +1251,6 @@ MODULE_DEVICE_TABLE(virtio, id_table); static struct virtio_driver virtio_iommu_drv = { .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .feature_table = features, .feature_table_size = ARRAY_SIZE(features), -- Gitee From 08da90198a11512b1d2cd7be1403d5d19cfa389c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 20 May 2024 20:14:44 +0100 Subject: [PATCH 046/143] iommu/dma: Fix domain init ANBZ: #13617 commit cc8d89d0637990c66440a226f443d95340979a04 upstream. Despite carefully rewording the kerneldoc to describe the new direct interaction with dma_range_map, it seems I managed to confuse myself in removing the redundant force_aperture check and ended up making the code not do that at all. This led to dma_range_maps inadvertently being able to set iovad->start_pfn = 0, and all the nonsensical chaos which ensues from there. Restore the correct behaviour of constraining base_pfn to the domain aperture regardless of dma_range_map, and not trying to apply dma_range_map constraints to the basic IOVA domain since they will be properly handled with reserved regions later. Reported-by: Jon Hunter Reported-by: Jerry Snitselaar Fixes: ad4750b07d34 ("iommu/dma: Make limit checks self-contained") Signed-off-by: Robin Murphy Tested-by: Jerry Snitselaar Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/721fa6baebb0924aa40db0b8fb86bcb4538434af.1716232484.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/dma-iommu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 8ac257da905d..566ad634cf9e 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -705,15 +705,15 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev /* Check the domain allows at least some access to the device... */ if (map) { - dma_addr_t base = dma_range_map_min(map); - if (base > domain->geometry.aperture_end || + if (dma_range_map_min(map) > domain->geometry.aperture_end || dma_range_map_max(map) < domain->geometry.aperture_start) { pr_warn("specified DMA range outside IOMMU capability\n"); return -EFAULT; } - /* ...then finally give it a kicking to make sure it fits */ - base_pfn = max(base, domain->geometry.aperture_start) >> order; } + /* ...then finally give it a kicking to make sure it fits */ + base_pfn = max_t(unsigned long, base_pfn, + domain->geometry.aperture_start >> order); /* start_pfn is always nonzero for an already-initialised domain */ mutex_lock(&cookie->mutex); -- Gitee From b745f927ad6d37a94953f277635d73aa7b178a5d Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 29 May 2024 11:39:00 +0000 Subject: [PATCH 047/143] iommu/amd: Fix workqueue name ANBZ: #13617 commit 998a0a362b0b4cf501161c3319e6994d37c45a8c upstream. Workqueue name length is crossing WQ_NAME_LEN limit. Fix it by changing name format. New format : "iopf_queue/amdvi-" kernel warning: [ 11.146912] workqueue: name exceeds WQ_NAME_LEN. Truncating to: iopf_queue/amdiommu-0xc002-iopf Reported-by: Borislav Petkov Fixes: 61928bab9d26 ("iommu/amd: Define per-IOMMU iopf_queue") Signed-off-by: Vasant Hegde Acked-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240529113900.5798-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/ppr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/amd/ppr.c b/drivers/iommu/amd/ppr.c index 0463daa2d46b..0f1d78164618 100644 --- a/drivers/iommu/amd/ppr.c +++ b/drivers/iommu/amd/ppr.c @@ -220,8 +220,7 @@ int amd_iommu_iopf_init(struct amd_iommu *iommu) if (iommu->iopf_queue) return ret; - snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name), - "amdiommu-%#x-iopfq", + snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name), "amdvi-%#x", PCI_SEG_DEVID_TO_SBDF(iommu->pci_seg->id, iommu->devid)); iommu->iopf_queue = iopf_queue_alloc(iommu->iopfq_name); -- Gitee From b7541df610b31a4f9129116aecfde57f3d97acb0 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 30 May 2024 07:11:18 +0000 Subject: [PATCH 048/143] iommu/amd: Check EFR[EPHSup] bit before enabling PPR ANBZ: #13617 commit 48dc345a23b984c457d1c5878168d026c500618f upstream. Check for EFR[EPHSup] bit before enabling PPR. This bit must be set to enable PPR. Reported-by: Borislav Petkov Fixes: c4cb23111103 ("iommu/amd: Add support for enable/disable IOPF") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218900 Tested-by: Borislav Petkov Tested-by: Jean-Christophe Guillain Signed-off-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20240530071118.10297-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 5f761bb34b0c..4abfab24585d 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -129,7 +129,8 @@ static inline int check_feature_gpt_level(void) static inline bool amd_iommu_gt_ppr_supported(void) { return (check_feature(FEATURE_GT) && - check_feature(FEATURE_PPR)); + check_feature(FEATURE_PPR) && + check_feature(FEATURE_EPHSUP)); } static inline u64 iommu_virt_to_phys(void *vaddr) -- Gitee From 7d3fdcbd64d6f5a22d7194afa65dcc2f503c3a13 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 30 May 2024 08:48:01 +0000 Subject: [PATCH 049/143] iommu/amd: Fix Invalid wait context issue ANBZ: #13617 commit 526606b0a1998b0791b42c199d53550c3ba724b5 upstream. With commit c4cb23111103 ("iommu/amd: Add support for enable/disable IOPF") we are hitting below issue. This happens because in IOPF enablement path it holds spin lock with irq disable and then tries to take mutex lock. dmesg: ----- [ 0.938739] ============================= [ 0.938740] [ BUG: Invalid wait context ] [ 0.938742] 6.10.0-rc1+ #1 Not tainted [ 0.938745] ----------------------------- [ 0.938746] swapper/0/1 is trying to lock: [ 0.938748] ffffffff8c9f01d8 (&port_lock_key){....}-{3:3}, at: serial8250_console_write+0x78/0x4a0 [ 0.938767] other info that might help us debug this: [ 0.938768] context-{5:5} [ 0.938769] 7 locks held by swapper/0/1: [ 0.938772] #0: ffff888101a91310 (&group->mutex){+.+.}-{4:4}, at: bus_iommu_probe+0x70/0x160 [ 0.938790] #1: ffff888101d1f1b8 (&domain->lock){....}-{3:3}, at: amd_iommu_attach_device+0xa5/0x700 [ 0.938799] #2: ffff888101cc3d18 (&dev_data->lock){....}-{3:3}, at: amd_iommu_attach_device+0xc5/0x700 [ 0.938806] #3: ffff888100052830 (&iommu->lock){....}-{2:2}, at: amd_iommu_iopf_add_device+0x3f/0xa0 [ 0.938813] #4: ffffffff8945a340 (console_lock){+.+.}-{0:0}, at: _printk+0x48/0x50 [ 0.938822] #5: ffffffff8945a390 (console_srcu){....}-{0:0}, at: console_flush_all+0x58/0x4e0 [ 0.938867] #6: ffffffff82459f80 (console_owner){....}-{0:0}, at: console_flush_all+0x1f0/0x4e0 [ 0.938872] stack backtrace: [ 0.938874] CPU: 2 PID: 1 Comm: swapper/0 Not tainted 6.10.0-rc1+ #1 [ 0.938877] Hardware name: HP HP EliteBook 745 G3/807E, BIOS N73 Ver. 01.39 04/16/2019 Fix above issue by re-arranging code in attach device path: - move device PASID/IOPF enablement outside lock in AMD IOMMU driver. This is safe as core layer holds group->mutex lock before calling iommu_ops->attach_dev. Reported-by: Borislav Petkov Reported-by: Mikhail Gavrilov Reported-by: Chris Bainbridge Fixes: c4cb23111103 ("iommu/amd: Add support for enable/disable IOPF") Tested-by: Borislav Petkov Tested-by: Chris Bainbridge Tested-by: Mikhail Gavrilov Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20240530084801.10758-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/iommu.c | 48 +++++++++++++++++++-------------------- drivers/iommu/amd/ppr.c | 22 ++++-------------- 2 files changed, 28 insertions(+), 42 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 71fcd22a0256..67782edecabd 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2041,7 +2041,6 @@ static int do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); - struct pci_dev *pdev; int ret = 0; /* Update data structures */ @@ -2056,30 +2055,13 @@ static int do_attach(struct iommu_dev_data *dev_data, domain->dev_iommu[iommu->index] += 1; domain->dev_cnt += 1; - pdev = dev_is_pci(dev_data->dev) ? to_pci_dev(dev_data->dev) : NULL; + /* Setup GCR3 table */ if (pdom_is_sva_capable(domain)) { ret = init_gcr3_table(dev_data, domain); if (ret) return ret; - - if (pdev) { - pdev_enable_caps(pdev); - - /* - * Device can continue to function even if IOPF - * enablement failed. Hence in error path just - * disable device PRI support. - */ - if (amd_iommu_iopf_add_device(iommu, dev_data)) - pdev_disable_cap_pri(pdev); - } - } else if (pdev) { - pdev_enable_cap_ats(pdev); } - /* Update device table */ - amd_iommu_dev_update_dte(dev_data, true); - return ret; } @@ -2172,6 +2154,11 @@ static void detach_device(struct device *dev) do_detach(dev_data); +out: + spin_unlock(&dev_data->lock); + + spin_unlock_irqrestore(&domain->lock, flags); + /* Remove IOPF handler */ if (ppr) amd_iommu_iopf_remove_device(iommu, dev_data); @@ -2179,10 +2166,6 @@ static void detach_device(struct device *dev) if (dev_is_pci(dev)) pdev_disable_caps(to_pci_dev(dev)); -out: - spin_unlock(&dev_data->lock); - - spin_unlock_irqrestore(&domain->lock, flags); } static struct iommu_device *amd_iommu_probe_device(struct device *dev) @@ -2507,6 +2490,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); struct protection_domain *domain = to_pdomain(dom); struct amd_iommu *iommu = get_amd_iommu_from_dev(dev); + struct pci_dev *pdev; int ret; /* @@ -2539,7 +2523,23 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, } #endif - iommu_completion_wait(iommu); + pdev = dev_is_pci(dev_data->dev) ? to_pci_dev(dev_data->dev) : NULL; + if (pdev && pdom_is_sva_capable(domain)) { + pdev_enable_caps(pdev); + + /* + * Device can continue to function even if IOPF + * enablement failed. Hence in error path just + * disable device PRI support. + */ + if (amd_iommu_iopf_add_device(iommu, dev_data)) + pdev_disable_cap_pri(pdev); + } else if (pdev) { + pdev_enable_cap_ats(pdev); + } + + /* Update device table */ + amd_iommu_dev_update_dte(dev_data, true); return ret; } diff --git a/drivers/iommu/amd/ppr.c b/drivers/iommu/amd/ppr.c index 0f1d78164618..a504f02dd904 100644 --- a/drivers/iommu/amd/ppr.c +++ b/drivers/iommu/amd/ppr.c @@ -246,40 +246,26 @@ void amd_iommu_page_response(struct device *dev, struct iopf_fault *evt, int amd_iommu_iopf_add_device(struct amd_iommu *iommu, struct iommu_dev_data *dev_data) { - unsigned long flags; int ret = 0; if (!dev_data->pri_enabled) return ret; - raw_spin_lock_irqsave(&iommu->lock, flags); - - if (!iommu->iopf_queue) { - ret = -EINVAL; - goto out_unlock; - } + if (!iommu->iopf_queue) + return -EINVAL; ret = iopf_queue_add_device(iommu->iopf_queue, dev_data->dev); if (ret) - goto out_unlock; + return ret; dev_data->ppr = true; - -out_unlock: - raw_spin_unlock_irqrestore(&iommu->lock, flags); - return ret; + return 0; } /* Its assumed that caller has verified that device was added to iopf queue */ void amd_iommu_iopf_remove_device(struct amd_iommu *iommu, struct iommu_dev_data *dev_data) { - unsigned long flags; - - raw_spin_lock_irqsave(&iommu->lock, flags); - iopf_queue_remove_device(iommu->iopf_queue, dev_data->dev); dev_data->ppr = false; - - raw_spin_unlock_irqrestore(&iommu->lock, flags); } -- Gitee From 685761fdd0bd27058af4f9dd9c25b23e6c687ed8 Mon Sep 17 00:00:00 2001 From: Mostafa Saleh Date: Tue, 4 Jun 2024 18:52:18 +0000 Subject: [PATCH 050/143] iommu/arm-smmu-v3: Avoid uninitialized asid in case of error ANBZ: #13617 commit d3867e7148318e12b5d69b64950622f5ed06fe86 upstream. Static checker is complaining about the ASID possibly set uninitialized. This only happens in case of error and this value would be ignored anyway. A simple fix would be just to initialize the local variable to zero, this path will only be reached on the first attach to a domain where the CD is already initialized to zero. This avoids having to bloat the function with an error path. Closes: https://lore.kernel.org/linux-iommu/849e3d77-0a3c-43c4-878d-a0e061c8cd61@moroto.mountain/T/#u Reported-by: Dan Carpenter Signed-off-by: Mostafa Saleh Fixes: 04905c17f648 ("iommu/arm-smmu-v3: Build the whole CD in arm_smmu_make_s1_cd()") Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240604185218.2602058-1-smostafa@google.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index c84577e140d6..213dfcbdbf25 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2313,7 +2313,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu, struct arm_smmu_domain *smmu_domain) { int ret; - u32 asid; + u32 asid = 0; struct arm_smmu_ctx_desc *cd = &smmu_domain->cd; refcount_set(&cd->refs, 1); -- Gitee From d32be6acb6dc9a41222c96a27f7b6b27d3fb326a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 8 May 2024 18:15:55 +0300 Subject: [PATCH 051/143] iommu/arm-smmu-v3: Use *-y instead of *-objs in Makefile ANBZ: #13617 commit 16c0bad7ae04e4a1e7361fbb91573248de06a008 upstream. *-objs suffix is reserved rather for (user-space) host programs while usually *-y suffix is used for kernel drivers (although *-objs works for that purpose for now). Let's correct the old usages of *-objs in Makefiles. Signed-off-by: Andy Shevchenko Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240508151611.1444352-1-andriy.shevchenko@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile index 014a997753a8..355173d1441d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/Makefile +++ b/drivers/iommu/arm/arm-smmu-v3/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o -arm_smmu_v3-objs-y += arm-smmu-v3.o -arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o -arm_smmu_v3-objs := $(arm_smmu_v3-objs-y) +arm_smmu_v3-y := arm-smmu-v3.o +arm_smmu_v3-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o obj-$(CONFIG_ARM_SMMU_V3_KUNIT_TEST) += arm-smmu-v3-test.o -- Gitee From bd49fd3a91e959548c46feeb979e024d7903c6b4 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 28 May 2024 12:54:58 +0800 Subject: [PATCH 052/143] iommu: Make iommu_sva_domain_alloc() static ANBZ: #13617 commit b5c29fba72a6c950655d1cb0f6aa16b60dc83be7 upstream. iommu_sva_domain_alloc() is only called in iommu-sva.c, hence make it static. On the other hand, iommu_sva_domain_alloc() should not return NULL anymore after commit <80af5a452024> ("iommu: Add ops->domain_alloc_sva()"), the removal of inline code avoids potential confusion. Fixes: 80af5a452024 ("iommu: Add ops->domain_alloc_sva()") Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240528045458.81458-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/iommu-sva.c | 6 ++++-- include/linux/iommu.h | 8 -------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 18a35e798b72..25e581299226 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -10,6 +10,8 @@ #include "iommu-priv.h" static DEFINE_MUTEX(iommu_sva_lock); +static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm); /* Allocate a PASID for the mm within range (inclusive) */ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct device *dev) @@ -277,8 +279,8 @@ static int iommu_sva_iopf_handler(struct iopf_group *group) return 0; } -struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, - struct mm_struct *mm) +static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm) { const struct iommu_ops *ops = dev_iommu_ops(dev); struct iommu_domain *domain; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b5a431eb8c4d..9afdafdc0e8b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1559,8 +1559,6 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm); void iommu_sva_unbind_device(struct iommu_sva *handle); u32 iommu_sva_get_pasid(struct iommu_sva *handle); -struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, - struct mm_struct *mm); #else static inline struct iommu_sva * iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) @@ -1585,12 +1583,6 @@ static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm) } static inline void mm_pasid_drop(struct mm_struct *mm) {} - -static inline struct iommu_domain * -iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) -{ - return NULL; -} #endif /* CONFIG_IOMMU_SVA */ #ifdef CONFIG_IOMMU_IOPF -- Gitee From 504889e3d97cf576969420b72821b13d080162a3 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Thu, 30 May 2024 13:36:03 -0500 Subject: [PATCH 053/143] iommu/amd: Fix panic accessing amd_iommu_enable_faulting ANBZ: #13617 commit 12243a8115f8583a6bcada7717c01fb164e23c89 upstream. This fixes a bug introduced by commit d74169ceb0d2 ("iommu/vt-d: Allocate DMAR fault interrupts locally"). The panic happens when amd_iommu_enable_faulting is called from CPUHP_AP_ONLINE_DYN context. Fixes: d74169ceb0d2 ("iommu/vt-d: Allocate DMAR fault interrupts locally") Signed-off-by: Dimitri Sivanich Tested-by: Yi Zhang Reviewed-by: Jerry Snitselaar Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/ZljHE/R4KLzGU6vx@hpe.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 0d5f75a7543a..1a2519de068c 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3335,7 +3335,7 @@ int amd_iommu_reenable(int mode) return 0; } -int __init amd_iommu_enable_faulting(unsigned int cpu) +int amd_iommu_enable_faulting(unsigned int cpu) { /* We enable MSI later when PCI is initialized */ return 0; -- Gitee From c33367cdbd3a1709e9b07da154599204ab5ac5de Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 4 Jun 2024 13:39:09 +0100 Subject: [PATCH 054/143] iommu/dma: Prune redundant pgprot arguments ANBZ: #13617 commit 8d485a69603f667032d61daf4f1cb9464f315e1c upstream. Somewhere amongst previous refactorings, the pgprot value in __iommu_dma_alloc_noncontiguous() became entirely unused, and the one used in iommu_dma_alloc_remap() can be computed locally rather than by its one remaining caller. Clean 'em up. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/c2a81b72df59a71a13f8bad94f834e627c4c93dd.1717504749.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/dma-iommu.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 566ad634cf9e..95918eeb5576 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -959,8 +959,7 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev, * but an IOMMU which supports smaller pages might not map the whole thing. */ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, - size_t size, struct sg_table *sgt, gfp_t gfp, pgprot_t prot, - unsigned long attrs) + size_t size, struct sg_table *sgt, gfp_t gfp, unsigned long attrs) { struct iommu_domain *domain = iommu_get_dma_domain(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; @@ -1034,15 +1033,14 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, } static void *iommu_dma_alloc_remap(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot, - unsigned long attrs) + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { struct page **pages; struct sg_table sgt; void *vaddr; + pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); - pages = __iommu_dma_alloc_noncontiguous(dev, size, &sgt, gfp, prot, - attrs); + pages = __iommu_dma_alloc_noncontiguous(dev, size, &sgt, gfp, attrs); if (!pages) return NULL; *dma_handle = sgt.sgl->dma_address; @@ -1069,8 +1067,7 @@ static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, if (!sh) return NULL; - sh->pages = __iommu_dma_alloc_noncontiguous(dev, size, &sh->sgt, gfp, - PAGE_KERNEL, attrs); + sh->pages = __iommu_dma_alloc_noncontiguous(dev, size, &sh->sgt, gfp, attrs); if (!sh->pages) { kfree(sh); return NULL; @@ -1639,8 +1636,7 @@ static void *iommu_dma_alloc(struct device *dev, size_t size, if (gfpflags_allow_blocking(gfp) && !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) { - return iommu_dma_alloc_remap(dev, size, handle, gfp, - dma_pgprot(dev, PAGE_KERNEL, attrs), attrs); + return iommu_dma_alloc_remap(dev, size, handle, gfp, attrs); } if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && -- Gitee From 3e264ed776d50cc9bdb5a888efbc9ef04622e98d Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Thu, 13 Jun 2024 18:14:36 -0700 Subject: [PATCH 055/143] iommu/iova: Add missing MODULE_DESCRIPTION() macro ANBZ: #13617 commit c94ad1d5e3885bd4fa6abb695baf5a8f5c3c309c upstream. With ARCH=arm, make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/iommu/iova.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Signed-off-by: Jeff Johnson Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20240613-md-arm-drivers-iommu-v1-1-1fe0bd953119@quicinc.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/iova.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index d59d0ea2fd21..16c6adff3eb7 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -1000,4 +1000,5 @@ void iova_cache_put(void) EXPORT_SYMBOL_GPL(iova_cache_put); MODULE_AUTHOR("Anil S Keshavamurthy "); +MODULE_DESCRIPTION("IOMMU I/O Virtual Address management"); MODULE_LICENSE("GPL"); -- Gitee From 6f25e62420dde79a43f280d00ee637e90af671e3 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Sun, 16 Jun 2024 23:40:53 +0100 Subject: [PATCH 056/143] iommu: sun50i: allocate page tables from below 4 GiB ANBZ: #13617 commit 7b9331a3ae93adfae54c6a56d23513e1f7db5dcb upstream. The Allwinner IOMMU is a strict 32-bit device, with its input addresses, the page table root pointer as well as both level's page tables and also the target addresses all required to be below 4GB. The Allwinner H6 SoC only supports 32-bit worth of physical addresses anyway, so this isn't a problem so far, but the H616 and later SoCs extend the PA space beyond 32 bit to accommodate more DRAM. To make sure we stay within the 32-bit PA range required by the IOMMU, force the memory for the page tables to come from below 4GB. by using allocations with the DMA32 flag. Also reject any attempt to map target addresses beyond 4GB, and print a warning to give users a hint while this fails. Signed-off-by: Andre Przywara Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20240616224056.29159-3-andre.przywara@arm.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/sun50i-iommu.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index dd3f07384624..20a07f829085 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -602,6 +602,14 @@ static int sun50i_iommu_map(struct iommu_domain *domain, unsigned long iova, u32 *page_table, *pte_addr; int ret = 0; + /* the IOMMU can only handle 32-bit addresses, both input and output */ + if ((uint64_t)paddr >> 32) { + ret = -EINVAL; + dev_warn_once(iommu->dev, + "attempt to map address beyond 4GB\n"); + goto out; + } + page_table = sun50i_dte_get_page_table(sun50i_domain, iova, gfp); if (IS_ERR(page_table)) { ret = PTR_ERR(page_table); @@ -682,7 +690,8 @@ sun50i_iommu_domain_alloc_paging(struct device *dev) if (!sun50i_domain) return NULL; - sun50i_domain->dt = iommu_alloc_pages(GFP_KERNEL, get_order(DT_SIZE)); + sun50i_domain->dt = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32, + get_order(DT_SIZE)); if (!sun50i_domain->dt) goto err_free_domain; @@ -997,7 +1006,7 @@ static int sun50i_iommu_probe(struct platform_device *pdev) iommu->pt_pool = kmem_cache_create(dev_name(&pdev->dev), PT_SIZE, PT_SIZE, - SLAB_HWCACHE_ALIGN, + SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA32, NULL); if (!iommu->pt_pool) return -ENOMEM; -- Gitee From 495954b87e676e76c65d1ead9c6ed4c68f1f4913 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Sun, 16 Jun 2024 23:40:55 +0100 Subject: [PATCH 057/143] iommu: sun50i: Add H616 compatible string ANBZ: #13617 commit 8db07ce532c0b51fea974613002cdc6a27732929 upstream. The IOMMU IP in the Allwinner H616 SoC is *almost* compatible to the H6, but uses a different reset value for the bypass register, and adds some more registers. While a driver *can* be written to support both variants (which we in fact do), the hardware itself is not fully compatible, so we require a separate compatible string. Add the new compatible string to the list, but without changing the behaviour, since the driver already supports both variants. Signed-off-by: Andre Przywara Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20240616224056.29159-5-andre.przywara@arm.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/sun50i-iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 20a07f829085..8d8f11854676 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -1067,6 +1067,7 @@ static int sun50i_iommu_probe(struct platform_device *pdev) static const struct of_device_id sun50i_iommu_dt[] = { { .compatible = "allwinner,sun50i-h6-iommu", }, + { .compatible = "allwinner,sun50i-h616-iommu", }, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, sun50i_iommu_dt); -- Gitee From 3148ba7b5cc7353f3b61f63259b6389da6510010 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 22 May 2024 10:26:47 +0200 Subject: [PATCH 058/143] iommu/amd: Use try_cmpxchg64() in v2_alloc_pte() ANBZ: #13617 commit 9a448e453151ec4e4b98a914b463539e790dd198 upstream. Use try_cmpxchg64() instead of cmpxchg64 (*ptr, old, new) != old in v2_alloc_pte(). cmpxchg returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). This is the same improvement as implemented for alloc_pte() in: commit 0d10fe759117 ("iommu/amd: Use try_cmpxchg64 in alloc_pte and free_clear_pte") Signed-off-by: Uros Bizjak Cc: Joerg Roedel Cc: Suravee Suthikulpanit Cc: Will Deacon Cc: Robin Murphy Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20240522082729.971123-1-ubizjak@gmail.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/io_pgtable_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index e1d6ad01c641..3965da0376ce 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -158,7 +158,7 @@ static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova, __npte = set_pgtable_attr(page); /* pte could have been changed somewhere. */ - if (cmpxchg64(pte, __pte, __npte) != __pte) + if (!try_cmpxchg64(pte, &__pte, __npte)) iommu_free_page(page); else if (IOMMU_PTE_PRESENT(__pte)) *updated = true; -- Gitee From cc60e3f6fc2d75165a38ebbcc500039f115ae25c Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 22 May 2024 10:26:48 +0200 Subject: [PATCH 059/143] iommu/vt-d: Use try_cmpxchg64() in intel_pasid_get_entry() ANBZ: #13617 commit 5c555f1f1c31f7dd60a7697be9bb0e98706bb10a upstream. Use try_cmpxchg64() instead of cmpxchg64 (*ptr, old, new) != old in intel_pasid_get_entry(). cmpxchg returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). Signed-off-by: Uros Bizjak Cc: David Woodhouse Cc: Lu Baolu Cc: Joerg Roedel Cc: Will Deacon Cc: Robin Murphy Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20240522082729.971123-2-ubizjak@gmail.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/intel/pasid.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index abce19e2ad6f..9bf45bc4b967 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -146,6 +146,8 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) retry: entries = get_pasid_table_from_pde(&dir[dir_index]); if (!entries) { + u64 tmp; + entries = iommu_alloc_page_node(info->iommu->node, GFP_ATOMIC); if (!entries) return NULL; @@ -156,8 +158,9 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) * clear. However, this entry might be populated by others * while we are preparing it. Use theirs with a retry. */ - if (cmpxchg64(&dir[dir_index].val, 0ULL, - (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) { + tmp = 0ULL; + if (!try_cmpxchg64(&dir[dir_index].val, &tmp, + (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) { iommu_free_page(entries); goto retry; } -- Gitee From db73e5a5357e526e402b66abc2e8f472a7d69591 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 22 May 2024 10:26:49 +0200 Subject: [PATCH 060/143] iommufd: Use atomic_long_try_cmpxchg() in incr_user_locked_vm() ANBZ: #13617 commit b95a40122a8183873736e0506df8e3a881178099 upstream. Use atomic_long_try_cmpxchg() instead of atomic_long_cmpxchg (*ptr, old, new) != old in incr_user_locked_vm(). cmpxchg returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). Also, atomic_long_try_cmpxchg() implicitly assigns old *ptr value to "old" when cmpxchg fails. There is no need to re-read the value in the loop. Signed-off-by: Uros Bizjak Cc: Jason Gunthorpe Cc: Kevin Tian Cc: Joerg Roedel Cc: Will Deacon Cc: Robin Murphy Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240522082729.971123-3-ubizjak@gmail.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/pages.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 528f356238b3..117f644a0c5b 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -809,13 +809,14 @@ static int incr_user_locked_vm(struct iopt_pages *pages, unsigned long npages) lock_limit = task_rlimit(pages->source_task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; + + cur_pages = atomic_long_read(&pages->source_user->locked_vm); do { - cur_pages = atomic_long_read(&pages->source_user->locked_vm); new_pages = cur_pages + npages; if (new_pages > lock_limit) return -ENOMEM; - } while (atomic_long_cmpxchg(&pages->source_user->locked_vm, cur_pages, - new_pages) != cur_pages); + } while (!atomic_long_try_cmpxchg(&pages->source_user->locked_vm, + &cur_pages, new_pages)); return 0; } -- Gitee From 474e6fb1f9f6210db77ec21c035302c484bd7862 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 20 Jun 2024 06:05:52 +0000 Subject: [PATCH 061/143] iommu/amd: Invalidate cache before removing device from domain list ANBZ: #13617 commit c362f32a59a84fe4453abecc6b53f5f70894a6d5 upstream. Commit 87a6f1f22c97 ("iommu/amd: Introduce per-device domain ID to fix potential TLB aliasing issue") introduced per device domain ID when domain is configured with v2 page table. And in invalidation path, it uses per device structure (dev_data->gcr3_info.domid) to get the domain ID. In detach_device() path, current code tries to invalidate IOMMU cache after removing dev_data from domain device list. This means when domain is configured with v2 page table, amd_iommu_domain_flush_all() will not be able to invalidate cache as device is already removed from domain device list. This is causing change domain tests (changing domain type from identity to DMA) to fail with IO_PAGE_FAULT issue. Hence invalidate cache and update DTE before updating data structures. Reported-by: FahHean Lee Reported-by: Dheeraj Kumar Srivastava Fixes: 87a6f1f22c97 ("iommu/amd: Introduce per-device domain ID to fix potential TLB aliasing issue") Tested-by: Dheeraj Kumar Srivastava Tested-by: Sairaj Arun Kodilkar Tested-by: FahHean Lee Signed-off-by: Vasant Hegde Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20240620060552.13984-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/iommu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 67782edecabd..79c18b397866 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2070,6 +2070,12 @@ static void do_detach(struct iommu_dev_data *dev_data) struct protection_domain *domain = dev_data->domain; struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); + /* Clear DTE and flush the entry */ + amd_iommu_dev_update_dte(dev_data, false); + + /* Flush IOTLB and wait for the flushes to finish */ + amd_iommu_domain_flush_all(domain); + /* Clear GCR3 table */ if (pdom_is_sva_capable(domain)) destroy_gcr3_table(dev_data, domain); @@ -2078,12 +2084,6 @@ static void do_detach(struct iommu_dev_data *dev_data) dev_data->domain = NULL; list_del(&dev_data->list); - /* Clear DTE and flush the entry */ - amd_iommu_dev_update_dte(dev_data, false); - - /* Flush IOTLB and wait for the flushes to finish */ - amd_iommu_domain_flush_all(domain); - /* decrease reference counters - needs to happen after the flushes */ domain->dev_iommu[iommu->index] -= 1; domain->dev_cnt -= 1; -- Gitee From 16666400ff20a0add27270da5df0c74c217c3b14 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 20 Jun 2024 14:29:40 +0800 Subject: [PATCH 062/143] iommu/vt-d: Fix missed device TLB cache tag ANBZ: #13617 commit 041be2717b198dd65032f726648401ba293c1bba upstream. When a domain is attached to a device, the required cache tags are assigned to the domain so that the related caches can be flushed whenever it is needed. The device TLB cache tag is created based on whether the ats_enabled field of the device's iommu data is set. This creates an ordered dependency between cache tag assignment and ATS enabling. The device TLB cache tag would not be created if device's ATS is enabled after the cache tag assignment. This causes devices with PCI ATS support to malfunction. The ATS control is exclusively owned by the iommu driver. Hence, move cache_tag_assign_domain() after PCI ATS enabling to make sure that the device TLB cache tag is created for the domain. Fixes: 3b1d9e2b2d68 ("iommu/vt-d: Add cache tag assignment interface") Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240620062940.201786-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/intel/iommu.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d5dfa27117c9..b217d4f3646b 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2126,12 +2126,6 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (ret) return ret; - ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); - if (ret) { - domain_detach_iommu(domain, iommu); - return ret; - } - info->domain = domain; spin_lock_irqsave(&domain->lock, flags); list_add(&info->link, &domain->devices); @@ -2149,15 +2143,21 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, else ret = intel_pasid_setup_second_level(iommu, domain, dev, IOMMU_NO_PASID); - if (ret) { - device_block_translation(dev); - return ret; - } + if (ret) + goto out_block_translation; if (sm_supported(info->iommu) || !domain_type_is_si(info->domain)) iommu_enable_pci_caps(info); + ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); + if (ret) + goto out_block_translation; + return 0; + +out_block_translation: + device_block_translation(dev); + return ret; } /** -- Gitee From f1fc2d34bbf5ecd61ea1fd630036964affdee8ef Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Fri, 21 Jun 2024 10:15:33 +0000 Subject: [PATCH 063/143] iommu/amd: Fix GT feature enablement again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #13617 commit 150bdf5f8d8f805d70bebbbfd07697bd2416771a upstream. Current code configures GCR3 even when device is attached to identity domain. So that we can support SVA with identity domain. This means in attach device path it updates Guest Translation related bits in DTE. Commit de111f6b4f6a ("iommu/amd: Enable Guest Translation after reading IOMMU feature register") missed to enable Control[GT] bit in resume path. Its causing certain laptop to fail to resume after suspend. This is because we have inconsistency between between control register (GT is disabled) and DTE (where we have enabled guest translation related bits) in resume path. And IOMMU hardware throws ILLEGAL_DEV_TABLE_ENTRY. Fix it by enabling GT bit in resume path. Reported-by: BÅ‚ażej SzczygieÅ‚ Link: https://bugzilla.kernel.org/show_bug.cgi?id=218975 Fixes: de111f6b4f6a ("iommu/amd: Enable Guest Translation after reading IOMMU feature register") Tested-by: BÅ‚ażej SzczygieÅ‚ Signed-off-by: Vasant Hegde Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20240621101533.20216-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 1a2519de068c..d7347e19aabb 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2742,6 +2742,7 @@ static void early_enable_iommu(struct amd_iommu *iommu) iommu_enable_command_buffer(iommu); iommu_enable_event_buffer(iommu); iommu_set_exclusion_range(iommu); + iommu_enable_gt(iommu); iommu_enable_ga(iommu); iommu_enable_xt(iommu); iommu_enable_irtcachedis(iommu); -- Gitee From 5c118e9815ba205d4449564db0ff2b562620aa82 Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Mon, 24 Jun 2024 12:38:21 +0000 Subject: [PATCH 064/143] powerpc/iommu: Move pSeries specific functions to pseries/iommu.c ANBZ: #13617 commit b09c031d9433dda3186190e5845ba0d720212567 upstream. The PowerNV specific table_group_ops are defined in powernv/pci-ioda.c. The pSeries specific table_group_ops are sitting in the generic powerpc file. Move it to where it actually belong(pseries/iommu.c). The functions are currently defined even for CONFIG_PPC_POWERNV which are unused on PowerNV. Only code movement, no functional changes intended. Signed-off-by: Shivaprasad G Bhat Signed-off-by: Michael Ellerman Link: https://msgid.link/171923269701.1397.15758640002786937132.stgit@linux.ibm.com Signed-off-by: Shuai Xue --- arch/powerpc/include/asm/iommu.h | 4 +- arch/powerpc/kernel/iommu.c | 149 +------------------------ arch/powerpc/platforms/pseries/iommu.c | 144 ++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 148 deletions(-) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 026695943550..744cc5fc22d3 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -156,6 +156,9 @@ extern int iommu_tce_table_put(struct iommu_table *tbl); extern struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, unsigned long res_start, unsigned long res_end); bool iommu_table_in_use(struct iommu_table *tbl); +extern void iommu_table_reserve_pages(struct iommu_table *tbl, + unsigned long res_start, unsigned long res_end); +extern void iommu_table_clear(struct iommu_table *tbl); #define IOMMU_TABLE_GROUP_MAX_TABLES 2 @@ -218,7 +221,6 @@ extern long iommu_tce_xchg_no_kill(struct mm_struct *mm, extern void iommu_tce_kill(struct iommu_table *tbl, unsigned long entry, unsigned long pages); -extern struct iommu_table_group_ops spapr_tce_table_group_ops; #else static inline void iommu_register_group(struct iommu_table_group *table_group, int pci_domain_number, diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 1185efebf032..aa11b2acf24f 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -642,7 +642,7 @@ void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, tbl->it_ops->flush(tbl); } -static void iommu_table_clear(struct iommu_table *tbl) +void iommu_table_clear(struct iommu_table *tbl) { /* * In case of firmware assisted dump system goes through clean @@ -683,7 +683,7 @@ static void iommu_table_clear(struct iommu_table *tbl) #endif } -static void iommu_table_reserve_pages(struct iommu_table *tbl, +void iommu_table_reserve_pages(struct iommu_table *tbl, unsigned long res_start, unsigned long res_end) { int i; @@ -1101,59 +1101,6 @@ void iommu_tce_kill(struct iommu_table *tbl, } EXPORT_SYMBOL_GPL(iommu_tce_kill); -#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) -static int iommu_take_ownership(struct iommu_table *tbl) -{ - unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; - int ret = 0; - - /* - * VFIO does not control TCE entries allocation and the guest - * can write new TCEs on top of existing ones so iommu_tce_build() - * must be able to release old pages. This functionality - * requires exchange() callback defined so if it is not - * implemented, we disallow taking ownership over the table. - */ - if (!tbl->it_ops->xchg_no_kill) - return -EINVAL; - - spin_lock_irqsave(&tbl->large_pool.lock, flags); - for (i = 0; i < tbl->nr_pools; i++) - spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); - - if (iommu_table_in_use(tbl)) { - pr_err("iommu_tce: it_map is not empty"); - ret = -EBUSY; - } else { - memset(tbl->it_map, 0xff, sz); - } - - for (i = 0; i < tbl->nr_pools; i++) - spin_unlock(&tbl->pools[i].lock); - spin_unlock_irqrestore(&tbl->large_pool.lock, flags); - - return ret; -} - -static void iommu_release_ownership(struct iommu_table *tbl) -{ - unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; - - spin_lock_irqsave(&tbl->large_pool.lock, flags); - for (i = 0; i < tbl->nr_pools; i++) - spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); - - memset(tbl->it_map, 0, sz); - - iommu_table_reserve_pages(tbl, tbl->it_reserved_start, - tbl->it_reserved_end); - - for (i = 0; i < tbl->nr_pools; i++) - spin_unlock(&tbl->pools[i].lock); - spin_unlock_irqrestore(&tbl->large_pool.lock, flags); -} -#endif - int iommu_add_device(struct iommu_table_group *table_group, struct device *dev) { /* @@ -1185,98 +1132,6 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev) EXPORT_SYMBOL_GPL(iommu_add_device); #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) -/* - * A simple iommu_table_group_ops which only allows reusing the existing - * iommu_table. This handles VFIO for POWER7 or the nested KVM. - * The ops does not allow creating windows and only allows reusing the existing - * one if it matches table_group->tce32_start/tce32_size/page_shift. - */ -static unsigned long spapr_tce_get_table_size(__u32 page_shift, - __u64 window_size, __u32 levels) -{ - unsigned long size; - - if (levels > 1) - return ~0U; - size = window_size >> (page_shift - 3); - return size; -} - -static long spapr_tce_create_table(struct iommu_table_group *table_group, int num, - __u32 page_shift, __u64 window_size, __u32 levels, - struct iommu_table **ptbl) -{ - struct iommu_table *tbl = table_group->tables[0]; - - if (num > 0) - return -EPERM; - - if (tbl->it_page_shift != page_shift || - tbl->it_size != (window_size >> page_shift) || - tbl->it_indirect_levels != levels - 1) - return -EINVAL; - - *ptbl = iommu_tce_table_get(tbl); - return 0; -} - -static long spapr_tce_set_window(struct iommu_table_group *table_group, - int num, struct iommu_table *tbl) -{ - return tbl == table_group->tables[num] ? 0 : -EPERM; -} - -static long spapr_tce_unset_window(struct iommu_table_group *table_group, int num) -{ - return 0; -} - -static long spapr_tce_take_ownership(struct iommu_table_group *table_group) -{ - int i, j, rc = 0; - - for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { - struct iommu_table *tbl = table_group->tables[i]; - - if (!tbl || !tbl->it_map) - continue; - - rc = iommu_take_ownership(tbl); - if (!rc) - continue; - - for (j = 0; j < i; ++j) - iommu_release_ownership(table_group->tables[j]); - return rc; - } - return 0; -} - -static void spapr_tce_release_ownership(struct iommu_table_group *table_group) -{ - int i; - - for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { - struct iommu_table *tbl = table_group->tables[i]; - - if (!tbl) - continue; - - iommu_table_clear(tbl); - if (tbl->it_map) - iommu_release_ownership(tbl); - } -} - -struct iommu_table_group_ops spapr_tce_table_group_ops = { - .get_table_size = spapr_tce_get_table_size, - .create_table = spapr_tce_create_table, - .set_window = spapr_tce_set_window, - .unset_window = spapr_tce_unset_window, - .take_ownership = spapr_tce_take_ownership, - .release_ownership = spapr_tce_release_ownership, -}; - /* * A simple iommu_ops to allow less cruft in generic VFIO code. */ diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index bf02f94a973d..ed49bf1df0f1 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -54,6 +54,57 @@ enum { DDW_EXT_QUERY_OUT_SIZE = 2 }; +static int iommu_take_ownership(struct iommu_table *tbl) +{ + unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; + int ret = 0; + + /* + * VFIO does not control TCE entries allocation and the guest + * can write new TCEs on top of existing ones so iommu_tce_build() + * must be able to release old pages. This functionality + * requires exchange() callback defined so if it is not + * implemented, we disallow taking ownership over the table. + */ + if (!tbl->it_ops->xchg_no_kill) + return -EINVAL; + + spin_lock_irqsave(&tbl->large_pool.lock, flags); + for (i = 0; i < tbl->nr_pools; i++) + spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); + + if (iommu_table_in_use(tbl)) { + pr_err("iommu_tce: it_map is not empty"); + ret = -EBUSY; + } else { + memset(tbl->it_map, 0xff, sz); + } + + for (i = 0; i < tbl->nr_pools; i++) + spin_unlock(&tbl->pools[i].lock); + spin_unlock_irqrestore(&tbl->large_pool.lock, flags); + + return ret; +} + +static void iommu_release_ownership(struct iommu_table *tbl) +{ + unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; + + spin_lock_irqsave(&tbl->large_pool.lock, flags); + for (i = 0; i < tbl->nr_pools; i++) + spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); + + memset(tbl->it_map, 0, sz); + + iommu_table_reserve_pages(tbl, tbl->it_reserved_start, + tbl->it_reserved_end); + + for (i = 0; i < tbl->nr_pools; i++) + spin_unlock(&tbl->pools[i].lock); + spin_unlock_irqrestore(&tbl->large_pool.lock, flags); +} + static struct iommu_table *iommu_pseries_alloc_table(int node) { struct iommu_table *tbl; @@ -67,6 +118,8 @@ static struct iommu_table *iommu_pseries_alloc_table(int node) return tbl; } +static struct iommu_table_group_ops spapr_tce_table_group_ops; + static struct iommu_table_group *iommu_pseries_alloc_group(int node) { struct iommu_table_group *table_group; @@ -1647,6 +1700,97 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) return false; } +/* + * A simple iommu_table_group_ops which only allows reusing the existing + * iommu_table. This handles VFIO for POWER7 or the nested KVM. + * The ops does not allow creating windows and only allows reusing the existing + * one if it matches table_group->tce32_start/tce32_size/page_shift. + */ +static unsigned long spapr_tce_get_table_size(__u32 page_shift, + __u64 window_size, __u32 levels) +{ + unsigned long size; + + if (levels > 1) + return ~0U; + size = window_size >> (page_shift - 3); + return size; +} + +static long spapr_tce_create_table(struct iommu_table_group *table_group, int num, + __u32 page_shift, __u64 window_size, __u32 levels, + struct iommu_table **ptbl) +{ + struct iommu_table *tbl = table_group->tables[0]; + + if (num > 0) + return -EPERM; + + if (tbl->it_page_shift != page_shift || + tbl->it_size != (window_size >> page_shift) || + tbl->it_indirect_levels != levels - 1) + return -EINVAL; + + *ptbl = iommu_tce_table_get(tbl); + return 0; +} + +static long spapr_tce_set_window(struct iommu_table_group *table_group, + int num, struct iommu_table *tbl) +{ + return tbl == table_group->tables[num] ? 0 : -EPERM; +} + +static long spapr_tce_unset_window(struct iommu_table_group *table_group, int num) +{ + return 0; +} + +static long spapr_tce_take_ownership(struct iommu_table_group *table_group) +{ + int i, j, rc = 0; + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = table_group->tables[i]; + + if (!tbl || !tbl->it_map) + continue; + + rc = iommu_take_ownership(tbl); + if (!rc) + continue; + + for (j = 0; j < i; ++j) + iommu_release_ownership(table_group->tables[j]); + return rc; + } + return 0; +} + +static void spapr_tce_release_ownership(struct iommu_table_group *table_group) +{ + int i; + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = table_group->tables[i]; + + if (!tbl) + continue; + + if (tbl->it_map) + iommu_release_ownership(tbl); + } +} + +static struct iommu_table_group_ops spapr_tce_table_group_ops = { + .get_table_size = spapr_tce_get_table_size, + .create_table = spapr_tce_create_table, + .set_window = spapr_tce_set_window, + .unset_window = spapr_tce_unset_window, + .take_ownership = spapr_tce_take_ownership, + .release_ownership = spapr_tce_release_ownership, +}; + static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) { -- Gitee From 05fa02535c6f706df2be9ad68ead01906fb54a36 Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Mon, 24 Jun 2024 12:38:34 +0000 Subject: [PATCH 065/143] powerpc/pseries/iommu: Fix the VFIO_IOMMU_SPAPR_TCE_GET_INFO ioctl output ANBZ: #13617 commit 6af67f2ddfcbbca551d786415beda14c48fb6ddf upstream. The ioctl VFIO_IOMMU_SPAPR_TCE_GET_INFO is not reporting the actuals on the platform as not all the details are correctly collected during the platform probe/scan into the iommu_table_group. Collect the information during the device setup time as the DMA window property is already looked up on parent nodes anyway. Signed-off-by: Shivaprasad G Bhat Signed-off-by: Michael Ellerman Link: https://msgid.link/171923271138.1397.7908302630061814623.stgit@linux.ibm.com Signed-off-by: Shuai Xue --- arch/powerpc/platforms/pseries/iommu.c | 81 +++++++++++++++++++++----- 1 file changed, 67 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index ed49bf1df0f1..b2f5da180ff9 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -865,13 +865,6 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) be32_to_cpu(prop.tce_shift), NULL, &iommu_table_lpar_multi_ops); - /* Only for normal boot with default window. Doesn't matter even - * if we set these with DDW which is 64bit during kdump, since - * these will not be used during kdump. - */ - ppci->table_group->tce32_start = be64_to_cpu(prop.dma_base); - ppci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); - if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) panic("Failed to initialize iommu table"); @@ -1619,6 +1612,71 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) return direct_mapping; } +static __u64 query_page_size_to_mask(u32 query_page_size) +{ + const long shift[] = { + (SZ_4K), (SZ_64K), (SZ_16M), + (SZ_32M), (SZ_64M), (SZ_128M), + (SZ_256M), (SZ_16G), (SZ_2M) + }; + int i, ret = 0; + + for (i = 0; i < ARRAY_SIZE(shift); i++) { + if (query_page_size & (1 << i)) + ret |= shift[i]; + } + + return ret; +} + +static void spapr_tce_init_table_group(struct pci_dev *pdev, + struct device_node *pdn, + struct dynamic_dma_window_prop prop) +{ + struct iommu_table_group *table_group = PCI_DN(pdn)->table_group; + u32 ddw_avail[DDW_APPLICABLE_SIZE]; + + struct ddw_query_response query; + int ret; + + /* Only for normal boot with default window. Doesn't matter during + * kdump, since these will not be used during kdump. + */ + if (is_kdump_kernel()) + return; + + if (table_group->max_dynamic_windows_supported != 0) + return; /* already initialized */ + + table_group->tce32_start = be64_to_cpu(prop.dma_base); + table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); + + if (!of_find_property(pdn, "ibm,dma-window", NULL)) + dev_err(&pdev->dev, "default dma window missing!\n"); + + ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", + &ddw_avail[0], DDW_APPLICABLE_SIZE); + if (ret) { + table_group->max_dynamic_windows_supported = -1; + return; + } + + ret = query_ddw(pdev, ddw_avail, &query, pdn); + if (ret) { + dev_err(&pdev->dev, "%s: query_ddw failed\n", __func__); + table_group->max_dynamic_windows_supported = -1; + return; + } + + if (query.windows_available == 0) + table_group->max_dynamic_windows_supported = 1; + else + table_group->max_dynamic_windows_supported = IOMMU_TABLE_GROUP_MAX_TABLES; + + table_group->max_levels = 1; + table_group->pgsizes |= query_page_size_to_mask(query.page_size); +} + static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) { struct device_node *pdn, *dn; @@ -1658,13 +1716,6 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) be32_to_cpu(prop.tce_shift), NULL, &iommu_table_lpar_multi_ops); - /* Only for normal boot with default window. Doesn't matter even - * if we set these with DDW which is 64bit during kdump, since - * these will not be used during kdump. - */ - pci->table_group->tce32_start = be64_to_cpu(prop.dma_base); - pci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); - iommu_init_table(tbl, pci->phb->node, 0, 0); iommu_register_group(pci->table_group, pci_domain_nr(pci->phb->bus), 0); @@ -1673,6 +1724,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) pr_debug(" found DMA window, table: %p\n", pci->table_group); } + spapr_tce_init_table_group(dev, pdn, prop); + set_iommu_table_base(&dev->dev, pci->table_group->tables[0]); iommu_add_device(pci->table_group, &dev->dev); } -- Gitee From 1082ee8aae867025c35fde936fac770f6f02c07a Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Mon, 24 Jun 2024 12:38:46 +0000 Subject: [PATCH 066/143] powerpc/pseries/iommu: Use the iommu table[0] for IOV VF's DDW ANBZ: #13617 commit aed6e4946ed9654fc965482b045b84f9b9572bb8 upstream. This patch basically brings consistency with PowerNV approach to use the first freely available iommu table when the default window is removed. The pSeries iommu code convention has been that the table[0] is for the default 32 bit DMA window and the table[1] is for the 64 bit DDW. With VFs having only 1 DMA window, the default has to be removed for creating the larger DMA window. The existing code uses the table[1] for that, while marking the table[0] as NULL. This is fine as long as the host driver itself uses the device. For the VFIO user, on pSeries there is no way to skip table[0] as the VFIO subdriver uses the first freely available table. The window 0, when created as 64-bit DDW in that context would still be on table[0], as the maximum number of windows is 1. This won't have any impact for the host driver as the table is fetched from the device's iommu_table_base. Signed-off-by: Shivaprasad G Bhat [mpe: Rebase and resolve conflicts] Signed-off-by: Michael Ellerman Link: https://msgid.link/171923272328.1397.1817843961216868850.stgit@linux.ibm.com Signed-off-by: Shuai Xue [ Jay Chen: rebase conflicts in ../arch/powerpc/platforms/pseries/iommu.c ] Signed-off-by: Jay Chen --- arch/powerpc/platforms/pseries/iommu.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index b2f5da180ff9..6c42700e7a2f 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -155,7 +155,7 @@ static void iommu_pseries_free_group(struct iommu_table_group *table_group, #endif /* Default DMA window table is at index 0, while DDW at 1. SR-IOV - * adapters only have table on index 1. + * adapters only have table on index 0(if not direct mapped). */ if (table_group->tables[0]) iommu_tce_table_put(table_group->tables[0]); @@ -1523,6 +1523,11 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) clean_dma_window(pdn, win64->value); goto out_del_list; } + if (default_win_removed) { + iommu_tce_table_put(pci->table_group->tables[0]); + pci->table_group->tables[0] = NULL; + set_iommu_table_base(&dev->dev, NULL); + } } else { struct iommu_table *newtbl; int i; @@ -1552,7 +1557,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) 1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops); iommu_init_table(newtbl, pci->phb->node, start, end); - pci->table_group->tables[1] = newtbl; + pci->table_group->tables[default_win_removed ? 0 : 1] = newtbl; set_iommu_table_base(&dev->dev, newtbl); } -- Gitee From 282a9ed096c9825ee9a602819991992dbba5eb34 Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Mon, 24 Jun 2024 12:39:10 +0000 Subject: [PATCH 067/143] powerpc/iommu: Move dev_has_iommu_table() to iommu.c ANBZ: #13617 commit 35146eadcb81d72153a1621f3cc0d5588cae19d3 upstream. Move function dev_has_iommu_table() to powerpc/kernel/iommu.c as it is going to be used by machine specific iommu code as well in subsequent patches. Signed-off-by: Shivaprasad G Bhat Signed-off-by: Michael Ellerman Link: https://msgid.link/171923274748.1397.6274953248403106679.stgit@linux.ibm.com Signed-off-by: Shuai Xue --- arch/powerpc/include/asm/iommu.h | 6 ++++++ arch/powerpc/kernel/eeh.c | 16 ---------------- arch/powerpc/kernel/iommu.c | 17 +++++++++++++++++ 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 744cc5fc22d3..b2fe6e7f81d6 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -220,6 +220,7 @@ extern long iommu_tce_xchg_no_kill(struct mm_struct *mm, enum dma_data_direction *direction); extern void iommu_tce_kill(struct iommu_table *tbl, unsigned long entry, unsigned long pages); +int dev_has_iommu_table(struct device *dev, void *data); #else static inline void iommu_register_group(struct iommu_table_group *table_group, @@ -233,6 +234,11 @@ static inline int iommu_add_device(struct iommu_table_group *table_group, { return 0; } + +static inline int dev_has_iommu_table(struct device *dev, void *data) +{ + return 0; +} #endif /* !CONFIG_IOMMU_API */ u64 dma_iommu_get_required_mask(struct device *dev); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 82626363a309..c0cef4547560 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1265,22 +1265,6 @@ EXPORT_SYMBOL(eeh_dev_release); #ifdef CONFIG_IOMMU_API -static int dev_has_iommu_table(struct device *dev, void *data) -{ - struct pci_dev *pdev = to_pci_dev(dev); - struct pci_dev **ppdev = data; - - if (!dev) - return 0; - - if (device_iommu_mapped(dev)) { - *ppdev = pdev; - return 1; - } - - return 0; -} - /** * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE * @group: IOMMU group diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index aa11b2acf24f..32059b392cfa 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -987,6 +987,23 @@ unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir) EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm); #ifdef CONFIG_IOMMU_API + +int dev_has_iommu_table(struct device *dev, void *data) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_dev **ppdev = data; + + if (!dev) + return 0; + + if (device_iommu_mapped(dev)) { + *ppdev = pdev; + return 1; + } + + return 0; +} + /* * SPAPR TCE API */ -- Gitee From 34cf1a4abff09b9775e3b7215c98047d0bfee267 Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Mon, 24 Jun 2024 12:39:23 +0000 Subject: [PATCH 068/143] powerpc/iommu: Reimplement the iommu_table_group_ops for pSeries ANBZ: #13617 commit f431a8cde7f102fce412546db6e62fdbde1131a7 upstream. PPC64 IOMMU API defines iommu_table_group_ops which handles DMA windows for PEs, their ownership transfer, create/set/unset the TCE tables for the Dynamic DMA wundows(DDW). VFIOS uses these APIs for support on POWER. The commit 9d67c9433509 ("powerpc/iommu: Add "borrowing" iommu_table_group_ops") implemented partial support for this API with "borrow" mechanism wherein the DMA windows if created already by the host driver, they would be available for VFIO to use. Also, it didn't have the support to control/modify the window size or the IO page size. The current patch implements all the necessary iommu_table_group_ops APIs there by avoiding the "borrrowing". So, just the way it is on the PowerNV platform, with this patch the iommu table group ownership is transferred to the VFIO PPC subdriver, the iommu table, DMA windows creation/deletion all driven through the APIs. The pSeries uses the query-pe-dma-window, create-pe-dma-window and reset-pe-dma-window RTAS calls for DMA window creation, deletion and reset to defaul. The RTAs calls do show some minor differences to the way things are to be handled on the pSeries which are listed below. * On pSeries, the default DMA window size is "fixed" cannot be custom sized as requested by the user. For non-SRIOV VFs, It is fixed at 2GB and for SRIOV VFs, its variable sized based on the capacity assigned to it during the VF assignment to the LPAR. So, for the default DMA window alone the size if requested less than tce32_size, the smaller size is enforced using the iommu table->it_size. * The DMA start address for 32-bit window is 0, and for the 64-bit window in case of PowerNV is hardcoded to TVE select (bit 59) at 512PiB offset. This address is returned at the time of create_table() API call (even before the window is created), the subsequent set_window() call actually opens the DMA window. On pSeries, the DMA start address for 32-bit window is known from the 'ibm,dma-window' DT property. However, the 64-bit window start address is not known until the create-pe-dma RTAS call is made. So, the create_table() which returns the DMA window start address actually opens the DMA window and returns the DMA start address as returned by the Hypervisor for the create-pe-dma RTAS call. * The reset-pe-dma RTAS call resets the DMA windows and restores the default DMA window, however it does not clear the TCE table entries if there are any. In case of ownership transfer from platform domain which used direct mapping, the patch chooses remove-pe-dma instead of reset-pe for the 64-bit window intentionally so that the clear_dma_window() is called. Other than the DMA window management changes mentioned above, the patch also brings back the userspace view for the single level TCE as it existed before commit 090bad39b237a ("powerpc/powernv: Add indirect levels to it_userspace") along with the relavent refactoring. Signed-off-by: Shivaprasad G Bhat Signed-off-by: Michael Ellerman Link: https://msgid.link/171923275958.1397.907964437142542242.stgit@linux.ibm.com Signed-off-by: Shuai Xue [ Jay Chen: rebase conflicts in ../arch/powerpc/platforms/pseries/iommu.c ] Signed-off-by: Jay Chen --- arch/powerpc/include/asm/iommu.h | 4 +- arch/powerpc/kernel/iommu.c | 4 +- arch/powerpc/platforms/powernv/pci-ioda.c | 6 +- arch/powerpc/platforms/pseries/iommu.c | 628 ++++++++++++++++++---- 4 files changed, 543 insertions(+), 99 deletions(-) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index b2fe6e7f81d6..6ce13ef3e37d 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -181,9 +181,9 @@ struct iommu_table_group_ops { long (*unset_window)(struct iommu_table_group *table_group, int num); /* Switch ownership from platform code to external user (e.g. VFIO) */ - long (*take_ownership)(struct iommu_table_group *table_group); + long (*take_ownership)(struct iommu_table_group *table_group, struct device *dev); /* Switch ownership from external user (e.g. VFIO) back to core */ - void (*release_ownership)(struct iommu_table_group *table_group); + void (*release_ownership)(struct iommu_table_group *table_group, struct device *dev); }; struct iommu_table_group_link { diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 32059b392cfa..5c953c0211b3 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1171,7 +1171,7 @@ spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain, * The domain being set to PLATFORM from earlier * BLOCKED. The table_group ownership has to be released. */ - table_group->ops->release_ownership(table_group); + table_group->ops->release_ownership(table_group, dev); iommu_group_put(grp); return 0; @@ -1199,7 +1199,7 @@ spapr_tce_blocked_iommu_attach_dev(struct iommu_domain *platform_domain, * also sets the dma_api ops */ table_group = iommu_group_get_iommudata(grp); - ret = table_group->ops->take_ownership(table_group); + ret = table_group->ops->take_ownership(table_group, dev); iommu_group_put(grp); return ret; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 28fac4770073..d557bce987ca 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1537,7 +1537,8 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) } } -static long pnv_ioda2_take_ownership(struct iommu_table_group *table_group) +static long pnv_ioda2_take_ownership(struct iommu_table_group *table_group, + struct device *dev __maybe_unused) { struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, table_group); @@ -1562,7 +1563,8 @@ static long pnv_ioda2_take_ownership(struct iommu_table_group *table_group) return 0; } -static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) +static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group, + struct device *dev __maybe_unused) { struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, table_group); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 6c42700e7a2f..183c1fada5dc 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -54,57 +55,6 @@ enum { DDW_EXT_QUERY_OUT_SIZE = 2 }; -static int iommu_take_ownership(struct iommu_table *tbl) -{ - unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; - int ret = 0; - - /* - * VFIO does not control TCE entries allocation and the guest - * can write new TCEs on top of existing ones so iommu_tce_build() - * must be able to release old pages. This functionality - * requires exchange() callback defined so if it is not - * implemented, we disallow taking ownership over the table. - */ - if (!tbl->it_ops->xchg_no_kill) - return -EINVAL; - - spin_lock_irqsave(&tbl->large_pool.lock, flags); - for (i = 0; i < tbl->nr_pools; i++) - spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); - - if (iommu_table_in_use(tbl)) { - pr_err("iommu_tce: it_map is not empty"); - ret = -EBUSY; - } else { - memset(tbl->it_map, 0xff, sz); - } - - for (i = 0; i < tbl->nr_pools; i++) - spin_unlock(&tbl->pools[i].lock); - spin_unlock_irqrestore(&tbl->large_pool.lock, flags); - - return ret; -} - -static void iommu_release_ownership(struct iommu_table *tbl) -{ - unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; - - spin_lock_irqsave(&tbl->large_pool.lock, flags); - for (i = 0; i < tbl->nr_pools; i++) - spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); - - memset(tbl->it_map, 0, sz); - - iommu_table_reserve_pages(tbl, tbl->it_reserved_start, - tbl->it_reserved_end); - - for (i = 0; i < tbl->nr_pools; i++) - spin_unlock(&tbl->pools[i].lock); - spin_unlock_irqrestore(&tbl->large_pool.lock, flags); -} - static struct iommu_table *iommu_pseries_alloc_table(int node) { struct iommu_table *tbl; @@ -196,7 +146,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index, } -static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) +static void tce_clear_pSeries(struct iommu_table *tbl, long index, long npages) { __be64 *tcep; @@ -215,6 +165,37 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) return be64_to_cpu(*tcep); } +static long pseries_tce_iommu_userspace_view_alloc(struct iommu_table *tbl) +{ + unsigned long cb = ALIGN(sizeof(tbl->it_userspace[0]) * tbl->it_size, PAGE_SIZE); + unsigned long *uas; + + if (tbl->it_indirect_levels) /* Impossible */ + return -EPERM; + + WARN_ON(tbl->it_userspace); + + uas = vzalloc(cb); + if (!uas) + return -ENOMEM; + + tbl->it_userspace = (__be64 *) uas; + + return 0; +} + +static void tce_iommu_userspace_view_free(struct iommu_table *tbl) +{ + vfree(tbl->it_userspace); + tbl->it_userspace = NULL; +} + +static void tce_free_pSeries(struct iommu_table *tbl) +{ + if (!tbl->it_userspace) + tce_iommu_userspace_view_free(tbl); +} + static void tce_free_pSeriesLP(unsigned long liobn, long, long, long); static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); @@ -629,7 +610,7 @@ struct iommu_table_ops iommu_table_lpar_multi_ops; struct iommu_table_ops iommu_table_pseries_ops = { .set = tce_build_pSeries, - .clear = tce_free_pSeries, + .clear = tce_clear_pSeries, .get = tce_get_pseries }; @@ -738,17 +719,45 @@ static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned return rc; } + +static __be64 *tce_useraddr_pSeriesLP(struct iommu_table *tbl, long index, + bool __always_unused alloc) +{ + return tbl->it_userspace ? &tbl->it_userspace[index - tbl->it_offset] : NULL; +} #endif struct iommu_table_ops iommu_table_lpar_multi_ops = { .set = tce_buildmulti_pSeriesLP, #ifdef CONFIG_IOMMU_API .xchg_no_kill = tce_exchange_pseries, + .useraddrptr = tce_useraddr_pSeriesLP, #endif .clear = tce_freemulti_pSeriesLP, - .get = tce_get_pSeriesLP + .get = tce_get_pSeriesLP, + .free = tce_free_pSeries }; +/* + * When the DMA window properties might have been removed, + * the parent node has the table_group setup on it. + */ +static struct device_node *pci_dma_find_parent_node(struct pci_dev *dev, + struct iommu_table_group *table_group) +{ + struct device_node *dn = pci_device_to_OF_node(dev); + struct pci_dn *rpdn; + + for (; dn && PCI_DN(dn); dn = dn->parent) { + rpdn = PCI_DN(dn); + + if (table_group == rpdn->table_group) + return dn; + } + + return NULL; +} + /* * Find nearest ibm,dma-window (default DMA window) or direct DMA window or * dynamic 64bit DMA window, walking up the device tree. @@ -963,7 +972,7 @@ static void __remove_dma_window(struct device_node *np, u32 *ddw_avail, u64 liob } static void remove_dma_window(struct device_node *np, u32 *ddw_avail, - struct property *win) + struct property *win, bool cleanup) { struct dynamic_dma_window_prop *dwp; u64 liobn; @@ -971,11 +980,44 @@ static void remove_dma_window(struct device_node *np, u32 *ddw_avail, dwp = win->value; liobn = (u64)be32_to_cpu(dwp->liobn); - clean_dma_window(np, dwp); + if (cleanup) + clean_dma_window(np, dwp); __remove_dma_window(np, ddw_avail, liobn); } -static int remove_ddw(struct device_node *np, bool remove_prop, const char *win_name) +static void copy_property(struct device_node *pdn, const char *from, const char *to) +{ + struct property *src, *dst; + + src = of_find_property(pdn, from, NULL); + if (!src) + return; + + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) + return; + + dst->name = kstrdup(to, GFP_KERNEL); + dst->value = kmemdup(src->value, src->length, GFP_KERNEL); + dst->length = src->length; + if (!dst->name || !dst->value) + return; + + if (of_add_property(pdn, dst)) { + pr_err("Unable to add DMA window property for %pOF", pdn); + goto free_prop; + } + + return; + +free_prop: + kfree(dst->name); + kfree(dst->value); + kfree(dst); +} + +static int remove_dma_window_named(struct device_node *np, bool remove_prop, const char *win_name, + bool cleanup) { struct property *win; u32 ddw_avail[DDW_APPLICABLE_SIZE]; @@ -990,13 +1032,20 @@ static int remove_ddw(struct device_node *np, bool remove_prop, const char *win_ if (ret) return 0; - if (win->length >= sizeof(struct dynamic_dma_window_prop)) - remove_dma_window(np, ddw_avail, win); + remove_dma_window(np, ddw_avail, win, cleanup); if (!remove_prop) return 0; + /* Default window property if removed is lost as reset-pe doesn't restore it. + * Though FDT has a copy of it, the DLPAR hotplugged devices will not have a + * node on FDT until next reboot. So, back it up. + */ + if ((strcmp(win_name, "ibm,dma-window") == 0) && + !of_find_property(np, "ibm,dma-window-saved", NULL)) + copy_property(np, win_name, "ibm,dma-window-saved"); + ret = of_remove_property(np, win); if (ret) pr_warn("%pOF: failed to remove DMA window property: %d\n", @@ -1054,7 +1103,7 @@ static void find_existing_ddw_windows_named(const char *name) for_each_node_with_property(pdn, name) { dma64 = of_get_property(pdn, name, &len); if (!dma64 || len < sizeof(*dma64)) { - remove_ddw(pdn, true, name); + remove_dma_window_named(pdn, true, name, true); continue; } @@ -1427,7 +1476,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (reset_win_ext) goto out_failed; - remove_dma_window(pdn, ddw_avail, default_win); + remove_dma_window(pdn, ddw_avail, default_win, true); default_win_removed = true; /* Query again, to check if the window is available */ @@ -1565,7 +1614,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (default_win_removed) { iommu_tce_table_put(pci->table_group->tables[0]); pci->table_group->tables[0] = NULL; - + if (!of_find_property(pdn, "ibm,dma-window-saved", NULL)) + copy_property(pdn, "ibm,dma-window", "ibm,dma-window-saved"); /* default_win is valid here because default_win_removed == true */ of_remove_property(pdn, default_win); dev_info(&dev->dev, "Removed default DMA window for %pOF\n", pdn); @@ -1775,24 +1825,326 @@ static unsigned long spapr_tce_get_table_size(__u32 page_shift, return size; } +static struct pci_dev *iommu_group_get_first_pci_dev(struct iommu_group *group) +{ + struct pci_dev *pdev = NULL; + int ret; + + /* No IOMMU group ? */ + if (!group) + return NULL; + + ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); + if (!ret || !pdev) + return NULL; + return pdev; +} + +static void restore_default_dma_window(struct pci_dev *pdev, struct device_node *pdn) +{ + reset_dma_window(pdev, pdn); + copy_property(pdn, "ibm,dma-window-saved", "ibm,dma-window"); +} + +static long remove_dynamic_dma_windows(struct pci_dev *pdev, struct device_node *pdn) +{ + struct pci_dn *pci = PCI_DN(pdn); + struct dma_win *window; + bool direct_mapping; + int len; + + if (find_existing_ddw(pdn, &pdev->dev.archdata.dma_offset, &len, &direct_mapping)) { + remove_dma_window_named(pdn, true, direct_mapping ? + DIRECT64_PROPNAME : DMA64_PROPNAME, true); + if (!direct_mapping) { + WARN_ON(!pci->table_group->tables[0] && !pci->table_group->tables[1]); + + if (pci->table_group->tables[1]) { + iommu_tce_table_put(pci->table_group->tables[1]); + pci->table_group->tables[1] = NULL; + } else if (pci->table_group->tables[0]) { + /* Default window was removed and only the DDW exists */ + iommu_tce_table_put(pci->table_group->tables[0]); + pci->table_group->tables[0] = NULL; + } + } + spin_lock(&dma_win_list_lock); + list_for_each_entry(window, &dma_win_list, list) { + if (window->device == pdn) { + list_del(&window->list); + kfree(window); + break; + } + } + spin_unlock(&dma_win_list_lock); + } + + return 0; +} + +static long pseries_setup_default_iommu_config(struct iommu_table_group *table_group, + struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + const __be32 *default_prop; + long liobn, offset, size; + struct device_node *pdn; + struct iommu_table *tbl; + struct pci_dn *pci; + + pdn = pci_dma_find_parent_node(pdev, table_group); + if (!pdn || !PCI_DN(pdn)) { + dev_warn(&pdev->dev, "No table_group configured for the node %pOF\n", pdn); + return -1; + } + pci = PCI_DN(pdn); + + /* The default window is restored if not present already on removal of DDW. + * However, if used by VFIO SPAPR sub driver, the user's order of removal of + * windows might have been different to not leading to auto restoration, + * suppose the DDW was removed first followed by the default one. + * So, restore the default window with reset-pe-dma call explicitly. + */ + restore_default_dma_window(pdev, pdn); + + default_prop = of_get_property(pdn, "ibm,dma-window", NULL); + of_parse_dma_window(pdn, default_prop, &liobn, &offset, &size); + tbl = iommu_pseries_alloc_table(pci->phb->node); + if (!tbl) { + dev_err(&pdev->dev, "couldn't create new IOMMU table\n"); + return -1; + } + + iommu_table_setparms_common(tbl, pci->phb->bus->number, liobn, offset, + size, IOMMU_PAGE_SHIFT_4K, NULL, + &iommu_table_lpar_multi_ops); + iommu_init_table(tbl, pci->phb->node, 0, 0); + + pci->table_group->tables[0] = tbl; + set_iommu_table_base(&pdev->dev, tbl); + + return 0; +} + +static bool is_default_window_request(struct iommu_table_group *table_group, __u32 page_shift, + __u64 window_size) +{ + if ((window_size <= table_group->tce32_size) && + (page_shift == IOMMU_PAGE_SHIFT_4K)) + return true; + + return false; +} + static long spapr_tce_create_table(struct iommu_table_group *table_group, int num, __u32 page_shift, __u64 window_size, __u32 levels, struct iommu_table **ptbl) { - struct iommu_table *tbl = table_group->tables[0]; - - if (num > 0) - return -EPERM; + struct pci_dev *pdev = iommu_group_get_first_pci_dev(table_group->group); + u32 ddw_avail[DDW_APPLICABLE_SIZE]; + struct ddw_create_response create; + unsigned long liobn, offset, size; + unsigned long start = 0, end = 0; + struct ddw_query_response query; + const __be32 *default_prop; + struct failed_ddw_pdn *fpdn; + unsigned int window_shift; + struct device_node *pdn; + struct iommu_table *tbl; + struct dma_win *window; + struct property *win64; + struct pci_dn *pci; + u64 win_addr; + int len, i; + long ret; - if (tbl->it_page_shift != page_shift || - tbl->it_size != (window_size >> page_shift) || - tbl->it_indirect_levels != levels - 1) + if (!is_power_of_2(window_size) || levels > 1) return -EINVAL; + window_shift = order_base_2(window_size); + + mutex_lock(&dma_win_init_mutex); + + ret = -ENODEV; + + pdn = pci_dma_find_parent_node(pdev, table_group); + if (!pdn || !PCI_DN(pdn)) { /* Niether of 32s|64-bit exist! */ + dev_warn(&pdev->dev, "No dma-windows exist for the node %pOF\n", pdn); + goto out_failed; + } + pci = PCI_DN(pdn); + + /* If the enable DDW failed for the pdn, dont retry! */ + list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) { + if (fpdn->pdn == pdn) { + dev_info(&pdev->dev, "%pOF in failed DDW device list\n", pdn); + goto out_unlock; + } + } + + tbl = iommu_pseries_alloc_table(pci->phb->node); + if (!tbl) { + dev_dbg(&pdev->dev, "couldn't create new IOMMU table\n"); + goto out_unlock; + } + + if (num == 0) { + bool direct_mapping; + /* The request is not for default window? Ensure there is no DDW window already */ + if (!is_default_window_request(table_group, page_shift, window_size)) { + if (find_existing_ddw(pdn, &pdev->dev.archdata.dma_offset, &len, + &direct_mapping)) { + dev_warn(&pdev->dev, "%pOF: 64-bit window already present.", pdn); + ret = -EPERM; + goto out_unlock; + } + } else { + /* Request is for Default window, ensure there is no DDW if there is a + * need to reset. reset-pe otherwise removes the DDW also + */ + default_prop = of_get_property(pdn, "ibm,dma-window", NULL); + if (!default_prop) { + if (find_existing_ddw(pdn, &pdev->dev.archdata.dma_offset, &len, + &direct_mapping)) { + dev_warn(&pdev->dev, "%pOF: Attempt to create window#0 when 64-bit window is present. Preventing the attempt as that would destroy the 64-bit window", + pdn); + ret = -EPERM; + goto out_unlock; + } + + restore_default_dma_window(pdev, pdn); + + default_prop = of_get_property(pdn, "ibm,dma-window", NULL); + of_parse_dma_window(pdn, default_prop, &liobn, &offset, &size); + /* Limit the default window size to window_size */ + iommu_table_setparms_common(tbl, pci->phb->bus->number, liobn, + offset, 1UL << window_shift, + IOMMU_PAGE_SHIFT_4K, NULL, + &iommu_table_lpar_multi_ops); + iommu_init_table(tbl, pci->phb->node, start, end); + + table_group->tables[0] = tbl; + + mutex_unlock(&dma_win_init_mutex); + + goto exit; + } + } + } + + ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", + &ddw_avail[0], DDW_APPLICABLE_SIZE); + if (ret) { + dev_info(&pdev->dev, "ibm,ddw-applicable not found\n"); + goto out_failed; + } + ret = -ENODEV; + + pr_err("%s: Calling query %pOF\n", __func__, pdn); + ret = query_ddw(pdev, ddw_avail, &query, pdn); + if (ret) + goto out_failed; + ret = -ENODEV; + + len = window_shift; + if (query.largest_available_block < (1ULL << (len - page_shift))) { + dev_dbg(&pdev->dev, "can't map window 0x%llx with %llu %llu-sized pages\n", + 1ULL << len, query.largest_available_block, + 1ULL << page_shift); + ret = -EINVAL; /* Retry with smaller window size */ + goto out_unlock; + } + + if (create_ddw(pdev, ddw_avail, &create, page_shift, len)) { + pr_err("%s: Create ddw failed %pOF\n", __func__, pdn); + goto out_failed; + } + + win_addr = ((u64)create.addr_hi << 32) | create.addr_lo; + win64 = ddw_property_create(DMA64_PROPNAME, create.liobn, win_addr, page_shift, len); + if (!win64) + goto remove_window; + + ret = of_add_property(pdn, win64); + if (ret) { + dev_err(&pdev->dev, "unable to add DMA window property for %pOF: %ld", pdn, ret); + goto free_property; + } + ret = -ENODEV; + + window = ddw_list_new_entry(pdn, win64->value); + if (!window) + goto remove_property; + + window->direct = false; + + for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) { + const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM; + + /* Look for MMIO32 */ + if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) { + start = pci->phb->mem_resources[i].start; + end = pci->phb->mem_resources[i].end; + break; + } + } + + /* New table for using DDW instead of the default DMA window */ + iommu_table_setparms_common(tbl, pci->phb->bus->number, create.liobn, win_addr, + 1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops); + iommu_init_table(tbl, pci->phb->node, start, end); + + pci->table_group->tables[num] = tbl; + set_iommu_table_base(&pdev->dev, tbl); + pdev->dev.archdata.dma_offset = win_addr; + + spin_lock(&dma_win_list_lock); + list_add(&window->list, &dma_win_list); + spin_unlock(&dma_win_list_lock); + + mutex_unlock(&dma_win_init_mutex); + + goto exit; + +remove_property: + of_remove_property(pdn, win64); +free_property: + kfree(win64->name); + kfree(win64->value); + kfree(win64); +remove_window: + __remove_dma_window(pdn, ddw_avail, create.liobn); + +out_failed: + fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL); + if (!fpdn) + goto out_unlock; + fpdn->pdn = pdn; + list_add(&fpdn->list, &failed_ddw_pdn_list); + +out_unlock: + mutex_unlock(&dma_win_init_mutex); + + return ret; +exit: + /* Allocate the userspace view */ + pseries_tce_iommu_userspace_view_alloc(tbl); + tbl->it_allocated_size = spapr_tce_get_table_size(page_shift, window_size, levels); + *ptbl = iommu_tce_table_get(tbl); + return 0; } +static bool is_default_window_table(struct iommu_table_group *table_group, struct iommu_table *tbl) +{ + if (((tbl->it_size << tbl->it_page_shift) <= table_group->tce32_size) && + (tbl->it_page_shift == IOMMU_PAGE_SHIFT_4K)) + return true; + + return false; +} + static long spapr_tce_set_window(struct iommu_table_group *table_group, int num, struct iommu_table *tbl) { @@ -1801,43 +2153,133 @@ static long spapr_tce_set_window(struct iommu_table_group *table_group, static long spapr_tce_unset_window(struct iommu_table_group *table_group, int num) { - return 0; + struct pci_dev *pdev = iommu_group_get_first_pci_dev(table_group->group); + struct device_node *dn = pci_device_to_OF_node(pdev), *pdn; + struct iommu_table *tbl = table_group->tables[num]; + struct failed_ddw_pdn *fpdn; + struct dma_win *window; + const char *win_name; + int ret = -ENODEV; + + mutex_lock(&dma_win_init_mutex); + + if ((num == 0) && is_default_window_table(table_group, tbl)) + win_name = "ibm,dma-window"; + else + win_name = DMA64_PROPNAME; + + pdn = pci_dma_find(dn, NULL); + if (!pdn || !PCI_DN(pdn)) { /* Niether of 32s|64-bit exist! */ + dev_warn(&pdev->dev, "No dma-windows exist for the node %pOF\n", pdn); + goto out_failed; + } + + /* Dont clear the TCEs, User should have done it */ + if (remove_dma_window_named(pdn, true, win_name, false)) { + pr_err("%s: The existing DDW removal failed for node %pOF\n", __func__, pdn); + goto out_failed; /* Could not remove it either! */ + } + + if (strcmp(win_name, DMA64_PROPNAME) == 0) { + spin_lock(&dma_win_list_lock); + list_for_each_entry(window, &dma_win_list, list) { + if (window->device == pdn) { + list_del(&window->list); + kfree(window); + break; + } + } + spin_unlock(&dma_win_list_lock); + } + + iommu_tce_table_put(table_group->tables[num]); + table_group->tables[num] = NULL; + + ret = 0; + + goto out_unlock; + +out_failed: + fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL); + if (!fpdn) + goto out_unlock; + fpdn->pdn = pdn; + list_add(&fpdn->list, &failed_ddw_pdn_list); + +out_unlock: + mutex_unlock(&dma_win_init_mutex); + + return ret; } -static long spapr_tce_take_ownership(struct iommu_table_group *table_group) +static long spapr_tce_take_ownership(struct iommu_table_group *table_group, struct device *dev) { - int i, j, rc = 0; + struct iommu_table *tbl = table_group->tables[0]; + struct pci_dev *pdev = to_pci_dev(dev); + struct device_node *dn = pci_device_to_OF_node(pdev); + struct device_node *pdn; - for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { - struct iommu_table *tbl = table_group->tables[i]; + /* SRIOV VFs using direct map by the host driver OR multifunction devices + * where the ownership was taken on the attempt by the first function + */ + if (!tbl && (table_group->max_dynamic_windows_supported != 1)) + return 0; - if (!tbl || !tbl->it_map) - continue; + mutex_lock(&dma_win_init_mutex); - rc = iommu_take_ownership(tbl); - if (!rc) - continue; + pdn = pci_dma_find(dn, NULL); + if (!pdn || !PCI_DN(pdn)) { /* Niether of 32s|64-bit exist! */ + dev_warn(&pdev->dev, "No dma-windows exist for the node %pOF\n", pdn); + mutex_unlock(&dma_win_init_mutex); + return -1; + } - for (j = 0; j < i; ++j) - iommu_release_ownership(table_group->tables[j]); - return rc; + /* + * Though rtas call reset-pe removes the DDW, it doesn't clear the entries on the table + * if there are any. In case of direct map, the entries will be left over, which + * is fine for PEs with 2 DMA windows where the second window is created with create-pe + * at which point the table is cleared. However, on VFs having only one DMA window, the + * default window would end up seeing the entries left over from the direct map done + * on the second window. So, remove the ddw explicitly so that clean_dma_window() + * cleans up the entries if any. + */ + if (remove_dynamic_dma_windows(pdev, pdn)) { + dev_warn(&pdev->dev, "The existing DDW removal failed for node %pOF\n", pdn); + mutex_unlock(&dma_win_init_mutex); + return -1; } + + /* The table_group->tables[0] is not null now, it must be the default window + * Remove it, let the userspace create it as it needs. + */ + if (table_group->tables[0]) { + remove_dma_window_named(pdn, true, "ibm,dma-window", true); + iommu_tce_table_put(tbl); + table_group->tables[0] = NULL; + } + set_iommu_table_base(dev, NULL); + + mutex_unlock(&dma_win_init_mutex); + return 0; } -static void spapr_tce_release_ownership(struct iommu_table_group *table_group) +static void spapr_tce_release_ownership(struct iommu_table_group *table_group, struct device *dev) { - int i; + struct iommu_table *tbl = table_group->tables[0]; - for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { - struct iommu_table *tbl = table_group->tables[i]; + if (tbl) { /* Default window already restored */ + return; + } - if (!tbl) - continue; + mutex_lock(&dma_win_init_mutex); - if (tbl->it_map) - iommu_release_ownership(tbl); - } + /* Restore the default window */ + pseries_setup_default_iommu_config(table_group, dev); + + mutex_unlock(&dma_win_init_mutex); + + return; } static struct iommu_table_group_ops spapr_tce_table_group_ops = { @@ -1917,8 +2359,8 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti * we have to remove the property when releasing * the device node. */ - if (remove_ddw(np, false, DIRECT64_PROPNAME)) - remove_ddw(np, false, DMA64_PROPNAME); + if (remove_dma_window_named(np, false, DIRECT64_PROPNAME, true)) + remove_dma_window_named(np, false, DMA64_PROPNAME, true); if (pci && pci->table_group) iommu_pseries_free_group(pci->table_group, -- Gitee From ce3fd786030e4f5e8ace0dad4919610b185aa6f8 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 27 Jun 2024 12:00:55 +0100 Subject: [PATCH 069/143] iommufd/selftest: Fix dirty bitmap tests with u8 bitmaps ANBZ: #13617 commit ec61f820a2ff07d1717583bd57d6ee45d2763a6e upstream. With 64k base pages, the first 128k iova length test requires less than a byte for a bitmap, exposing a bug in the tests that assume that bitmaps are at least a byte. Rather than dealing with bytes, have _test_mock_dirty_bitmaps() pass the number of bits. The caller functions are adjusted to also use bits as well, and converting to bytes when clearing, allocating and freeing the bitmap. Link: https://lore.kernel.org/r/20240627110105.62325-2-joao.m.martins@oracle.com Reported-by: Matt Ochs Fixes: a9af47e382a4 ("iommufd/selftest: Test IOMMU_HWPT_GET_DIRTY_BITMAP") Signed-off-by: Joao Martins Reviewed-by: Kevin Tian Tested-by: Matt Ochs Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- tools/testing/selftests/iommu/iommufd.c | 10 +++++----- tools/testing/selftests/iommu/iommufd_utils.h | 6 ++++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index c8d0f13c91dd..fcc2342ffe8d 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -1722,6 +1722,7 @@ FIXTURE_VARIANT(iommufd_dirty_tracking) FIXTURE_SETUP(iommufd_dirty_tracking) { + unsigned long size; int mmap_flags; void *vrc; int rc; @@ -1749,12 +1750,11 @@ FIXTURE_SETUP(iommufd_dirty_tracking) assert(vrc == self->buffer); self->page_size = MOCK_PAGE_SIZE; - self->bitmap_size = - variant->buffer_size / self->page_size / BITS_PER_BYTE; + self->bitmap_size = variant->buffer_size / self->page_size; /* Provision with an extra (PAGE_SIZE) for the unaligned case */ - rc = posix_memalign(&self->bitmap, PAGE_SIZE, - self->bitmap_size + PAGE_SIZE); + size = DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE); + rc = posix_memalign(&self->bitmap, PAGE_SIZE, size + PAGE_SIZE); assert(!rc); assert(self->bitmap); assert((uintptr_t)self->bitmap % PAGE_SIZE == 0); @@ -1775,7 +1775,7 @@ FIXTURE_SETUP(iommufd_dirty_tracking) FIXTURE_TEARDOWN(iommufd_dirty_tracking) { munmap(self->buffer, variant->buffer_size); - munmap(self->bitmap, self->bitmap_size); + munmap(self->bitmap, DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE)); teardown_iommufd(self->fd, _metadata); } diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 8d2b46b2114d..c612fbf0195b 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -22,6 +22,8 @@ #define BIT_MASK(nr) (1UL << ((nr) % __BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / __BITS_PER_LONG) +#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) + static inline void set_bit(unsigned int nr, unsigned long *addr) { unsigned long mask = BIT_MASK(nr); @@ -346,12 +348,12 @@ static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length, static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, __u64 iova, size_t page_size, size_t pte_page_size, __u64 *bitmap, - __u64 bitmap_size, __u32 flags, + __u64 nbits, __u32 flags, struct __test_metadata *_metadata) { unsigned long npte = pte_page_size / page_size, pteset = 2 * npte; - unsigned long nbits = bitmap_size * BITS_PER_BYTE; unsigned long j, i, nr = nbits / pteset ?: 1; + unsigned long bitmap_size = DIV_ROUND_UP(nbits, BITS_PER_BYTE); __u64 out_dirty = 0; /* Mark all even bits as dirty in the mock domain */ -- Gitee From c0ee0d969bfd7b2b2e8f07c6e113902b7a086fc6 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 27 Jun 2024 12:00:56 +0100 Subject: [PATCH 070/143] iommufd/selftest: Fix iommufd_test_dirty() to handle Fixes: a9af47e382a4 ("iommufd/selftest: Test IOMMU_HWPT_GET_DIRTY_BITMAP") Signed-off-by: Joao Martins Reviewed-by: Kevin Tian Tested-by: Matt Ochs Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/selftest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 7a2199470f31..654ed3339095 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -1334,7 +1334,7 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, } max = length / page_size; - bitmap_size = max / BITS_PER_BYTE; + bitmap_size = DIV_ROUND_UP(max, BITS_PER_BYTE); tmp = kvzalloc(bitmap_size, GFP_KERNEL_ACCOUNT); if (!tmp) { -- Gitee From b39d26cf98333990ee529c664f722a4b5b47d852 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 27 Jun 2024 12:00:57 +0100 Subject: [PATCH 071/143] iommufd/selftest: Add tests for <= u8 bitmap sizes ANBZ: #13617 commit 33335584eb78c0bda21ff8d759c39e035abb48ac upstream. Add more tests for bitmaps smaller than or equal to an u8, though skip the tests if the IOVA buffer size is smaller than the mock page size. Link: https://lore.kernel.org/r/20240627110105.62325-4-joao.m.martins@oracle.com Signed-off-by: Joao Martins Reviewed-by: Kevin Tian Tested-by: Matt Ochs Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- tools/testing/selftests/iommu/iommufd.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index fcc2342ffe8d..fafb1814bdc7 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -1727,6 +1727,12 @@ FIXTURE_SETUP(iommufd_dirty_tracking) void *vrc; int rc; + if (variant->buffer_size < MOCK_PAGE_SIZE) { + SKIP(return, + "Skipping buffer_size=%lu, less than MOCK_PAGE_SIZE=%lu", + variant->buffer_size, MOCK_PAGE_SIZE); + } + self->fd = open("/dev/iommu", O_RDWR); ASSERT_NE(-1, self->fd); @@ -1779,6 +1785,18 @@ FIXTURE_TEARDOWN(iommufd_dirty_tracking) teardown_iommufd(self->fd, _metadata); } +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty8k) +{ + /* half of an u8 index bitmap */ + .buffer_size = 8UL * 1024UL, +}; + +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty16k) +{ + /* one u8 index bitmap */ + .buffer_size = 16UL * 1024UL, +}; + FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k) { /* one u32 index bitmap */ -- Gitee From e929c719d163919a7c6f9905ee114f0d5ec683c9 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 27 Jun 2024 12:00:58 +0100 Subject: [PATCH 072/143] iommufd/selftest: Fix tests to use MOCK_PAGE_SIZE based buffer sizes ANBZ: #13617 commit ffa3c799ce157493615f9f3c2b3c9ba602d320b9 upstream. commit a9af47e382a4 ("iommufd/selftest: Test IOMMU_HWPT_GET_DIRTY_BITMAP") added tests covering edge cases in the boundaries of iova bitmap. Although it used buffer sizes thinking in PAGE_SIZE (4K) as opposed to the MOCK_PAGE_SIZE (2K) that is used in iommufd mock selftests. This meant that isn't correctly exercising everything specifically the u32 and 4K bitmap test cases. Fix selftests buffer sizes to be based on mock page size. Link: https://lore.kernel.org/r/20240627110105.62325-5-joao.m.martins@oracle.com Reported-by: Kevin Tian Closes: https://lore.kernel.org/linux-iommu/96efb6cf-a41c-420f-9673-2f0b682cac8c@oracle.com/ Fixes: a9af47e382a4 ("iommufd/selftest: Test IOMMU_HWPT_GET_DIRTY_BITMAP") Signed-off-by: Joao Martins Reviewed-by: Kevin Tian Tested-by: Matt Ochs Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- tools/testing/selftests/iommu/iommufd.c | 36 ++++++++++++------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index fafb1814bdc7..d79bd5fc08db 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -1797,47 +1797,47 @@ FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty16k) .buffer_size = 16UL * 1024UL, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64k) { /* one u32 index bitmap */ - .buffer_size = 128UL * 1024UL, + .buffer_size = 64UL * 1024UL, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256k) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k) { /* one u64 index bitmap */ - .buffer_size = 256UL * 1024UL, + .buffer_size = 128UL * 1024UL, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty640k) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty320k) { /* two u64 index and trailing end bitmap */ - .buffer_size = 640UL * 1024UL, + .buffer_size = 320UL * 1024UL, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64M) { - /* 4K bitmap (128M IOVA range) */ - .buffer_size = 128UL * 1024UL * 1024UL, + /* 4K bitmap (64M IOVA range) */ + .buffer_size = 64UL * 1024UL * 1024UL, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64M_huge) { - /* 4K bitmap (128M IOVA range) */ - .buffer_size = 128UL * 1024UL * 1024UL, + /* 4K bitmap (64M IOVA range) */ + .buffer_size = 64UL * 1024UL * 1024UL, .hugepages = true, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M) { - /* 8K bitmap (256M IOVA range) */ - .buffer_size = 256UL * 1024UL * 1024UL, + /* 8K bitmap (128M IOVA range) */ + .buffer_size = 128UL * 1024UL * 1024UL, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M_huge) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge) { - /* 8K bitmap (256M IOVA range) */ - .buffer_size = 256UL * 1024UL * 1024UL, + /* 8K bitmap (128M IOVA range) */ + .buffer_size = 128UL * 1024UL * 1024UL, .hugepages = true, }; -- Gitee From 87b09b3990b28a058d9c3bb3281f71f744bc1559 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 27 Jun 2024 12:00:59 +0100 Subject: [PATCH 073/143] iommufd/selftest: Do not record head iova to better match iommu drivers ANBZ: #13617 commit dceb5304d7263f72333d25e5940254f98b663010 upstream. Do not set a hugepage-aligned IOVA for incrementing an IOVA, to better match current IOMMU driver implementations. Keep the logic of clearing all IOPTE dirty bits for a whole hugepage, even if the range being dirtied starts from part of the hugepage. This is also similar to AMD driver (iommu v1 format) where IOMMU uses various subpage PTE data for dirty tracking (for non-standard page sizes). Link: https://lore.kernel.org/r/20240627110105.62325-6-joao.m.martins@oracle.com Signed-off-by: Joao Martins Reviewed-by: Kevin Tian Tested-by: Matt Ochs Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/selftest.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 654ed3339095..7a70a3e0fee6 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -266,8 +266,8 @@ static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain, /* Clear dirty */ if (mock_test_and_clear_dirty(mock, head, pgsize, flags)) - iommu_dirty_bitmap_record(dirty, head, pgsize); - iova = head + pgsize; + iommu_dirty_bitmap_record(dirty, iova, pgsize); + iova += pgsize; } while (iova < end); return 0; -- Gitee From 02e96d0adffd618aaf438f38686a53503cde2c63 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 27 Jun 2024 12:01:00 +0100 Subject: [PATCH 074/143] iommufd/iova_bitmap: Check iova_bitmap_done() after set ahead ANBZ: #13617 commit 792583656f554e35383d6b2325371c8fe056a56b upstream. After iova_bitmap_set_ahead() returns it may be at the end of the range. Move iova_bitmap_set_ahead() earlier to avoid unnecessary attempt in trying to pin the next pages by reusing iova_bitmap_done() check. Fixes: 2780025e01e2 ("iommufd/iova_bitmap: Handle recording beyond the mapped pages") Link: https://lore.kernel.org/r/20240627110105.62325-7-joao.m.martins@oracle.com Signed-off-by: Joao Martins Reviewed-by: Kevin Tian Tested-by: Matt Ochs Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/iova_bitmap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index 89e6e11157f0..1c2faf61db8c 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -384,8 +384,6 @@ static int iova_bitmap_advance(struct iova_bitmap *bitmap) bitmap->mapped_base_index += count; iova_bitmap_put(bitmap); - if (iova_bitmap_done(bitmap)) - return 0; /* Iterate, set and skip any bits requested for next iteration */ if (bitmap->set_ahead_length) { @@ -396,6 +394,9 @@ static int iova_bitmap_advance(struct iova_bitmap *bitmap) return ret; } + if (iova_bitmap_done(bitmap)) + return 0; + /* When advancing the index we pin the next set of bitmap pages */ return iova_bitmap_get(bitmap); } -- Gitee From a0dc03892ac0dc4b4c9cff4cb375725b229bf99b Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 27 Jun 2024 12:01:01 +0100 Subject: [PATCH 075/143] iommufd/iova_bitmap: Cache mapped length in iova_bitmap_map struct ANBZ: #13617 commit a84c690e10ae03f1cddec908ac7f5068ceed67a8 upstream. The amount of IOVA mapped will be used more often in iova_bitmap_set() in preparation to dynamically iterate the bitmap. Cache said length to avoid having to calculate it all the time. Link: https://lore.kernel.org/r/20240627110105.62325-8-joao.m.martins@oracle.com Signed-off-by: Joao Martins Reviewed-by: Kevin Tian Tested-by: Matt Ochs Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/iova_bitmap.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index 1c2faf61db8c..de77cff26d0a 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -35,6 +35,9 @@ struct iova_bitmap_map { /* base IOVA representing bit 0 of the first page */ unsigned long iova; + /* mapped length */ + unsigned long length; + /* page size order that each bit granules to */ unsigned long pgshift; @@ -156,6 +159,8 @@ static unsigned long iova_bitmap_mapped_iova(struct iova_bitmap *bitmap) return bitmap->iova + iova_bitmap_index_to_offset(bitmap, skip); } +static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap); + /* * Pins the bitmap user pages for the current range window. * This is internal to IOVA bitmap and called when advancing the @@ -206,6 +211,7 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap) * aligned. */ mapped->pgoff = offset_in_page(addr); + mapped->length = iova_bitmap_mapped_length(bitmap); return 0; } -- Gitee From 0c9cb1c3bee3faa9050fbfe95a7df5128ba3851b Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 27 Jun 2024 12:01:02 +0100 Subject: [PATCH 076/143] iommufd/iova_bitmap: Move initial pinning to iova_bitmap_for_each() ANBZ: #13617 commit 781bc08797a2146400332acf2d7706793b51e20f upstream. The pinned pages are only relevant when it starts iterating the bitmap so defer that into iova_bitmap_for_each(). Link: https://lore.kernel.org/r/20240627110105.62325-9-joao.m.martins@oracle.com Signed-off-by: Joao Martins Reviewed-by: Kevin Tian Tested-by: Matt Ochs Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/iova_bitmap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index de77cff26d0a..ba965e59e11b 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -269,9 +269,6 @@ struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, goto err; } - rc = iova_bitmap_get(bitmap); - if (rc) - goto err; return bitmap; err: @@ -425,6 +422,10 @@ int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, { int ret = 0; + ret = iova_bitmap_get(bitmap); + if (ret) + return ret; + for (; !iova_bitmap_done(bitmap) && !ret; ret = iova_bitmap_advance(bitmap)) { ret = fn(bitmap, iova_bitmap_mapped_iova(bitmap), -- Gitee From 39c079b89cd01713ce447c0917fa0867c80bdc23 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 27 Jun 2024 12:01:03 +0100 Subject: [PATCH 077/143] iommufd/iova_bitmap: Consolidate iova_bitmap_set exit conditionals ANBZ: #13617 commit 00fa1a89917fbc319909231e648439b26e8857af upstream. There's no need to have two conditionals when they are closely tied together. Move the setting of bitmap::set_ahead_length after it checks for ::pages array out of bounds access. Link: https://lore.kernel.org/r/20240627110105.62325-10-joao.m.martins@oracle.com Signed-off-by: Joao Martins Reviewed-by: Kevin Tian Tested-by: Matt Ochs Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/iova_bitmap.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index ba965e59e11b..8c9dd1eefc0e 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -465,18 +465,18 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, last_bit - cur_bit + 1); void *kaddr; - if (unlikely(page_idx > last_page_idx)) + if (unlikely(page_idx > last_page_idx)) { + unsigned long left = + ((last_bit - cur_bit + 1) << mapped->pgshift); + + bitmap->set_ahead_length = left; break; + } kaddr = kmap_local_page(mapped->pages[page_idx]); bitmap_set(kaddr, offset, nbits); kunmap_local(kaddr); cur_bit += nbits; } while (cur_bit <= last_bit); - - if (unlikely(cur_bit <= last_bit)) { - bitmap->set_ahead_length = - ((last_bit - cur_bit + 1) << bitmap->mapped.pgshift); - } } EXPORT_SYMBOL_NS_GPL(iova_bitmap_set, IOMMUFD); -- Gitee From eef07914273df78ffb35b8ece8d4b77f08047229 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 27 Jun 2024 12:01:04 +0100 Subject: [PATCH 078/143] iommufd/iova_bitmap: Dynamic pinning on iova_bitmap_set() ANBZ: #13617 commit 7a7bba16244a5c55861d8fefea72cdbb8b05323e upstream. Today zerocopy iova bitmaps use a static iteration scheme where it walks the bitmap data in a max iteration size of 2M of bitmap of data at a time. That translates to a fixed window of IOVA space that can span up to 64G (e.g. base pages, x86). Here 'window' refers to the IOVA space represented by the bitmap data it is iterating. This static scheme is the ideal one where the reported page-size is the same as the one behind the dirty tracker. However, problems start to appear when the dirty tracker may dirty in many PTE sizes beyond or unaligned at the boundaries of the iteration window. Such is the case for the IOMMU and commit 2780025e01e2 ("iommufd/iova_bitmap: Handle recording beyond the mapped pages") tried to fix the problem by handling the PTEs that get dirty which surprass the end of the iteration. But the fix was incomplete and it didn't handle all the data structure issues namely: 1) when there's nothing to dirty but the end of the iteration IOVA range is a IOMMU hugepage PTE that crosses iterations, when it goes to the next iteration it finds the other end of the said hugepage but don't account that it had checked for that IOPTE already. iommu driver then walk the IOVA space as if it is a new page without accounting that it is past the start of a bigger page which ends up setting (future) dirty bits slightly offset-ed. Note that the partial ranges here are self induced due as a result of the fixed 'window' scheme being unaligned to this hugepage IOPTE. 2) on the same line of thinking between pinning pages of different iterations it could allow DMA to mark PTEs as dirty on the second part of this previously mentioned partial hugepage. This leads to marking part of the hugepage as dirty but still clearing IOPTE leading to missed dirty data. So to fix these problems more fundamentally and avoid future ones: instead of iterating the whole bitmap in fixed chunks, instead only pin the bitmap pages when it has dirty bits to set. The logic is simple in iova_bitmap_set(): check where the current iova range to be marked as dirty is pinned and pin the bitmap pages where to-be-recorded @iova starts if it's not. If it's partially mapped out of the whole set, continue pinning it and set bits until the whole dirty-size is covered. The latter is more relevant with AMD iommu pgtable v1 format where you can have up 64G/128G/256G page sizes and thus you can set 64G at a time. Code also gets simpler and easier to follow. Fixing this without changing this iteration scheme means changing iommu drivers to ignore any partial pages and not clear dirty bits, which is a bit hacky. Though getting to walk only part of a IOMMU hugepage is a self-induced due to this iteration scheme as it doesn't (and can't) align the iteration boundary to the huge IOPTE at the end. Thus it can't know what the hugepage size the iteration should align to until it walks the begin/end. Dynamically pinning adds some comparisons inside iova_bitmap_set() to check if something needs to be pinned if the IOVA range is out of range. Though it has the benefit that non-dirty IOVA ranges only walk page tables without needing to pin any bitmap pages. This dynamic scheme should be better for IOMMUs where upper layers don't need or know what PTE sizes IOVAs map into (and there could be more than one PTE size[*]) until they walk the IOMMU page tables. A follow-up change will remove the iteration logic. [*] Specially on AMD v1 iommu pgtable format where most powers of two are supported as page-size. Link: https://lore.kernel.org/linux-iommu/6b90f949-48da-4cb3-ad9a-ed54f1351a9a@oracle.com/ Fixes: 2780025e01e2 ("iommufd/iova_bitmap: Handle recording beyond the mapped pages") Link: https://lore.kernel.org/r/20240627110105.62325-11-joao.m.martins@oracle.com Signed-off-by: Joao Martins Reviewed-by: Kevin Tian Tested-by: Matt Ochs Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/iova_bitmap.c | 73 ++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index 8c9dd1eefc0e..d4d374395fad 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -119,6 +119,9 @@ struct iova_bitmap { /* length of the IOVA range set ahead the pinned pages */ unsigned long set_ahead_length; + + /* true if it using the iterator otherwise it pins dynamically */ + bool iterator; }; /* @@ -340,6 +343,17 @@ static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap) return remaining; } +/* + * Returns true if [@iova..@iova+@length-1] is part of the mapped IOVA range. + */ +static bool iova_bitmap_mapped_range(struct iova_bitmap_map *mapped, + unsigned long iova, size_t length) +{ + return mapped->npages && + (iova >= mapped->iova && + (iova + length - 1) <= (mapped->iova + mapped->length - 1)); +} + /* * Returns true if there's not more data to iterate. */ @@ -374,6 +388,27 @@ static int iova_bitmap_set_ahead(struct iova_bitmap *bitmap, return ret; } +/* + * Advances to a selected range, releases the current pinned + * pages and pins the next set of bitmap pages. + * Returns 0 on success or otherwise errno. + */ +static int iova_bitmap_advance_to(struct iova_bitmap *bitmap, + unsigned long iova) +{ + unsigned long index; + + index = iova_bitmap_offset_to_index(bitmap, iova - bitmap->iova); + if (index >= bitmap->mapped_total_index) + return -EINVAL; + bitmap->mapped_base_index = index; + + iova_bitmap_put(bitmap); + + /* Pin the next set of bitmap pages */ + return iova_bitmap_get(bitmap); +} + /* * Advances to the next range, releases the current pinned * pages and pins the next set of bitmap pages. @@ -426,6 +461,7 @@ int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, if (ret) return ret; + bitmap->iterator = true; for (; !iova_bitmap_done(bitmap) && !ret; ret = iova_bitmap_advance(bitmap)) { ret = fn(bitmap, iova_bitmap_mapped_iova(bitmap), @@ -433,6 +469,7 @@ int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, if (ret) break; } + bitmap->iterator = false; return ret; } @@ -452,11 +489,28 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, unsigned long iova, size_t length) { struct iova_bitmap_map *mapped = &bitmap->mapped; - unsigned long cur_bit = ((iova - mapped->iova) >> - mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; - unsigned long last_bit = (((iova + length - 1) - mapped->iova) >> - mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; - unsigned long last_page_idx = mapped->npages - 1; + unsigned long cur_bit, last_bit, last_page_idx; + +update_indexes: + if (unlikely(!bitmap->iterator && + !iova_bitmap_mapped_range(mapped, iova, length))) { + + /* + * The attempt to advance the base index to @iova + * may fail if it's out of bounds, or pinning the pages + * returns an error. + * + * It is a no-op if within a iova_bitmap_for_each() closure. + */ + if (iova_bitmap_advance_to(bitmap, iova)) + return; + } + + last_page_idx = mapped->npages - 1; + cur_bit = ((iova - mapped->iova) >> + mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; + last_bit = (((iova + length - 1) - mapped->iova) >> + mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; do { unsigned int page_idx = cur_bit / BITS_PER_PAGE; @@ -469,8 +523,13 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, unsigned long left = ((last_bit - cur_bit + 1) << mapped->pgshift); - bitmap->set_ahead_length = left; - break; + if (bitmap->iterator) { + bitmap->set_ahead_length = left; + return; + } + iova += (length - left); + length = left; + goto update_indexes; } kaddr = kmap_local_page(mapped->pages[page_idx]); -- Gitee From 2a7c132b9a107683c0079f7b293e68668e8878bf Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 27 Jun 2024 12:01:05 +0100 Subject: [PATCH 079/143] iommufd/iova_bitmap: Remove iterator logic ANBZ: #13617 commit 53e6b65693b68519dcfd384280bfc3d34c7398e2 upstream. The newly introduced dynamic pinning/windowing greatly simplifies the code and there's no obvious performance advantage that has been identified that justifies maintinaing both schemes. Remove the iterator logic and have iova_bitmap_for_each() just invoke the callback with the total iova/length. Fixes: 2780025e01e2 ("iommufd/iova_bitmap: Handle recording beyond the mapped pages") Link: https://lore.kernel.org/r/20240627110105.62325-12-joao.m.martins@oracle.com Signed-off-by: Joao Martins Reviewed-by: Kevin Tian Tested-by: Matt Ochs Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/iova_bitmap.c | 97 +---------------------------- 1 file changed, 2 insertions(+), 95 deletions(-) diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index d4d374395fad..6b09b7856789 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -116,12 +116,6 @@ struct iova_bitmap { /* length of the IOVA range for the whole bitmap */ size_t length; - - /* length of the IOVA range set ahead the pinned pages */ - unsigned long set_ahead_length; - - /* true if it using the iterator otherwise it pins dynamically */ - bool iterator; }; /* @@ -354,40 +348,6 @@ static bool iova_bitmap_mapped_range(struct iova_bitmap_map *mapped, (iova + length - 1) <= (mapped->iova + mapped->length - 1)); } -/* - * Returns true if there's not more data to iterate. - */ -static bool iova_bitmap_done(struct iova_bitmap *bitmap) -{ - return bitmap->mapped_base_index >= bitmap->mapped_total_index; -} - -static int iova_bitmap_set_ahead(struct iova_bitmap *bitmap, - size_t set_ahead_length) -{ - int ret = 0; - - while (set_ahead_length > 0 && !iova_bitmap_done(bitmap)) { - unsigned long length = iova_bitmap_mapped_length(bitmap); - unsigned long iova = iova_bitmap_mapped_iova(bitmap); - - ret = iova_bitmap_get(bitmap); - if (ret) - break; - - length = min(length, set_ahead_length); - iova_bitmap_set(bitmap, iova, length); - - set_ahead_length -= length; - bitmap->mapped_base_index += - iova_bitmap_offset_to_index(bitmap, length - 1) + 1; - iova_bitmap_put(bitmap); - } - - bitmap->set_ahead_length = 0; - return ret; -} - /* * Advances to a selected range, releases the current pinned * pages and pins the next set of bitmap pages. @@ -409,36 +369,6 @@ static int iova_bitmap_advance_to(struct iova_bitmap *bitmap, return iova_bitmap_get(bitmap); } -/* - * Advances to the next range, releases the current pinned - * pages and pins the next set of bitmap pages. - * Returns 0 on success or otherwise errno. - */ -static int iova_bitmap_advance(struct iova_bitmap *bitmap) -{ - unsigned long iova = iova_bitmap_mapped_length(bitmap) - 1; - unsigned long count = iova_bitmap_offset_to_index(bitmap, iova) + 1; - - bitmap->mapped_base_index += count; - - iova_bitmap_put(bitmap); - - /* Iterate, set and skip any bits requested for next iteration */ - if (bitmap->set_ahead_length) { - int ret; - - ret = iova_bitmap_set_ahead(bitmap, bitmap->set_ahead_length); - if (ret) - return ret; - } - - if (iova_bitmap_done(bitmap)) - return 0; - - /* When advancing the index we pin the next set of bitmap pages */ - return iova_bitmap_get(bitmap); -} - /** * iova_bitmap_for_each() - Iterates over the bitmap * @bitmap: IOVA bitmap to iterate @@ -455,23 +385,7 @@ static int iova_bitmap_advance(struct iova_bitmap *bitmap) int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, iova_bitmap_fn_t fn) { - int ret = 0; - - ret = iova_bitmap_get(bitmap); - if (ret) - return ret; - - bitmap->iterator = true; - for (; !iova_bitmap_done(bitmap) && !ret; - ret = iova_bitmap_advance(bitmap)) { - ret = fn(bitmap, iova_bitmap_mapped_iova(bitmap), - iova_bitmap_mapped_length(bitmap), opaque); - if (ret) - break; - } - bitmap->iterator = false; - - return ret; + return fn(bitmap, bitmap->iova, bitmap->length, opaque); } EXPORT_SYMBOL_NS_GPL(iova_bitmap_for_each, IOMMUFD); @@ -492,15 +406,12 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, unsigned long cur_bit, last_bit, last_page_idx; update_indexes: - if (unlikely(!bitmap->iterator && - !iova_bitmap_mapped_range(mapped, iova, length))) { + if (unlikely(!iova_bitmap_mapped_range(mapped, iova, length))) { /* * The attempt to advance the base index to @iova * may fail if it's out of bounds, or pinning the pages * returns an error. - * - * It is a no-op if within a iova_bitmap_for_each() closure. */ if (iova_bitmap_advance_to(bitmap, iova)) return; @@ -523,10 +434,6 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, unsigned long left = ((last_bit - cur_bit + 1) << mapped->pgshift); - if (bitmap->iterator) { - bitmap->set_ahead_length = left; - return; - } iova += (length - left); length = left; goto update_indexes; -- Gitee From 2c36e67a33d81af9c5e9efdf5843998ccb8d18c8 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:32 -0300 Subject: [PATCH 080/143] iommu/arm-smmu-v3: Convert to domain_alloc_sva() ANBZ: #13617 commit 678d79b98028ce2365b30e35479bea0e555c23d3 upstream. This allows the driver the receive the mm and always a device during allocation. Later patches need this to properly setup the notifier when the domain is first allocated. Remove ops->domain_alloc() as SVA was the only remaining purpose. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 6 ++++-- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 10 +--------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 8 +++----- 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index e490ffb38015..28f8bf4327f6 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -656,13 +656,15 @@ static const struct iommu_domain_ops arm_smmu_sva_domain_ops = { .free = arm_smmu_sva_domain_free }; -struct iommu_domain *arm_smmu_sva_domain_alloc(void) +struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm) { struct iommu_domain *domain; domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) - return NULL; + return ERR_PTR(-ENOMEM); + domain->type = IOMMU_DOMAIN_SVA; domain->ops = &arm_smmu_sva_domain_ops; return domain; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 213dfcbdbf25..86ec5dda663e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2248,14 +2248,6 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) } } -static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) -{ - - if (type == IOMMU_DOMAIN_SVA) - return arm_smmu_sva_domain_alloc(); - return ERR_PTR(-EOPNOTSUPP); -} - static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) { struct arm_smmu_domain *smmu_domain; @@ -3102,8 +3094,8 @@ static struct iommu_ops arm_smmu_ops = { .identity_domain = &arm_smmu_identity_domain, .blocked_domain = &arm_smmu_blocked_domain, .capable = arm_smmu_capable, - .domain_alloc = arm_smmu_domain_alloc, .domain_alloc_paging = arm_smmu_domain_alloc_paging, + .domain_alloc_sva = arm_smmu_sva_domain_alloc, .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .device_group = arm_smmu_device_group, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 3dc2bbaf141a..1fb7c68e1aec 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -802,7 +802,8 @@ int arm_smmu_master_enable_sva(struct arm_smmu_master *master); int arm_smmu_master_disable_sva(struct arm_smmu_master *master); bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master); void arm_smmu_sva_notifier_synchronize(void); -struct iommu_domain *arm_smmu_sva_domain_alloc(void); +struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm); void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t id); #else /* CONFIG_ARM_SMMU_V3_SVA */ @@ -838,10 +839,7 @@ static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master static inline void arm_smmu_sva_notifier_synchronize(void) {} -static inline struct iommu_domain *arm_smmu_sva_domain_alloc(void) -{ - return NULL; -} +#define arm_smmu_sva_domain_alloc NULL static inline void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, struct device *dev, -- Gitee From 82ee8b3631c00d1b45191ec859c9920a6e030340 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:33 -0300 Subject: [PATCH 081/143] iommu/arm-smmu-v3: Start building a generic PASID layer ANBZ: #13617 commit 85f2fb6ef4137c631c9d2663716d998d7e4f164f upstream. Add arm_smmu_set_pasid()/arm_smmu_remove_pasid() which are to be used by callers that already constructed the arm_smmu_cd they wish to program. These functions will encapsulate the shared logic to setup a CD entry that will be shared by SVA and S1 domain cases. Prior fixes had already moved most of this logic up into __arm_smmu_sva_bind(), move it to it's final home. Following patches will relieve some of the remaining SVA restrictions: - The RID domain is a S1 domain and has already setup the STE to point to the CD table - The programmed PASID is the mm_get_enqcmd_pasid() - Nothing changes while SVA is running (sva_enable) SVA invalidation will still iterate over the S1 domain's master list, later patches will resolve that. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 57 ++++++++++--------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 32 ++++++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 9 ++- 3 files changed, 67 insertions(+), 31 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 28f8bf4327f6..71ca87c2c5c3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -417,29 +417,27 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) arm_smmu_free_shared_cd(cd); } -static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid, - struct mm_struct *mm) +static struct arm_smmu_bond *__arm_smmu_sva_bind(struct device *dev, + struct mm_struct *mm) { int ret; - struct arm_smmu_cd target; - struct arm_smmu_cd *cdptr; struct arm_smmu_bond *bond; struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct arm_smmu_domain *smmu_domain; if (!(domain->type & __IOMMU_DOMAIN_PAGING)) - return -ENODEV; + return ERR_PTR(-ENODEV); smmu_domain = to_smmu_domain(domain); if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) - return -ENODEV; + return ERR_PTR(-ENODEV); if (!master || !master->sva_enabled) - return -ENODEV; + return ERR_PTR(-ENODEV); bond = kzalloc(sizeof(*bond), GFP_KERNEL); if (!bond) - return -ENOMEM; + return ERR_PTR(-ENOMEM); bond->mm = mm; @@ -449,22 +447,12 @@ static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid, goto err_free_bond; } - cdptr = arm_smmu_alloc_cd_ptr(master, mm_get_enqcmd_pasid(mm)); - if (!cdptr) { - ret = -ENOMEM; - goto err_put_notifier; - } - arm_smmu_make_sva_cd(&target, master, mm, bond->smmu_mn->cd->asid); - arm_smmu_write_cd_entry(master, pasid, cdptr, &target); - list_add(&bond->list, &master->bonds); - return 0; + return bond; -err_put_notifier: - arm_smmu_mmu_notifier_put(bond->smmu_mn); err_free_bond: kfree(bond); - return ret; + return ERR_PTR(ret); } bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) @@ -611,10 +599,9 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, struct arm_smmu_bond *bond = NULL, *t; struct arm_smmu_master *master = dev_iommu_priv_get(dev); - mutex_lock(&sva_lock); - - arm_smmu_clear_cd(master, id); + arm_smmu_remove_pasid(master, to_smmu_domain(domain), id); + mutex_lock(&sva_lock); list_for_each_entry(t, &master->bonds, list) { if (t->mm == mm) { bond = t; @@ -633,17 +620,33 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t id) { - int ret = 0; + struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct mm_struct *mm = domain->mm; + struct arm_smmu_bond *bond; + struct arm_smmu_cd target; + int ret; if (mm_get_enqcmd_pasid(mm) != id) return -EINVAL; mutex_lock(&sva_lock); - ret = __arm_smmu_sva_bind(dev, id, mm); - mutex_unlock(&sva_lock); + bond = __arm_smmu_sva_bind(dev, mm); + if (IS_ERR(bond)) { + mutex_unlock(&sva_lock); + return PTR_ERR(bond); + } - return ret; + arm_smmu_make_sva_cd(&target, master, mm, bond->smmu_mn->cd->asid); + ret = arm_smmu_set_pasid(master, NULL, id, &target); + if (ret) { + list_del(&bond->list); + arm_smmu_mmu_notifier_put(bond->smmu_mn); + kfree(bond); + mutex_unlock(&sva_lock); + return ret; + } + mutex_unlock(&sva_lock); + return 0; } static void arm_smmu_sva_domain_free(struct iommu_domain *domain) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 86ec5dda663e..5dd94fdea1d9 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1211,8 +1211,8 @@ struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, return &l1_desc->l2ptr[ssid % CTXDESC_L2_ENTRIES]; } -struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, - u32 ssid) +static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, + u32 ssid) { struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; struct arm_smmu_device *smmu = master->smmu; @@ -2423,6 +2423,10 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master, int i, j; struct arm_smmu_device *smmu = master->smmu; + master->cd_table.in_ste = + FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(target->data[0])) == + STRTAB_STE_0_CFG_S1_TRANS; + for (i = 0; i < master->num_streams; ++i) { u32 sid = master->streams[i].id; struct arm_smmu_ste *step = @@ -2643,6 +2647,30 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return 0; } +int arm_smmu_set_pasid(struct arm_smmu_master *master, + struct arm_smmu_domain *smmu_domain, ioasid_t pasid, + const struct arm_smmu_cd *cd) +{ + struct arm_smmu_cd *cdptr; + + /* The core code validates pasid */ + + if (!master->cd_table.in_ste) + return -ENODEV; + + cdptr = arm_smmu_alloc_cd_ptr(master, pasid); + if (!cdptr) + return -ENOMEM; + arm_smmu_write_cd_entry(master, pasid, cdptr, cd); + return 0; +} + +void arm_smmu_remove_pasid(struct arm_smmu_master *master, + struct arm_smmu_domain *smmu_domain, ioasid_t pasid) +{ + arm_smmu_clear_cd(master, pasid); +} + static int arm_smmu_attach_dev_ste(struct device *dev, struct arm_smmu_ste *ste) { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 1fb7c68e1aec..30253abf30f3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -602,6 +602,7 @@ struct arm_smmu_ctx_desc_cfg { dma_addr_t cdtab_dma; struct arm_smmu_l1_ctx_desc *l1_desc; unsigned int num_l1_ents; + u8 in_ste; u8 s1fmt; /* log2 of the maximum number of CDs supported by this table */ u8 s1cdmax; @@ -777,8 +778,6 @@ extern struct mutex arm_smmu_asid_lock; void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid); struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid); -struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, - u32 ssid); void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain); @@ -786,6 +785,12 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, struct arm_smmu_cd *cdptr, const struct arm_smmu_cd *target); +int arm_smmu_set_pasid(struct arm_smmu_master *master, + struct arm_smmu_domain *smmu_domain, ioasid_t pasid, + const struct arm_smmu_cd *cd); +void arm_smmu_remove_pasid(struct arm_smmu_master *master, + struct arm_smmu_domain *smmu_domain, ioasid_t pasid); + void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid); void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, size_t granule, bool leaf, -- Gitee From d47e3eae7581ba6da717c3db4ec3040d079fa2bd Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:34 -0300 Subject: [PATCH 082/143] iommu/arm-smmu-v3: Make smmu_domain->devices into an allocated list ANBZ: #13617 commit ad10dce61303d82f7bdd2dbb116e02146778f728 upstream. The next patch will need to store the same master twice (with different SSIDs), so allocate memory for each list element. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 11 ++++-- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 39 ++++++++++++++++--- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 7 +++- 3 files changed, 47 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 71ca87c2c5c3..cb3a0e4143c8 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -38,12 +38,13 @@ static DEFINE_MUTEX(sva_lock); static void arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) { - struct arm_smmu_master *master; + struct arm_smmu_master_domain *master_domain; struct arm_smmu_cd target_cd; unsigned long flags; spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) { + list_for_each_entry(master_domain, &smmu_domain->devices, devices_elm) { + struct arm_smmu_master *master = master_domain->master; struct arm_smmu_cd *cdptr; /* S1 domains only support RID attachment right now */ @@ -301,7 +302,7 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); struct arm_smmu_domain *smmu_domain = smmu_mn->domain; - struct arm_smmu_master *master; + struct arm_smmu_master_domain *master_domain; unsigned long flags; mutex_lock(&sva_lock); @@ -315,7 +316,9 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) * but disable translation. */ spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) { + list_for_each_entry(master_domain, &smmu_domain->devices, + devices_elm) { + struct arm_smmu_master *master = master_domain->master; struct arm_smmu_cd target; struct arm_smmu_cd *cdptr; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 5dd94fdea1d9..262c32132dba 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2026,10 +2026,10 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, unsigned long iova, size_t size) { + struct arm_smmu_master_domain *master_domain; int i; unsigned long flags; struct arm_smmu_cmdq_ent cmd; - struct arm_smmu_master *master; struct arm_smmu_cmdq_batch cmds; if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS)) @@ -2057,7 +2057,10 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, cmds.num = 0; spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) { + list_for_each_entry(master_domain, &smmu_domain->devices, + devices_elm) { + struct arm_smmu_master *master = master_domain->master; + if (!master->ats_enabled) continue; @@ -2545,9 +2548,26 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master) pci_disable_pasid(pdev); } +static struct arm_smmu_master_domain * +arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, + struct arm_smmu_master *master) +{ + struct arm_smmu_master_domain *master_domain; + + lockdep_assert_held(&smmu_domain->devices_lock); + + list_for_each_entry(master_domain, &smmu_domain->devices, + devices_elm) { + if (master_domain->master == master) + return master_domain; + } + return NULL; +} + static void arm_smmu_detach_dev(struct arm_smmu_master *master) { struct iommu_domain *domain = iommu_get_domain_for_dev(master->dev); + struct arm_smmu_master_domain *master_domain; struct arm_smmu_domain *smmu_domain; unsigned long flags; @@ -2558,7 +2578,11 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master) arm_smmu_disable_ats(master, smmu_domain); spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_del_init(&master->domain_head); + master_domain = arm_smmu_find_master_domain(smmu_domain, master); + if (master_domain) { + list_del(&master_domain->devices_elm); + kfree(master_domain); + } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); master->ats_enabled = false; @@ -2572,6 +2596,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct arm_smmu_device *smmu; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_master_domain *master_domain; struct arm_smmu_master *master; struct arm_smmu_cd *cdptr; @@ -2608,6 +2633,11 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return -ENOMEM; } + master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL); + if (!master_domain) + return -ENOMEM; + master_domain->master = master; + /* * Prevent arm_smmu_share_asid() from trying to change the ASID * of either the old or new domain while we are working on it. @@ -2621,7 +2651,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) master->ats_enabled = arm_smmu_ats_supported(master); spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_add(&master->domain_head, &smmu_domain->devices); + list_add(&master_domain->devices_elm, &smmu_domain->devices); spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); switch (smmu_domain->stage) { @@ -2930,7 +2960,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) master->dev = dev; master->smmu = smmu; INIT_LIST_HEAD(&master->bonds); - INIT_LIST_HEAD(&master->domain_head); dev_iommu_priv_set(dev, master); ret = arm_smmu_insert_master(smmu, master); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 30253abf30f3..aa40cba12da1 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -697,7 +697,6 @@ struct arm_smmu_stream { struct arm_smmu_master { struct arm_smmu_device *smmu; struct device *dev; - struct list_head domain_head; struct arm_smmu_stream *streams; /* Locked by the iommu core using the group mutex */ struct arm_smmu_ctx_desc_cfg cd_table; @@ -731,6 +730,7 @@ struct arm_smmu_domain { struct iommu_domain domain; + /* List of struct arm_smmu_master_domain */ struct list_head devices; spinlock_t devices_lock; @@ -767,6 +767,11 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, u16 asid); #endif +struct arm_smmu_master_domain { + struct list_head devices_elm; + struct arm_smmu_master *master; +}; + static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); -- Gitee From f1aff72bbbd98717120dba251b3ef8ae51838745 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:35 -0300 Subject: [PATCH 083/143] iommu/arm-smmu-v3: Make changing domains be hitless for ATS ANBZ: #13617 commit 7497f4211f4fbdcec5fc5bb4df7f6ccd345966e8 upstream. The core code allows the domain to be changed on the fly without a forced stop in BLOCKED/IDENTITY. In this flow the driver should just continually maintain the ATS with no change while the STE is updated. ATS relies on a linked list smmu_domain->devices to keep track of which masters have the domain programmed, but this list is also used by arm_smmu_share_asid(), unrelated to ats. Create two new functions to encapsulate this combined logic: arm_smmu_attach_prepare() arm_smmu_attach_commit() The two functions can sequence both enabling ATS and disabling across the STE store. Have every update of the STE use this sequence. Installing a S1/S2 domain always enables the ATS if the PCIe device supports it. The enable flow is now ordered differently to allow it to be hitless: 1) Add the master to the new smmu_domain->devices list 2) Program the STE 3) Enable ATS at PCIe 4) Remove the master from the old smmu_domain This flow ensures that invalidations to either domain will generate an ATC invalidation to the device while the STE is being switched. Thus we don't need to turn off the ATS anymore for correctness. The disable flow is the reverse: 1) Disable ATS at PCIe 2) Program the STE 3) Invalidate the ATC 4) Remove the master from the old smmu_domain Move the nr_ats_masters adjustments to be close to the list manipulations. It is a count of the number of ATS enabled masters currently in the list. This is stricly before and after the STE/CD are revised, and done under the list's spin_lock. This is part of the bigger picture to allow changing the RID domain while a PASID is in use. If a SVA PASID is relying on ATS to function then changing the RID domain cannot just temporarily toggle ATS off without also wrecking the SVA PASID. The new infrastructure here is organized so that the PASID attach/detach flows will make use of it as well in following patches. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Nicolin Chen Reviewed-by: Michael Shavit Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/4-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c | 5 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 237 +++++++++++++----- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 6 +- 3 files changed, 177 insertions(+), 71 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c index 315e487fd990..a460b71f5857 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c @@ -164,7 +164,7 @@ static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste, .smmu = &smmu, }; - arm_smmu_make_cdtable_ste(ste, &master); + arm_smmu_make_cdtable_ste(ste, &master, true); } static void arm_smmu_v3_write_ste_test_bypass_to_abort(struct kunit *test) @@ -231,7 +231,6 @@ static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste, { struct arm_smmu_master master = { .smmu = &smmu, - .ats_enabled = ats_enabled, }; struct io_pgtable io_pgtable = {}; struct arm_smmu_domain smmu_domain = { @@ -247,7 +246,7 @@ static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste, io_pgtable.cfg.arm_lpae_s2_cfg.vtcr.sl = 3; io_pgtable.cfg.arm_lpae_s2_cfg.vtcr.tsz = 4; - arm_smmu_make_s2_domain_ste(ste, &master, &smmu_domain); + arm_smmu_make_s2_domain_ste(ste, &master, &smmu_domain, ats_enabled); } static void arm_smmu_v3_write_ste_test_s2_to_abort(struct kunit *test) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 262c32132dba..47c781807b88 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1538,7 +1538,7 @@ EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste); VISIBLE_IF_KUNIT void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, - struct arm_smmu_master *master) + struct arm_smmu_master *master, bool ats_enabled) { struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; struct arm_smmu_device *smmu = master->smmu; @@ -1561,7 +1561,7 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, STRTAB_STE_1_S1STALLD : 0) | FIELD_PREP(STRTAB_STE_1_EATS, - master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); + ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); if (smmu->features & ARM_SMMU_FEAT_E2H) { /* @@ -1591,7 +1591,8 @@ EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste); VISIBLE_IF_KUNIT void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, struct arm_smmu_master *master, - struct arm_smmu_domain *smmu_domain) + struct arm_smmu_domain *smmu_domain, + bool ats_enabled) { struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg; const struct io_pgtable_cfg *pgtbl_cfg = @@ -1608,7 +1609,7 @@ void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, target->data[1] = cpu_to_le64( FIELD_PREP(STRTAB_STE_1_EATS, - master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); + ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, @@ -2461,22 +2462,16 @@ static bool arm_smmu_ats_supported(struct arm_smmu_master *master) return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)); } -static void arm_smmu_enable_ats(struct arm_smmu_master *master, - struct arm_smmu_domain *smmu_domain) +static void arm_smmu_enable_ats(struct arm_smmu_master *master) { size_t stu; struct pci_dev *pdev; struct arm_smmu_device *smmu = master->smmu; - /* Don't enable ATS at the endpoint if it's not enabled in the STE */ - if (!master->ats_enabled) - return; - /* Smallest Translation Unit: log2 of the smallest supported granule */ stu = __ffs(smmu->pgsize_bitmap); pdev = to_pci_dev(master->dev); - atomic_inc(&smmu_domain->nr_ats_masters); /* * ATC invalidation of PASID 0 causes the entire ATC to be flushed. */ @@ -2485,22 +2480,6 @@ static void arm_smmu_enable_ats(struct arm_smmu_master *master, dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu); } -static void arm_smmu_disable_ats(struct arm_smmu_master *master, - struct arm_smmu_domain *smmu_domain) -{ - if (!master->ats_enabled) - return; - - pci_disable_ats(to_pci_dev(master->dev)); - /* - * Ensure ATS is disabled at the endpoint before we issue the - * ATC invalidation via the SMMU. - */ - wmb(); - arm_smmu_atc_inv_master(master); - atomic_dec(&smmu_domain->nr_ats_masters); -} - static int arm_smmu_enable_pasid(struct arm_smmu_master *master) { int ret; @@ -2564,46 +2543,181 @@ arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, return NULL; } -static void arm_smmu_detach_dev(struct arm_smmu_master *master) +/* + * If the domain uses the smmu_domain->devices list return the arm_smmu_domain + * structure, otherwise NULL. These domains track attached devices so they can + * issue invalidations. + */ +static struct arm_smmu_domain * +to_smmu_domain_devices(struct iommu_domain *domain) +{ + /* The domain can be NULL only when processing the first attach */ + if (!domain) + return NULL; + if (domain->type & __IOMMU_DOMAIN_PAGING) + return to_smmu_domain(domain); + return NULL; +} + +static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, + struct iommu_domain *domain) { - struct iommu_domain *domain = iommu_get_domain_for_dev(master->dev); + struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain); struct arm_smmu_master_domain *master_domain; - struct arm_smmu_domain *smmu_domain; unsigned long flags; - if (!domain || !(domain->type & __IOMMU_DOMAIN_PAGING)) + if (!smmu_domain) return; - smmu_domain = to_smmu_domain(domain); - arm_smmu_disable_ats(master, smmu_domain); - spin_lock_irqsave(&smmu_domain->devices_lock, flags); master_domain = arm_smmu_find_master_domain(smmu_domain, master); if (master_domain) { list_del(&master_domain->devices_elm); kfree(master_domain); + if (master->ats_enabled) + atomic_dec(&smmu_domain->nr_ats_masters); } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); +} + +struct arm_smmu_attach_state { + /* Inputs */ + struct iommu_domain *old_domain; + struct arm_smmu_master *master; + /* Resulting state */ + bool ats_enabled; +}; + +/* + * Start the sequence to attach a domain to a master. The sequence contains three + * steps: + * arm_smmu_attach_prepare() + * arm_smmu_install_ste_for_dev() + * arm_smmu_attach_commit() + * + * If prepare succeeds then the sequence must be completed. The STE installed + * must set the STE.EATS field according to state.ats_enabled. + * + * If the device supports ATS then this determines if EATS should be enabled + * in the STE, and starts sequencing EATS disable if required. + * + * The change of the EATS in the STE and the PCI ATS config space is managed by + * this sequence to be in the right order so that if PCI ATS is enabled then + * STE.ETAS is enabled. + * + * new_domain can be a non-paging domain. In this case ATS will not be enabled, + * and invalidations won't be tracked. + */ +static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, + struct iommu_domain *new_domain) +{ + struct arm_smmu_master *master = state->master; + struct arm_smmu_master_domain *master_domain; + struct arm_smmu_domain *smmu_domain = + to_smmu_domain_devices(new_domain); + unsigned long flags; + + /* + * arm_smmu_share_asid() must not see two domains pointing to the same + * arm_smmu_master_domain contents otherwise it could randomly write one + * or the other to the CD. + */ + lockdep_assert_held(&arm_smmu_asid_lock); + + if (smmu_domain) { + /* + * The SMMU does not support enabling ATS with bypass/abort. + * When the STE is in bypass (STE.Config[2:0] == 0b100), ATS + * Translation Requests and Translated transactions are denied + * as though ATS is disabled for the stream (STE.EATS == 0b00), + * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events + * (IHI0070Ea 5.2 Stream Table Entry). Thus ATS can only be + * enabled if we have arm_smmu_domain, those always have page + * tables. + */ + state->ats_enabled = arm_smmu_ats_supported(master); + + master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL); + if (!master_domain) + return -ENOMEM; + master_domain->master = master; - master->ats_enabled = false; + /* + * During prepare we want the current smmu_domain and new + * smmu_domain to be in the devices list before we change any + * HW. This ensures that both domains will send ATS + * invalidations to the master until we are done. + * + * It is tempting to make this list only track masters that are + * using ATS, but arm_smmu_share_asid() also uses this to change + * the ASID of a domain, unrelated to ATS. + * + * Notice if we are re-attaching the same domain then the list + * will have two identical entries and commit will remove only + * one of them. + */ + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + if (state->ats_enabled) + atomic_inc(&smmu_domain->nr_ats_masters); + list_add(&master_domain->devices_elm, &smmu_domain->devices); + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + } + + if (!state->ats_enabled && master->ats_enabled) { + pci_disable_ats(to_pci_dev(master->dev)); + /* + * This is probably overkill, but the config write for disabling + * ATS should complete before the STE is configured to generate + * UR to avoid AER noise. + */ + wmb(); + } + return 0; +} + +/* + * Commit is done after the STE/CD are configured with the EATS setting. It + * completes synchronizing the PCI device's ATC and finishes manipulating the + * smmu_domain->devices list. + */ +static void arm_smmu_attach_commit(struct arm_smmu_attach_state *state) +{ + struct arm_smmu_master *master = state->master; + + lockdep_assert_held(&arm_smmu_asid_lock); + + if (state->ats_enabled && !master->ats_enabled) { + arm_smmu_enable_ats(master); + } else if (master->ats_enabled) { + /* + * The translation has changed, flush the ATC. At this point the + * SMMU is translating for the new domain and both the old&new + * domain will issue invalidations. + */ + arm_smmu_atc_inv_master(master); + } + master->ats_enabled = state->ats_enabled; + + arm_smmu_remove_master_domain(master, state->old_domain); } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) { int ret = 0; - unsigned long flags; struct arm_smmu_ste target; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct arm_smmu_device *smmu; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct arm_smmu_master_domain *master_domain; + struct arm_smmu_attach_state state = { + .old_domain = iommu_get_domain_for_dev(dev), + }; struct arm_smmu_master *master; struct arm_smmu_cd *cdptr; if (!fwspec) return -ENOENT; - master = dev_iommu_priv_get(dev); + state.master = master = dev_iommu_priv_get(dev); smmu = master->smmu; /* @@ -2633,11 +2747,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return -ENOMEM; } - master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL); - if (!master_domain) - return -ENOMEM; - master_domain->master = master; - /* * Prevent arm_smmu_share_asid() from trying to change the ASID * of either the old or new domain while we are working on it. @@ -2646,13 +2755,11 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) */ mutex_lock(&arm_smmu_asid_lock); - arm_smmu_detach_dev(master); - - master->ats_enabled = arm_smmu_ats_supported(master); - - spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_add(&master_domain->devices_elm, &smmu_domain->devices); - spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + ret = arm_smmu_attach_prepare(&state, domain); + if (ret) { + mutex_unlock(&arm_smmu_asid_lock); + return ret; + } switch (smmu_domain->stage) { case ARM_SMMU_DOMAIN_S1: { @@ -2661,18 +2768,19 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr, &target_cd); - arm_smmu_make_cdtable_ste(&target, master); + arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled); arm_smmu_install_ste_for_dev(master, &target); break; } case ARM_SMMU_DOMAIN_S2: - arm_smmu_make_s2_domain_ste(&target, master, smmu_domain); + arm_smmu_make_s2_domain_ste(&target, master, smmu_domain, + state.ats_enabled); arm_smmu_install_ste_for_dev(master, &target); arm_smmu_clear_cd(master, IOMMU_NO_PASID); break; } - arm_smmu_enable_ats(master, smmu_domain); + arm_smmu_attach_commit(&state); mutex_unlock(&arm_smmu_asid_lock); return 0; } @@ -2701,10 +2809,14 @@ void arm_smmu_remove_pasid(struct arm_smmu_master *master, arm_smmu_clear_cd(master, pasid); } -static int arm_smmu_attach_dev_ste(struct device *dev, - struct arm_smmu_ste *ste) +static int arm_smmu_attach_dev_ste(struct iommu_domain *domain, + struct device *dev, struct arm_smmu_ste *ste) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_attach_state state = { + .master = master, + .old_domain = iommu_get_domain_for_dev(dev), + }; if (arm_smmu_master_sva_enabled(master)) return -EBUSY; @@ -2715,16 +2827,9 @@ static int arm_smmu_attach_dev_ste(struct device *dev, */ mutex_lock(&arm_smmu_asid_lock); - /* - * The SMMU does not support enabling ATS with bypass/abort. When the - * STE is in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests - * and Translated transactions are denied as though ATS is disabled for - * the stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and - * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry). - */ - arm_smmu_detach_dev(master); - + arm_smmu_attach_prepare(&state, domain); arm_smmu_install_ste_for_dev(master, ste); + arm_smmu_attach_commit(&state); mutex_unlock(&arm_smmu_asid_lock); /* @@ -2743,7 +2848,7 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, struct arm_smmu_master *master = dev_iommu_priv_get(dev); arm_smmu_make_bypass_ste(master->smmu, &ste); - return arm_smmu_attach_dev_ste(dev, &ste); + return arm_smmu_attach_dev_ste(domain, dev, &ste); } static const struct iommu_domain_ops arm_smmu_identity_ops = { @@ -2761,7 +2866,7 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain, struct arm_smmu_ste ste; arm_smmu_make_abort_ste(&ste); - return arm_smmu_attach_dev_ste(dev, &ste); + return arm_smmu_attach_dev_ste(domain, dev, &ste); } static const struct iommu_domain_ops arm_smmu_blocked_ops = { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index aa40cba12da1..370b7315b47b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -758,10 +758,12 @@ void arm_smmu_make_abort_ste(struct arm_smmu_ste *target); void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu, struct arm_smmu_ste *target); void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, - struct arm_smmu_master *master); + struct arm_smmu_master *master, + bool ats_enabled); void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, struct arm_smmu_master *master, - struct arm_smmu_domain *smmu_domain); + struct arm_smmu_domain *smmu_domain, + bool ats_enabled); void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, struct arm_smmu_master *master, struct mm_struct *mm, u16 asid); -- Gitee From 379979eadd5a51b0c557fb06a55b9f61228788c1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:36 -0300 Subject: [PATCH 084/143] iommu/arm-smmu-v3: Add ssid to struct arm_smmu_master_domain ANBZ: #13617 commit 64efb3def3a53effe01fa750eec6e7369f65e386 upstream. Prepare to allow a S1 domain to be attached to a PASID as well. Keep track of the SSID the domain is using on each master in the arm_smmu_master_domain. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/5-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 15 ++++--- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 42 +++++++++++++++---- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 5 ++- 3 files changed, 43 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index cb3a0e4143c8..d31caceb5849 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -47,13 +47,12 @@ arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) struct arm_smmu_master *master = master_domain->master; struct arm_smmu_cd *cdptr; - /* S1 domains only support RID attachment right now */ - cdptr = arm_smmu_get_cd_ptr(master, IOMMU_NO_PASID); + cdptr = arm_smmu_get_cd_ptr(master, master_domain->ssid); if (WARN_ON(!cdptr)) continue; arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); - arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr, + arm_smmu_write_cd_entry(master, master_domain->ssid, cdptr, &target_cd); } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); @@ -294,8 +293,8 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, smmu_domain); } - arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), start, - size); + arm_smmu_atc_inv_domain_sva(smmu_domain, mm_get_enqcmd_pasid(mm), start, + size); } static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) @@ -332,7 +331,7 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid); - arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), 0, 0); + arm_smmu_atc_inv_domain_sva(smmu_domain, mm_get_enqcmd_pasid(mm), 0, 0); smmu_mn->cleared = true; mutex_unlock(&sva_lock); @@ -411,8 +410,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) */ if (!smmu_mn->cleared) { arm_smmu_tlb_inv_asid(smmu_domain->smmu, cd->asid); - arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), 0, - 0); + arm_smmu_atc_inv_domain_sva(smmu_domain, + mm_get_enqcmd_pasid(mm), 0, 0); } /* Frees smmu_mn */ diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 47c781807b88..76bb49f64545 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2024,8 +2024,8 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) return arm_smmu_cmdq_batch_submit(master->smmu, &cmds); } -int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, - unsigned long iova, size_t size) +static int __arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, + ioasid_t ssid, unsigned long iova, size_t size) { struct arm_smmu_master_domain *master_domain; int i; @@ -2053,8 +2053,6 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, if (!atomic_read(&smmu_domain->nr_ats_masters)) return 0; - arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd); - cmds.num = 0; spin_lock_irqsave(&smmu_domain->devices_lock, flags); @@ -2065,6 +2063,16 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, if (!master->ats_enabled) continue; + /* + * Non-zero ssid means SVA is co-opting the S1 domain to issue + * invalidations for SVA PASIDs. + */ + if (ssid != IOMMU_NO_PASID) + arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd); + else + arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size, + &cmd); + for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd); @@ -2075,6 +2083,19 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds); } +static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, + unsigned long iova, size_t size) +{ + return __arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, + size); +} + +int arm_smmu_atc_inv_domain_sva(struct arm_smmu_domain *smmu_domain, + ioasid_t ssid, unsigned long iova, size_t size) +{ + return __arm_smmu_atc_inv_domain(smmu_domain, ssid, iova, size); +} + /* IO_PGTABLE API */ static void arm_smmu_tlb_inv_context(void *cookie) { @@ -2096,7 +2117,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); } - arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0); + arm_smmu_atc_inv_domain(smmu_domain, 0, 0); } static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, @@ -2194,7 +2215,7 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, * Unfortunately, this can't be leaf-only since we may have * zapped an entire table. */ - arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size); + arm_smmu_atc_inv_domain(smmu_domain, iova, size); } void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, @@ -2529,7 +2550,8 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master) static struct arm_smmu_master_domain * arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_master *master) + struct arm_smmu_master *master, + ioasid_t ssid) { struct arm_smmu_master_domain *master_domain; @@ -2537,7 +2559,8 @@ arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, list_for_each_entry(master_domain, &smmu_domain->devices, devices_elm) { - if (master_domain->master == master) + if (master_domain->master == master && + master_domain->ssid == ssid) return master_domain; } return NULL; @@ -2570,7 +2593,8 @@ static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, return; spin_lock_irqsave(&smmu_domain->devices_lock, flags); - master_domain = arm_smmu_find_master_domain(smmu_domain, master); + master_domain = arm_smmu_find_master_domain(smmu_domain, master, + IOMMU_NO_PASID); if (master_domain) { list_del(&master_domain->devices_elm); kfree(master_domain); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 370b7315b47b..6330d50b5fd7 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -772,6 +772,7 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, struct arm_smmu_master_domain { struct list_head devices_elm; struct arm_smmu_master *master; + ioasid_t ssid; }; static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) @@ -803,8 +804,8 @@ void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, size_t granule, bool leaf, struct arm_smmu_domain *smmu_domain); bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd); -int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, - unsigned long iova, size_t size); +int arm_smmu_atc_inv_domain_sva(struct arm_smmu_domain *smmu_domain, + ioasid_t ssid, unsigned long iova, size_t size); #ifdef CONFIG_ARM_SMMU_V3_SVA bool arm_smmu_sva_supported(struct arm_smmu_device *smmu); -- Gitee From 5de339b4e3eec18fe8f3c0b88aef7fe4820cb770 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:37 -0300 Subject: [PATCH 085/143] iommu/arm-smmu-v3: Do not use master->sva_enable to restrict attaches ANBZ: #13617 commit be7c90de39fdebdba4f9cce7575b71c6b2506ea0 upstream. We no longer need a master->sva_enable to control what attaches are allowed. Instead we can tell if the attach is legal based on the current configuration of the master. Keep track of the number of valid CD entries for SSID's in the cd_table and if the cd_table has been installed in the STE directly so we know what the configuration is. The attach logic is then made into: - SVA bind, check if the CD is installed - RID attach of S2, block if SSIDs are used - RID attach of IDENTITY/BLOCKING, block if SSIDs are used arm_smmu_set_pasid() is already checking if it is possible to setup a CD entry, at this patch it means the RID path already set a STE pointing at the CD table. Tested-by: Nicolin Chen Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/6-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 24 ++++++++++----------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 7 ++++++ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 76bb49f64545..2f9b3c3b1e28 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1289,6 +1289,8 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, struct arm_smmu_cd *cdptr, const struct arm_smmu_cd *target) { + bool target_valid = target->data[0] & cpu_to_le64(CTXDESC_CD_0_V); + bool cur_valid = cdptr->data[0] & cpu_to_le64(CTXDESC_CD_0_V); struct arm_smmu_cd_writer cd_writer = { .writer = { .ops = &arm_smmu_cd_writer_ops, @@ -1297,6 +1299,13 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, .ssid = ssid, }; + if (ssid != IOMMU_NO_PASID && cur_valid != target_valid) { + if (cur_valid) + master->cd_table.used_ssids--; + else + master->cd_table.used_ssids++; + } + arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data); } @@ -2744,16 +2753,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) state.master = master = dev_iommu_priv_get(dev); smmu = master->smmu; - /* - * Checking that SVA is disabled ensures that this device isn't bound to - * any mm, and can be safely detached from its old domain. Bonds cannot - * be removed concurrently since we're holding the group mutex. - */ - if (arm_smmu_master_sva_enabled(master)) { - dev_err(dev, "cannot attach - SVA enabled\n"); - return -EBUSY; - } - mutex_lock(&smmu_domain->init_mutex); if (!smmu_domain->smmu) { @@ -2769,7 +2768,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID); if (!cdptr) return -ENOMEM; - } + } else if (arm_smmu_ssids_in_use(&master->cd_table)) + return -EBUSY; /* * Prevent arm_smmu_share_asid() from trying to change the ASID @@ -2842,7 +2842,7 @@ static int arm_smmu_attach_dev_ste(struct iommu_domain *domain, .old_domain = iommu_get_domain_for_dev(dev), }; - if (arm_smmu_master_sva_enabled(master)) + if (arm_smmu_ssids_in_use(&master->cd_table)) return -EBUSY; /* diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 6330d50b5fd7..2d5292a75a1c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -602,12 +602,19 @@ struct arm_smmu_ctx_desc_cfg { dma_addr_t cdtab_dma; struct arm_smmu_l1_ctx_desc *l1_desc; unsigned int num_l1_ents; + unsigned int used_ssids; u8 in_ste; u8 s1fmt; /* log2 of the maximum number of CDs supported by this table */ u8 s1cdmax; }; +/* True if the cd table has SSIDS > 0 in use. */ +static inline bool arm_smmu_ssids_in_use(struct arm_smmu_ctx_desc_cfg *cd_table) +{ + return cd_table->used_ssids; +} + struct arm_smmu_s2_cfg { u16 vmid; }; -- Gitee From 6b3d3fad33600cdbdfd3d014a354d89bac4f9b16 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:38 -0300 Subject: [PATCH 086/143] iommu/arm-smmu-v3: Thread SSID through the arm_smmu_attach_*() interface ANBZ: #13617 commit 1d5f34f0002f9f56d0ca153022cfdead07d45dc6 upstream. Allow creating and managing arm_smmu_mater_domain's with a non-zero SSID through the arm_smmu_attach_*() family of functions. This triggers ATC invalidation for the correct SSID in PASID cases and tracks the per-attachment SSID in the struct arm_smmu_master_domain. Generalize arm_smmu_attach_remove() to be able to remove SSID's as well by ensuring the ATC for the PASID is flushed properly. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/7-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 26 ++++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 2f9b3c3b1e28..71b2c8498e67 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2016,13 +2016,14 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size, cmd->atc.size = log2_span; } -static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) +static int arm_smmu_atc_inv_master(struct arm_smmu_master *master, + ioasid_t ssid) { int i; struct arm_smmu_cmdq_ent cmd; struct arm_smmu_cmdq_batch cmds; - arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd); + arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd); cmds.num = 0; for (i = 0; i < master->num_streams; i++) { @@ -2505,7 +2506,7 @@ static void arm_smmu_enable_ats(struct arm_smmu_master *master) /* * ATC invalidation of PASID 0 causes the entire ATC to be flushed. */ - arm_smmu_atc_inv_master(master); + arm_smmu_atc_inv_master(master, IOMMU_NO_PASID); if (pci_enable_ats(pdev, stu)) dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu); } @@ -2592,7 +2593,8 @@ to_smmu_domain_devices(struct iommu_domain *domain) } static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, - struct iommu_domain *domain) + struct iommu_domain *domain, + ioasid_t ssid) { struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain); struct arm_smmu_master_domain *master_domain; @@ -2602,8 +2604,7 @@ static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, return; spin_lock_irqsave(&smmu_domain->devices_lock, flags); - master_domain = arm_smmu_find_master_domain(smmu_domain, master, - IOMMU_NO_PASID); + master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid); if (master_domain) { list_del(&master_domain->devices_elm); kfree(master_domain); @@ -2617,6 +2618,7 @@ struct arm_smmu_attach_state { /* Inputs */ struct iommu_domain *old_domain; struct arm_smmu_master *master; + ioasid_t ssid; /* Resulting state */ bool ats_enabled; }; @@ -2674,6 +2676,7 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, if (!master_domain) return -ENOMEM; master_domain->master = master; + master_domain->ssid = state->ssid; /* * During prepare we want the current smmu_domain and new @@ -2721,17 +2724,20 @@ static void arm_smmu_attach_commit(struct arm_smmu_attach_state *state) if (state->ats_enabled && !master->ats_enabled) { arm_smmu_enable_ats(master); - } else if (master->ats_enabled) { + } else if (state->ats_enabled && master->ats_enabled) { /* * The translation has changed, flush the ATC. At this point the * SMMU is translating for the new domain and both the old&new * domain will issue invalidations. */ - arm_smmu_atc_inv_master(master); + arm_smmu_atc_inv_master(master, state->ssid); + } else if (!state->ats_enabled && master->ats_enabled) { + /* ATS is being switched off, invalidate the entire ATC */ + arm_smmu_atc_inv_master(master, IOMMU_NO_PASID); } master->ats_enabled = state->ats_enabled; - arm_smmu_remove_master_domain(master, state->old_domain); + arm_smmu_remove_master_domain(master, state->old_domain, state->ssid); } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) @@ -2743,6 +2749,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_attach_state state = { .old_domain = iommu_get_domain_for_dev(dev), + .ssid = IOMMU_NO_PASID, }; struct arm_smmu_master *master; struct arm_smmu_cd *cdptr; @@ -2840,6 +2847,7 @@ static int arm_smmu_attach_dev_ste(struct iommu_domain *domain, struct arm_smmu_attach_state state = { .master = master, .old_domain = iommu_get_domain_for_dev(dev), + .ssid = IOMMU_NO_PASID, }; if (arm_smmu_ssids_in_use(&master->cd_table)) -- Gitee From 884c9b4f93e338a6a90ed9f1d738d398e389aaa4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:39 -0300 Subject: [PATCH 087/143] iommu/arm-smmu-v3: Make SVA allocate a normal arm_smmu_domain ANBZ: #13617 commit d7b2d2ba1b84f4ae7cd94de22f74d6c6c5419de6 upstream. Currently the SVA domain is a naked struct iommu_domain, allocate a struct arm_smmu_domain instead. This is necessary to be able to use the struct arm_master_domain mechanism. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/8-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 21 ++++++++------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 27 +++++++++++++------ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 2 ++ 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index d31caceb5849..aa033cd65adc 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -639,7 +639,7 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, } arm_smmu_make_sva_cd(&target, master, mm, bond->smmu_mn->cd->asid); - ret = arm_smmu_set_pasid(master, NULL, id, &target); + ret = arm_smmu_set_pasid(master, to_smmu_domain(domain), id, &target); if (ret) { list_del(&bond->list); arm_smmu_mmu_notifier_put(bond->smmu_mn); @@ -653,7 +653,7 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, static void arm_smmu_sva_domain_free(struct iommu_domain *domain) { - kfree(domain); + kfree(to_smmu_domain(domain)); } static const struct iommu_domain_ops arm_smmu_sva_domain_ops = { @@ -664,13 +664,16 @@ static const struct iommu_domain_ops arm_smmu_sva_domain_ops = { struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) { - struct iommu_domain *domain; + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_device *smmu = master->smmu; + struct arm_smmu_domain *smmu_domain; - domain = kzalloc(sizeof(*domain), GFP_KERNEL); - if (!domain) - return ERR_PTR(-ENOMEM); - domain->type = IOMMU_DOMAIN_SVA; - domain->ops = &arm_smmu_sva_domain_ops; + smmu_domain = arm_smmu_domain_alloc(); + if (IS_ERR(smmu_domain)) + return ERR_CAST(smmu_domain); + smmu_domain->domain.type = IOMMU_DOMAIN_SVA; + smmu_domain->domain.ops = &arm_smmu_sva_domain_ops; + smmu_domain->smmu = smmu; - return domain; + return &smmu_domain->domain; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 71b2c8498e67..df88fed5f2a6 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2283,15 +2283,10 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) } } -static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) +struct arm_smmu_domain *arm_smmu_domain_alloc(void) { struct arm_smmu_domain *smmu_domain; - /* - * Allocate the domain and initialise some of its data structures. - * We can't really do anything meaningful until we've added a - * master. - */ smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL); if (!smmu_domain) return ERR_PTR(-ENOMEM); @@ -2301,6 +2296,22 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) spin_lock_init(&smmu_domain->devices_lock); INIT_LIST_HEAD(&smmu_domain->mmu_notifiers); + return smmu_domain; +} + +static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) +{ + struct arm_smmu_domain *smmu_domain; + + /* + * Allocate the domain and initialise some of its data structures. + * We can't really do anything meaningful until we've added a + * master. + */ + smmu_domain = arm_smmu_domain_alloc(); + if (IS_ERR(smmu_domain)) + return ERR_CAST(smmu_domain); + if (dev) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); int ret; @@ -2314,7 +2325,7 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) return &smmu_domain->domain; } -static void arm_smmu_domain_free(struct iommu_domain *domain) +static void arm_smmu_domain_free_paging(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; @@ -3310,7 +3321,7 @@ static struct iommu_ops arm_smmu_ops = { .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, .enable_nesting = arm_smmu_enable_nesting, - .free = arm_smmu_domain_free, + .free = arm_smmu_domain_free_paging, } }; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 2d5292a75a1c..d6a7a3404ea8 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -790,6 +790,8 @@ static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) extern struct xarray arm_smmu_asid_xa; extern struct mutex arm_smmu_asid_lock; +struct arm_smmu_domain *arm_smmu_domain_alloc(void); + void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid); struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid); -- Gitee From 1ddde5d758ad98949c7478f1ad0779cf883a3e31 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:40 -0300 Subject: [PATCH 088/143] iommu/arm-smmu-v3: Keep track of arm_smmu_master_domain for SVA ANBZ: #13617 commit 49db2ed23c52f8371c12ab8646df23fa1daad4b2 upstream. Fill in the smmu_domain->devices list in the new struct arm_smmu_domain that SVA allocates. Keep track of every SSID and master that is using the domain reusing the logic for the RID attach. This is the first step to making the SVA invalidation follow the same design as S1/S2 invalidation. At present nothing will read this list. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/9-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 30 +++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index df88fed5f2a6..ae43c37d1aa2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2598,7 +2598,8 @@ to_smmu_domain_devices(struct iommu_domain *domain) /* The domain can be NULL only when processing the first attach */ if (!domain) return NULL; - if (domain->type & __IOMMU_DOMAIN_PAGING) + if ((domain->type & __IOMMU_DOMAIN_PAGING) || + domain->type == IOMMU_DOMAIN_SVA) return to_smmu_domain(domain); return NULL; } @@ -2831,7 +2832,16 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain, ioasid_t pasid, const struct arm_smmu_cd *cd) { + struct arm_smmu_attach_state state = { + .master = master, + /* + * For now the core code prevents calling this when a domain is + * already attached, no need to set old_domain. + */ + .ssid = pasid, + }; struct arm_smmu_cd *cdptr; + int ret; /* The core code validates pasid */ @@ -2841,14 +2851,30 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master, cdptr = arm_smmu_alloc_cd_ptr(master, pasid); if (!cdptr) return -ENOMEM; + + mutex_lock(&arm_smmu_asid_lock); + ret = arm_smmu_attach_prepare(&state, &smmu_domain->domain); + if (ret) + goto out_unlock; + arm_smmu_write_cd_entry(master, pasid, cdptr, cd); - return 0; + + arm_smmu_attach_commit(&state); + +out_unlock: + mutex_unlock(&arm_smmu_asid_lock); + return ret; } void arm_smmu_remove_pasid(struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain, ioasid_t pasid) { + mutex_lock(&arm_smmu_asid_lock); arm_smmu_clear_cd(master, pasid); + if (master->ats_enabled) + arm_smmu_atc_inv_master(master, pasid); + arm_smmu_remove_master_domain(master, &smmu_domain->domain, pasid); + mutex_unlock(&arm_smmu_asid_lock); } static int arm_smmu_attach_dev_ste(struct iommu_domain *domain, -- Gitee From 83c056388dca3b7bad5d719c9b208009b296d8b9 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:41 -0300 Subject: [PATCH 089/143] iommu/arm-smmu-v3: Put the SVA mmu notifier in the smmu_domain ANBZ: #13617 commit d38c28dbefeee03d7dd02004ad80d9676ac54d86 upstream. This removes all the notifier de-duplication logic in the driver and relies on the core code to de-duplicate and allocate only one SVA domain per mm per smmu instance. This naturally gives a 1:1 relationship between SVA domain and mmu notifier. It is a significant simplication of the flow, as we end up with a single struct arm_smmu_domain for each MM and the invalidation can then be shifted to properly use the masters list like S1/S2 do. Remove all of the previous mmu_notifier, bond, shared cd, and cd refcount logic entirely. The logic here is tightly wound together with the unusued BTM support. Since the BTM logic requires holding all the iommu_domains in a global ASID xarray it conflicts with the design to have a single SVA domain per PASID, as multiple SMMU instances will need to have different domains. Following patches resolve this by making the ASID xarray per-instance instead of global. However, converting the BTM code over to this methodology requires many changes. Thus, since ARM_SMMU_FEAT_BTM is never enabled, remove the parts of the BTM support for ASID sharing that interact with SVA as well. A followup series is already working on fully enabling the BTM support, that requires iommufd's VIOMMU feature to bring in the KVM's VMID as well. It will come with an already written patch to bring back the ASID sharing using a per-instance ASID xarray. https://lore.kernel.org/linux-iommu/20240208151837.35068-1-shameerali.kolothum.thodi@huawei.com/ https://lore.kernel.org/linux-iommu/26-v6-228e7adf25eb+4155-smmuv3_newapi_p2_jgg@nvidia.com/ Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Nicolin Chen Reviewed-by: Michael Shavit Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/10-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 395 +++--------------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 69 +-- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 15 +- 3 files changed, 86 insertions(+), 393 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index aa033cd65adc..a7c36654dee5 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -13,29 +13,9 @@ #include "arm-smmu-v3.h" #include "../../io-pgtable-arm.h" -struct arm_smmu_mmu_notifier { - struct mmu_notifier mn; - struct arm_smmu_ctx_desc *cd; - bool cleared; - refcount_t refs; - struct list_head list; - struct arm_smmu_domain *domain; -}; - -#define mn_to_smmu(mn) container_of(mn, struct arm_smmu_mmu_notifier, mn) - -struct arm_smmu_bond { - struct mm_struct *mm; - struct arm_smmu_mmu_notifier *smmu_mn; - struct list_head list; -}; - -#define sva_to_bond(handle) \ - container_of(handle, struct arm_smmu_bond, sva) - static DEFINE_MUTEX(sva_lock); -static void +static void __maybe_unused arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) { struct arm_smmu_master_domain *master_domain; @@ -58,58 +38,6 @@ arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); } -/* - * Check if the CPU ASID is available on the SMMU side. If a private context - * descriptor is using it, try to replace it. - */ -static struct arm_smmu_ctx_desc * -arm_smmu_share_asid(struct mm_struct *mm, u16 asid) -{ - int ret; - u32 new_asid; - struct arm_smmu_ctx_desc *cd; - struct arm_smmu_device *smmu; - struct arm_smmu_domain *smmu_domain; - - cd = xa_load(&arm_smmu_asid_xa, asid); - if (!cd) - return NULL; - - if (cd->mm) { - if (WARN_ON(cd->mm != mm)) - return ERR_PTR(-EINVAL); - /* All devices bound to this mm use the same cd struct. */ - refcount_inc(&cd->refs); - return cd; - } - - smmu_domain = container_of(cd, struct arm_smmu_domain, cd); - smmu = smmu_domain->smmu; - - ret = xa_alloc(&arm_smmu_asid_xa, &new_asid, cd, - XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); - if (ret) - return ERR_PTR(-ENOSPC); - /* - * Race with unmap: TLB invalidations will start targeting the new ASID, - * which isn't assigned yet. We'll do an invalidate-all on the old ASID - * later, so it doesn't matter. - */ - cd->asid = new_asid; - /* - * Update ASID and invalidate CD in all associated masters. There will - * be some overlap between use of both ASIDs, until we invalidate the - * TLB. - */ - arm_smmu_update_s1_domain_cd_entry(smmu_domain); - - /* Invalidate TLB entries previously associated with that context */ - arm_smmu_tlb_inv_asid(smmu, asid); - - xa_erase(&arm_smmu_asid_xa, asid); - return NULL; -} - static u64 page_size_to_cd(void) { static_assert(PAGE_SIZE == SZ_4K || PAGE_SIZE == SZ_16K || @@ -187,69 +115,6 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, } EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_sva_cd); -static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm) -{ - u16 asid; - int err = 0; - struct arm_smmu_ctx_desc *cd; - struct arm_smmu_ctx_desc *ret = NULL; - - /* Don't free the mm until we release the ASID */ - mmgrab(mm); - - asid = arm64_mm_context_get(mm); - if (!asid) { - err = -ESRCH; - goto out_drop_mm; - } - - cd = kzalloc(sizeof(*cd), GFP_KERNEL); - if (!cd) { - err = -ENOMEM; - goto out_put_context; - } - - refcount_set(&cd->refs, 1); - - mutex_lock(&arm_smmu_asid_lock); - ret = arm_smmu_share_asid(mm, asid); - if (ret) { - mutex_unlock(&arm_smmu_asid_lock); - goto out_free_cd; - } - - err = xa_insert(&arm_smmu_asid_xa, asid, cd, GFP_KERNEL); - mutex_unlock(&arm_smmu_asid_lock); - - if (err) - goto out_free_asid; - - cd->asid = asid; - cd->mm = mm; - - return cd; - -out_free_asid: - arm_smmu_free_asid(cd); -out_free_cd: - kfree(cd); -out_put_context: - arm64_mm_context_put(mm); -out_drop_mm: - mmdrop(mm); - return err < 0 ? ERR_PTR(err) : ret; -} - -static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd) -{ - if (arm_smmu_free_asid(cd)) { - /* Unpin ASID */ - arm64_mm_context_put(cd->mm); - mmdrop(cd->mm); - kfree(cd); - } -} - /* * Cloned from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h, this * is used as a threshold to replace per-page TLBI commands to issue in the @@ -264,8 +129,8 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, unsigned long start, unsigned long end) { - struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); - struct arm_smmu_domain *smmu_domain = smmu_mn->domain; + struct arm_smmu_domain *smmu_domain = + container_of(mn, struct arm_smmu_domain, mmu_notifier); size_t size; /* @@ -282,34 +147,22 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, size = 0; } - if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) { - if (!size) - arm_smmu_tlb_inv_asid(smmu_domain->smmu, - smmu_mn->cd->asid); - else - arm_smmu_tlb_inv_range_asid(start, size, - smmu_mn->cd->asid, - PAGE_SIZE, false, - smmu_domain); - } + if (!size) + arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid); + else + arm_smmu_tlb_inv_range_asid(start, size, smmu_domain->cd.asid, + PAGE_SIZE, false, smmu_domain); - arm_smmu_atc_inv_domain_sva(smmu_domain, mm_get_enqcmd_pasid(mm), start, - size); + arm_smmu_atc_inv_domain(smmu_domain, start, size); } static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) { - struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); - struct arm_smmu_domain *smmu_domain = smmu_mn->domain; + struct arm_smmu_domain *smmu_domain = + container_of(mn, struct arm_smmu_domain, mmu_notifier); struct arm_smmu_master_domain *master_domain; unsigned long flags; - mutex_lock(&sva_lock); - if (smmu_mn->cleared) { - mutex_unlock(&sva_lock); - return; - } - /* * DMA may still be running. Keep the cd valid to avoid C_BAD_CD events, * but disable translation. @@ -321,25 +174,23 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) struct arm_smmu_cd target; struct arm_smmu_cd *cdptr; - cdptr = arm_smmu_get_cd_ptr(master, mm_get_enqcmd_pasid(mm)); + cdptr = arm_smmu_get_cd_ptr(master, master_domain->ssid); if (WARN_ON(!cdptr)) continue; - arm_smmu_make_sva_cd(&target, master, NULL, smmu_mn->cd->asid); - arm_smmu_write_cd_entry(master, mm_get_enqcmd_pasid(mm), cdptr, + arm_smmu_make_sva_cd(&target, master, NULL, + smmu_domain->cd.asid); + arm_smmu_write_cd_entry(master, master_domain->ssid, cdptr, &target); } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); - arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid); - arm_smmu_atc_inv_domain_sva(smmu_domain, mm_get_enqcmd_pasid(mm), 0, 0); - - smmu_mn->cleared = true; - mutex_unlock(&sva_lock); + arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid); + arm_smmu_atc_inv_domain(smmu_domain, 0, 0); } static void arm_smmu_mmu_notifier_free(struct mmu_notifier *mn) { - kfree(mn_to_smmu(mn)); + kfree(container_of(mn, struct arm_smmu_domain, mmu_notifier)); } static const struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = { @@ -348,115 +199,6 @@ static const struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = { .free_notifier = arm_smmu_mmu_notifier_free, }; -/* Allocate or get existing MMU notifier for this {domain, mm} pair */ -static struct arm_smmu_mmu_notifier * -arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain, - struct mm_struct *mm) -{ - int ret; - struct arm_smmu_ctx_desc *cd; - struct arm_smmu_mmu_notifier *smmu_mn; - - list_for_each_entry(smmu_mn, &smmu_domain->mmu_notifiers, list) { - if (smmu_mn->mn.mm == mm) { - refcount_inc(&smmu_mn->refs); - return smmu_mn; - } - } - - cd = arm_smmu_alloc_shared_cd(mm); - if (IS_ERR(cd)) - return ERR_CAST(cd); - - smmu_mn = kzalloc(sizeof(*smmu_mn), GFP_KERNEL); - if (!smmu_mn) { - ret = -ENOMEM; - goto err_free_cd; - } - - refcount_set(&smmu_mn->refs, 1); - smmu_mn->cd = cd; - smmu_mn->domain = smmu_domain; - smmu_mn->mn.ops = &arm_smmu_mmu_notifier_ops; - - ret = mmu_notifier_register(&smmu_mn->mn, mm); - if (ret) { - kfree(smmu_mn); - goto err_free_cd; - } - - list_add(&smmu_mn->list, &smmu_domain->mmu_notifiers); - return smmu_mn; - -err_free_cd: - arm_smmu_free_shared_cd(cd); - return ERR_PTR(ret); -} - -static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) -{ - struct mm_struct *mm = smmu_mn->mn.mm; - struct arm_smmu_ctx_desc *cd = smmu_mn->cd; - struct arm_smmu_domain *smmu_domain = smmu_mn->domain; - - if (!refcount_dec_and_test(&smmu_mn->refs)) - return; - - list_del(&smmu_mn->list); - - /* - * If we went through clear(), we've already invalidated, and no - * new TLB entry can have been formed. - */ - if (!smmu_mn->cleared) { - arm_smmu_tlb_inv_asid(smmu_domain->smmu, cd->asid); - arm_smmu_atc_inv_domain_sva(smmu_domain, - mm_get_enqcmd_pasid(mm), 0, 0); - } - - /* Frees smmu_mn */ - mmu_notifier_put(&smmu_mn->mn); - arm_smmu_free_shared_cd(cd); -} - -static struct arm_smmu_bond *__arm_smmu_sva_bind(struct device *dev, - struct mm_struct *mm) -{ - int ret; - struct arm_smmu_bond *bond; - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct arm_smmu_domain *smmu_domain; - - if (!(domain->type & __IOMMU_DOMAIN_PAGING)) - return ERR_PTR(-ENODEV); - smmu_domain = to_smmu_domain(domain); - if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) - return ERR_PTR(-ENODEV); - - if (!master || !master->sva_enabled) - return ERR_PTR(-ENODEV); - - bond = kzalloc(sizeof(*bond), GFP_KERNEL); - if (!bond) - return ERR_PTR(-ENOMEM); - - bond->mm = mm; - - bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm); - if (IS_ERR(bond->smmu_mn)) { - ret = PTR_ERR(bond->smmu_mn); - goto err_free_bond; - } - - list_add(&bond->list, &master->bonds); - return bond; - -err_free_bond: - kfree(bond); - return ERR_PTR(ret); -} - bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) { unsigned long reg, fld; @@ -573,11 +315,6 @@ int arm_smmu_master_enable_sva(struct arm_smmu_master *master) int arm_smmu_master_disable_sva(struct arm_smmu_master *master) { mutex_lock(&sva_lock); - if (!list_empty(&master->bonds)) { - dev_err(master->dev, "cannot disable SVA, device is bound\n"); - mutex_unlock(&sva_lock); - return -EBUSY; - } arm_smmu_master_sva_disable_iopf(master); master->sva_enabled = false; mutex_unlock(&sva_lock); @@ -594,66 +331,51 @@ void arm_smmu_sva_notifier_synchronize(void) mmu_notifier_synchronize(); } -void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t id) -{ - struct mm_struct *mm = domain->mm; - struct arm_smmu_bond *bond = NULL, *t; - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - - arm_smmu_remove_pasid(master, to_smmu_domain(domain), id); - - mutex_lock(&sva_lock); - list_for_each_entry(t, &master->bonds, list) { - if (t->mm == mm) { - bond = t; - break; - } - } - - if (!WARN_ON(!bond)) { - list_del(&bond->list); - arm_smmu_mmu_notifier_put(bond->smmu_mn); - kfree(bond); - } - mutex_unlock(&sva_lock); -} - static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t id) { + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_master *master = dev_iommu_priv_get(dev); - struct mm_struct *mm = domain->mm; - struct arm_smmu_bond *bond; struct arm_smmu_cd target; int ret; - if (mm_get_enqcmd_pasid(mm) != id) + /* Prevent arm_smmu_mm_release from being called while we are attaching */ + if (!mmget_not_zero(domain->mm)) return -EINVAL; - mutex_lock(&sva_lock); - bond = __arm_smmu_sva_bind(dev, mm); - if (IS_ERR(bond)) { - mutex_unlock(&sva_lock); - return PTR_ERR(bond); - } + /* + * This does not need the arm_smmu_asid_lock because SVA domains never + * get reassigned + */ + arm_smmu_make_sva_cd(&target, master, domain->mm, smmu_domain->cd.asid); + ret = arm_smmu_set_pasid(master, smmu_domain, id, &target); - arm_smmu_make_sva_cd(&target, master, mm, bond->smmu_mn->cd->asid); - ret = arm_smmu_set_pasid(master, to_smmu_domain(domain), id, &target); - if (ret) { - list_del(&bond->list); - arm_smmu_mmu_notifier_put(bond->smmu_mn); - kfree(bond); - mutex_unlock(&sva_lock); - return ret; - } - mutex_unlock(&sva_lock); - return 0; + mmput(domain->mm); + return ret; } static void arm_smmu_sva_domain_free(struct iommu_domain *domain) { - kfree(to_smmu_domain(domain)); + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + + /* + * Ensure the ASID is empty in the iommu cache before allowing reuse. + */ + arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid); + + /* + * Notice that the arm_smmu_mm_arch_invalidate_secondary_tlbs op can + * still be called/running at this point. We allow the ASID to be + * reused, and if there is a race then it just suffers harmless + * unnecessary invalidation. + */ + xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); + + /* + * Actual free is defered to the SRCU callback + * arm_smmu_mmu_notifier_free() + */ + mmu_notifier_put(&smmu_domain->mmu_notifier); } static const struct iommu_domain_ops arm_smmu_sva_domain_ops = { @@ -667,6 +389,8 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_domain *smmu_domain; + u32 asid; + int ret; smmu_domain = arm_smmu_domain_alloc(); if (IS_ERR(smmu_domain)) @@ -675,5 +399,22 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, smmu_domain->domain.ops = &arm_smmu_sva_domain_ops; smmu_domain->smmu = smmu; + ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain, + XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); + if (ret) + goto err_free; + + smmu_domain->cd.asid = asid; + smmu_domain->mmu_notifier.ops = &arm_smmu_mmu_notifier_ops; + ret = mmu_notifier_register(&smmu_domain->mmu_notifier, mm); + if (ret) + goto err_asid; + return &smmu_domain->domain; + +err_asid: + xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); +err_free: + kfree(smmu_domain); + return ERR_PTR(ret); } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index ae43c37d1aa2..2325a3fad67e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1439,22 +1439,6 @@ static void arm_smmu_free_cd_tables(struct arm_smmu_master *master) cd_table->cdtab = NULL; } -bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd) -{ - bool free; - struct arm_smmu_ctx_desc *old_cd; - - if (!cd->asid) - return false; - - free = refcount_dec_and_test(&cd->refs); - if (free) { - old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid); - WARN_ON(old_cd != cd); - } - return free; -} - /* Stream table manipulation functions */ static void arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc) @@ -2034,8 +2018,8 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master, return arm_smmu_cmdq_batch_submit(master->smmu, &cmds); } -static int __arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, - ioasid_t ssid, unsigned long iova, size_t size) +int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, + unsigned long iova, size_t size) { struct arm_smmu_master_domain *master_domain; int i; @@ -2073,15 +2057,7 @@ static int __arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, if (!master->ats_enabled) continue; - /* - * Non-zero ssid means SVA is co-opting the S1 domain to issue - * invalidations for SVA PASIDs. - */ - if (ssid != IOMMU_NO_PASID) - arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd); - else - arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size, - &cmd); + arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size, &cmd); for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; @@ -2093,19 +2069,6 @@ static int __arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds); } -static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, - unsigned long iova, size_t size) -{ - return __arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, - size); -} - -int arm_smmu_atc_inv_domain_sva(struct arm_smmu_domain *smmu_domain, - ioasid_t ssid, unsigned long iova, size_t size) -{ - return __arm_smmu_atc_inv_domain(smmu_domain, ssid, iova, size); -} - /* IO_PGTABLE API */ static void arm_smmu_tlb_inv_context(void *cookie) { @@ -2294,7 +2257,6 @@ struct arm_smmu_domain *arm_smmu_domain_alloc(void) mutex_init(&smmu_domain->init_mutex); INIT_LIST_HEAD(&smmu_domain->devices); spin_lock_init(&smmu_domain->devices_lock); - INIT_LIST_HEAD(&smmu_domain->mmu_notifiers); return smmu_domain; } @@ -2336,7 +2298,7 @@ static void arm_smmu_domain_free_paging(struct iommu_domain *domain) if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { /* Prevent SVA from touching the CD while we're freeing it */ mutex_lock(&arm_smmu_asid_lock); - arm_smmu_free_asid(&smmu_domain->cd); + xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); mutex_unlock(&arm_smmu_asid_lock); } else { struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; @@ -2354,11 +2316,9 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu, u32 asid = 0; struct arm_smmu_ctx_desc *cd = &smmu_domain->cd; - refcount_set(&cd->refs, 1); - /* Prevent SVA from modifying the ASID until it is written to the CD */ mutex_lock(&arm_smmu_asid_lock); - ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd, + ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain, XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); cd->asid = (u16)asid; mutex_unlock(&arm_smmu_asid_lock); @@ -2845,6 +2805,9 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master, /* The core code validates pasid */ + if (smmu_domain->smmu != master->smmu) + return -EINVAL; + if (!master->cd_table.in_ste) return -ENODEV; @@ -2866,9 +2829,14 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master, return ret; } -void arm_smmu_remove_pasid(struct arm_smmu_master *master, - struct arm_smmu_domain *smmu_domain, ioasid_t pasid) +static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid, + struct iommu_domain *domain) { + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_domain *smmu_domain; + + smmu_domain = to_smmu_domain(domain); + mutex_lock(&arm_smmu_asid_lock); arm_smmu_clear_cd(master, pasid); if (master->ats_enabled) @@ -3133,7 +3101,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) master->dev = dev; master->smmu = smmu; - INIT_LIST_HEAD(&master->bonds); dev_iommu_priv_set(dev, master); ret = arm_smmu_insert_master(smmu, master); @@ -3315,12 +3282,6 @@ static int arm_smmu_def_domain_type(struct device *dev) return 0; } -static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid, - struct iommu_domain *domain) -{ - arm_smmu_sva_remove_dev_pasid(domain, dev, pasid); -} - static struct iommu_ops arm_smmu_ops = { .identity_domain = &arm_smmu_identity_domain, .blocked_domain = &arm_smmu_blocked_domain, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index d6a7a3404ea8..7347e842f8a8 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -587,9 +587,6 @@ struct arm_smmu_strtab_l1_desc { struct arm_smmu_ctx_desc { u16 asid; - - refcount_t refs; - struct mm_struct *mm; }; struct arm_smmu_l1_ctx_desc { @@ -712,7 +709,6 @@ struct arm_smmu_master { bool stall_enabled; bool sva_enabled; bool iopf_enabled; - struct list_head bonds; unsigned int ssid_bits; }; @@ -741,7 +737,7 @@ struct arm_smmu_domain { struct list_head devices; spinlock_t devices_lock; - struct list_head mmu_notifiers; + struct mmu_notifier mmu_notifier; }; /* The following are exposed for testing purposes. */ @@ -805,16 +801,13 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, int arm_smmu_set_pasid(struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain, ioasid_t pasid, const struct arm_smmu_cd *cd); -void arm_smmu_remove_pasid(struct arm_smmu_master *master, - struct arm_smmu_domain *smmu_domain, ioasid_t pasid); void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid); void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, size_t granule, bool leaf, struct arm_smmu_domain *smmu_domain); -bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd); -int arm_smmu_atc_inv_domain_sva(struct arm_smmu_domain *smmu_domain, - ioasid_t ssid, unsigned long iova, size_t size); +int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, + unsigned long iova, size_t size); #ifdef CONFIG_ARM_SMMU_V3_SVA bool arm_smmu_sva_supported(struct arm_smmu_device *smmu); @@ -826,8 +819,6 @@ bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master); void arm_smmu_sva_notifier_synchronize(void); struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, struct mm_struct *mm); -void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t id); #else /* CONFIG_ARM_SMMU_V3_SVA */ static inline bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) { -- Gitee From 69c9a55f6d144a34ebe8682176e6b4cc4f1b164b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:42 -0300 Subject: [PATCH 090/143] iommu/arm-smmu-v3: Allow IDENTITY/BLOCKED to be set while PASID is used ANBZ: #13617 commit ce26ea9e6e12df01432bd2a1cb8cbfa025b8a977 upstream. The HW supports this, use the S1DSS bits to configure the behavior of SSID=0 which is the RID's translation. If SSID's are currently being used in the CD table then just update the S1DSS bits in the STE, remove the master_domain and leave ATS alone. For iommufd the driver design has a small problem that all the unused CD table entries are set with V=0 which will generate an event if VFIO userspace tries to use the CD entry. This patch extends this problem to include the RID as well if PASID is being used. For BLOCKED with used PASIDs the F_STREAM_DISABLED (STRTAB_STE_1_S1DSS_TERMINATE) event is generated on untagged traffic and a substream CD table entry with V=0 (removed pasid) will generate C_BAD_CD. Arguably there is no advantage to using S1DSS over the CD entry 0 with V=0. As we don't yet support PASID in iommufd this is a problem to resolve later, possibly by using EPD0 for unused CD table entries instead of V=0, and not using S1DSS for BLOCKED. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/11-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c | 2 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 60 +++++++++++++++---- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 4 +- 3 files changed, 50 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c index a460b71f5857..d7e022bb9df5 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c @@ -164,7 +164,7 @@ static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste, .smmu = &smmu, }; - arm_smmu_make_cdtable_ste(ste, &master, true); + arm_smmu_make_cdtable_ste(ste, &master, true, STRTAB_STE_1_S1DSS_SSID0); } static void arm_smmu_v3_write_ste_test_bypass_to_abort(struct kunit *test) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 2325a3fad67e..931da9b327fe 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -991,6 +991,14 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW | STRTAB_STE_1_EATS); used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID); + + /* + * See 13.5 Summary of attribute/permission configuration fields + * for the SHCFG behavior. + */ + if (FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) == + STRTAB_STE_1_S1DSS_BYPASS) + used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG); } /* S2 translates */ @@ -1531,7 +1539,8 @@ EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste); VISIBLE_IF_KUNIT void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, - struct arm_smmu_master *master, bool ats_enabled) + struct arm_smmu_master *master, bool ats_enabled, + unsigned int s1dss) { struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; struct arm_smmu_device *smmu = master->smmu; @@ -1545,7 +1554,7 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax)); target->data[1] = cpu_to_le64( - FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | + FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) | FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | @@ -1556,6 +1565,11 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, FIELD_PREP(STRTAB_STE_1_EATS, ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); + if ((smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) && + s1dss == STRTAB_STE_1_S1DSS_BYPASS) + target->data[1] |= cpu_to_le64(FIELD_PREP( + STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); + if (smmu->features & ARM_SMMU_FEAT_E2H) { /* * To support BTM the streamworld needs to match the @@ -2590,6 +2604,7 @@ struct arm_smmu_attach_state { /* Inputs */ struct iommu_domain *old_domain; struct arm_smmu_master *master; + bool cd_needs_ats; ioasid_t ssid; /* Resulting state */ bool ats_enabled; @@ -2631,7 +2646,7 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, */ lockdep_assert_held(&arm_smmu_asid_lock); - if (smmu_domain) { + if (smmu_domain || state->cd_needs_ats) { /* * The SMMU does not support enabling ATS with bypass/abort. * When the STE is in bypass (STE.Config[2:0] == 0b100), ATS @@ -2643,7 +2658,9 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, * tables. */ state->ats_enabled = arm_smmu_ats_supported(master); + } + if (smmu_domain) { master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL); if (!master_domain) return -ENOMEM; @@ -2771,7 +2788,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr, &target_cd); - arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled); + arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled, + STRTAB_STE_1_S1DSS_SSID0); arm_smmu_install_ste_for_dev(master, &target); break; } @@ -2845,8 +2863,10 @@ static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid, mutex_unlock(&arm_smmu_asid_lock); } -static int arm_smmu_attach_dev_ste(struct iommu_domain *domain, - struct device *dev, struct arm_smmu_ste *ste) +static void arm_smmu_attach_dev_ste(struct iommu_domain *domain, + struct device *dev, + struct arm_smmu_ste *ste, + unsigned int s1dss) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct arm_smmu_attach_state state = { @@ -2855,16 +2875,28 @@ static int arm_smmu_attach_dev_ste(struct iommu_domain *domain, .ssid = IOMMU_NO_PASID, }; - if (arm_smmu_ssids_in_use(&master->cd_table)) - return -EBUSY; - /* * Do not allow any ASID to be changed while are working on the STE, * otherwise we could miss invalidations. */ mutex_lock(&arm_smmu_asid_lock); - arm_smmu_attach_prepare(&state, domain); + /* + * If the CD table is not in use we can use the provided STE, otherwise + * we use a cdtable STE with the provided S1DSS. + */ + if (arm_smmu_ssids_in_use(&master->cd_table)) { + /* + * If a CD table has to be present then we need to run with ATS + * on even though the RID will fail ATS queries with UR. This is + * because we have no idea what the PASID's need. + */ + state.cd_needs_ats = true; + arm_smmu_attach_prepare(&state, domain); + arm_smmu_make_cdtable_ste(ste, master, state.ats_enabled, s1dss); + } else { + arm_smmu_attach_prepare(&state, domain); + } arm_smmu_install_ste_for_dev(master, ste); arm_smmu_attach_commit(&state); mutex_unlock(&arm_smmu_asid_lock); @@ -2875,7 +2907,6 @@ static int arm_smmu_attach_dev_ste(struct iommu_domain *domain, * descriptor from arm_smmu_share_asid(). */ arm_smmu_clear_cd(master, IOMMU_NO_PASID); - return 0; } static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, @@ -2885,7 +2916,8 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, struct arm_smmu_master *master = dev_iommu_priv_get(dev); arm_smmu_make_bypass_ste(master->smmu, &ste); - return arm_smmu_attach_dev_ste(domain, dev, &ste); + arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS); + return 0; } static const struct iommu_domain_ops arm_smmu_identity_ops = { @@ -2903,7 +2935,9 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain, struct arm_smmu_ste ste; arm_smmu_make_abort_ste(&ste); - return arm_smmu_attach_dev_ste(domain, dev, &ste); + arm_smmu_attach_dev_ste(domain, dev, &ste, + STRTAB_STE_1_S1DSS_TERMINATE); + return 0; } static const struct iommu_domain_ops arm_smmu_blocked_ops = { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 7347e842f8a8..e096552a7e26 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -761,8 +761,8 @@ void arm_smmu_make_abort_ste(struct arm_smmu_ste *target); void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu, struct arm_smmu_ste *target); void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, - struct arm_smmu_master *master, - bool ats_enabled); + struct arm_smmu_master *master, bool ats_enabled, + unsigned int s1dss); void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain, -- Gitee From 8013e6b201406435a01f18ddffbd5af8c42657fe Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:43 -0300 Subject: [PATCH 091/143] iommu/arm-smmu-v3: Test the STE S1DSS functionality ANBZ: #13617 commit 3b5302cbb06af6b62022360066944a1ff6aea0d1 upstream. S1DSS brings in quite a few new transition pairs that are interesting. Test to/from S1DSS_BYPASS <-> S1DSS_SSID0, and BYPASS <-> S1DSS_SSID0. Test a contrived non-hitless flow to make sure that the logic works. Tested-by: Nicolin Chen Signed-off-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/12-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c | 113 +++++++++++++++++- 1 file changed, 108 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c index d7e022bb9df5..e0fce31eba54 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c @@ -144,6 +144,14 @@ static void arm_smmu_v3_test_ste_expect_transition( KUNIT_EXPECT_MEMEQ(test, target->data, cur_copy.data, sizeof(cur_copy)); } +static void arm_smmu_v3_test_ste_expect_non_hitless_transition( + struct kunit *test, const struct arm_smmu_ste *cur, + const struct arm_smmu_ste *target, unsigned int num_syncs_expected) +{ + arm_smmu_v3_test_ste_expect_transition(test, cur, target, + num_syncs_expected, false); +} + static void arm_smmu_v3_test_ste_expect_hitless_transition( struct kunit *test, const struct arm_smmu_ste *cur, const struct arm_smmu_ste *target, unsigned int num_syncs_expected) @@ -155,6 +163,7 @@ static void arm_smmu_v3_test_ste_expect_hitless_transition( static const dma_addr_t fake_cdtab_dma_addr = 0xF0F0F0F0F0F0; static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste, + unsigned int s1dss, const dma_addr_t dma_addr) { struct arm_smmu_master master = { @@ -164,7 +173,7 @@ static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste, .smmu = &smmu, }; - arm_smmu_make_cdtable_ste(ste, &master, true, STRTAB_STE_1_S1DSS_SSID0); + arm_smmu_make_cdtable_ste(ste, &master, true, s1dss); } static void arm_smmu_v3_write_ste_test_bypass_to_abort(struct kunit *test) @@ -194,7 +203,8 @@ static void arm_smmu_v3_write_ste_test_cdtable_to_abort(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &abort_ste, NUM_EXPECTED_SYNCS(2)); } @@ -203,7 +213,8 @@ static void arm_smmu_v3_write_ste_test_abort_to_cdtable(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); arm_smmu_v3_test_ste_expect_hitless_transition(test, &abort_ste, &ste, NUM_EXPECTED_SYNCS(2)); } @@ -212,7 +223,8 @@ static void arm_smmu_v3_write_ste_test_cdtable_to_bypass(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &bypass_ste, NUM_EXPECTED_SYNCS(3)); } @@ -221,11 +233,54 @@ static void arm_smmu_v3_write_ste_test_bypass_to_cdtable(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); arm_smmu_v3_test_ste_expect_hitless_transition(test, &bypass_ste, &ste, NUM_EXPECTED_SYNCS(3)); } +static void arm_smmu_v3_write_ste_test_cdtable_s1dss_change(struct kunit *test) +{ + struct arm_smmu_ste ste; + struct arm_smmu_ste s1dss_bypass; + + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, + fake_cdtab_dma_addr); + + /* + * Flipping s1dss on a CD table STE only involves changes to the second + * qword of an STE and can be done in a single write. + */ + arm_smmu_v3_test_ste_expect_hitless_transition( + test, &ste, &s1dss_bypass, NUM_EXPECTED_SYNCS(1)); + arm_smmu_v3_test_ste_expect_hitless_transition( + test, &s1dss_bypass, &ste, NUM_EXPECTED_SYNCS(1)); +} + +static void +arm_smmu_v3_write_ste_test_s1dssbypass_to_stebypass(struct kunit *test) +{ + struct arm_smmu_ste s1dss_bypass; + + arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, + fake_cdtab_dma_addr); + arm_smmu_v3_test_ste_expect_hitless_transition( + test, &s1dss_bypass, &bypass_ste, NUM_EXPECTED_SYNCS(2)); +} + +static void +arm_smmu_v3_write_ste_test_stebypass_to_s1dssbypass(struct kunit *test) +{ + struct arm_smmu_ste s1dss_bypass; + + arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, + fake_cdtab_dma_addr); + arm_smmu_v3_test_ste_expect_hitless_transition( + test, &bypass_ste, &s1dss_bypass, NUM_EXPECTED_SYNCS(2)); +} + static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste, bool ats_enabled) { @@ -285,6 +340,48 @@ static void arm_smmu_v3_write_ste_test_bypass_to_s2(struct kunit *test) NUM_EXPECTED_SYNCS(2)); } +static void arm_smmu_v3_write_ste_test_s1_to_s2(struct kunit *test) +{ + struct arm_smmu_ste s1_ste; + struct arm_smmu_ste s2_ste; + + arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); + arm_smmu_test_make_s2_ste(&s2_ste, true); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &s1_ste, &s2_ste, + NUM_EXPECTED_SYNCS(3)); +} + +static void arm_smmu_v3_write_ste_test_s2_to_s1(struct kunit *test) +{ + struct arm_smmu_ste s1_ste; + struct arm_smmu_ste s2_ste; + + arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); + arm_smmu_test_make_s2_ste(&s2_ste, true); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &s2_ste, &s1_ste, + NUM_EXPECTED_SYNCS(3)); +} + +static void arm_smmu_v3_write_ste_test_non_hitless(struct kunit *test) +{ + struct arm_smmu_ste ste; + struct arm_smmu_ste ste_2; + + /* + * Although no flow resembles this in practice, one way to force an STE + * update to be non-hitless is to change its CD table pointer as well as + * s1 dss field in the same update. + */ + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste_2, STRTAB_STE_1_S1DSS_BYPASS, + 0x4B4B4b4B4B); + arm_smmu_v3_test_ste_expect_non_hitless_transition( + test, &ste, &ste_2, NUM_EXPECTED_SYNCS(3)); +} + static void arm_smmu_v3_test_cd_expect_transition( struct kunit *test, const struct arm_smmu_cd *cur, const struct arm_smmu_cd *target, unsigned int num_syncs_expected, @@ -438,10 +535,16 @@ static struct kunit_case arm_smmu_v3_test_cases[] = { KUNIT_CASE(arm_smmu_v3_write_ste_test_abort_to_cdtable), KUNIT_CASE(arm_smmu_v3_write_ste_test_cdtable_to_bypass), KUNIT_CASE(arm_smmu_v3_write_ste_test_bypass_to_cdtable), + KUNIT_CASE(arm_smmu_v3_write_ste_test_cdtable_s1dss_change), + KUNIT_CASE(arm_smmu_v3_write_ste_test_s1dssbypass_to_stebypass), + KUNIT_CASE(arm_smmu_v3_write_ste_test_stebypass_to_s1dssbypass), KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_abort), KUNIT_CASE(arm_smmu_v3_write_ste_test_abort_to_s2), KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_bypass), KUNIT_CASE(arm_smmu_v3_write_ste_test_bypass_to_s2), + KUNIT_CASE(arm_smmu_v3_write_ste_test_s1_to_s2), + KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_s1), + KUNIT_CASE(arm_smmu_v3_write_ste_test_non_hitless), KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_clear), KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_change_asid), KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_clear), -- Gitee From 80331bf032dcbfd4016e60900066bc23e6a76003 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:44 -0300 Subject: [PATCH 092/143] iommu/arm-smmu-v3: Allow a PASID to be set when RID is IDENTITY/BLOCKED ANBZ: #13617 commit 8ee9175c25827240dd84a7adffbfa9c16938ac5d upstream. If the STE doesn't point to the CD table we can upgrade it by reprogramming the STE with the appropriate S1DSS. We may also need to turn on ATS at the same time. Keep track if the installed STE is pointing at the cd_table and the ATS state to trigger this path. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/13-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 49 ++++++++++++++++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 931da9b327fe..576554a489fe 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2446,6 +2446,9 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master, master->cd_table.in_ste = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(target->data[0])) == STRTAB_STE_0_CFG_S1_TRANS; + master->ste_ats_enabled = + FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(target->data[1])) == + STRTAB_STE_1_EATS_TRANS; for (i = 0; i < master->num_streams; ++i) { u32 sid = master->streams[i].id; @@ -2806,10 +2809,36 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return 0; } +static void arm_smmu_update_ste(struct arm_smmu_master *master, + struct iommu_domain *sid_domain, + bool ats_enabled) +{ + unsigned int s1dss = STRTAB_STE_1_S1DSS_TERMINATE; + struct arm_smmu_ste ste; + + if (master->cd_table.in_ste && master->ste_ats_enabled == ats_enabled) + return; + + if (sid_domain->type == IOMMU_DOMAIN_IDENTITY) + s1dss = STRTAB_STE_1_S1DSS_BYPASS; + else + WARN_ON(sid_domain->type != IOMMU_DOMAIN_BLOCKED); + + /* + * Change the STE into a cdtable one with SID IDENTITY/BLOCKED behavior + * using s1dss if necessary. If the cd_table is already installed then + * the S1DSS is correct and this will just update the EATS. Otherwise it + * installs the entire thing. This will be hitless. + */ + arm_smmu_make_cdtable_ste(&ste, master, ats_enabled, s1dss); + arm_smmu_install_ste_for_dev(master, &ste); +} + int arm_smmu_set_pasid(struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain, ioasid_t pasid, const struct arm_smmu_cd *cd) { + struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev); struct arm_smmu_attach_state state = { .master = master, /* @@ -2826,8 +2855,10 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master, if (smmu_domain->smmu != master->smmu) return -EINVAL; - if (!master->cd_table.in_ste) - return -ENODEV; + if (!master->cd_table.in_ste && + sid_domain->type != IOMMU_DOMAIN_IDENTITY && + sid_domain->type != IOMMU_DOMAIN_BLOCKED) + return -EINVAL; cdptr = arm_smmu_alloc_cd_ptr(master, pasid); if (!cdptr) @@ -2839,6 +2870,7 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master, goto out_unlock; arm_smmu_write_cd_entry(master, pasid, cdptr, cd); + arm_smmu_update_ste(master, sid_domain, state.ats_enabled); arm_smmu_attach_commit(&state); @@ -2861,6 +2893,19 @@ static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid, arm_smmu_atc_inv_master(master, pasid); arm_smmu_remove_master_domain(master, &smmu_domain->domain, pasid); mutex_unlock(&arm_smmu_asid_lock); + + /* + * When the last user of the CD table goes away downgrade the STE back + * to a non-cd_table one. + */ + if (!arm_smmu_ssids_in_use(&master->cd_table)) { + struct iommu_domain *sid_domain = + iommu_get_domain_for_dev(master->dev); + + if (sid_domain->type == IOMMU_DOMAIN_IDENTITY || + sid_domain->type == IOMMU_DOMAIN_BLOCKED) + sid_domain->ops->attach_dev(sid_domain, dev); + } } static void arm_smmu_attach_dev_ste(struct iommu_domain *domain, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index e096552a7e26..3cddb6256428 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -705,7 +705,8 @@ struct arm_smmu_master { /* Locked by the iommu core using the group mutex */ struct arm_smmu_ctx_desc_cfg cd_table; unsigned int num_streams; - bool ats_enabled; + bool ats_enabled : 1; + bool ste_ats_enabled : 1; bool stall_enabled; bool sva_enabled; bool iopf_enabled; -- Gitee From 61f6159295b2ca1444bceb36c974947bf579e868 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:45 -0300 Subject: [PATCH 093/143] iommu/arm-smmu-v3: Allow setting a S1 domain to a PASID ANBZ: #13617 commit f3b273b7c7e42ff7ef5b6063834d768d33c7ba79 upstream. The SVA cleanup made the SSID logic entirely general so all we need to do is call it with the correct cd table entry for a S1 domain. This is slightly tricky because of the ASID and how the locking works, the simple fix is to just update the ASID once we get the right locks. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/14-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 41 ++++++++++++++++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 2 +- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 576554a489fe..40f759c58824 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2809,6 +2809,36 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return 0; } +static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t id) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_device *smmu = master->smmu; + struct arm_smmu_cd target_cd; + int ret = 0; + + mutex_lock(&smmu_domain->init_mutex); + if (!smmu_domain->smmu) + ret = arm_smmu_domain_finalise(smmu_domain, smmu); + else if (smmu_domain->smmu != smmu) + ret = -EINVAL; + mutex_unlock(&smmu_domain->init_mutex); + if (ret) + return ret; + + if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) + return -EINVAL; + + /* + * We can read cd.asid outside the lock because arm_smmu_set_pasid() + * will fix it + */ + arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); + return arm_smmu_set_pasid(master, to_smmu_domain(domain), id, + &target_cd); +} + static void arm_smmu_update_ste(struct arm_smmu_master *master, struct iommu_domain *sid_domain, bool ats_enabled) @@ -2836,7 +2866,7 @@ static void arm_smmu_update_ste(struct arm_smmu_master *master, int arm_smmu_set_pasid(struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain, ioasid_t pasid, - const struct arm_smmu_cd *cd) + struct arm_smmu_cd *cd) { struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev); struct arm_smmu_attach_state state = { @@ -2869,6 +2899,14 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master, if (ret) goto out_unlock; + /* + * We don't want to obtain to the asid_lock too early, so fix up the + * caller set ASID under the lock in case it changed. + */ + cd->data[0] &= ~cpu_to_le64(CTXDESC_CD_0_ASID); + cd->data[0] |= cpu_to_le64( + FIELD_PREP(CTXDESC_CD_0_ASID, smmu_domain->cd.asid)); + arm_smmu_write_cd_entry(master, pasid, cdptr, cd); arm_smmu_update_ste(master, sid_domain, state.ats_enabled); @@ -3381,6 +3419,7 @@ static struct iommu_ops arm_smmu_ops = { .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = arm_smmu_attach_dev, + .set_dev_pasid = arm_smmu_s1_set_dev_pasid, .map_pages = arm_smmu_map_pages, .unmap_pages = arm_smmu_unmap_pages, .flush_iotlb_all = arm_smmu_flush_iotlb_all, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 3cddb6256428..2880d4b18a34 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -801,7 +801,7 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, int arm_smmu_set_pasid(struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain, ioasid_t pasid, - const struct arm_smmu_cd *cd); + struct arm_smmu_cd *cd); void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid); void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, -- Gitee From a21de4053fff208934c86e4f25ffb5167a0bb59c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 10 Jun 2024 21:31:10 -0300 Subject: [PATCH 094/143] iommu/arm-smmu-v3: Do not zero the strtab twice ANBZ: #13617 commit c84c5ab76c9c04b5f1c8cc66ee9313198e89bb11 upstream. dmam_alloc_coherent() already returns zero'd memory so cfg->strtab.l1_desc (the list of DMA addresses for the L2 entries) is already zero'd. arm_smmu_init_l1_strtab() goes through and calls arm_smmu_write_strtab_l1_desc() on the newly allocated (and zero'd) struct arm_smmu_strtab_l1_desc, which ends up computing 'val = 0' and zeroing it again. Remove arm_smmu_init_l1_strtab() and just call devm_kcalloc() from arm_smmu_init_strtab_2lvl to allocate the companion struct. Tested-by: Nicolin Chen Reviewed-by: Mostafa Saleh Signed-off-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Link: https://lore.kernel.org/r/1-v2-318ed5f6983b+198f-smmuv3_tidy_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 26 +++++---------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 40f759c58824..a0dad3335913 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3528,25 +3528,6 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) PRIQ_ENT_DWORDS, "priq"); } -static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) -{ - unsigned int i; - struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; - void *strtab = smmu->strtab_cfg.strtab; - - cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents, - sizeof(*cfg->l1_desc), GFP_KERNEL); - if (!cfg->l1_desc) - return -ENOMEM; - - for (i = 0; i < cfg->num_l1_ents; ++i) { - arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]); - strtab += STRTAB_L1_DESC_DWORDS << 3; - } - - return 0; -} - static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) { void *strtab; @@ -3582,7 +3563,12 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT); cfg->strtab_base_cfg = reg; - return arm_smmu_init_l1_strtab(smmu); + cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents, + sizeof(*cfg->l1_desc), GFP_KERNEL); + if (!cfg->l1_desc) + return -ENOMEM; + + return 0; } static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) -- Gitee From 2a5622b45874aee14fc2c423e54e42e9e3c61188 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 10 Jun 2024 21:31:11 -0300 Subject: [PATCH 095/143] iommu/arm-smmu-v3: Shrink the strtab l1_desc array ANBZ: #13617 commit a4d75360f7a6d979edd66af577847b0f4dbf4377 upstream. The top of the 2 level stream table is (at most) 128k entries big, and two high order allocations are required. One of __le64 which is programmed into the HW (1M), and one of struct arm_smmu_strtab_l1_desc which holds the CPU pointer (3M). There is no reason to store the l2ptr_dma as nothing reads it. devm stores a copy of it and the DMA memory will be freed via devm mechanisms. span is a constant of 8+1. Remove both. This removes 16 bytes from each arm_smmu_l1_ctx_desc and saves up to 2M of memory per iommu instance. Tested-by: Nicolin Chen Reviewed-by: Mostafa Saleh Signed-off-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Link: https://lore.kernel.org/r/2-v2-318ed5f6983b+198f-smmuv3_tidy_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 13 ++++++------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 --- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index a0dad3335913..5f74d4d8dd6b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1448,13 +1448,12 @@ static void arm_smmu_free_cd_tables(struct arm_smmu_master *master) } /* Stream table manipulation functions */ -static void -arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc) +static void arm_smmu_write_strtab_l1_desc(__le64 *dst, dma_addr_t l2ptr_dma) { u64 val = 0; - val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span); - val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK; + val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, STRTAB_SPLIT + 1); + val |= l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK; /* The HW has 64 bit atomicity with stores to the L2 STE table */ WRITE_ONCE(*dst, cpu_to_le64(val)); @@ -1663,6 +1662,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) { size_t size; void *strtab; + dma_addr_t l2ptr_dma; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT]; @@ -1672,8 +1672,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3); strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS]; - desc->span = STRTAB_SPLIT + 1; - desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma, + desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &l2ptr_dma, GFP_KERNEL); if (!desc->l2ptr) { dev_err(smmu->dev, @@ -1683,7 +1682,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) } arm_smmu_init_initial_stes(desc->l2ptr, 1 << STRTAB_SPLIT); - arm_smmu_write_strtab_l1_desc(strtab, desc); + arm_smmu_write_strtab_l1_desc(strtab, l2ptr_dma); return 0; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 2880d4b18a34..60d2372935c5 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -579,10 +579,7 @@ struct arm_smmu_priq { /* High-level stream table and context descriptor structures */ struct arm_smmu_strtab_l1_desc { - u8 span; - struct arm_smmu_ste *l2ptr; - dma_addr_t l2ptr_dma; }; struct arm_smmu_ctx_desc { -- Gitee From 045a6b5191ece60af37336cf2e9056e6210330ff Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Thu, 13 Jun 2024 12:44:17 -0700 Subject: [PATCH 096/143] iommu/arm-smmu-v3: add missing MODULE_DESCRIPTION() macro ANBZ: #13617 commit a35f443d837ffcd5e73b64c13a46d12701839213 upstream. With ARCH=arm64, make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Signed-off-by: Jeff Johnson Fixes: da55da5a42d4 ("iommu/arm-smmu-v3: Make the kunit into a module") Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240613-md-arm64-drivers-iommu-arm-arm-smmu-v3-v1-1-0e9f7584a5c8@quicinc.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c index e0fce31eba54..cceb737a7001 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c @@ -567,4 +567,5 @@ static struct kunit_suite arm_smmu_v3_test_module = { kunit_test_suites(&arm_smmu_v3_test_module); MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); +MODULE_DESCRIPTION("KUnit tests for arm-smmu-v3 driver"); MODULE_LICENSE("GPL v2"); -- Gitee From 21054f48b564d4c2bc9c266184d43f162dee7a0a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 1 Jul 2024 09:20:10 -0700 Subject: [PATCH 097/143] iommu/arm-smmu: Add CB prefix to register bitfields ANBZ: #13617 commit d0166022be375ce72e7b220d688740b1c4424ad5 upstream. For consistency, add the "CB" prefix to the bitfield defines for context registers. Signed-off-by: Rob Clark Reviewed-by: Pranjal Shrivastava Link: https://lore.kernel.org/r/20240701162025.375134-2-robdclark@gmail.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 2 +- .../iommu/arm/arm-smmu/arm-smmu-qcom-debug.c | 18 +++---- drivers/iommu/arm/arm-smmu/arm-smmu.c | 8 +-- drivers/iommu/arm/arm-smmu/arm-smmu.h | 50 +++++++++---------- drivers/iommu/arm/arm-smmu/qcom_iommu.c | 4 +- 5 files changed, 41 insertions(+), 41 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c index 957d988b6d83..4b2994b6126d 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c @@ -200,7 +200,7 @@ static irqreturn_t nvidia_smmu_context_fault_bank(int irq, void __iomem *cb_base = nvidia_smmu_page(smmu, inst, smmu->numpage + idx); fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR); - if (!(fsr & ARM_SMMU_FSR_FAULT)) + if (!(fsr & ARM_SMMU_CB_FSR_FAULT)) return IRQ_NONE; fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index 552199cbd9e2..e4ee78fb6a66 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -141,7 +141,7 @@ static int qcom_tbu_halt(struct qcom_tbu *tbu, struct arm_smmu_domain *smmu_doma writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG); fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if ((fsr & ARM_SMMU_FSR_FAULT) && (fsr & ARM_SMMU_FSR_SS)) { + if ((fsr & ARM_SMMU_CB_FSR_FAULT) && (fsr & ARM_SMMU_CB_FSR_SS)) { u32 sctlr_orig, sctlr; /* @@ -298,7 +298,7 @@ static phys_addr_t qcom_iova_to_phys(struct arm_smmu_domain *smmu_domain, arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr); fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (fsr & ARM_SMMU_FSR_FAULT) { + if (fsr & ARM_SMMU_CB_FSR_FAULT) { /* Clear pending interrupts */ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); @@ -306,7 +306,7 @@ static phys_addr_t qcom_iova_to_phys(struct arm_smmu_domain *smmu_domain, * TBU halt takes care of resuming any stalled transcation. * Kept it here for completeness sake. */ - if (fsr & ARM_SMMU_FSR_SS) + if (fsr & ARM_SMMU_CB_FSR_SS) arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, ARM_SMMU_RESUME_TERMINATE); } @@ -320,11 +320,11 @@ static phys_addr_t qcom_iova_to_phys(struct arm_smmu_domain *smmu_domain, phys = qcom_tbu_trigger_atos(smmu_domain, tbu, iova, sid); fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (fsr & ARM_SMMU_FSR_FAULT) { + if (fsr & ARM_SMMU_CB_FSR_FAULT) { /* Clear pending interrupts */ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); - if (fsr & ARM_SMMU_FSR_SS) + if (fsr & ARM_SMMU_CB_FSR_SS) arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, ARM_SMMU_RESUME_TERMINATE); } @@ -394,7 +394,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) DEFAULT_RATELIMIT_BURST); fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (!(fsr & ARM_SMMU_FSR_FAULT)) + if (!(fsr & ARM_SMMU_CB_FSR_FAULT)) return IRQ_NONE; fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); @@ -403,7 +403,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) if (list_empty(&tbu_list)) { ret = report_iommu_fault(&smmu_domain->domain, NULL, iova, - fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); + fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (ret == -ENOSYS) dev_err_ratelimited(smmu->dev, @@ -417,7 +417,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) phys_soft = ops->iova_to_phys(ops, iova); tmp = report_iommu_fault(&smmu_domain->domain, NULL, iova, - fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); + fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (!tmp || tmp == -EBUSY) { dev_dbg(smmu->dev, "Context fault handled by client: iova=0x%08lx, fsr=0x%x, fsynr=0x%x, cb=%d\n", @@ -481,7 +481,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); /* Retry or terminate any stalled transactions */ - if (fsr & ARM_SMMU_FSR_SS) + if (fsr & ARM_SMMU_CB_FSR_SS) arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, resume); } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index bd09113cdbc6..094345dca818 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -420,7 +420,7 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) int ret; fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (!(fsr & ARM_SMMU_FSR_FAULT)) + if (!(fsr & ARM_SMMU_CB_FSR_FAULT)) return IRQ_NONE; fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); @@ -428,7 +428,7 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); ret = report_iommu_fault(&smmu_domain->domain, NULL, iova, - fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); + fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (ret == -ENOSYS) dev_err_ratelimited(smmu->dev, @@ -1311,7 +1311,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va); reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR; - if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE), + if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_CB_ATSR_ACTIVE), 5, 50)) { spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); dev_err(dev, @@ -1676,7 +1676,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) /* Make sure all context banks are disabled and clear CB_FSR */ for (i = 0; i < smmu->num_context_banks; ++i) { arm_smmu_write_context_bank(smmu, i); - arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT); + arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_CB_FSR_FAULT); } /* Invalidate the TLB, just in case */ diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index 4765c6945c34..b04a00126a12 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -196,34 +196,34 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_CB_PAR_F BIT(0) #define ARM_SMMU_CB_FSR 0x58 -#define ARM_SMMU_FSR_MULTI BIT(31) -#define ARM_SMMU_FSR_SS BIT(30) -#define ARM_SMMU_FSR_UUT BIT(8) -#define ARM_SMMU_FSR_ASF BIT(7) -#define ARM_SMMU_FSR_TLBLKF BIT(6) -#define ARM_SMMU_FSR_TLBMCF BIT(5) -#define ARM_SMMU_FSR_EF BIT(4) -#define ARM_SMMU_FSR_PF BIT(3) -#define ARM_SMMU_FSR_AFF BIT(2) -#define ARM_SMMU_FSR_TF BIT(1) - -#define ARM_SMMU_FSR_IGN (ARM_SMMU_FSR_AFF | \ - ARM_SMMU_FSR_ASF | \ - ARM_SMMU_FSR_TLBMCF | \ - ARM_SMMU_FSR_TLBLKF) - -#define ARM_SMMU_FSR_FAULT (ARM_SMMU_FSR_MULTI | \ - ARM_SMMU_FSR_SS | \ - ARM_SMMU_FSR_UUT | \ - ARM_SMMU_FSR_EF | \ - ARM_SMMU_FSR_PF | \ - ARM_SMMU_FSR_TF | \ - ARM_SMMU_FSR_IGN) +#define ARM_SMMU_CB_FSR_MULTI BIT(31) +#define ARM_SMMU_CB_FSR_SS BIT(30) +#define ARM_SMMU_CB_FSR_UUT BIT(8) +#define ARM_SMMU_CB_FSR_ASF BIT(7) +#define ARM_SMMU_CB_FSR_TLBLKF BIT(6) +#define ARM_SMMU_CB_FSR_TLBMCF BIT(5) +#define ARM_SMMU_CB_FSR_EF BIT(4) +#define ARM_SMMU_CB_FSR_PF BIT(3) +#define ARM_SMMU_CB_FSR_AFF BIT(2) +#define ARM_SMMU_CB_FSR_TF BIT(1) + +#define ARM_SMMU_CB_FSR_IGN (ARM_SMMU_CB_FSR_AFF | \ + ARM_SMMU_CB_FSR_ASF | \ + ARM_SMMU_CB_FSR_TLBMCF | \ + ARM_SMMU_CB_FSR_TLBLKF) + +#define ARM_SMMU_CB_FSR_FAULT (ARM_SMMU_CB_FSR_MULTI | \ + ARM_SMMU_CB_FSR_SS | \ + ARM_SMMU_CB_FSR_UUT | \ + ARM_SMMU_CB_FSR_EF | \ + ARM_SMMU_CB_FSR_PF | \ + ARM_SMMU_CB_FSR_TF | \ + ARM_SMMU_CB_FSR_IGN) #define ARM_SMMU_CB_FAR 0x60 #define ARM_SMMU_CB_FSYNR0 0x68 -#define ARM_SMMU_FSYNR0_WNR BIT(4) +#define ARM_SMMU_CB_FSYNR0_WNR BIT(4) #define ARM_SMMU_CB_FSYNR1 0x6c @@ -237,7 +237,7 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_CB_ATS1PR 0x800 #define ARM_SMMU_CB_ATSR 0x8f0 -#define ARM_SMMU_ATSR_ACTIVE BIT(0) +#define ARM_SMMU_CB_ATSR_ACTIVE BIT(0) #define ARM_SMMU_RESUME_TERMINATE BIT(0) diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index e079bb7a993e..b98a7a598b89 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -194,7 +194,7 @@ static irqreturn_t qcom_iommu_fault(int irq, void *dev) fsr = iommu_readl(ctx, ARM_SMMU_CB_FSR); - if (!(fsr & ARM_SMMU_FSR_FAULT)) + if (!(fsr & ARM_SMMU_CB_FSR_FAULT)) return IRQ_NONE; fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0); @@ -274,7 +274,7 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, /* Clear context bank fault address fault status registers */ iommu_writel(ctx, ARM_SMMU_CB_FAR, 0); - iommu_writel(ctx, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT); + iommu_writel(ctx, ARM_SMMU_CB_FSR, ARM_SMMU_CB_FSR_FAULT); /* TTBRs */ iommu_writeq(ctx, ARM_SMMU_CB_TTBR0, -- Gitee From dfb1d5e6e88072bf7ec0722455a17cdefba1ed18 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 1 Jul 2024 09:20:11 -0700 Subject: [PATCH 098/143] iommu/arm-smmu-qcom-debug: Do not print for handled faults ANBZ: #13617 commit 55089781ff7724dd10040231a6d8b791cf24afcd upstream. Handled faults can be "normal", don't spam dmesg about them. Signed-off-by: Rob Clark Reviewed-by: Pranjal Shrivastava Link: https://lore.kernel.org/r/20240701162025.375134-3-robdclark@gmail.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index e4ee78fb6a66..681fbdfc325d 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -419,10 +419,6 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) tmp = report_iommu_fault(&smmu_domain->domain, NULL, iova, fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (!tmp || tmp == -EBUSY) { - dev_dbg(smmu->dev, - "Context fault handled by client: iova=0x%08lx, fsr=0x%x, fsynr=0x%x, cb=%d\n", - iova, fsr, fsynr, idx); - dev_dbg(smmu->dev, "soft iova-to-phys=%pa\n", &phys_soft); ret = IRQ_HANDLED; resume = ARM_SMMU_RESUME_TERMINATE; } else { -- Gitee From 27724e6283b5e2d85974950fabd44acd34e4dc3a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 1 Jul 2024 09:20:12 -0700 Subject: [PATCH 099/143] iommu/arm-smmu: Pretty-print context fault related regs ANBZ: #13617 commit d525b0af0c3b8275e6f83fa0c0640338ed90661a upstream. Parse out the bitfields for easier-to-read fault messages. Signed-off-by: Rob Clark Reviewed-by: Pranjal Shrivastava Link: https://lore.kernel.org/r/20240701162025.375134-4-robdclark@gmail.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- .../iommu/arm/arm-smmu/arm-smmu-qcom-debug.c | 52 +++++--------- drivers/iommu/arm/arm-smmu/arm-smmu.c | 70 +++++++++++++++---- drivers/iommu/arm/arm-smmu/arm-smmu.h | 21 ++++++ 3 files changed, 92 insertions(+), 51 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index 681fbdfc325d..ef93f825f11f 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -383,64 +383,44 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) struct arm_smmu_domain *smmu_domain = dev; struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; struct arm_smmu_device *smmu = smmu_domain->smmu; - u32 fsr, fsynr, cbfrsynra, resume = 0; + struct arm_smmu_context_fault_info cfi; + u32 resume = 0; int idx = smmu_domain->cfg.cbndx; phys_addr_t phys_soft; - unsigned long iova; int ret, tmp; static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); - fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (!(fsr & ARM_SMMU_CB_FSR_FAULT)) - return IRQ_NONE; + arm_smmu_read_context_fault_info(smmu, idx, &cfi); - fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); - iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR); - cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); + if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT)) + return IRQ_NONE; if (list_empty(&tbu_list)) { - ret = report_iommu_fault(&smmu_domain->domain, NULL, iova, - fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); + ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, + cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (ret == -ENOSYS) - dev_err_ratelimited(smmu->dev, - "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", - fsr, iova, fsynr, cbfrsynra, idx); + arm_smmu_print_context_fault_info(smmu, idx, &cfi); - arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); return IRQ_HANDLED; } - phys_soft = ops->iova_to_phys(ops, iova); + phys_soft = ops->iova_to_phys(ops, cfi.iova); - tmp = report_iommu_fault(&smmu_domain->domain, NULL, iova, - fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); + tmp = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, + cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (!tmp || tmp == -EBUSY) { ret = IRQ_HANDLED; resume = ARM_SMMU_RESUME_TERMINATE; } else { - phys_addr_t phys_atos = qcom_smmu_verify_fault(smmu_domain, iova, fsr); + phys_addr_t phys_atos = qcom_smmu_verify_fault(smmu_domain, cfi.iova, cfi.fsr); if (__ratelimit(&_rs)) { - dev_err(smmu->dev, - "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", - fsr, iova, fsynr, cbfrsynra, idx); - dev_err(smmu->dev, - "FSR = %08x [%s%s%s%s%s%s%s%s%s], SID=0x%x\n", - fsr, - (fsr & 0x02) ? "TF " : "", - (fsr & 0x04) ? "AFF " : "", - (fsr & 0x08) ? "PF " : "", - (fsr & 0x10) ? "EF " : "", - (fsr & 0x20) ? "TLBMCF " : "", - (fsr & 0x40) ? "TLBLKF " : "", - (fsr & 0x80) ? "MHF " : "", - (fsr & 0x40000000) ? "SS " : "", - (fsr & 0x80000000) ? "MULTI " : "", - cbfrsynra); + arm_smmu_print_context_fault_info(smmu, idx, &cfi); dev_err(smmu->dev, "soft iova-to-phys=%pa\n", &phys_soft); @@ -474,10 +454,10 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) */ if (tmp != -EBUSY) { /* Clear the faulting FSR */ - arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); /* Retry or terminate any stalled transactions */ - if (fsr & ARM_SMMU_CB_FSR_SS) + if (cfi.fsr & ARM_SMMU_CB_FSR_SS) arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, resume); } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 094345dca818..5d7feb43f094 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -410,32 +410,72 @@ static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = { .tlb_add_page = arm_smmu_tlb_add_page_s2_v1, }; + +void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx, + struct arm_smmu_context_fault_info *cfi) +{ + cfi->iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR); + cfi->fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); + cfi->fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); + cfi->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); +} + +void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx, + const struct arm_smmu_context_fault_info *cfi) +{ + dev_dbg(smmu->dev, + "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", + cfi->fsr, cfi->iova, cfi->fsynr, cfi->cbfrsynra, idx); + + dev_err(smmu->dev, "FSR = %08x [%s%sFormat=%u%s%s%s%s%s%s%s%s], SID=0x%x\n", + cfi->fsr, + (cfi->fsr & ARM_SMMU_CB_FSR_MULTI) ? "MULTI " : "", + (cfi->fsr & ARM_SMMU_CB_FSR_SS) ? "SS " : "", + (u32)FIELD_GET(ARM_SMMU_CB_FSR_FORMAT, cfi->fsr), + (cfi->fsr & ARM_SMMU_CB_FSR_UUT) ? " UUT" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_ASF) ? " ASF" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_TLBLKF) ? " TLBLKF" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_TLBMCF) ? " TLBMCF" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_EF) ? " EF" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_PF) ? " PF" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_AFF) ? " AFF" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_TF) ? " TF" : "", + cfi->cbfrsynra); + + dev_err(smmu->dev, "FSYNR0 = %08x [S1CBNDX=%u%s%s%s%s%s%s PLVL=%u]\n", + cfi->fsynr, + (u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_S1CBNDX, cfi->fsynr), + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_AFR) ? " AFR" : "", + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_PTWF) ? " PTWF" : "", + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_NSATTR) ? " NSATTR" : "", + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_IND) ? " IND" : "", + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_PNU) ? " PNU" : "", + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_WNR) ? " WNR" : "", + (u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_PLVL, cfi->fsynr)); +} + static irqreturn_t arm_smmu_context_fault(int irq, void *dev) { - u32 fsr, fsynr, cbfrsynra; - unsigned long iova; + struct arm_smmu_context_fault_info cfi; struct arm_smmu_domain *smmu_domain = dev; struct arm_smmu_device *smmu = smmu_domain->smmu; + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); int idx = smmu_domain->cfg.cbndx; int ret; - fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (!(fsr & ARM_SMMU_CB_FSR_FAULT)) - return IRQ_NONE; + arm_smmu_read_context_fault_info(smmu, idx, &cfi); - fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); - iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR); - cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); + if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT)) + return IRQ_NONE; - ret = report_iommu_fault(&smmu_domain->domain, NULL, iova, - fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); + ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, + cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); - if (ret == -ENOSYS) - dev_err_ratelimited(smmu->dev, - "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", - fsr, iova, fsynr, cbfrsynra, idx); + if (ret == -ENOSYS && __ratelimit(&rs)) + arm_smmu_print_context_fault_info(smmu, idx, &cfi); - arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); return IRQ_HANDLED; } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index b04a00126a12..e2aeb511ae90 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -198,6 +198,7 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_CB_FSR 0x58 #define ARM_SMMU_CB_FSR_MULTI BIT(31) #define ARM_SMMU_CB_FSR_SS BIT(30) +#define ARM_SMMU_CB_FSR_FORMAT GENMASK(10, 9) #define ARM_SMMU_CB_FSR_UUT BIT(8) #define ARM_SMMU_CB_FSR_ASF BIT(7) #define ARM_SMMU_CB_FSR_TLBLKF BIT(6) @@ -223,7 +224,14 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_CB_FAR 0x60 #define ARM_SMMU_CB_FSYNR0 0x68 +#define ARM_SMMU_CB_FSYNR0_PLVL GENMASK(1, 0) #define ARM_SMMU_CB_FSYNR0_WNR BIT(4) +#define ARM_SMMU_CB_FSYNR0_PNU BIT(5) +#define ARM_SMMU_CB_FSYNR0_IND BIT(6) +#define ARM_SMMU_CB_FSYNR0_NSATTR BIT(8) +#define ARM_SMMU_CB_FSYNR0_PTWF BIT(10) +#define ARM_SMMU_CB_FSYNR0_AFR BIT(11) +#define ARM_SMMU_CB_FSYNR0_S1CBNDX GENMASK(23, 16) #define ARM_SMMU_CB_FSYNR1 0x6c @@ -533,4 +541,17 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu); void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx); int arm_mmu500_reset(struct arm_smmu_device *smmu); +struct arm_smmu_context_fault_info { + unsigned long iova; + u32 fsr; + u32 fsynr; + u32 cbfrsynra; +}; + +void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx, + struct arm_smmu_context_fault_info *cfi); + +void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx, + const struct arm_smmu_context_fault_info *cfi); + #endif /* _ARM_SMMU_H */ -- Gitee From ac824c291af77e1e5971213be4aa57da284f8c9f Mon Sep 17 00:00:00 2001 From: Zhenhua Huang Date: Tue, 2 Jul 2024 17:01:10 +0800 Subject: [PATCH 100/143] iommu/arm-smmu-qcom: record reason for deferring probe ANBZ: #13617 commit 9796cf9b3eb9a0b9502dfe0b3acf63610090ef44 upstream. To avoid deferring probe smmu driver silently, record reason for it. It can be checked through ../debugfs/devices_deferred as well: /sys/kernel/debug# cat devices_deferred 15000000.iommu arm-smmu: qcom_scm not ready Signed-off-by: Zhenhua Huang Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/1719910870-25079-1-git-send-email-quic_zhenhuah@quicinc.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index ee318cd0e6f1..3a71d710e288 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -497,7 +497,8 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, /* Check to make sure qcom_scm has finished probing */ if (!qcom_scm_is_available()) - return ERR_PTR(-EPROBE_DEFER); + return ERR_PTR(dev_err_probe(smmu->dev, -EPROBE_DEFER, + "qcom_scm not ready\n")); qsmmu = devm_krealloc(smmu->dev, smmu, sizeof(*qsmmu), GFP_KERNEL); if (!qsmmu) -- Gitee From 411b536fb02a7b386583a9db3e44e0170f44a529 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 3 Jul 2024 11:16:00 +0100 Subject: [PATCH 101/143] iommu/arm-smmu-v3: Add support for domain_alloc_user fn ANBZ: #13617 commit 52acd7d8a4130ad4dda6540dbbb821a92e1c0138 upstream. This will be used by iommufd for allocating usr managed domains and is also required when we add support for iommufd based dirty tracking support. Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20240703101604.2576-2-shameerali.kolothum.thodi@huawei.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 33 +++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 5f74d4d8dd6b..2dedcfc65c82 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -36,6 +36,8 @@ module_param(disable_msipolling, bool, 0444); MODULE_PARM_DESC(disable_msipolling, "Disable MSI-based polling for CMD_SYNC completion."); +static struct iommu_ops arm_smmu_ops; + enum arm_smmu_msi_index { EVTQ_MSI_INDEX, GERROR_MSI_INDEX, @@ -3031,6 +3033,34 @@ static struct iommu_domain arm_smmu_blocked_domain = { .ops = &arm_smmu_blocked_ops, }; +static struct iommu_domain * +arm_smmu_domain_alloc_user(struct device *dev, u32 flags, + struct iommu_domain *parent, + const struct iommu_user_data *user_data) +{ + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_domain *smmu_domain; + int ret; + + if (flags || parent || user_data) + return ERR_PTR(-EOPNOTSUPP); + + smmu_domain = arm_smmu_domain_alloc(); + if (!smmu_domain) + return ERR_PTR(-ENOMEM); + + smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED; + smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops; + ret = arm_smmu_domain_finalise(smmu_domain, master->smmu); + if (ret) + goto err_free; + return &smmu_domain->domain; + +err_free: + kfree(smmu_domain); + return ERR_PTR(ret); +} + static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -3195,8 +3225,6 @@ static void arm_smmu_remove_master(struct arm_smmu_master *master) kfree(master->streams); } -static struct iommu_ops arm_smmu_ops; - static struct iommu_device *arm_smmu_probe_device(struct device *dev) { int ret; @@ -3404,6 +3432,7 @@ static struct iommu_ops arm_smmu_ops = { .capable = arm_smmu_capable, .domain_alloc_paging = arm_smmu_domain_alloc_paging, .domain_alloc_sva = arm_smmu_sva_domain_alloc, + .domain_alloc_user = arm_smmu_domain_alloc_user, .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .device_group = arm_smmu_device_group, -- Gitee From 55fc40ef07cc8bafb61597f817451c36d5d2aa62 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 3 Jul 2024 11:16:01 +0100 Subject: [PATCH 102/143] iommu/arm-smmu-v3: Add feature detection for HTTU ANBZ: #13617 commit 2f8d6178b4fe3e2f50782fa640921a9ee46b6d6f upstream. If the SMMU supports it and the kernel was built with HTTU support, Probe support for Hardware Translation Table Update (HTTU) which is essentially to enable hardware update of access and dirty flags. Probe and set the smmu::features for Hardware Dirty and Hardware Access bits. This is in preparation, to enable it on the context descriptors of stage 1 format. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Joao Martins Reviewed-by: Jason Gunthorpe Reviewed-by: Ryan Roberts Reviewed-by: Kevin Tian Reviewed-by: Nicolin Chen Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20240703101604.2576-3-shameerali.kolothum.thodi@huawei.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 32 +++++++++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 5 ++++ 2 files changed, 37 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 2dedcfc65c82..b133970bd87f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -4019,6 +4019,28 @@ static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu) } } +static void arm_smmu_get_httu(struct arm_smmu_device *smmu, u32 reg) +{ + u32 fw_features = smmu->features & (ARM_SMMU_FEAT_HA | ARM_SMMU_FEAT_HD); + u32 hw_features = 0; + + switch (FIELD_GET(IDR0_HTTU, reg)) { + case IDR0_HTTU_ACCESS_DIRTY: + hw_features |= ARM_SMMU_FEAT_HD; + fallthrough; + case IDR0_HTTU_ACCESS: + hw_features |= ARM_SMMU_FEAT_HA; + } + + if (smmu->dev->of_node) + smmu->features |= hw_features; + else if (hw_features != fw_features) + /* ACPI IORT sets the HTTU bits */ + dev_warn(smmu->dev, + "IDR0.HTTU features(0x%x) overridden by FW configuration (0x%x)\n", + hw_features, fw_features); +} + static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) { u32 reg; @@ -4079,6 +4101,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) smmu->features |= ARM_SMMU_FEAT_E2H; } + arm_smmu_get_httu(smmu, reg); + /* * The coherency feature as set by FW is used in preference to the ID * register, but warn on mismatch. @@ -4274,6 +4298,14 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) smmu->features |= ARM_SMMU_FEAT_COHERENCY; + switch (FIELD_GET(ACPI_IORT_SMMU_V3_HTTU_OVERRIDE, iort_smmu->flags)) { + case IDR0_HTTU_ACCESS_DIRTY: + smmu->features |= ARM_SMMU_FEAT_HD; + fallthrough; + case IDR0_HTTU_ACCESS: + smmu->features |= ARM_SMMU_FEAT_HA; + } + return 0; } #else diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 60d2372935c5..5c577839d86d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -33,6 +33,9 @@ #define IDR0_ASID16 (1 << 12) #define IDR0_ATS (1 << 10) #define IDR0_HYP (1 << 9) +#define IDR0_HTTU GENMASK(7, 6) +#define IDR0_HTTU_ACCESS 1 +#define IDR0_HTTU_ACCESS_DIRTY 2 #define IDR0_COHACC (1 << 4) #define IDR0_TTF GENMASK(3, 2) #define IDR0_TTF_AARCH64 2 @@ -650,6 +653,8 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_E2H (1 << 18) #define ARM_SMMU_FEAT_NESTING (1 << 19) #define ARM_SMMU_FEAT_ATTR_TYPES_OVR (1 << 20) +#define ARM_SMMU_FEAT_HA (1 << 21) +#define ARM_SMMU_FEAT_HD (1 << 22) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) -- Gitee From 29b90667effc2475e2b1f484ee84a50c6afed85f Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 3 Jul 2024 11:16:02 +0100 Subject: [PATCH 103/143] iommu/io-pgtable-arm: Add read_and_clear_dirty() support ANBZ: #13617 commit 4fe88fd8b4aecb7f9680bf898811db76b94095a9 upstream. .read_and_clear_dirty() IOMMU domain op takes care of reading the dirty bits (i.e. PTE has DBM set and AP[2] clear) and marshalling into a bitmap of a given page size. While reading the dirty bits we also set the PTE AP[2] bit to mark it as writeable-clean depending on read_and_clear_dirty() flags. PTE states with respect to DBM bit: DBM bit AP[2]("RDONLY" bit) 1. writable_clean 1 1 2. writable_dirty 1 0 3. read-only 0 1 Reviewed-by: Ryan Roberts Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20240703101604.2576-4-shameerali.kolothum.thodi@huawei.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/io-pgtable-arm.c | 114 ++++++++++++++++++++++++++++++++- 1 file changed, 112 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 38ebd192e5db..b521e656d4ae 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -76,6 +76,7 @@ #define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63) #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) +#define ARM_LPAE_PTE_DBM (((arm_lpae_iopte)1) << 51) #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) #define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8) #define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8) @@ -85,7 +86,7 @@ #define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) /* Ignore the contiguous bit for block splitting */ -#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52) +#define ARM_LPAE_PTE_ATTR_HI_MASK (ARM_LPAE_PTE_XN | ARM_LPAE_PTE_DBM) #define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ ARM_LPAE_PTE_ATTR_HI_MASK) /* Software bit for solving coherency races */ @@ -93,7 +94,11 @@ /* Stage-1 PTE */ #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) -#define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)2) << 6) +#define ARM_LPAE_PTE_AP_RDONLY_BIT 7 +#define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)1) << \ + ARM_LPAE_PTE_AP_RDONLY_BIT) +#define ARM_LPAE_PTE_AP_WR_CLEAN_MASK (ARM_LPAE_PTE_AP_RDONLY | \ + ARM_LPAE_PTE_DBM) #define ARM_LPAE_PTE_ATTRINDX_SHIFT 2 #define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11) @@ -139,6 +144,12 @@ #define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK) +#define iopte_writeable_dirty(pte) \ + (((pte) & ARM_LPAE_PTE_AP_WR_CLEAN_MASK) == ARM_LPAE_PTE_DBM) + +#define iopte_set_writeable_clean(ptep) \ + set_bit(ARM_LPAE_PTE_AP_RDONLY_BIT, (unsigned long *)(ptep)) + struct arm_lpae_io_pgtable { struct io_pgtable iop; @@ -160,6 +171,13 @@ static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl, return iopte_type(pte) == ARM_LPAE_PTE_TYPE_BLOCK; } +static inline bool iopte_table(arm_lpae_iopte pte, int lvl) +{ + if (lvl == (ARM_LPAE_MAX_LEVELS - 1)) + return false; + return iopte_type(pte) == ARM_LPAE_PTE_TYPE_TABLE; +} + static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr, struct arm_lpae_io_pgtable *data) { @@ -737,6 +755,97 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, return iopte_to_paddr(pte, data) | iova; } +struct io_pgtable_walk_data { + struct iommu_dirty_bitmap *dirty; + unsigned long flags; + u64 addr; + const u64 end; +}; + +static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, + int lvl); + +static int io_pgtable_visit_dirty(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, int lvl) +{ + struct io_pgtable *iop = &data->iop; + arm_lpae_iopte pte = READ_ONCE(*ptep); + + if (iopte_leaf(pte, lvl, iop->fmt)) { + size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data); + + if (iopte_writeable_dirty(pte)) { + iommu_dirty_bitmap_record(walk_data->dirty, + walk_data->addr, size); + if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR)) + iopte_set_writeable_clean(ptep); + } + walk_data->addr += size; + return 0; + } + + if (WARN_ON(!iopte_table(pte, lvl))) + return -EINVAL; + + ptep = iopte_deref(pte, data); + return __arm_lpae_iopte_walk_dirty(data, walk_data, ptep, lvl + 1); +} + +static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, + int lvl) +{ + u32 idx; + int max_entries, ret; + + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) + return -EINVAL; + + if (lvl == data->start_level) + max_entries = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte); + else + max_entries = ARM_LPAE_PTES_PER_TABLE(data); + + for (idx = ARM_LPAE_LVL_IDX(walk_data->addr, lvl, data); + (idx < max_entries) && (walk_data->addr < walk_data->end); ++idx) { + ret = io_pgtable_visit_dirty(data, walk_data, ptep + idx, lvl); + if (ret) + return ret; + } + + return 0; +} + +static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &data->iop.cfg; + struct io_pgtable_walk_data walk_data = { + .dirty = dirty, + .flags = flags, + .addr = iova, + .end = iova + size, + }; + arm_lpae_iopte *ptep = data->pgd; + int lvl = data->start_level; + + if (WARN_ON(!size)) + return -EINVAL; + if (WARN_ON((iova + size - 1) & ~(BIT(cfg->ias) - 1))) + return -EINVAL; + if (data->iop.fmt != ARM_64_LPAE_S1) + return -EINVAL; + + return __arm_lpae_iopte_walk_dirty(data, &walk_data, ptep, lvl); +} + static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) { unsigned long granule, page_sizes; @@ -815,6 +924,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) .map_pages = arm_lpae_map_pages, .unmap_pages = arm_lpae_unmap_pages, .iova_to_phys = arm_lpae_iova_to_phys, + .read_and_clear_dirty = arm_lpae_read_and_clear_dirty, }; return data; -- Gitee From 5dc38e85e23d430a3559089365c6907919570f76 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Wed, 3 Jul 2024 11:16:03 +0100 Subject: [PATCH 104/143] iommu/arm-smmu-v3: Add support for dirty tracking in domain alloc ANBZ: #13617 commit eb054d67b21a53f6ccf3af49a62fb99397b48fc2 upstream. This provides all the infrastructure to enable dirty tracking if the hardware has the capability and domain alloc request for it. Also, add a device_iommu_capable() check in iommufd core for IOMMU_CAP_DIRTY_TRACKING before we request a user domain with dirty tracking support. Please note, we still report no support for IOMMU_CAP_DIRTY_TRACKING as it will finally be enabled in a subsequent patch. Signed-off-by: Joao Martins Reviewed-by: Ryan Roberts Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20240703101604.2576-5-shameerali.kolothum.thodi@huawei.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 84 +++++++++++++++------ drivers/iommu/iommufd/hw_pagetable.c | 3 + include/linux/io-pgtable.h | 3 + 3 files changed, 67 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index b133970bd87f..6be65381df0c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "arm-smmu-v3.h" #include "../../dma-iommu.h" @@ -37,6 +38,7 @@ MODULE_PARM_DESC(disable_msipolling, "Disable MSI-based polling for CMD_SYNC completion."); static struct iommu_ops arm_smmu_ops; +static struct iommu_dirty_ops arm_smmu_dirty_ops; enum arm_smmu_msi_index { EVTQ_MSI_INDEX, @@ -82,7 +84,7 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { }; static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_device *smmu); + struct arm_smmu_device *smmu, u32 flags); static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master); static void parse_driver_options(struct arm_smmu_device *smmu) @@ -2293,7 +2295,7 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) struct arm_smmu_master *master = dev_iommu_priv_get(dev); int ret; - ret = arm_smmu_domain_finalise(smmu_domain, master->smmu); + ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, 0); if (ret) { kfree(smmu_domain); return ERR_PTR(ret); @@ -2357,15 +2359,15 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu, } static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_device *smmu) + struct arm_smmu_device *smmu, u32 flags) { int ret; - unsigned long ias, oas; enum io_pgtable_fmt fmt; struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable_ops *pgtbl_ops; int (*finalise_stage_fn)(struct arm_smmu_device *smmu, struct arm_smmu_domain *smmu_domain); + bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; /* Restrict the stage to what we can actually support */ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) @@ -2373,17 +2375,31 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2)) smmu_domain->stage = ARM_SMMU_DOMAIN_S1; + pgtbl_cfg = (struct io_pgtable_cfg) { + .pgsize_bitmap = smmu->pgsize_bitmap, + .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, + .tlb = &arm_smmu_flush_ops, + .iommu_dev = smmu->dev, + }; + switch (smmu_domain->stage) { - case ARM_SMMU_DOMAIN_S1: - ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48; - ias = min_t(unsigned long, ias, VA_BITS); - oas = smmu->ias; + case ARM_SMMU_DOMAIN_S1: { + unsigned long ias = (smmu->features & + ARM_SMMU_FEAT_VAX) ? 52 : 48; + + pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS); + pgtbl_cfg.oas = smmu->ias; + if (enable_dirty) + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD; fmt = ARM_64_LPAE_S1; finalise_stage_fn = arm_smmu_domain_finalise_s1; break; + } case ARM_SMMU_DOMAIN_S2: - ias = smmu->ias; - oas = smmu->oas; + if (enable_dirty) + return -EOPNOTSUPP; + pgtbl_cfg.ias = smmu->ias; + pgtbl_cfg.oas = smmu->oas; fmt = ARM_64_LPAE_S2; finalise_stage_fn = arm_smmu_domain_finalise_s2; break; @@ -2391,15 +2407,6 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, return -EINVAL; } - pgtbl_cfg = (struct io_pgtable_cfg) { - .pgsize_bitmap = smmu->pgsize_bitmap, - .ias = ias, - .oas = oas, - .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, - .tlb = &arm_smmu_flush_ops, - .iommu_dev = smmu->dev, - }; - pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) return -ENOMEM; @@ -2407,6 +2414,8 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1; smmu_domain->domain.geometry.force_aperture = true; + if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1) + smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops; ret = finalise_stage_fn(smmu, smmu_domain); if (ret < 0) { @@ -2756,7 +2765,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) mutex_lock(&smmu_domain->init_mutex); if (!smmu_domain->smmu) { - ret = arm_smmu_domain_finalise(smmu_domain, smmu); + ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0); } else if (smmu_domain->smmu != smmu) ret = -EINVAL; @@ -2821,7 +2830,7 @@ static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain, mutex_lock(&smmu_domain->init_mutex); if (!smmu_domain->smmu) - ret = arm_smmu_domain_finalise(smmu_domain, smmu); + ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0); else if (smmu_domain->smmu != smmu) ret = -EINVAL; mutex_unlock(&smmu_domain->init_mutex); @@ -3039,10 +3048,13 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags, const struct iommu_user_data *user_data) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); + const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING; struct arm_smmu_domain *smmu_domain; int ret; - if (flags || parent || user_data) + if (flags & ~PAGING_FLAGS) + return ERR_PTR(-EOPNOTSUPP); + if (parent || user_data) return ERR_PTR(-EOPNOTSUPP); smmu_domain = arm_smmu_domain_alloc(); @@ -3051,7 +3063,7 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags, smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED; smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops; - ret = arm_smmu_domain_finalise(smmu_domain, master->smmu); + ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, flags); if (ret) goto err_free; return &smmu_domain->domain; @@ -3300,6 +3312,27 @@ static void arm_smmu_release_device(struct device *dev) kfree(master); } +static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + + return ops->read_and_clear_dirty(ops, iova, size, flags, dirty); +} + +static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain, + bool enabled) +{ + /* + * Always enabled and the dirty bitmap is cleared prior to + * set_dirty_tracking(). + */ + return 0; +} + static struct iommu_group *arm_smmu_device_group(struct device *dev) { struct iommu_group *group; @@ -3458,6 +3491,11 @@ static struct iommu_ops arm_smmu_ops = { } }; +static struct iommu_dirty_ops arm_smmu_dirty_ops = { + .read_and_clear_dirty = arm_smmu_read_and_clear_dirty, + .set_dirty_tracking = arm_smmu_set_dirty_tracking, +}; + /* Probing and initialisation functions */ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, struct arm_smmu_queue *q, diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 33d142f8057d..6d5b2fffeea0 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -114,6 +114,9 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, return ERR_PTR(-EOPNOTSUPP); if (flags & ~valid_flags) return ERR_PTR(-EOPNOTSUPP); + if ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) && + !device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) + return ERR_PTR(-EOPNOTSUPP); hwpt_paging = __iommufd_object_alloc( ictx, hwpt_paging, IOMMUFD_OBJ_HWPT_PAGING, common.obj); diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 86cf1f7ae389..f9a81761bfce 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -85,6 +85,8 @@ struct io_pgtable_cfg { * * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability * attributes set in the TCR for a non-coherent page-table walker. + * + * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable. */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) @@ -92,6 +94,7 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT BIT(4) #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) + #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; -- Gitee From 3fa71420b5a5ba35757638dff09f592bda9b4e5f Mon Sep 17 00:00:00 2001 From: Kunkun Jiang Date: Wed, 3 Jul 2024 11:16:04 +0100 Subject: [PATCH 105/143] iommu/arm-smmu-v3: Enable HTTU for stage1 with io-pgtable mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #13617 commit 25c776dd03b3e3ee16ad3402feabe20d811c7cb2 upstream. If io-pgtable quirk flag indicates support for hardware update of dirty state, enable HA/HD bits in the SMMU CD and also set the DBM bit in the page descriptor. Now report the dirty page tracking capability of SMMUv3 and select IOMMUFD_DRIVER for ARM_SMMU_V3 if IOMMUFD is enabled. Co-developed-by: Keqian Zhu Signed-off-by: Keqian Zhu Signed-off-by: Kunkun Jiang Signed-off-by: Joao Martins Reviewed-by: Ryan Roberts Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20240703101604.2576-6-shameerali.kolothum.thodi@huawei.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/Kconfig | 1 + drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 15 +++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++ drivers/iommu/io-pgtable-arm.c | 5 ++++- 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 0197014056ab..2d92a58c76e2 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -395,6 +395,7 @@ config ARM_SMMU_V3 select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select GENERIC_MSI_IRQ + select IOMMUFD_DRIVER if IOMMUFD help Support for implementations of the ARM System MMU architecture version 3 providing translation support to a PCIe root complex. diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 6be65381df0c..2478178dfda7 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1352,6 +1352,12 @@ void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, CTXDESC_CD_0_ASET | FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) ); + + /* To enable dirty flag update, set both Access flag and dirty state update */ + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD) + target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA | + CTXDESC_CD_0_TCR_HD); + target->data[1] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.ttbr & CTXDESC_CD_1_TTB0_MASK); target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.mair); @@ -2246,6 +2252,13 @@ static const struct iommu_flush_ops arm_smmu_flush_ops = { .tlb_add_page = arm_smmu_tlb_inv_page_nosync, }; +static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu) +{ + u32 features = (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY); + + return (smmu->features & features) == features; +} + /* IOMMU API */ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) { @@ -2258,6 +2271,8 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) case IOMMU_CAP_NOEXEC: case IOMMU_CAP_DEFERRED_FLUSH: return true; + case IOMMU_CAP_DIRTY_TRACKING: + return arm_smmu_dbm_capable(master->smmu); default: return false; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 5c577839d86d..b332536a00d4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -304,6 +304,9 @@ struct arm_smmu_cd { #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32) #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38) +#define CTXDESC_CD_0_TCR_HA (1UL << 43) +#define CTXDESC_CD_0_TCR_HD (1UL << 42) + #define CTXDESC_CD_0_AA64 (1UL << 41) #define CTXDESC_CD_0_S (1UL << 44) #define CTXDESC_CD_0_R (1UL << 45) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index b521e656d4ae..fd9a5c136b5d 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -452,6 +452,8 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, pte = ARM_LPAE_PTE_nG; if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) pte |= ARM_LPAE_PTE_AP_RDONLY; + else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD) + pte |= ARM_LPAE_PTE_DBM; if (!(prot & IOMMU_PRIV)) pte |= ARM_LPAE_PTE_AP_UNPRIV; } else { @@ -940,7 +942,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_ARM_TTBR1 | - IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)) + IO_PGTABLE_QUIRK_ARM_OUTER_WBWA | + IO_PGTABLE_QUIRK_ARM_HD)) return NULL; data = arm_lpae_alloc_pgtable(cfg); -- Gitee From 7c2dfa0cbc9bbc8c30fb5bb664ca98cc4d410cb8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 21:08:34 +0800 Subject: [PATCH 106/143] iommu/vt-d: Remove comment for def_domain_type ANBZ: #13617 commit 5fbf97371dc0a48794280445bc94e2e15dd81a63 upstream. The comment for def_domain_type is outdated. Part of it is irrelevant. Furthermore, it could just be deleted since the iommu_ops::def_domain_type callback is properly documented in iommu.h, so individual implementations shouldn't need to repeat that. Remove it to avoid confusion. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240624024327.234979-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20240702130839.108139-3-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/intel/iommu.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b217d4f3646b..d121a889fa5a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2189,17 +2189,6 @@ static bool device_rmrr_is_relaxable(struct device *dev) return false; } -/* - * Return the required default domain type for a specific device. - * - * @dev: the device in query - * @startup: true if this is during early boot - * - * Returns: - * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain - * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain - * - 0: both identity and dynamic domains work for this device - */ static int device_def_domain_type(struct device *dev) { if (dev_is_pci(dev)) { -- Gitee From f86b9d8ba029b94fc827a64f1b477dfd643850b0 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 21:08:35 +0800 Subject: [PATCH 107/143] iommu/vt-d: Remove control over Execute-Requested requests ANBZ: #13617 commit e995fcde6070f0981e083c1e2e17e401e6c17ad9 upstream. The VT-d specification has removed architectural support of the requests with pasid with a value of 1 for Execute-Requested (ER). And the NXE bit in the pasid table entry and XD bit in the first-stage paging Entries are deprecated accordingly. Remove the programming of these bits to make it consistent with the spec. Suggested-by: Jacob Pan Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240624032351.249858-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20240702130839.108139-4-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/intel/iommu.c | 4 ++-- drivers/iommu/intel/iommu.h | 6 ++---- drivers/iommu/intel/pasid.c | 1 - drivers/iommu/intel/pasid.h | 10 ---------- 4 files changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d121a889fa5a..27230a2a3a9a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -866,7 +866,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; if (domain->use_first_level) - pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; + pteval |= DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; tmp = 0ULL; if (!try_cmpxchg64(&pte->val, &tmp, pteval)) @@ -1884,7 +1884,7 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP); attr |= DMA_FL_PTE_PRESENT; if (domain->use_first_level) { - attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; + attr |= DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; if (prot & DMA_PTE_WRITE) attr |= DMA_FL_PTE_DIRTY; } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 83d394f166a7..e590a2cd5e57 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -49,7 +49,6 @@ #define DMA_FL_PTE_US BIT_ULL(2) #define DMA_FL_PTE_ACCESS BIT_ULL(5) #define DMA_FL_PTE_DIRTY BIT_ULL(6) -#define DMA_FL_PTE_XD BIT_ULL(63) #define DMA_SL_PTE_DIRTY_BIT 9 #define DMA_SL_PTE_DIRTY BIT_ULL(DMA_SL_PTE_DIRTY_BIT) @@ -831,11 +830,10 @@ static inline void dma_clear_pte(struct dma_pte *pte) static inline u64 dma_pte_addr(struct dma_pte *pte) { #ifdef CONFIG_64BIT - return pte->val & VTD_PAGE_MASK & (~DMA_FL_PTE_XD); + return pte->val & VTD_PAGE_MASK; #else /* Must have a full atomic 64-bit read */ - return __cmpxchg64(&pte->val, 0ULL, 0ULL) & - VTD_PAGE_MASK & (~DMA_FL_PTE_XD); + return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK; #endif } diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 9bf45bc4b967..ffac7a75be95 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -336,7 +336,6 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, pasid_set_domain_id(pte, did); pasid_set_address_width(pte, iommu->agaw); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); - pasid_set_nxe(pte); /* Setup Present and PASID Granular Transfer Type: */ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY); diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index da9978fef7ac..dde6d3ba5ae0 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -247,16 +247,6 @@ static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) pasid_set_bits(&pe->val[1], 1 << 23, value << 23); } -/* - * Setup No Execute Enable bit (Bit 133) of a scalable mode PASID - * entry. It is required when XD bit of the first level page table - * entry is about to be set. - */ -static inline void pasid_set_nxe(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[2], 1 << 5, 1 << 5); -} - /* * Setup the Page Snoop (PGSNP) field (Bit 88) of a scalable mode * PASID entry. -- Gitee From 488e35d91a205336ed309579810993549ce1ea4b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 21:08:36 +0800 Subject: [PATCH 108/143] iommu/vt-d: Downgrade warning for pre-enabled IR ANBZ: #13617 commit 804f98e224e41c16e3b70f97790f84894745a392 upstream. Emitting a warning is overkill in intel_setup_irq_remapping() since the interrupt remapping is pre-enabled. For example, there's no guarantee that kexec will explicitly disable interrupt remapping before booting a new kernel. As a result, users are seeing warning messages like below when they kexec boot a kernel, though there is nothing wrong: DMAR-IR: IRQ remapping was enabled on dmar18 but we are not in kdump mode DMAR-IR: IRQ remapping was enabled on dmar17 but we are not in kdump mode DMAR-IR: IRQ remapping was enabled on dmar16 but we are not in kdump mode ... ... Downgrade the severity of this message to avoid user confusion. CC: Paul Menzel Link: https://lore.kernel.org/linux-iommu/5517f76a-94ad-452c-bae6-34ecc0ec4831@molgen.mpg.de/ Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240625043912.258036-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20240702130839.108139-5-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/intel/irq_remapping.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 2d6e6cbba680..e59403767fae 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -601,8 +601,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) if (ir_pre_enabled(iommu)) { if (!is_kdump_kernel()) { - pr_warn("IRQ remapping was enabled on %s but we are not in kdump mode\n", - iommu->name); + pr_info_once("IRQ remapping was enabled on %s but we are not in kdump mode\n", + iommu->name); clear_ir_pre_enabled(iommu); iommu_disable_irq_remapping(iommu); } else if (iommu_load_old_irte(iommu)) -- Gitee From e2d2d70e1da4bce9ed5eea8eaeec0267d565bd8d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 21:08:37 +0800 Subject: [PATCH 109/143] iommu/vt-d: Add helper to allocate paging domain ANBZ: #13617 commit 2b989ab9bc89b29dd4b5509408b8fa42337eda56 upstream. The domain_alloc_user operation is currently implemented by allocating a paging domain using iommu_domain_alloc(). This is because it needs to fully initialize the domain before return. Add a helper to do this to avoid using iommu_domain_alloc(). Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240610085555.88197-16-baolu.lu@linux.intel.com Reviewed-by: Yi Liu Link: https://lore.kernel.org/r/20240702130839.108139-6-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/intel/iommu.c | 90 +++++++++++++++++++++++++++++++++---- 1 file changed, 81 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 27230a2a3a9a..5c74059f21b0 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3695,6 +3695,79 @@ static struct iommu_domain blocking_domain = { } }; +static int iommu_superpage_capability(struct intel_iommu *iommu, bool first_stage) +{ + if (!intel_iommu_superpage) + return 0; + + if (first_stage) + return cap_fl1gp_support(iommu->cap) ? 2 : 1; + + return fls(cap_super_page_val(iommu->cap)); +} + +static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_stage) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + struct dmar_domain *domain; + int addr_width; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&domain->devices); + INIT_LIST_HEAD(&domain->dev_pasids); + INIT_LIST_HEAD(&domain->cache_tags); + spin_lock_init(&domain->lock); + spin_lock_init(&domain->cache_lock); + xa_init(&domain->iommu_array); + + domain->nid = dev_to_node(dev); + domain->has_iotlb_device = info->ats_enabled; + domain->use_first_level = first_stage; + + /* calculate the address width */ + addr_width = agaw_to_width(iommu->agaw); + if (addr_width > cap_mgaw(iommu->cap)) + addr_width = cap_mgaw(iommu->cap); + domain->gaw = addr_width; + domain->agaw = iommu->agaw; + domain->max_addr = __DOMAIN_MAX_ADDR(addr_width); + + /* iommu memory access coherency */ + domain->iommu_coherency = iommu_paging_structure_coherency(iommu); + + /* pagesize bitmap */ + domain->domain.pgsize_bitmap = SZ_4K; + domain->iommu_superpage = iommu_superpage_capability(iommu, first_stage); + domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain); + + /* + * IOVA aperture: First-level translation restricts the input-address + * to a canonical address (i.e., address bits 63:N have the same value + * as address bit [N-1], where N is 48-bits with 4-level paging and + * 57-bits with 5-level paging). Hence, skip bit [N-1]. + */ + domain->domain.geometry.force_aperture = true; + domain->domain.geometry.aperture_start = 0; + if (first_stage) + domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1); + else + domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw); + + /* always allocate the top pgd */ + domain->pgd = iommu_alloc_page_node(domain->nid, GFP_KERNEL); + if (!domain->pgd) { + kfree(domain); + return ERR_PTR(-ENOMEM); + } + domain_flush_cache(domain, domain->pgd, PAGE_SIZE); + + return domain; +} + static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) { struct dmar_domain *dmar_domain; @@ -3757,15 +3830,14 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags, if (user_data || (dirty_tracking && !ssads_supported(iommu))) return ERR_PTR(-EOPNOTSUPP); - /* - * domain_alloc_user op needs to fully initialize a domain before - * return, so uses iommu_domain_alloc() here for simple. - */ - domain = iommu_domain_alloc(dev->bus); - if (!domain) - return ERR_PTR(-ENOMEM); - - dmar_domain = to_dmar_domain(domain); + /* Do not use first stage for user domain translation. */ + dmar_domain = paging_domain_alloc(dev, false); + if (IS_ERR(dmar_domain)) + return ERR_CAST(dmar_domain); + domain = &dmar_domain->domain; + domain->type = IOMMU_DOMAIN_UNMANAGED; + domain->owner = &intel_iommu_ops; + domain->ops = intel_iommu_ops.default_domain_ops; if (nested_parent) { dmar_domain->nested_parent = true; -- Gitee From 91ec19b7895ccbd9f39870b6fcf6cc36e022ecc1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 21:08:38 +0800 Subject: [PATCH 110/143] iommu/vt-d: Add helper to flush caches for context change ANBZ: #13617 commit f90584f4beb84211c4d21b319cc13f391fe9f3c2 upstream. This helper is used to flush the related caches following a change in a context table entry that was previously present. The VT-d specification provides guidance for such invalidations in section 6.5.3.3. This helper replaces the existing open code in the code paths where a present context entry is being torn down. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240701112317.94022-2-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20240702130839.108139-7-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/intel/iommu.c | 32 +---------- drivers/iommu/intel/iommu.h | 4 ++ drivers/iommu/intel/pasid.c | 106 +++++++++++++++++++++++++++++------- 3 files changed, 92 insertions(+), 50 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 5c74059f21b0..de392ed38970 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1371,21 +1371,6 @@ static void iommu_disable_pci_caps(struct device_domain_info *info) } } -static void __iommu_flush_dev_iotlb(struct device_domain_info *info, - u64 addr, unsigned int mask) -{ - u16 sid, qdep; - - if (!info || !info->ats_enabled) - return; - - sid = info->bus << 8 | info->devfn; - qdep = info->ats_qdep; - qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, - qdep, addr, mask); - quirk_extra_dev_tlb_flush(info, addr, mask, IOMMU_NO_PASID, qdep); -} - static void intel_flush_iotlb_all(struct iommu_domain *domain) { cache_tag_flush_all(to_dmar_domain(domain)); @@ -1971,7 +1956,6 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 { struct intel_iommu *iommu = info->iommu; struct context_entry *context; - u16 did_old; spin_lock(&iommu->lock); context = iommu_context_addr(iommu, bus, devfn, 0); @@ -1980,24 +1964,10 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 return; } - did_old = context_domain_id(context); - context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock(&iommu->lock); - iommu->flush.flush_context(iommu, - did_old, - (((u16)bus) << 8) | devfn, - DMA_CCMD_MASK_NOBIT, - DMA_CCMD_DEVICE_INVL); - - iommu->flush.flush_iotlb(iommu, - did_old, - 0, - 0, - DMA_TLB_DSI_FLUSH); - - __iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH); + intel_context_flush_present(info, context, true); } static int domain_setup_first_level(struct intel_iommu *iommu, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index e590a2cd5e57..1fe4d81b56a2 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1143,6 +1143,10 @@ void cache_tag_flush_all(struct dmar_domain *domain); void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, unsigned long end); +void intel_context_flush_present(struct device_domain_info *info, + struct context_entry *context, + bool affect_domains); + #ifdef CONFIG_INTEL_IOMMU_SVM void intel_svm_check(struct intel_iommu *iommu); int intel_svm_enable_prq(struct intel_iommu *iommu); diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index ffac7a75be95..3d23cc6d3214 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -694,25 +694,7 @@ static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock(&iommu->lock); - - /* - * Cache invalidation for changes to a scalable-mode context table - * entry. - * - * Section 6.5.3.3 of the VT-d spec: - * - Device-selective context-cache invalidation; - * - Domain-selective PASID-cache invalidation to affected domains - * (can be skipped if all PASID entries were not-present); - * - Domain-selective IOTLB invalidation to affected domains; - * - Global Device-TLB invalidation to affected functions. - * - * The iommu has been parked in the blocking state. All domains have - * been detached from the device or PASID. The PASID and IOTLB caches - * have been invalidated during the domain detach path. - */ - iommu->flush.flush_context(iommu, 0, PCI_DEVID(bus, devfn), - DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); - devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID); + intel_context_flush_present(info, context, false); } static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data) @@ -874,3 +856,89 @@ int intel_pasid_setup_sm_context(struct device *dev) return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev); } + +/* + * Global Device-TLB invalidation following changes in a context entry which + * was present. + */ +static void __context_flush_dev_iotlb(struct device_domain_info *info) +{ + if (!info->ats_enabled) + return; + + qi_flush_dev_iotlb(info->iommu, PCI_DEVID(info->bus, info->devfn), + info->pfsid, info->ats_qdep, 0, MAX_AGAW_PFN_WIDTH); + + /* + * There is no guarantee that the device DMA is stopped when it reaches + * here. Therefore, always attempt the extra device TLB invalidation + * quirk. The impact on performance is acceptable since this is not a + * performance-critical path. + */ + quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH, IOMMU_NO_PASID, + info->ats_qdep); +} + +/* + * Cache invalidations after change in a context table entry that was present + * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). If + * IOMMU is in scalable mode and all PASID table entries of the device were + * non-present, set flush_domains to false. Otherwise, true. + */ +void intel_context_flush_present(struct device_domain_info *info, + struct context_entry *context, + bool flush_domains) +{ + struct intel_iommu *iommu = info->iommu; + u16 did = context_domain_id(context); + struct pasid_entry *pte; + int i; + + /* + * Device-selective context-cache invalidation. The Domain-ID field + * of the Context-cache Invalidate Descriptor is ignored by hardware + * when operating in scalable mode. Therefore the @did value doesn't + * matter in scalable mode. + */ + iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus, info->devfn), + DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); + + /* + * For legacy mode: + * - Domain-selective IOTLB invalidation + * - Global Device-TLB invalidation to all affected functions + */ + if (!sm_supported(iommu)) { + iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); + __context_flush_dev_iotlb(info); + + return; + } + + /* + * For scalable mode: + * - Domain-selective PASID-cache invalidation to affected domains + * - Domain-selective IOTLB invalidation to affected domains + * - Global Device-TLB invalidation to affected functions + */ + if (flush_domains) { + /* + * If the IOMMU is running in scalable mode and there might + * be potential PASID translations, the caller should hold + * the lock to ensure that context changes and cache flushes + * are atomic. + */ + assert_spin_locked(&iommu->lock); + for (i = 0; i < info->pasid_table->max_pasid; i++) { + pte = intel_pasid_get_entry(info->dev, i); + if (!pte || !pasid_pte_is_present(pte)) + continue; + + did = pasid_get_domain_id(pte); + qi_flush_pasid_cache(iommu, did, QI_PC_ALL_PASIDS, 0); + iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); + } + } + + __context_flush_dev_iotlb(info); +} -- Gitee From af3ed1efc857d604327e750647b8d3fc3647fa52 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 21:08:39 +0800 Subject: [PATCH 111/143] iommu/vt-d: Refactor PCI PRI enabling/disabling callbacks ANBZ: #13617 commit 3753311c9190f833963fb47336dfe17221d93706 upstream. Commit 0095bf83554f8 ("iommu: Improve iopf_queue_remove_device()") specified the flow for disabling the PRI on a device. Refactor the PRI callbacks in the intel iommu driver to better manage PRI enabling and disabling and align it with the device queue interfaces in the iommu core. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240701112317.94022-3-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20240702130839.108139-8-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/intel/iommu.c | 57 +++++++++++++++++++++++++++++++++---- drivers/iommu/intel/iommu.h | 9 ++++++ drivers/iommu/intel/pasid.c | 2 -- 3 files changed, 61 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index de392ed38970..2e2bd8c1bcf5 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4317,6 +4317,37 @@ static int intel_iommu_enable_sva(struct device *dev) return 0; } +static int context_flip_pri(struct device_domain_info *info, bool enable) +{ + struct intel_iommu *iommu = info->iommu; + u8 bus = info->bus, devfn = info->devfn; + struct context_entry *context; + + spin_lock(&iommu->lock); + if (context_copied(iommu, bus, devfn)) { + spin_unlock(&iommu->lock); + return -EINVAL; + } + + context = iommu_context_addr(iommu, bus, devfn, false); + if (!context || !context_present(context)) { + spin_unlock(&iommu->lock); + return -ENODEV; + } + + if (enable) + context_set_sm_pre(context); + else + context_clear_sm_pre(context); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(context, sizeof(*context)); + intel_context_flush_present(info, context, true); + spin_unlock(&iommu->lock); + + return 0; +} + static int intel_iommu_enable_iopf(struct device *dev) { struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; @@ -4346,15 +4377,23 @@ static int intel_iommu_enable_iopf(struct device *dev) if (ret) return ret; + ret = context_flip_pri(info, true); + if (ret) + goto err_remove_device; + ret = pci_enable_pri(pdev, PRQ_DEPTH); - if (ret) { - iopf_queue_remove_device(iommu->iopf_queue, dev); - return ret; - } + if (ret) + goto err_clear_pri; info->pri_enabled = 1; return 0; +err_clear_pri: + context_flip_pri(info, false); +err_remove_device: + iopf_queue_remove_device(iommu->iopf_queue, dev); + + return ret; } static int intel_iommu_disable_iopf(struct device *dev) @@ -4365,6 +4404,15 @@ static int intel_iommu_disable_iopf(struct device *dev) if (!info->pri_enabled) return -EINVAL; + /* Disable new PRI reception: */ + context_flip_pri(info, false); + + /* + * Remove device from fault queue and acknowledge all outstanding + * PRQs to the device: + */ + iopf_queue_remove_device(iommu->iopf_queue, dev); + /* * PCIe spec states that by clearing PRI enable bit, the Page * Request Interface will not issue new page requests, but has @@ -4375,7 +4423,6 @@ static int intel_iommu_disable_iopf(struct device *dev) */ pci_disable_pri(to_pci_dev(dev)); info->pri_enabled = 0; - iopf_queue_remove_device(iommu->iopf_queue, dev); return 0; } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 1fe4d81b56a2..2086688743f0 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1045,6 +1045,15 @@ static inline void context_set_sm_pre(struct context_entry *context) context->lo |= BIT_ULL(4); } +/* + * Clear the PRE(Page Request Enable) field of a scalable mode context + * entry. + */ +static inline void context_clear_sm_pre(struct context_entry *context) +{ + context->lo &= ~BIT_ULL(4); +} + /* Returns a number of VTD pages, but aligned to MM page size */ static inline unsigned long aligned_nrpages(unsigned long host_addr, size_t size) { diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 3d23cc6d3214..5792c817cefa 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -752,8 +752,6 @@ static int context_entry_set_pasid_table(struct context_entry *context, if (info->ats_supported) context_set_sm_dte(context); - if (info->pri_supported) - context_set_sm_pre(context); if (info->pasid_supported) context_set_pasid(context); -- Gitee From 0849b3df67c7d7142ebc971e18b313463c0804c0 Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Thu, 4 Jul 2024 10:22:37 +0000 Subject: [PATCH 112/143] powerpc/pseries/iommu: Define spapr_tce_table_group_ops only with CONFIG_IOMMU_API ANBZ: #13617 commit af199e6ca29c77fb8fea9a78f18dfb165af37cd7 upstream. The patch fixes the below warning: arch/powerpc/platforms/pseries/iommu.c:1824:37: warning: 'spapr_tce_table_group_ops' defined but not used Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202407020357.Hz8kQkKf-lkp@intel.com/ Fixes: b09c031d9433 ("powerpc/iommu: Move pSeries specific functions to pseries/iommu.c") Signed-off-by: Shivaprasad G Bhat Signed-off-by: Michael Ellerman Link: https://msgid.link/172008854222.784.13666247605789409729.stgit@linux.ibm.com Signed-off-by: Shuai Xue --- arch/powerpc/platforms/pseries/iommu.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 183c1fada5dc..23c800c88a48 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -68,7 +68,9 @@ static struct iommu_table *iommu_pseries_alloc_table(int node) return tbl; } +#ifdef CONFIG_IOMMU_API static struct iommu_table_group_ops spapr_tce_table_group_ops; +#endif static struct iommu_table_group *iommu_pseries_alloc_group(int node) { @@ -165,6 +167,7 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) return be64_to_cpu(*tcep); } +#ifdef CONFIG_IOMMU_API static long pseries_tce_iommu_userspace_view_alloc(struct iommu_table *tbl) { unsigned long cb = ALIGN(sizeof(tbl->it_userspace[0]) * tbl->it_size, PAGE_SIZE); @@ -183,6 +186,7 @@ static long pseries_tce_iommu_userspace_view_alloc(struct iommu_table *tbl) return 0; } +#endif static void tce_iommu_userspace_view_free(struct iommu_table *tbl) { @@ -738,6 +742,7 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = { .free = tce_free_pSeries }; +#ifdef CONFIG_IOMMU_API /* * When the DMA window properties might have been removed, * the parent node has the table_group setup on it. @@ -757,6 +762,7 @@ static struct device_node *pci_dma_find_parent_node(struct pci_dev *dev, return NULL; } +#endif /* * Find nearest ibm,dma-window (default DMA window) or direct DMA window or @@ -1808,6 +1814,7 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) return false; } +#ifdef CONFIG_IOMMU_API /* * A simple iommu_table_group_ops which only allows reusing the existing * iommu_table. This handles VFIO for POWER7 or the nested KVM. @@ -2290,6 +2297,7 @@ static struct iommu_table_group_ops spapr_tce_table_group_ops = { .take_ownership = spapr_tce_take_ownership, .release_ownership = spapr_tce_release_ownership, }; +#endif static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) -- Gitee From 6419f5496672d7aa57814f7ddbae799e0c59f268 Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Wed, 3 Jul 2024 18:07:59 -0700 Subject: [PATCH 113/143] iommu/arm-smmu-qcom: Register the TBU driver in qcom_smmu_impl_init ANBZ: #13617 commit 0b4eeee2876f2b08442eb32081451bf130e01a4c upstream. Currently the TBU driver will only probe when CONFIG_ARM_SMMU_QCOM_DEBUG is enabled. The driver not probing would prevent the platform to reach sync_state and the system will remain in sub-optimal power consumption mode while waiting for all consumer drivers to probe. To address this, let's register the TBU driver in qcom_smmu_impl_init(), so that it can probe, but still enable its functionality only when the debug option in Kconfig is enabled. Reported-by: Dmitry Baryshkov Closes: https://lore.kernel.org/r/CAA8EJppcXVu72OSo+OiYEiC1HQjP3qCwKMumOsUhcn6Czj0URg@mail.gmail.com Fixes: 414ecb030870 ("iommu/arm-smmu-qcom-debug: Add support for TBUs") Signed-off-by: Georgi Djakov Link: https://lore.kernel.org/r/20240704010759.507798-1-quic_c_gdjako@quicinc.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- .../iommu/arm/arm-smmu/arm-smmu-qcom-debug.c | 17 +------- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 39 +++++++++++++++++++ drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h | 2 + 3 files changed, 42 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index ef93f825f11f..548783f3f8e8 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -464,7 +464,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) return ret; } -static int qcom_tbu_probe(struct platform_device *pdev) +int qcom_tbu_probe(struct platform_device *pdev) { struct of_phandle_args args = { .args_count = 2 }; struct device_node *np = pdev->dev.of_node; @@ -506,18 +506,3 @@ static int qcom_tbu_probe(struct platform_device *pdev) return 0; } - -static const struct of_device_id qcom_tbu_of_match[] = { - { .compatible = "qcom,sc7280-tbu" }, - { .compatible = "qcom,sdm845-tbu" }, - { } -}; - -static struct platform_driver qcom_tbu_driver = { - .driver = { - .name = "qcom_tbu", - .of_match_table = qcom_tbu_of_match, - }, - .probe = qcom_tbu_probe, -}; -builtin_platform_driver(qcom_tbu_driver); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 3a71d710e288..601fb878d0ef 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include "arm-smmu.h" #include "arm-smmu-qcom.h" @@ -591,10 +593,47 @@ static struct acpi_platform_list qcom_acpi_platlist[] = { }; #endif +static int qcom_smmu_tbu_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + int ret; + + if (IS_ENABLED(CONFIG_ARM_SMMU_QCOM_DEBUG)) { + ret = qcom_tbu_probe(pdev); + if (ret) + return ret; + } + + if (dev->pm_domain) { + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + } + + return 0; +} + +static const struct of_device_id qcom_smmu_tbu_of_match[] = { + { .compatible = "qcom,sc7280-tbu" }, + { .compatible = "qcom,sdm845-tbu" }, + { } +}; + +static struct platform_driver qcom_smmu_tbu_driver = { + .driver = { + .name = "qcom_tbu", + .of_match_table = qcom_smmu_tbu_of_match, + }, + .probe = qcom_smmu_tbu_probe, +}; + struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) { const struct device_node *np = smmu->dev->of_node; const struct of_device_id *match; + static u8 tbu_registered; + + if (!tbu_registered++) + platform_driver_register(&qcom_smmu_tbu_driver); #ifdef CONFIG_ACPI if (np == NULL) { diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h index 9bb3ae7d62da..3c134d1a6277 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h @@ -34,8 +34,10 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev); #ifdef CONFIG_ARM_SMMU_QCOM_DEBUG void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu); +int qcom_tbu_probe(struct platform_device *pdev); #else static inline void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) { } +static inline int qcom_tbu_probe(struct platform_device *pdev) { return -EINVAL; } #endif #endif /* _ARM_SMMU_QCOM_H */ -- Gitee From f614a6ba7f9437345ff203b8dee415cbcfc39e3d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Jun 2024 16:55:35 +0800 Subject: [PATCH 114/143] iommu: Add iommu_paging_domain_alloc() interface ANBZ: #13617 commit a27bf2743cb80d3b36b5b43e8e2e702412c41668 upstream. Commit <17de3f5fdd35> ("iommu: Retire bus ops") removes iommu ops from bus. The iommu subsystem no longer relies on bus for operations. So the bus parameter in iommu_domain_alloc() is no longer relevant. Add a new interface named iommu_paging_domain_alloc(), which explicitly indicates the allocation of a paging domain for DMA managed by a kernel driver. The new interface takes a device pointer as its parameter, that better aligns with the current iommu subsystem. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20240610085555.88197-2-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/iommu.c | 20 ++++++++++++++++++++ include/linux/iommu.h | 6 ++++++ 2 files changed, 26 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index c41ebd98410c..11c91a58ea0e 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2027,6 +2027,10 @@ static int __iommu_domain_alloc_dev(struct device *dev, void *data) return 0; } +/* + * The iommu ops in bus has been retired. Do not use this interface in + * new drivers. + */ struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) { const struct iommu_ops *ops = NULL; @@ -2043,6 +2047,22 @@ struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) } EXPORT_SYMBOL_GPL(iommu_domain_alloc); +/** + * iommu_paging_domain_alloc() - Allocate a paging domain + * @dev: device for which the domain is allocated + * + * Allocate a paging domain which will be managed by a kernel driver. Return + * allocated domain if successful, or a ERR pointer for failure. + */ +struct iommu_domain *iommu_paging_domain_alloc(struct device *dev) +{ + if (!dev_has_iommu(dev)) + return ERR_PTR(-ENODEV); + + return __iommu_domain_alloc(dev_iommu_ops(dev), dev, IOMMU_DOMAIN_UNMANAGED); +} +EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc); + void iommu_domain_free(struct iommu_domain *domain) { if (domain->type == IOMMU_DOMAIN_SVA) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9afdafdc0e8b..fab03591cef0 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -797,6 +797,7 @@ extern bool iommu_present(const struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); extern bool iommu_group_has_isolated_msi(struct iommu_group *group); extern struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus); +struct iommu_domain *iommu_paging_domain_alloc(struct device *dev); extern void iommu_domain_free(struct iommu_domain *domain); extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); @@ -1118,6 +1119,11 @@ static inline struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus return NULL; } +static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev) +{ + return ERR_PTR(-ENODEV); +} + static inline void iommu_domain_free(struct iommu_domain *domain) { } -- Gitee From 42445ebd4d084d1b11fbb0f58096b9b24de457c1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Jun 2024 16:55:36 +0800 Subject: [PATCH 115/143] iommufd: Use iommu_paging_domain_alloc() ANBZ: #13617 commit 26a581606fab44ff76b394f0ba44cd19c6ec0a6e upstream. If the iommu driver doesn't implement its domain_alloc_user callback, iommufd_hwpt_paging_alloc() rolls back to allocate an iommu paging domain. Replace iommu_domain_alloc() with iommu_user_domain_alloc() to pass the device pointer along the path. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240610085555.88197-3-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/hw_pagetable.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 6d5b2fffeea0..9020da52c10f 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -140,9 +140,10 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, } hwpt->domain->owner = ops; } else { - hwpt->domain = iommu_domain_alloc(idev->dev->bus); - if (!hwpt->domain) { - rc = -ENOMEM; + hwpt->domain = iommu_paging_domain_alloc(idev->dev); + if (IS_ERR(hwpt->domain)) { + rc = PTR_ERR(hwpt->domain); + hwpt->domain = NULL; goto out_abort; } } -- Gitee From 5990e695b389d176348ae48098741df624aaedbd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 25 Mar 2024 14:32:57 +0200 Subject: [PATCH 116/143] ACPI: bus: Make container_of() no-op where it makes sense ANBZ: #13617 commit 32666d9cb3ddcf0041b6377cbab68f4c2d7c4171 upstream. Move list head node to be the first member in a few data structures in order to make container_of() no-op at compile time. On x86_64 with a custom (default + a few dozens of drivers enabled) configuration: add/remove: 0/0 grow/shrink: 5/12 up/down: 21/-124 (-103) ... Total: Before=39924675, After=39924572, chg -0.00% Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Shuai Xue --- include/acpi/acpi_bus.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index f4932790dae1..e9c8c6478ad4 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -122,8 +122,8 @@ static inline struct acpi_hotplug_profile *to_acpi_hotplug_profile( } struct acpi_scan_handler { - const struct acpi_device_id *ids; struct list_head list_node; + const struct acpi_device_id *ids; bool (*match)(const char *idstr, const struct acpi_device_id **matchid); int (*attach)(struct acpi_device *dev, const struct acpi_device_id *id); void (*detach)(struct acpi_device *dev); @@ -267,6 +267,7 @@ struct acpi_device_power_flags { }; struct acpi_device_power_state { + struct list_head resources; /* Power resources referenced */ struct { u8 valid:1; u8 explicit_set:1; /* _PSx present? */ @@ -274,7 +275,6 @@ struct acpi_device_power_state { } flags; int power; /* % Power (compared to D0) */ int latency; /* Dx->D0 time (microseconds) */ - struct list_head resources; /* Power resources referenced */ }; struct acpi_device_power { @@ -342,16 +342,16 @@ struct acpi_device_wakeup { }; struct acpi_device_physical_node { - unsigned int node_id; struct list_head node; struct device *dev; + unsigned int node_id; bool put_online:1; }; struct acpi_device_properties { + struct list_head list; const guid_t *guid; union acpi_object *properties; - struct list_head list; void **bufs; }; @@ -413,12 +413,12 @@ struct acpi_device { /* Non-device subnode */ struct acpi_data_node { + struct list_head sibling; const char *name; acpi_handle handle; struct fwnode_handle fwnode; struct fwnode_handle *parent; struct acpi_device_data data; - struct list_head sibling; struct kobject kobj; struct completion kobj_done; }; -- Gitee From 5c9c083e11d237e4142e3f49a3883767f226ad1a Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 12 Aug 2024 15:16:05 +0800 Subject: [PATCH 117/143] gpu: host1x: Use iommu_paging_domain_alloc() ANBZ: #13617 commit 9719c7b8f33bc0268cf76656cfb6244f37586066 upstream. An iommu domain is allocated in host1x_iommu_attach() and is attached to host->dev. Use iommu_paging_domain_alloc() to make it explicit. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240610085555.88197-8-baolu.lu@linux.intel.com Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20240812071605.9513-1-baolu.lu@linux.intel.com Signed-off-by: Jay Chen --- drivers/gpu/host1x/dev.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 7c6699aed7d2..5b4d41ed8e95 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -379,9 +379,10 @@ static struct iommu_domain *host1x_iommu_attach(struct host1x *host) if (err < 0) goto put_group; - host->domain = iommu_domain_alloc(&platform_bus_type); - if (!host->domain) { - err = -ENOMEM; + host->domain = iommu_paging_domain_alloc(host->dev); + if (IS_ERR(host->domain)) { + err = PTR_ERR(host->domain); + host->domain = NULL; goto put_cache; } -- Gitee From 13f299ba5bacb33f62e402e605f47ed202735bdb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 25 Mar 2024 14:32:58 +0200 Subject: [PATCH 118/143] ACPI: bus: Don't use "proxy" headers ANBZ: #13617 commit 336153053293a090400cfdd4ae724b6b8a4beb98 upstream. Update header inclusions to follow the IWYU (Include What You Use) principle. Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Shuai Xue --- include/acpi/acpi_bus.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index e9c8c6478ad4..67e5e8179fa4 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -9,8 +9,13 @@ #ifndef __ACPI_BUS_H__ #define __ACPI_BUS_H__ +#include +#include #include +#include +#include #include +#include /* TBD: Make dynamic */ #define ACPI_MAX_HANDLES 10 -- Gitee From bc59f9f039146cbfa55fb8983b6493dc13e6c87e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Jun 2024 16:55:39 +0800 Subject: [PATCH 119/143] drm/msm: Use iommu_paging_domain_alloc() ANBZ: #13617 commit 45acf35af200b305d1e6119ca9de47aa4c3c45b9 upstream. The domain allocated in msm_iommu_new() is for the @dev. Replace iommu_domain_alloc() with iommu_paging_domain_alloc() to make it explicit. Signed-off-by: Lu Baolu Acked-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240610085555.88197-6-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Jay Chen --- drivers/gpu/drm/msm/msm_iommu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index d5512037c38b..2a94e82316f9 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -407,10 +407,13 @@ struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks) struct msm_iommu *iommu; int ret; - domain = iommu_domain_alloc(dev->bus); - if (!domain) + if (!device_iommu_mapped(dev)) return NULL; + domain = iommu_paging_domain_alloc(dev); + if (IS_ERR(domain)) + return ERR_CAST(domain); + iommu_set_pgtable_quirks(domain, quirks); iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); -- Gitee From 6b8304a8899a3c61600281c3f6a2621a272057de Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 25 Mar 2024 14:33:00 +0200 Subject: [PATCH 120/143] ACPI: scan: Use list_first_entry_or_null() in acpi_device_hid() ANBZ: #13617 commit 2649a0f29a39f455ff39eb2296269e20df3bba0d upstream. To replace list_empty() + list_first_entry() pair to simplify code. Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Shuai Xue --- drivers/acpi/scan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index ff7078afb9d5..363472bc4bb0 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1262,10 +1262,10 @@ const char *acpi_device_hid(struct acpi_device *device) { struct acpi_hardware_id *hid; - if (list_empty(&device->pnp.ids)) + hid = list_first_entry_or_null(&device->pnp.ids, struct acpi_hardware_id, list); + if (!hid) return dummy_hid; - hid = list_first_entry(&device->pnp.ids, struct acpi_hardware_id, list); return hid->id; } EXPORT_SYMBOL(acpi_device_hid); -- Gitee From 81346ad174f7f4985f5826a6fc73ab9d7fc4d8a3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 25 Mar 2024 14:33:01 +0200 Subject: [PATCH 121/143] ACPI: scan: Move misleading comment to acpi_dma_configure_id() ANBZ: #13617 commit e80d4122df9c707ebc9cfe20ab60be302fc9b833 upstream. The acpi_iommu_configure_id() implementation has a misleading comment since after it the flow does something different to what it states. Move the commit to the caller and with that unshadow the error code inside acpi_iommu_configure_id(). Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Shuai Xue --- drivers/acpi/scan.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 363472bc4bb0..284cc0463310 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1590,12 +1590,11 @@ static int acpi_iommu_configure_id(struct device *dev, const u32 *id_in) if (!err && dev->bus) err = iommu_probe_device(dev); - /* Ignore all other errors apart from EPROBE_DEFER */ - if (err == -EPROBE_DEFER) { + if (err == -EPROBE_DEFER) return err; - } else if (err) { + if (err) { dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); - return -ENODEV; + return err; } if (!acpi_iommu_fwspec_ops(dev)) return -ENODEV; @@ -1636,13 +1635,14 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, acpi_arch_dma_setup(dev); + /* Ignore all other errors apart from EPROBE_DEFER */ ret = acpi_iommu_configure_id(dev, input_id); if (ret == -EPROBE_DEFER) return -EPROBE_DEFER; /* * Historically this routine doesn't fail driver probing due to errors - * in acpi_iommu_configure_id() + * in acpi_iommu_configure_id(). */ arch_setup_dma_ops(dev, 0, U64_MAX, attr == DEV_DMA_COHERENT); -- Gitee From de117933e8b2805b552755620b540a85a0b7b334 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 25 Mar 2024 14:33:02 +0200 Subject: [PATCH 122/143] ACPI: scan: Use standard error checking pattern ANBZ: #13617 commit 602401e32847f90c513df4a34bcac4dd6b02dd8d upstream. Check for an error and return it as it's the usual way to handle this. Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Shuai Xue --- drivers/acpi/scan.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 284cc0463310..47d58d30ebcb 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1546,12 +1546,13 @@ int acpi_iommu_fwspec_init(struct device *dev, u32 id, struct fwnode_handle *fwnode, const struct iommu_ops *ops) { - int ret = iommu_fwspec_init(dev, fwnode, ops); + int ret; - if (!ret) - ret = iommu_fwspec_add_ids(dev, &id, 1); + ret = iommu_fwspec_init(dev, fwnode, ops); + if (ret) + return ret; - return ret; + return iommu_fwspec_add_ids(dev, &id, 1); } static inline const struct iommu_ops *acpi_iommu_fwspec_ops(struct device *dev) -- Gitee From 18524443839dcd3cf605c7dbb0fa1b9b546c3543 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 25 Mar 2024 14:33:03 +0200 Subject: [PATCH 123/143] ACPI: scan: Introduce typedef:s for struct acpi_hotplug_context members ANBZ: #13617 commit f5c519fc3628915c2eccd65f86d7ee2ce5cc8645 upstream. Follow the struct acpi_device_ops approach and introduce typedef:s for the members. It makes code less verbose and more particular on what parameters we take or types we use. Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Shuai Xue --- drivers/acpi/dock.c | 48 +++++++++++++++-------------------------- drivers/acpi/scan.c | 5 ++--- include/acpi/acpi_bus.h | 13 ++++++----- 3 files changed, 27 insertions(+), 39 deletions(-) diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index a89bdbe00184..c6e95bb8ccca 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -88,43 +88,29 @@ static void dock_hotplug_event(struct dock_dependent_device *dd, u32 event, enum dock_callback_type cb_type) { struct acpi_device *adev = dd->adev; + acpi_hp_fixup fixup = NULL; + acpi_hp_uevent uevent = NULL; + acpi_hp_notify notify = NULL; acpi_lock_hp_context(); - if (!adev->hp) - goto out; - - if (cb_type == DOCK_CALL_FIXUP) { - void (*fixup)(struct acpi_device *); - - fixup = adev->hp->fixup; - if (fixup) { - acpi_unlock_hp_context(); - fixup(adev); - return; - } - } else if (cb_type == DOCK_CALL_UEVENT) { - void (*uevent)(struct acpi_device *, u32); - - uevent = adev->hp->uevent; - if (uevent) { - acpi_unlock_hp_context(); - uevent(adev, event); - return; - } - } else { - int (*notify)(struct acpi_device *, u32); - - notify = adev->hp->notify; - if (notify) { - acpi_unlock_hp_context(); - notify(adev, event); - return; - } + if (adev->hp) { + if (cb_type == DOCK_CALL_FIXUP) + fixup = adev->hp->fixup; + else if (cb_type == DOCK_CALL_UEVENT) + uevent = adev->hp->uevent; + else + notify = adev->hp->notify; } - out: acpi_unlock_hp_context(); + + if (fixup) + fixup(adev); + else if (uevent) + uevent(adev, event); + else if (notify) + notify(adev, event); } static struct dock_station *find_dock_station(acpi_handle handle) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 47d58d30ebcb..9fa0a9072a3e 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -73,8 +73,7 @@ void acpi_unlock_hp_context(void) void acpi_initialize_hp_context(struct acpi_device *adev, struct acpi_hotplug_context *hp, - int (*notify)(struct acpi_device *, u32), - void (*uevent)(struct acpi_device *, u32)) + acpi_hp_notify notify, acpi_hp_uevent uevent) { acpi_lock_hp_context(); hp->notify = notify; @@ -390,7 +389,7 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src) } else if (adev->flags.hotplug_notify) { error = acpi_generic_hotplug_event(adev, src); } else { - int (*notify)(struct acpi_device *, u32); + acpi_hp_notify notify; acpi_lock_hp_context(); notify = adev->hp ? adev->hp->notify : NULL; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 67e5e8179fa4..5fef4530197f 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -142,11 +142,15 @@ struct acpi_scan_handler { * -------------------- */ +typedef int (*acpi_hp_notify) (struct acpi_device *, u32); +typedef void (*acpi_hp_uevent) (struct acpi_device *, u32); +typedef void (*acpi_hp_fixup) (struct acpi_device *); + struct acpi_hotplug_context { struct acpi_device *self; - int (*notify)(struct acpi_device *, u32); - void (*uevent)(struct acpi_device *, u32); - void (*fixup)(struct acpi_device *); + acpi_hp_notify notify; + acpi_hp_uevent uevent; + acpi_hp_fixup fixup; }; /* @@ -508,8 +512,7 @@ static inline void acpi_set_hp_context(struct acpi_device *adev, void acpi_initialize_hp_context(struct acpi_device *adev, struct acpi_hotplug_context *hp, - int (*notify)(struct acpi_device *, u32), - void (*uevent)(struct acpi_device *, u32)); + acpi_hp_notify notify, acpi_hp_uevent uevent); /* acpi_device.dev.bus == &acpi_bus_type */ extern struct bus_type acpi_bus_type; -- Gitee From 98d15c5b48ab3509782d29fde4dd95bd62c2b8cb Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 19 Apr 2024 17:54:46 +0100 Subject: [PATCH 124/143] dma-mapping: Simplify arch_setup_dma_ops() ANBZ: #13617 commit f091e93306e0429ebb7589b9874590b6a9705e64 upstream. The dma_base, size and iommu arguments are only used by ARM, and can now easily be deduced from the device itself, so there's no need to pass them through the callchain as well. Acked-by: Rob Herring Reviewed-by: Christoph Hellwig Reviewed-by: Michael Kelley # For Hyper-V Reviewed-by: Jason Gunthorpe Tested-by: Hanjun Guo Signed-off-by: Robin Murphy Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/5291c2326eab405b1aa7693aa964e8d3cb7193de.1713523152.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel [ Shuai Xue: Conflicts: drivers/acpi/scan.c minro: comments in acpi_dma_configure_id with a dot, just remove it. ] Signed-off-by: Shuai Xue --- arch/arc/mm/dma.c | 3 +-- arch/arm/mm/dma-mapping-nommu.c | 3 +-- arch/arm/mm/dma-mapping.c | 16 +++++++++------- arch/arm64/mm/dma-mapping.c | 3 +-- arch/mips/mm/dma-noncoherent.c | 3 +-- arch/riscv/mm/dma-noncoherent.c | 3 +-- drivers/acpi/scan.c | 7 +------ drivers/hv/hv_common.c | 6 +----- drivers/of/device.c | 4 +--- include/linux/dma-map-ops.h | 6 ++---- 10 files changed, 19 insertions(+), 35 deletions(-) diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 197707bc7658..6b85e94f3275 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -90,8 +90,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, /* * Plug in direct dma map ops. */ -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent) +void arch_setup_dma_ops(struct device *dev, bool coherent) { /* * IOC hardware snoops all DMA traffic keeping the caches consistent diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index b94850b57995..97db5397c320 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -33,8 +33,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, } } -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent) +void arch_setup_dma_ops(struct device *dev, bool coherent) { if (IS_ENABLED(CONFIG_CPU_V7M)) { /* diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 6c359a3af8d9..5ac482d7ff94 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1712,11 +1712,15 @@ void arm_iommu_detach_device(struct device *dev) } EXPORT_SYMBOL_GPL(arm_iommu_detach_device); -static void arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent) +static void arm_setup_iommu_dma_ops(struct device *dev) { struct dma_iommu_mapping *mapping; + u64 dma_base = 0, size = 1ULL << 32; + if (dev->dma_range_map) { + dma_base = dma_range_map_min(dev->dma_range_map); + size = dma_range_map_max(dev->dma_range_map) - dma_base; + } mapping = arm_iommu_create_mapping(dev->bus, dma_base, size); if (IS_ERR(mapping)) { pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n", @@ -1747,8 +1751,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) #else -static void arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent) +static void arm_setup_iommu_dma_ops(struct device *dev) { } @@ -1756,8 +1759,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) { } #endif /* CONFIG_ARM_DMA_USE_IOMMU */ -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent) +void arch_setup_dma_ops(struct device *dev, bool coherent) { /* * Due to legacy code that sets the ->dma_coherent flag from a bus @@ -1777,7 +1779,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, return; if (device_iommu_mapped(dev)) - arm_setup_iommu_dma_ops(dev, dma_base, size, coherent); + arm_setup_iommu_dma_ops(dev); xen_setup_dma_ops(dev); dev->archdata.dma_ops_setup = true; diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 313d8938a2f0..0b320a25a471 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -46,8 +46,7 @@ void arch_teardown_dma_ops(struct device *dev) } #endif -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent) +void arch_setup_dma_ops(struct device *dev, bool coherent) { int cls = cache_line_size_of_cpu(); diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index 0f3cec663a12..ab4f2a75a7d0 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -137,8 +137,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, #endif #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent) +void arch_setup_dma_ops(struct device *dev, bool coherent) { dev->dma_coherent = coherent; } diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index 32031a7d96d4..cf19010b1172 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -129,8 +129,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size) ALT_CMO_OP_VPA(flush, flush_addr, paddr, size, riscv_cbom_block_size); } -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent) +void arch_setup_dma_ops(struct device *dev, bool coherent) { WARN_TAINT(!coherent && riscv_cbom_block_size > ARCH_DMA_MINALIGN, TAINT_CPU_OUT_OF_SPEC, diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 9fa0a9072a3e..e4a6591fe5db 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1640,12 +1640,7 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, if (ret == -EPROBE_DEFER) return -EPROBE_DEFER; - /* - * Historically this routine doesn't fail driver probing due to errors - * in acpi_iommu_configure_id(). - */ - - arch_setup_dma_ops(dev, 0, U64_MAX, attr == DEV_DMA_COHERENT); + arch_setup_dma_ops(dev, attr == DEV_DMA_COHERENT); return 0; } diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index fd938b6dfa7e..5561714313d7 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -485,11 +485,7 @@ EXPORT_SYMBOL_GPL(hv_query_ext_cap); void hv_setup_dma_ops(struct device *dev, bool coherent) { - /* - * Hyper-V does not offer a vIOMMU in the guest - * VM, so pass 0/NULL for the IOMMU settings - */ - arch_setup_dma_ops(dev, 0, 0, coherent); + arch_setup_dma_ops(dev, coherent); } EXPORT_SYMBOL_GPL(hv_setup_dma_ops); diff --git a/drivers/of/device.c b/drivers/of/device.c index 0681c220d114..24b15d3fd449 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -95,7 +95,6 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, { const struct bus_dma_region *map = NULL; struct device_node *bus_np; - u64 dma_start = 0; u64 mask, end = 0; bool coherent; int iommu_ret; @@ -118,7 +117,6 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, return ret == -ENODEV ? 0 : ret; } else { /* Determine the overall bounds of all DMA regions */ - dma_start = dma_range_map_min(map); end = dma_range_map_max(map); } @@ -175,7 +173,7 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, } else dev_dbg(dev, "device is behind an iommu\n"); - arch_setup_dma_ops(dev, dma_start, end - dma_start + 1, coherent); + arch_setup_dma_ops(dev, coherent); if (iommu_ret) of_dma_set_restricted_buffer(dev, np); diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 80b8f56e57be..cc848e7380f7 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -433,11 +433,9 @@ bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, #endif #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent); +void arch_setup_dma_ops(struct device *dev, bool coherent); #else -static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, - u64 size, bool coherent) +static inline void arch_setup_dma_ops(struct device *dev, bool coherent) { } #endif /* CONFIG_ARCH_HAS_SETUP_DMA_OPS */ -- Gitee From 7fbf6c32baf624532c342a919f3a4b18203ed938 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 30 Apr 2024 11:22:53 +0100 Subject: [PATCH 125/143] arm64: Properly clean up iommu-dma remnants ANBZ: #13617 commit 8b80549f1bc692cf9130af8555b6c89cec24e1a6 upstream. Thanks to the somewhat asymmetrical nature, while removing iommu_setup_dma_ops() from the arch_setup_dma_ops() flow, I managed to forget that arm64's teardown path was also specific to iommu-dma. Clean that up to match, otherwise probe deferral will lead to the arch code erroneously removing DMA ops set elsewhere. Reported-by: Dmitry Baryshkov Link: https://lore.kernel.org/linux-iommu/Zi_LV28TR-P-PzXi@eriador.lumag.spb.ru/ Fixes: b67483b3c44e ("iommu/dma: Centralise iommu_setup_dma_ops()") Signed-off-by: Robin Murphy Tested-by: Dmitry Baryshkov Acked-by: Catalin Marinas Reviewed-by: Konrad Dybcio Acked-by: Will Deacon Tested-by: Nicolin Chen Link: https://lore.kernel.org/r/d4cc20cbb0c45175e98dd76bf187e2ad6421296d.1714472573.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- arch/arm64/Kconfig | 1 - arch/arm64/mm/dma-mapping.c | 8 -------- 2 files changed, 9 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b8d7b905b8a2..2e2ff6f4b44b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -49,7 +49,6 @@ config ARM64 select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYSCALL_WRAPPER - select ARCH_HAS_TEARDOWN_DMA_OPS if IOMMU_SUPPORT select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_ZONE_DMA_SET if EXPERT select ARCH_HAVE_ELF_PROT diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 0b320a25a471..b2b5792b2caa 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -39,13 +38,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size) dcache_clean_poc(start, start + size); } -#ifdef CONFIG_IOMMU_DMA -void arch_teardown_dma_ops(struct device *dev) -{ - dev->dma_ops = NULL; -} -#endif - void arch_setup_dma_ops(struct device *dev, bool coherent) { int cls = cache_line_size_of_cpu(); -- Gitee From 5e6b6159e34e91ea7756db4530d8413b3f3aa7b1 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 2 Jul 2024 12:40:47 +0100 Subject: [PATCH 126/143] iommu/mediatek-v1: Clean up redundant fwspec checks ANBZ: #13617 commit e7acc36f26b0b1a71dd068f4afd33d871352d67d upstream. The driver explicitly clears any existing fwspec before calling mtk_iommu_v1_create_mapping(), but even if it didn't, the checks it's doing there duplicate what iommu_fwspec_init() would do anyway. Clean them up. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202407020415.KKnhPTUj-lkp@intel.com/ Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/7d6ebec135483f889af00eb376aa31c012efc3b2.1719919669.git.robin.murphy@arm.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/mtk_iommu_v1.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index d6e4002200bd..2b64ea46318f 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -401,7 +401,6 @@ static const struct iommu_ops mtk_iommu_v1_ops; static int mtk_iommu_v1_create_mapping(struct device *dev, const struct of_phandle_args *args) { - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct mtk_iommu_v1_data *data; struct platform_device *m4updev; struct dma_iommu_mapping *mtk_mapping; @@ -413,14 +412,9 @@ static int mtk_iommu_v1_create_mapping(struct device *dev, return -EINVAL; } - if (!fwspec) { - ret = iommu_fwspec_init(dev, &args->np->fwnode, &mtk_iommu_v1_ops); - if (ret) - return ret; - fwspec = dev_iommu_fwspec_get(dev); - } else if (dev_iommu_fwspec_get(dev)->ops != &mtk_iommu_v1_ops) { - return -EINVAL; - } + ret = iommu_fwspec_init(dev, &args->np->fwnode, &mtk_iommu_v1_ops); + if (ret) + return ret; if (!dev_iommu_priv_get(dev)) { /* Get the m4u device */ -- Gitee From b1faff78947f7c33e24a2f187edb5ab979603a21 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 2 Jul 2024 12:40:48 +0100 Subject: [PATCH 127/143] iommu: Resolve fwspec ops automatically ANBZ: #13617 commit 3f7c320916282c26812d70cfe8830abb9e4dc696 upstream. There's no real need for callers to resolve ops from a fwnode in order to then pass both to iommu_fwspec_init() - it's simpler and more sensible for that to resolve the ops itself. This in turn means we can centralise the notion of checking for a present driver, and enforce that fwspecs aren't allocated unless and until we know they will be usable. Also use this opportunity to modernise with some "new" helpers that arrived shortly after this code was first written; the generic fwnode_handle_get() clears up that ugly get/put mismatch, while of_fwnode_handle() can now abstract those open-coded dereferences. Tested-by: Jean-Philippe Brucker Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/0e2727adeb8cd73274425322f2f793561bdc927e.1719919669.git.robin.murphy@arm.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/acpi/arm64/iort.c | 19 +++++-------------- drivers/acpi/scan.c | 8 +++----- drivers/acpi/viot.c | 11 ++--------- drivers/iommu/arm/arm-smmu/arm-smmu.c | 3 +-- drivers/iommu/iommu-priv.h | 2 ++ drivers/iommu/iommu.c | 9 ++++++--- drivers/iommu/mtk_iommu_v1.c | 2 +- drivers/iommu/of_iommu.c | 19 ++++++------------- drivers/iommu/tegra-smmu.c | 2 +- include/acpi/acpi_bus.h | 3 +-- include/linux/iommu.h | 13 ++----------- 11 files changed, 30 insertions(+), 61 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index fd567b5efa5f..ab91b4f591c8 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1221,10 +1221,10 @@ static bool iort_pci_rc_supports_ats(struct acpi_iort_node *node) static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node, u32 streamid) { - const struct iommu_ops *ops; struct fwnode_handle *iort_fwnode; - if (!node) + /* If there's no SMMU driver at all, give up now */ + if (!node || !iort_iommu_driver_enabled(node->type)) return -ENODEV; iort_fwnode = iort_get_fwnode(node); @@ -1232,19 +1232,10 @@ static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node, return -ENODEV; /* - * If the ops look-up fails, this means that either - * the SMMU drivers have not been probed yet or that - * the SMMU drivers are not built in the kernel; - * Depending on whether the SMMU drivers are built-in - * in the kernel or not, defer the IOMMU configuration - * or just abort it. + * If the SMMU drivers are enabled but not loaded/probed + * yet, this will defer. */ - ops = iommu_ops_from_fwnode(iort_fwnode); - if (!ops) - return iort_iommu_driver_enabled(node->type) ? - -EPROBE_DEFER : -ENODEV; - - return acpi_iommu_fwspec_init(dev, streamid, iort_fwnode, ops); + return acpi_iommu_fwspec_init(dev, streamid, iort_fwnode); } struct iort_pci_alias_info { diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index e4a6591fe5db..043bea32ce0e 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1542,12 +1542,11 @@ int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map) #ifdef CONFIG_IOMMU_API int acpi_iommu_fwspec_init(struct device *dev, u32 id, - struct fwnode_handle *fwnode, - const struct iommu_ops *ops) + struct fwnode_handle *fwnode) { int ret; - ret = iommu_fwspec_init(dev, fwnode, ops); + ret = iommu_fwspec_init(dev, fwnode); if (ret) return ret; @@ -1604,8 +1603,7 @@ static int acpi_iommu_configure_id(struct device *dev, const u32 *id_in) #else /* !CONFIG_IOMMU_API */ int acpi_iommu_fwspec_init(struct device *dev, u32 id, - struct fwnode_handle *fwnode, - const struct iommu_ops *ops) + struct fwnode_handle *fwnode) { return -ENODEV; } diff --git a/drivers/acpi/viot.c b/drivers/acpi/viot.c index c8025921c129..2aa69a2fba73 100644 --- a/drivers/acpi/viot.c +++ b/drivers/acpi/viot.c @@ -307,21 +307,14 @@ void __init acpi_viot_init(void) static int viot_dev_iommu_init(struct device *dev, struct viot_iommu *viommu, u32 epid) { - const struct iommu_ops *ops; - - if (!viommu) + if (!viommu || !IS_ENABLED(CONFIG_VIRTIO_IOMMU)) return -ENODEV; /* We're not translating ourself */ if (device_match_fwnode(dev, viommu->fwnode)) return -EINVAL; - ops = iommu_ops_from_fwnode(viommu->fwnode); - if (!ops) - return IS_ENABLED(CONFIG_VIRTIO_IOMMU) ? - -EPROBE_DEFER : -ENODEV; - - return acpi_iommu_fwspec_init(dev, epid, viommu->fwnode, ops); + return acpi_iommu_fwspec_init(dev, epid, viommu->fwnode); } static int viot_pci_dev_iommu_init(struct pci_dev *pdev, u16 dev_id, void *data) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 5d7feb43f094..a7f7c69a5627 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -183,8 +183,7 @@ static int arm_smmu_register_legacy_master(struct device *dev, it.cur_count = 1; } - err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode, - &arm_smmu_ops); + err = iommu_fwspec_init(dev, NULL); if (err) return err; diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h index 5f731d994803..078cafcf49b4 100644 --- a/drivers/iommu/iommu-priv.h +++ b/drivers/iommu/iommu-priv.h @@ -17,6 +17,8 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) return dev->iommu->iommu_dev->ops; } +const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode); + int iommu_group_replace_domain(struct iommu_group *group, struct iommu_domain *new_domain); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 11c91a58ea0e..598fedddb07b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2855,11 +2855,14 @@ const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode return ops; } -int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, - const struct iommu_ops *ops) +int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode) { + const struct iommu_ops *ops = iommu_ops_from_fwnode(iommu_fwnode); struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + if (!ops) + return -EPROBE_DEFER; + if (fwspec) return ops == fwspec->ops ? 0 : -EINVAL; @@ -2871,7 +2874,7 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, if (!fwspec) return -ENOMEM; - of_node_get(to_of_node(iommu_fwnode)); + fwnode_handle_get(iommu_fwnode); fwspec->iommu_fwnode = iommu_fwnode; fwspec->ops = ops; dev_iommu_fwspec_set(dev, fwspec); diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 2b64ea46318f..c6ea5b4baff3 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -412,7 +412,7 @@ static int mtk_iommu_v1_create_mapping(struct device *dev, return -EINVAL; } - ret = iommu_fwspec_init(dev, &args->np->fwnode, &mtk_iommu_v1_ops); + ret = iommu_fwspec_init(dev, of_fwnode_handle(args->np)); if (ret) return ret; diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 3afe0b48a48d..08c523ad55ad 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -21,26 +21,19 @@ static int of_iommu_xlate(struct device *dev, struct of_phandle_args *iommu_spec) { const struct iommu_ops *ops; - struct fwnode_handle *fwnode = &iommu_spec->np->fwnode; int ret; - ops = iommu_ops_from_fwnode(fwnode); - if ((ops && !ops->of_xlate) || - !of_device_is_available(iommu_spec->np)) + if (!of_device_is_available(iommu_spec->np)) return -ENODEV; - ret = iommu_fwspec_init(dev, fwnode, ops); + ret = iommu_fwspec_init(dev, of_fwnode_handle(iommu_spec->np)); + if (ret == -EPROBE_DEFER) + return driver_deferred_probe_check_state(dev); if (ret) return ret; - /* - * The otherwise-empty fwspec handily serves to indicate the specific - * IOMMU device we're waiting for, which will be useful if we ever get - * a proper probe-ordering dependency mechanism in future. - */ - if (!ops) - return driver_deferred_probe_check_state(dev); - if (!try_module_get(ops->owner)) + ops = dev_iommu_fwspec_get(dev)->ops; + if (!ops->of_xlate || !try_module_get(ops->owner)) return -ENODEV; ret = ops->of_xlate(dev, iommu_spec); diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index f86c7ae91814..4365d9936e68 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -837,7 +837,7 @@ static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev, const struct iommu_ops *ops = smmu->iommu.ops; int err; - err = iommu_fwspec_init(dev, &dev->of_node->fwnode, ops); + err = iommu_fwspec_init(dev, of_fwnode_handle(dev->of_node)); if (err < 0) { dev_err(dev, "failed to initialize fwspec: %d\n", err); return err; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 5fef4530197f..997eb429b088 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -657,8 +657,7 @@ struct iommu_ops; bool acpi_dma_supported(const struct acpi_device *adev); enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); int acpi_iommu_fwspec_init(struct device *dev, u32 id, - struct fwnode_handle *fwnode, - const struct iommu_ops *ops); + struct fwnode_handle *fwnode); int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map); int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, const u32 *input_id); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index fab03591cef0..0f94774b20fd 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1038,11 +1038,9 @@ struct iommu_mm_data { struct list_head sva_handles; }; -int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, - const struct iommu_ops *ops); +int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode); void iommu_fwspec_free(struct device *dev); int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids); -const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode); static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { @@ -1353,8 +1351,7 @@ static inline void iommu_device_unlink(struct device *dev, struct device *link) } static inline int iommu_fwspec_init(struct device *dev, - struct fwnode_handle *iommu_fwnode, - const struct iommu_ops *ops) + struct fwnode_handle *iommu_fwnode) { return -ENODEV; } @@ -1369,12 +1366,6 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids, return -ENODEV; } -static inline -const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) -{ - return NULL; -} - static inline int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) { -- Gitee From 7af318c2fbd82cb87c7b7ba853d3c86f11c76e8a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 2 Jul 2024 12:40:49 +0100 Subject: [PATCH 128/143] ACPI: Retire acpi_iommu_fwspec_ops() ANBZ: #13617 commit 78596b5c321c9d74eeef1ad51c964563a4081f79 upstream. Now that iommu_fwspec_init() can signal for probe deferral directly, acpi_iommu_fwspec_ops() is unneeded and can be cleaned up. Acked-by: Rafael J. Wysocki Tested-by: Jean-Philippe Brucker Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/011e39e275aba3ad451c5a1965ca8ddf20ed36c2.1719919669.git.robin.murphy@arm.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/acpi/scan.c | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 043bea32ce0e..be25e7015523 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1553,26 +1553,14 @@ int acpi_iommu_fwspec_init(struct device *dev, u32 id, return iommu_fwspec_add_ids(dev, &id, 1); } -static inline const struct iommu_ops *acpi_iommu_fwspec_ops(struct device *dev) -{ - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - - return fwspec ? fwspec->ops : NULL; -} - static int acpi_iommu_configure_id(struct device *dev, const u32 *id_in) { int err; - const struct iommu_ops *ops; /* Serialise to make dev->iommu stable under our potential fwspec */ mutex_lock(&iommu_probe_device_lock); - /* - * If we already translated the fwspec there is nothing left to do, - * return the iommu_ops. - */ - ops = acpi_iommu_fwspec_ops(dev); - if (ops) { + /* If we already translated the fwspec there is nothing left to do */ + if (dev_iommu_fwspec_get(dev)) { mutex_unlock(&iommu_probe_device_lock); return 0; } @@ -1589,15 +1577,7 @@ static int acpi_iommu_configure_id(struct device *dev, const u32 *id_in) if (!err && dev->bus) err = iommu_probe_device(dev); - if (err == -EPROBE_DEFER) - return err; - if (err) { - dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); - return err; - } - if (!acpi_iommu_fwspec_ops(dev)) - return -ENODEV; - return 0; + return err; } #else /* !CONFIG_IOMMU_API */ @@ -1637,6 +1617,8 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, ret = acpi_iommu_configure_id(dev, input_id); if (ret == -EPROBE_DEFER) return -EPROBE_DEFER; + if (ret) + dev_dbg(dev, "Adding to IOMMU failed: %d\n", ret); arch_setup_dma_ops(dev, attr == DEV_DMA_COHERENT); -- Gitee From 9d15497fd5edf616b84be1f605c7cb1a5f3cebdd Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 2 Jul 2024 12:40:50 +0100 Subject: [PATCH 129/143] OF: Simplify of_iommu_configure() ANBZ: #13617 commit 5f937bc48a6aa63970648c54fb40ea8f96b633dc upstream. We no longer have a notion of partially-initialised fwspecs existing, and we also no longer need to use an iommu_ops pointer to return status to of_dma_configure(). Clean up the remains of those, which lends itself to clarifying the logic around the dma_range_map allocation as well. Acked-by: Rob Herring (Arm) Tested-by: Jean-Philippe Brucker Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/61972f88e31a6eda8bf5852f0853951164279a3c.1719919669.git.robin.murphy@arm.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/of_iommu.c | 29 ++++++++++------------------- drivers/of/device.c | 30 +++++++++++------------------- 2 files changed, 21 insertions(+), 38 deletions(-) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 08c523ad55ad..c946521a5906 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -108,7 +108,6 @@ static int of_iommu_configure_device(struct device_node *master_np, int of_iommu_configure(struct device *dev, struct device_node *master_np, const u32 *id) { - struct iommu_fwspec *fwspec; int err; if (!master_np) @@ -116,14 +115,9 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np, /* Serialise to make dev->iommu stable under our potential fwspec */ mutex_lock(&iommu_probe_device_lock); - fwspec = dev_iommu_fwspec_get(dev); - if (fwspec) { - if (fwspec->ops) { - mutex_unlock(&iommu_probe_device_lock); - return 0; - } - /* In the deferred case, start again from scratch */ - iommu_fwspec_free(dev); + if (dev_iommu_fwspec_get(dev)) { + mutex_unlock(&iommu_probe_device_lock); + return 0; } /* @@ -143,20 +137,17 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np, } else { err = of_iommu_configure_device(master_np, dev, id); } - mutex_unlock(&iommu_probe_device_lock); - if (err == -ENODEV || err == -EPROBE_DEFER) - return err; if (err) - goto err_log; + iommu_fwspec_free(dev); + mutex_unlock(&iommu_probe_device_lock); - err = iommu_probe_device(dev); - if (err) - goto err_log; - return 0; + if (!err && dev->bus) + err = iommu_probe_device(dev); + + if (err && err != -EPROBE_DEFER) + dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); -err_log: - dev_dbg(dev, "Adding to IOMMU failed: %pe\n", ERR_PTR(err)); return err; } diff --git a/drivers/of/device.c b/drivers/of/device.c index 24b15d3fd449..dc98aae05677 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -96,8 +96,7 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, const struct bus_dma_region *map = NULL; struct device_node *bus_np; u64 mask, end = 0; - bool coherent; - int iommu_ret; + bool coherent, set_map = false; int ret; if (np == dev->of_node) @@ -118,6 +117,7 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, } else { /* Determine the overall bounds of all DMA regions */ end = dma_range_map_max(map); + set_map = true; } /* @@ -144,7 +144,7 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, dev->coherent_dma_mask &= mask; *dev->dma_mask &= mask; /* ...but only set bus limit and range map if we found valid dma-ranges earlier */ - if (!ret) { + if (set_map) { dev->bus_dma_limit = end; dev->dma_range_map = map; } @@ -153,29 +153,21 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, dev_dbg(dev, "device is%sdma coherent\n", coherent ? " " : " not "); - iommu_ret = of_iommu_configure(dev, np, id); - if (iommu_ret == -EPROBE_DEFER) { + ret = of_iommu_configure(dev, np, id); + if (ret == -EPROBE_DEFER) { /* Don't touch range map if it wasn't set from a valid dma-ranges */ - if (!ret) + if (set_map) dev->dma_range_map = NULL; kfree(map); return -EPROBE_DEFER; - } else if (iommu_ret == -ENODEV) { - dev_dbg(dev, "device is not behind an iommu\n"); - } else if (iommu_ret) { - dev_err(dev, "iommu configuration for device failed with %pe\n", - ERR_PTR(iommu_ret)); - - /* - * Historically this routine doesn't fail driver probing - * due to errors in of_iommu_configure() - */ - } else - dev_dbg(dev, "device is behind an iommu\n"); + } + /* Take all other IOMMU errors to mean we'll just carry on without it */ + dev_dbg(dev, "device is%sbehind an iommu\n", + !ret ? " " : " not "); arch_setup_dma_ops(dev, coherent); - if (iommu_ret) + if (ret) of_dma_set_restricted_buffer(dev, np); return 0; -- Gitee From 451f67b609d4cd9229369ef4347eb3863726b04a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 2 Jul 2024 12:40:51 +0100 Subject: [PATCH 130/143] iommu: Remove iommu_fwspec ops ANBZ: #13617 commit 3e36c15fc1cce65cccc93ed16f86d8ff9d2f9992 upstream. The ops in iommu_fwspec are only needed for the early configuration and probe process, and by now are easy enough to derive on-demand in those couple of places which need them, so remove the redundant stored copy. Tested-by: Jean-Philippe Brucker Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/55c1410b2cd09531eab4f8e2f18f92a0faa0ea75.1719919669.git.robin.murphy@arm.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/iommu-priv.h | 5 +++++ drivers/iommu/iommu.c | 11 ++--------- drivers/iommu/of_iommu.c | 4 +++- include/linux/iommu.h | 2 -- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h index 078cafcf49b4..a34efed2884b 100644 --- a/drivers/iommu/iommu-priv.h +++ b/drivers/iommu/iommu-priv.h @@ -19,6 +19,11 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode); +static inline const struct iommu_ops *iommu_fwspec_ops(struct iommu_fwspec *fwspec) +{ + return iommu_ops_from_fwnode(fwspec ? fwspec->iommu_fwnode : NULL); +} + int iommu_group_replace_domain(struct iommu_group *group, struct iommu_domain *new_domain); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 598fedddb07b..0b5dc2e2d826 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -510,7 +510,6 @@ DEFINE_MUTEX(iommu_probe_device_lock); static int __iommu_probe_device(struct device *dev, struct list_head *group_list) { const struct iommu_ops *ops; - struct iommu_fwspec *fwspec; struct iommu_group *group; struct group_device *gdev; int ret; @@ -523,12 +522,7 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list * be present, and that any of their registered instances has suitable * ops for probing, and thus cheekily co-opt the same mechanism. */ - fwspec = dev_iommu_fwspec_get(dev); - if (fwspec && fwspec->ops) - ops = fwspec->ops; - else - ops = iommu_ops_from_fwnode(NULL); - + ops = iommu_fwspec_ops(dev_iommu_fwspec_get(dev)); if (!ops) return -ENODEV; /* @@ -2864,7 +2858,7 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode) return -EPROBE_DEFER; if (fwspec) - return ops == fwspec->ops ? 0 : -EINVAL; + return ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL; if (!dev_iommu_get(dev)) return -ENOMEM; @@ -2876,7 +2870,6 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode) fwnode_handle_get(iommu_fwnode); fwspec->iommu_fwnode = iommu_fwnode; - fwspec->ops = ops; dev_iommu_fwspec_set(dev, fwspec); return 0; } diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index c946521a5906..559c5db78edb 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -17,6 +17,8 @@ #include #include +#include "iommu-priv.h" + static int of_iommu_xlate(struct device *dev, struct of_phandle_args *iommu_spec) { @@ -32,7 +34,7 @@ static int of_iommu_xlate(struct device *dev, if (ret) return ret; - ops = dev_iommu_fwspec_get(dev)->ops; + ops = iommu_ops_from_fwnode(&iommu_spec->np->fwnode); if (!ops->of_xlate || !try_module_get(ops->owner)) return -ENODEV; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0f94774b20fd..ea4bc93c617f 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1001,7 +1001,6 @@ extern struct iommu_group *generic_single_device_group(struct device *dev); /** * struct iommu_fwspec - per-device IOMMU instance data - * @ops: ops for this device's IOMMU * @iommu_fwnode: firmware handle for this device's IOMMU * @flags: IOMMU_FWSPEC_* flags * @num_ids: number of associated device IDs @@ -1012,7 +1011,6 @@ extern struct iommu_group *generic_single_device_group(struct device *dev); * consumers. */ struct iommu_fwspec { - const struct iommu_ops *ops; struct fwnode_handle *iommu_fwnode; u32 flags; unsigned int num_ids; -- Gitee From 7c066332128060507f7df89d03cae4e49e328ede Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 7 Jun 2024 11:54:14 +0100 Subject: [PATCH 131/143] dt-bindings: PCI: generic: Add ats-supported property ANBZ: #13617 commit 40929e8e5449a18bc98baf7a907dd6674bd60049 upstream. Add a way for firmware to tell the OS that ATS is supported by the PCI root complex. An endpoint with ATS enabled may send Translation Requests and Translated Memory Requests, which look just like Normal Memory Requests with a non-zero AT field. So a root controller that ignores the AT field may simply forward the request to the IOMMU as a Normal Memory Request, which could end badly. In any case, the endpoint will be unusable. The ats-supported property allows the OS to only enable ATS in endpoints if the root controller can handle ATS requests. Only add the property to pcie-host-ecam-generic for the moment. For non-generic root controllers, availability of ATS can be inferred from the compatible string. Reviewed-by: Rob Herring Reviewed-by: Liviu Dudau Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20240607105415.2501934-3-jean-philippe@linaro.org Signed-off-by: Will Deacon Signed-off-by: Jay Chen --- Documentation/devicetree/bindings/pci/host-generic-pci.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/host-generic-pci.yaml b/Documentation/devicetree/bindings/pci/host-generic-pci.yaml index d25423aa7167..94d4a4914a48 100644 --- a/Documentation/devicetree/bindings/pci/host-generic-pci.yaml +++ b/Documentation/devicetree/bindings/pci/host-generic-pci.yaml @@ -110,6 +110,12 @@ properties: iommu-map-mask: true msi-parent: true + ats-supported: + description: + Indicates that a PCIe host controller supports ATS, and can handle Memory + Requests with Address Type (AT). + type: boolean + required: - compatible - reg -- Gitee From 2762fce0532aca481de7547330b83b59673782e5 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 7 Jun 2024 11:54:15 +0100 Subject: [PATCH 132/143] iommu/of: Support ats-supported device-tree property ANBZ: #13617 commit 86e02a88bedc1072beb5445d408e379674b0b7f3 upstream. Device-tree declares whether a PCI root-complex supports ATS by setting the "ats-supported" property. Copy this flag into device fwspec to let IOMMU drivers quickly check if they can enable ATS for a device. Tested-by: Ketan Patil Reviewed-by: Jason Gunthorpe Reviewed-by: Liviu Dudau Reviewed-by: Robin Murphy Reviewed-by: Nicolin Chen Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20240607105415.2501934-4-jean-philippe@linaro.org Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/of_iommu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 559c5db78edb..78d61da75257 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -100,6 +100,14 @@ static int of_iommu_configure_device(struct device_node *master_np, of_iommu_configure_dev(master_np, dev); } +static void of_pci_check_device_ats(struct device *dev, struct device_node *np) +{ + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + + if (fwspec && of_property_read_bool(np, "ats-supported")) + fwspec->flags |= IOMMU_FWSPEC_PCI_RC_ATS; +} + /* * Returns: * 0 on success, an iommu was configured @@ -136,6 +144,7 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np, pci_request_acs(); err = pci_for_each_dma_alias(to_pci_dev(dev), of_pci_iommu_init, &info); + of_pci_check_device_ats(dev, master_np); } else { err = of_iommu_configure_device(master_np, dev, id); } -- Gitee From 8b6c8aff511f3ff0d3cf5d0a78ca4ea6da17b7c5 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 7 Jun 2024 11:54:16 +0100 Subject: [PATCH 133/143] arm64: dts: fvp: Enable PCIe ATS for Base RevC FVP ANBZ: #13617 commit 6bac3388889cec379ecb06e5557dd6f31a31544e upstream. Declare that the host controller supports ATS, so the OS can enable it for ATS-capable PCIe endpoints. Acked-by: Sudeep Holla Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20240607105415.2501934-5-jean-philippe@linaro.org Signed-off-by: Will Deacon Signed-off-by: Jay Chen --- arch/arm64/boot/dts/arm/fvp-base-revc.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/arm/fvp-base-revc.dts b/arch/arm64/boot/dts/arm/fvp-base-revc.dts index 60472d65a355..85f1c15cc65d 100644 --- a/arch/arm64/boot/dts/arm/fvp-base-revc.dts +++ b/arch/arm64/boot/dts/arm/fvp-base-revc.dts @@ -243,6 +243,7 @@ pci: pci@40000000 { iommu-map = <0x0 &smmu 0x0 0x10000>; dma-coherent; + ats-supported; }; smmu: iommu@2b400000 { -- Gitee From 561fa4bb7eea43c600ed067712a413950a939be8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:35 +0800 Subject: [PATCH 134/143] iommu: Introduce domain attachment handle ANBZ: #13617 commit 14678219cf4093e897ab353fd78eab7994d1be7d upstream. Currently, when attaching a domain to a device or its PASID, domain is stored within the iommu group. It could be retrieved for use during the window between attachment and detachment. With new features introduced, there's a need to store more information than just a domain pointer. This information essentially represents the association between a domain and a device. For example, the SVA code already has a custom struct iommu_sva which represents a bond between sva domain and a PASID of a device. Looking forward, the IOMMUFD needs a place to store the iommufd_device pointer in the core, so that the device object ID could be quickly retrieved in the critical fault handling path. Introduce domain attachment handle that explicitly represents the attachment relationship between a domain and a device or its PASID. Co-developed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240702063444.105814-2-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/dma/idxd/init.c | 2 +- drivers/iommu/iommu-sva.c | 13 ++++++++----- drivers/iommu/iommu.c | 26 ++++++++++++++++---------- include/linux/iommu.h | 18 +++++++++++++++--- 4 files changed, 40 insertions(+), 19 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 25c557660215..20136d00c098 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -662,7 +662,7 @@ static int idxd_enable_system_pasid(struct idxd_device *idxd) * DMA domain is owned by the driver, it should support all valid * types such as DMA-FQ, identity, etc. */ - ret = iommu_attach_device_pasid(domain, dev, pasid); + ret = iommu_attach_device_pasid(domain, dev, pasid, NULL); if (ret) { dev_err(dev, "failed to attach device pasid %d, domain type %d", pasid, domain->type); diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 25e581299226..1a737ab0a73a 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -101,7 +101,9 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm /* Search for an existing domain. */ list_for_each_entry(domain, &mm->iommu_mm->sva_domains, next) { - ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid); + handle->handle.domain = domain; + ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid, + &handle->handle); if (!ret) { domain->users++; goto out; @@ -115,7 +117,9 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm goto out_free_handle; } - ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid); + handle->handle.domain = domain; + ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid, + &handle->handle); if (ret) goto out_free_domain; domain->users = 1; @@ -126,7 +130,6 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm list_add(&handle->handle_item, &mm->iommu_mm->sva_handles); mutex_unlock(&iommu_sva_lock); handle->dev = dev; - handle->domain = domain; return handle; out_free_domain: @@ -149,7 +152,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_bind_device); */ void iommu_sva_unbind_device(struct iommu_sva *handle) { - struct iommu_domain *domain = handle->domain; + struct iommu_domain *domain = handle->handle.domain; struct iommu_mm_data *iommu_mm = domain->mm->iommu_mm; struct device *dev = handle->dev; @@ -172,7 +175,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); u32 iommu_sva_get_pasid(struct iommu_sva *handle) { - struct iommu_domain *domain = handle->domain; + struct iommu_domain *domain = handle->handle.domain; return mm_get_enqcmd_pasid(domain->mm); } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 0b5dc2e2d826..0834dfff4ee5 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3386,16 +3386,17 @@ static void __iommu_remove_group_pasid(struct iommu_group *group, * @domain: the iommu domain. * @dev: the attached device. * @pasid: the pasid of the device. + * @handle: the attach handle. * * Return: 0 on success, or an error. */ int iommu_attach_device_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid) + struct device *dev, ioasid_t pasid, + struct iommu_attach_handle *handle) { /* Caller must be a probed driver on dev */ struct iommu_group *group = dev->iommu_group; struct group_device *device; - void *curr; int ret; if (!domain->ops->set_dev_pasid) @@ -3416,11 +3417,12 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, } } - curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL); - if (curr) { - ret = xa_err(curr) ? : -EBUSY; + if (handle) + handle->domain = domain; + + ret = xa_insert(&group->pasid_array, pasid, handle, GFP_KERNEL); + if (ret) goto out_unlock; - } ret = __iommu_set_group_pasid(domain, group, pasid); if (ret) @@ -3448,7 +3450,7 @@ void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, mutex_lock(&group->mutex); __iommu_remove_group_pasid(group, pasid, domain); - WARN_ON(xa_erase(&group->pasid_array, pasid) != domain); + xa_erase(&group->pasid_array, pasid); mutex_unlock(&group->mutex); } EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); @@ -3473,15 +3475,19 @@ struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev, { /* Caller must be a probed driver on dev */ struct iommu_group *group = dev->iommu_group; - struct iommu_domain *domain; + struct iommu_attach_handle *handle; + struct iommu_domain *domain = NULL; if (!group) return NULL; xa_lock(&group->pasid_array); - domain = xa_load(&group->pasid_array, pasid); + handle = xa_load(&group->pasid_array, pasid); + if (handle) + domain = handle->domain; + if (type && domain && domain->type != type) - domain = ERR_PTR(-EBUSY); + domain = NULL; xa_unlock(&group->pasid_array); return domain; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ea4bc93c617f..50f6df912952 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1020,12 +1020,22 @@ struct iommu_fwspec { /* ATS is supported */ #define IOMMU_FWSPEC_PCI_RC_ATS (1 << 0) +/* + * An iommu attach handle represents a relationship between an iommu domain + * and a PASID or RID of a device. It is allocated and managed by the component + * that manages the domain and is stored in the iommu group during the time the + * domain is attached. + */ +struct iommu_attach_handle { + struct iommu_domain *domain; +}; + /** * struct iommu_sva - handle to a device-mm bond */ struct iommu_sva { + struct iommu_attach_handle handle; struct device *dev; - struct iommu_domain *domain; struct list_head handle_item; refcount_t users; }; @@ -1081,7 +1091,8 @@ int iommu_device_claim_dma_owner(struct device *dev, void *owner); void iommu_device_release_dma_owner(struct device *dev); int iommu_attach_device_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid); + struct device *dev, ioasid_t pasid, + struct iommu_attach_handle *handle); void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); struct iommu_domain * @@ -1415,7 +1426,8 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner) } static inline int iommu_attach_device_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid) + struct device *dev, ioasid_t pasid, + struct iommu_attach_handle *handle) { return -ENODEV; } -- Gitee From 533e5c0d6f0d4844def94c4cc0525d493bb54d46 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:36 +0800 Subject: [PATCH 135/143] iommu: Remove sva handle list ANBZ: #13617 commit 3e7f57d1ef3f5fbed58974fae38d35e430f57d35 upstream. The struct sva_iommu represents an association between an SVA domain and a PASID of a device. It's stored in the iommu group's pasid array and also tracked by a list in the per-mm data structure. Removes duplicate tracking of sva_iommu by eliminating the list. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240702063444.105814-3-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/iommu-priv.h | 3 +++ drivers/iommu/iommu-sva.c | 30 ++++++++++++++++++++---------- drivers/iommu/iommu.c | 31 +++++++++++++++++++++++++++++++ include/linux/iommu.h | 2 -- 4 files changed, 54 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h index a34efed2884b..a8e32b9c1e61 100644 --- a/drivers/iommu/iommu-priv.h +++ b/drivers/iommu/iommu-priv.h @@ -35,4 +35,7 @@ void iommu_device_unregister_bus(struct iommu_device *iommu, const struct bus_type *bus, struct notifier_block *nb); +struct iommu_attach_handle *iommu_attach_handle_get(struct iommu_group *group, + ioasid_t pasid, + unsigned int type); #endif /* __LINUX_IOMMU_PRIV_H */ diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 1a737ab0a73a..8d1b8c897b60 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -43,7 +43,6 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de } iommu_mm->pasid = pasid; INIT_LIST_HEAD(&iommu_mm->sva_domains); - INIT_LIST_HEAD(&iommu_mm->sva_handles); /* * Make sure the write to mm->iommu_mm is not reordered in front of * initialization to iommu_mm fields. If it does, readers may see a @@ -71,11 +70,16 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de */ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) { + struct iommu_group *group = dev->iommu_group; + struct iommu_attach_handle *attach_handle; struct iommu_mm_data *iommu_mm; struct iommu_domain *domain; struct iommu_sva *handle; int ret; + if (!group) + return ERR_PTR(-ENODEV); + mutex_lock(&iommu_sva_lock); /* Allocate mm->pasid if necessary. */ @@ -85,12 +89,22 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm goto out_unlock; } - list_for_each_entry(handle, &mm->iommu_mm->sva_handles, handle_item) { - if (handle->dev == dev) { - refcount_inc(&handle->users); - mutex_unlock(&iommu_sva_lock); - return handle; + /* A bond already exists, just take a reference`. */ + attach_handle = iommu_attach_handle_get(group, iommu_mm->pasid, IOMMU_DOMAIN_SVA); + if (!IS_ERR(attach_handle)) { + handle = container_of(attach_handle, struct iommu_sva, handle); + if (attach_handle->domain->mm != mm) { + ret = -EBUSY; + goto out_unlock; } + refcount_inc(&handle->users); + mutex_unlock(&iommu_sva_lock); + return handle; + } + + if (PTR_ERR(attach_handle) != -ENOENT) { + ret = PTR_ERR(attach_handle); + goto out_unlock; } handle = kzalloc(sizeof(*handle), GFP_KERNEL); @@ -101,7 +115,6 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm /* Search for an existing domain. */ list_for_each_entry(domain, &mm->iommu_mm->sva_domains, next) { - handle->handle.domain = domain; ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid, &handle->handle); if (!ret) { @@ -117,7 +130,6 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm goto out_free_handle; } - handle->handle.domain = domain; ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid, &handle->handle); if (ret) @@ -127,7 +139,6 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm out: refcount_set(&handle->users, 1); - list_add(&handle->handle_item, &mm->iommu_mm->sva_handles); mutex_unlock(&iommu_sva_lock); handle->dev = dev; return handle; @@ -161,7 +172,6 @@ void iommu_sva_unbind_device(struct iommu_sva *handle) mutex_unlock(&iommu_sva_lock); return; } - list_del(&handle->handle_item); iommu_detach_device_pasid(domain, dev, iommu_mm->pasid); if (--domain->users == 0) { diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 0834dfff4ee5..93883cd6feda 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3520,3 +3520,34 @@ void iommu_free_global_pasid(ioasid_t pasid) ida_free(&iommu_global_pasid_ida, pasid); } EXPORT_SYMBOL_GPL(iommu_free_global_pasid); + +/** + * iommu_attach_handle_get - Return the attach handle + * @group: the iommu group that domain was attached to + * @pasid: the pasid within the group + * @type: matched domain type, 0 for any match + * + * Return handle or ERR_PTR(-ENOENT) on none, ERR_PTR(-EBUSY) on mismatch. + * + * Return the attach handle to the caller. The life cycle of an iommu attach + * handle is from the time when the domain is attached to the time when the + * domain is detached. Callers are required to synchronize the call of + * iommu_attach_handle_get() with domain attachment and detachment. The attach + * handle can only be used during its life cycle. + */ +struct iommu_attach_handle * +iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type) +{ + struct iommu_attach_handle *handle; + + xa_lock(&group->pasid_array); + handle = xa_load(&group->pasid_array, pasid); + if (!handle) + handle = ERR_PTR(-ENOENT); + else if (type && handle->domain->type != type) + handle = ERR_PTR(-EBUSY); + xa_unlock(&group->pasid_array); + + return handle; +} +EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, IOMMUFD_INTERNAL); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 50f6df912952..b32e089b09d2 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1036,14 +1036,12 @@ struct iommu_attach_handle { struct iommu_sva { struct iommu_attach_handle handle; struct device *dev; - struct list_head handle_item; refcount_t users; }; struct iommu_mm_data { u32 pasid; struct list_head sva_domains; - struct list_head sva_handles; }; int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode); -- Gitee From 81a207e58d5870ba9fe1186ab2cbffa069619f2f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:37 +0800 Subject: [PATCH 136/143] iommu: Add attach handle to struct iopf_group ANBZ: #13617 commit 06cdcc32d65759d42c6340700796e2906045b6a5 upstream. Previously, the domain that a page fault targets is stored in an iopf_group, which represents a minimal set of page faults. With the introduction of attach handle, replace the domain with the handle so that the fault handler can obtain more information as needed when handling the faults. iommu_report_device_fault() is currently used for SVA page faults, which handles the page fault in an internal cycle. The domain is retrieved with iommu_get_domain_for_dev_pasid() if the pasid in the fault message is valid. This doesn't work in IOMMUFD case, where if the pasid table of a device is wholly managed by user space, there is no domain attached to the PASID of the device, and all page faults are forwarded through a NESTING domain attaching to RID. Add a static flag in iommu ops, which indicates if the IOMMU driver supports user-managed PASID tables. In the iopf deliver path, if no attach handle found for the iopf PASID, roll back to RID domain when the IOMMU driver supports this capability. iommu_get_domain_for_dev_pasid() is no longer used and can be removed. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240702063444.105814-4-baolu.lu@linux.intel.com Signed-off-by: Will Deacon [ Shuai Xue: Conflicts: include/linux/iommu.h conflict with CK_KABI_RESERVE field in struct iommu_ops, just add user_pasid_table to struct iommu_ops ] Signed-off-by: Shuai Xue --- drivers/iommu/io-pgfault.c | 61 +++++++++++++++++++++----------------- drivers/iommu/iommu-sva.c | 3 +- drivers/iommu/iommu.c | 39 ------------------------ include/linux/iommu.h | 17 ++++------- 4 files changed, 42 insertions(+), 78 deletions(-) diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 06d78fcc79fd..7c9011992d3f 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -59,30 +59,6 @@ void iopf_free_group(struct iopf_group *group) } EXPORT_SYMBOL_GPL(iopf_free_group); -static struct iommu_domain *get_domain_for_iopf(struct device *dev, - struct iommu_fault *fault) -{ - struct iommu_domain *domain; - - if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) { - domain = iommu_get_domain_for_dev_pasid(dev, fault->prm.pasid, 0); - if (IS_ERR(domain)) - domain = NULL; - } else { - domain = iommu_get_domain_for_dev(dev); - } - - if (!domain || !domain->iopf_handler) { - dev_warn_ratelimited(dev, - "iopf (pasid %d) without domain attached or handler installed\n", - fault->prm.pasid); - - return NULL; - } - - return domain; -} - /* Non-last request of a group. Postpone until the last one. */ static int report_partial_fault(struct iommu_fault_param *fault_param, struct iommu_fault *fault) @@ -206,20 +182,51 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) if (group == &abort_group) goto err_abort; - group->domain = get_domain_for_iopf(dev, fault); - if (!group->domain) + if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) { + group->attach_handle = iommu_attach_handle_get(dev->iommu_group, + fault->prm.pasid, + 0); + if (IS_ERR(group->attach_handle)) { + const struct iommu_ops *ops = dev_iommu_ops(dev); + + if (!ops->user_pasid_table) + goto err_abort; + + /* + * The iommu driver for this device supports user- + * managed PASID table. Therefore page faults for + * any PASID should go through the NESTING domain + * attached to the device RID. + */ + group->attach_handle = + iommu_attach_handle_get(dev->iommu_group, + IOMMU_NO_PASID, + IOMMU_DOMAIN_NESTED); + if (IS_ERR(group->attach_handle)) + goto err_abort; + } + } else { + group->attach_handle = + iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0); + if (IS_ERR(group->attach_handle)) + goto err_abort; + } + + if (!group->attach_handle->domain->iopf_handler) goto err_abort; /* * On success iopf_handler must call iopf_group_response() and * iopf_free_group() */ - if (group->domain->iopf_handler(group)) + if (group->attach_handle->domain->iopf_handler(group)) goto err_abort; return; err_abort: + dev_warn_ratelimited(dev, "iopf with pasid %d aborted\n", + fault->prm.pasid); iopf_group_response(group, IOMMU_PAGE_RESP_FAILURE); if (group == &abort_group) __iopf_free_group(group); diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 8d1b8c897b60..503c5d23c1ea 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -274,7 +274,8 @@ static void iommu_sva_handle_iopf(struct work_struct *work) if (status != IOMMU_PAGE_RESP_SUCCESS) break; - status = iommu_sva_handle_mm(&iopf->fault, group->domain->mm); + status = iommu_sva_handle_mm(&iopf->fault, + group->attach_handle->domain->mm); } iopf_group_response(group, status); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 93883cd6feda..7df629c303f1 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3455,45 +3455,6 @@ void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, } EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); -/* - * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev - * @dev: the queried device - * @pasid: the pasid of the device - * @type: matched domain type, 0 for any match - * - * This is a variant of iommu_get_domain_for_dev(). It returns the existing - * domain attached to pasid of a device. Callers must hold a lock around this - * function, and both iommu_attach/detach_dev_pasid() whenever a domain of - * type is being manipulated. This API does not internally resolve races with - * attach/detach. - * - * Return: attached domain on success, NULL otherwise. - */ -struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev, - ioasid_t pasid, - unsigned int type) -{ - /* Caller must be a probed driver on dev */ - struct iommu_group *group = dev->iommu_group; - struct iommu_attach_handle *handle; - struct iommu_domain *domain = NULL; - - if (!group) - return NULL; - - xa_lock(&group->pasid_array); - handle = xa_load(&group->pasid_array, pasid); - if (handle) - domain = handle->domain; - - if (type && domain && domain->type != type) - domain = NULL; - xa_unlock(&group->pasid_array); - - return domain; -} -EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid); - ioasid_t iommu_alloc_global_pasid(struct device *dev) { int ret; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b32e089b09d2..b09ca81364f5 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -127,7 +127,7 @@ struct iopf_group { /* list node for iommu_fault_param::faults */ struct list_head pending_node; struct work_struct work; - struct iommu_domain *domain; + struct iommu_attach_handle *attach_handle; /* The device's fault data parameter. */ struct iommu_fault_param *fault_param; }; @@ -555,6 +555,10 @@ static inline int __iommu_copy_struct_from_user_array( * @default_domain: If not NULL this will always be set as the default domain. * This should be an IDENTITY/BLOCKED/PLATFORM domain. * Do not use in new drivers. + * @user_pasid_table: IOMMU driver supports user-managed PASID table. There is + * no user domain for each PASID and the I/O page faults are + * forwarded through the user domain attached to the device + * RID. */ struct iommu_ops { bool (*capable)(struct device *dev, enum iommu_cap); @@ -598,6 +602,7 @@ struct iommu_ops { struct iommu_domain *blocked_domain; struct iommu_domain *release_domain; struct iommu_domain *default_domain; + u8 user_pasid_table:1; CK_KABI_RESERVE(1) CK_KABI_RESERVE(2) @@ -1093,9 +1098,6 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, struct iommu_attach_handle *handle); void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); -struct iommu_domain * -iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, - unsigned int type); ioasid_t iommu_alloc_global_pasid(struct device *dev); void iommu_free_global_pasid(ioasid_t pasid); #else /* CONFIG_IOMMU_API */ @@ -1435,13 +1437,6 @@ static inline void iommu_detach_device_pasid(struct iommu_domain *domain, { } -static inline struct iommu_domain * -iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, - unsigned int type) -{ - return NULL; -} - static inline ioasid_t iommu_alloc_global_pasid(struct device *dev) { return IOMMU_PASID_INVALID; -- Gitee From 4e38e173c77678515691764a953f196327ffe9ba Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:38 +0800 Subject: [PATCH 137/143] iommu: Extend domain attach group with handle support ANBZ: #13617 commit 8519e689834a3ecf9a36332a9abb1844bd34e459 upstream. Unlike the SVA case where each PASID of a device has an SVA domain attached to it, the I/O page faults are handled by the fault handler of the SVA domain. The I/O page faults for a user page table might be handled by the domain attached to RID or the domain attached to the PASID, depending on whether the PASID table is managed by user space or kernel. As a result, there is a need for the domain attach group interfaces to have attach handle support. The attach handle will be forwarded to the fault handler of the user domain. Add some variants of the domain attaching group interfaces so that they could support the attach handle and export them for use in IOMMUFD. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240702063444.105814-5-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/iommu-priv.h | 8 +++ drivers/iommu/iommu.c | 103 +++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h index a8e32b9c1e61..de5b54eaa8bf 100644 --- a/drivers/iommu/iommu-priv.h +++ b/drivers/iommu/iommu-priv.h @@ -38,4 +38,12 @@ void iommu_device_unregister_bus(struct iommu_device *iommu, struct iommu_attach_handle *iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type); +int iommu_attach_group_handle(struct iommu_domain *domain, + struct iommu_group *group, + struct iommu_attach_handle *handle); +void iommu_detach_group_handle(struct iommu_domain *domain, + struct iommu_group *group); +int iommu_replace_group_handle(struct iommu_group *group, + struct iommu_domain *new_domain, + struct iommu_attach_handle *handle); #endif /* __LINUX_IOMMU_PRIV_H */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 7df629c303f1..6b0f42ea9218 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3512,3 +3512,106 @@ iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int return handle; } EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, IOMMUFD_INTERNAL); + +/** + * iommu_attach_group_handle - Attach an IOMMU domain to an IOMMU group + * @domain: IOMMU domain to attach + * @group: IOMMU group that will be attached + * @handle: attach handle + * + * Returns 0 on success and error code on failure. + * + * This is a variant of iommu_attach_group(). It allows the caller to provide + * an attach handle and use it when the domain is attached. This is currently + * used by IOMMUFD to deliver the I/O page faults. + */ +int iommu_attach_group_handle(struct iommu_domain *domain, + struct iommu_group *group, + struct iommu_attach_handle *handle) +{ + int ret; + + if (handle) + handle->domain = domain; + + mutex_lock(&group->mutex); + ret = xa_insert(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL); + if (ret) + goto err_unlock; + + ret = __iommu_attach_group(domain, group); + if (ret) + goto err_erase; + mutex_unlock(&group->mutex); + + return 0; +err_erase: + xa_erase(&group->pasid_array, IOMMU_NO_PASID); +err_unlock: + mutex_unlock(&group->mutex); + return ret; +} +EXPORT_SYMBOL_NS_GPL(iommu_attach_group_handle, IOMMUFD_INTERNAL); + +/** + * iommu_detach_group_handle - Detach an IOMMU domain from an IOMMU group + * @domain: IOMMU domain to attach + * @group: IOMMU group that will be attached + * + * Detach the specified IOMMU domain from the specified IOMMU group. + * It must be used in conjunction with iommu_attach_group_handle(). + */ +void iommu_detach_group_handle(struct iommu_domain *domain, + struct iommu_group *group) +{ + mutex_lock(&group->mutex); + __iommu_group_set_core_domain(group); + xa_erase(&group->pasid_array, IOMMU_NO_PASID); + mutex_unlock(&group->mutex); +} +EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, IOMMUFD_INTERNAL); + +/** + * iommu_replace_group_handle - replace the domain that a group is attached to + * @group: IOMMU group that will be attached to the new domain + * @new_domain: new IOMMU domain to replace with + * @handle: attach handle + * + * This is a variant of iommu_group_replace_domain(). It allows the caller to + * provide an attach handle for the new domain and use it when the domain is + * attached. + */ +int iommu_replace_group_handle(struct iommu_group *group, + struct iommu_domain *new_domain, + struct iommu_attach_handle *handle) +{ + void *curr; + int ret; + + if (!new_domain) + return -EINVAL; + + mutex_lock(&group->mutex); + if (handle) { + ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL); + if (ret) + goto err_unlock; + } + + ret = __iommu_group_set_domain(group, new_domain); + if (ret) + goto err_release; + + curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL); + WARN_ON(xa_is_err(curr)); + + mutex_unlock(&group->mutex); + + return 0; +err_release: + xa_release(&group->pasid_array, IOMMU_NO_PASID); +err_unlock: + mutex_unlock(&group->mutex); + return ret; +} +EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, IOMMUFD_INTERNAL); -- Gitee From 3adcc824be382d5640d1836e5a20cb14dfd08341 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:39 +0800 Subject: [PATCH 138/143] iommufd: Add fault and response message definitions ANBZ: #13617 commit c714f15860fcca02fe0fd7c3f1f1fc35b1768ac1 upstream. iommu_hwpt_pgfaults represent fault messages that the userspace can retrieve. Multiple iommu_hwpt_pgfaults might be put in an iopf group, with the IOMMU_PGFAULT_FLAGS_LAST_PAGE flag set only for the last iommu_hwpt_pgfault. An iommu_hwpt_page_response is a response message that the userspace should send to the kernel after finishing handling a group of fault messages. The @dev_id, @pasid, and @grpid fields in the message identify an outstanding iopf group for a device. The @cookie field, which matches the cookie field of the last fault in the group, will be used by the kernel to look up the pending message. Link: https://lore.kernel.org/r/20240702063444.105814-6-baolu.lu@linux.intel.com Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- include/uapi/linux/iommufd.h | 83 ++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 1dfeaa2e649e..4d89ed97b533 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -692,4 +692,87 @@ struct iommu_hwpt_invalidate { __u32 __reserved; }; #define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE) + +/** + * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault + * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is + * valid. + * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group. + */ +enum iommu_hwpt_pgfault_flags { + IOMMU_PGFAULT_FLAGS_PASID_VALID = (1 << 0), + IOMMU_PGFAULT_FLAGS_LAST_PAGE = (1 << 1), +}; + +/** + * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault + * @IOMMU_PGFAULT_PERM_READ: request for read permission + * @IOMMU_PGFAULT_PERM_WRITE: request for write permission + * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the + * Execute Requested bit set in PASID TLP Prefix. + * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the + * Privileged Mode Requested bit set in PASID TLP + * Prefix. + */ +enum iommu_hwpt_pgfault_perm { + IOMMU_PGFAULT_PERM_READ = (1 << 0), + IOMMU_PGFAULT_PERM_WRITE = (1 << 1), + IOMMU_PGFAULT_PERM_EXEC = (1 << 2), + IOMMU_PGFAULT_PERM_PRIV = (1 << 3), +}; + +/** + * struct iommu_hwpt_pgfault - iommu page fault data + * @flags: Combination of enum iommu_hwpt_pgfault_flags + * @dev_id: id of the originated device + * @pasid: Process Address Space ID + * @grpid: Page Request Group Index + * @perm: Combination of enum iommu_hwpt_pgfault_perm + * @addr: Fault address + * @length: a hint of how much data the requestor is expecting to fetch. For + * example, if the PRI initiator knows it is going to do a 10MB + * transfer, it could fill in 10MB and the OS could pre-fault in + * 10MB of IOVA. It's default to 0 if there's no such hint. + * @cookie: kernel-managed cookie identifying a group of fault messages. The + * cookie number encoded in the last page fault of the group should + * be echoed back in the response message. + */ +struct iommu_hwpt_pgfault { + __u32 flags; + __u32 dev_id; + __u32 pasid; + __u32 grpid; + __u32 perm; + __u64 addr; + __u32 length; + __u32 cookie; +}; + +/** + * enum iommufd_page_response_code - Return status of fault handlers + * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables + * populated, retry the access. This is the + * "Success" defined in PCI 10.4.2.1. + * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the + * access. This is the "Invalid Request" in PCI + * 10.4.2.1. + * @IOMMUFD_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from + * this device if possible. This is the "Response + * Failure" in PCI 10.4.2.1. + */ +enum iommufd_page_response_code { + IOMMUFD_PAGE_RESP_SUCCESS = 0, + IOMMUFD_PAGE_RESP_INVALID, + IOMMUFD_PAGE_RESP_FAILURE, +}; + +/** + * struct iommu_hwpt_page_response - IOMMU page fault response + * @cookie: The kernel-managed cookie reported in the fault message. + * @code: One of response code in enum iommufd_page_response_code. + */ +struct iommu_hwpt_page_response { + __u32 cookie; + __u32 code; +}; #endif -- Gitee From bd23a9a839d7bf6a94cc6c61185656e3cc3d552b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:40 +0800 Subject: [PATCH 139/143] iommufd: Add iommufd fault object ANBZ: #13617 commit 07838f7fd529c8a6de44b601d4b7057e6c8d36ed upstream. An iommufd fault object provides an interface for delivering I/O page faults to user space. These objects are created and destroyed by user space, and they can be associated with or dissociated from hardware page table objects during page table allocation or destruction. User space interacts with the fault object through a file interface. This interface offers a straightforward and efficient way for user space to handle page faults. It allows user space to read fault messages sequentially and respond to them by writing to the same file. The file interface supports reading messages in poll mode, so it's recommended that user space applications use io_uring to enhance read and write efficiency. A fault object can be associated with any iopf-capable iommufd_hw_pgtable during the pgtable's allocation. All I/O page faults triggered by devices when accessing the I/O addresses of an iommufd_hw_pgtable are routed through the fault object to user space. Similarly, user space's responses to these page faults are routed back to the iommu device driver through the same fault object. Link: https://lore.kernel.org/r/20240702063444.105814-7-baolu.lu@linux.intel.com Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/io-pgfault.c | 2 + drivers/iommu/iommufd/Makefile | 1 + drivers/iommu/iommufd/fault.c | 226 ++++++++++++++++++++++++ drivers/iommu/iommufd/iommufd_private.h | 30 ++++ drivers/iommu/iommufd/main.c | 6 + include/linux/iommu.h | 4 + include/uapi/linux/iommufd.h | 18 ++ 7 files changed, 287 insertions(+) create mode 100644 drivers/iommu/iommufd/fault.c diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 7c9011992d3f..cd679c13752e 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -110,6 +110,8 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param, list_add(&group->pending_node, &iopf_param->faults); mutex_unlock(&iopf_param->lock); + group->fault_count = list_count_nodes(&group->faults); + return group; } diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index 34b446146961..cf4605962bea 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only iommufd-y := \ device.o \ + fault.o \ hw_pagetable.o \ io_pagetable.o \ ioas.o \ diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c new file mode 100644 index 000000000000..68ff94671d48 --- /dev/null +++ b/drivers/iommu/iommufd/fault.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2024 Intel Corporation + */ +#define pr_fmt(fmt) "iommufd: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../iommu-priv.h" +#include "iommufd_private.h" + +void iommufd_fault_destroy(struct iommufd_object *obj) +{ + struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj); + struct iopf_group *group, *next; + + /* + * The iommufd object's reference count is zero at this point. + * We can be confident that no other threads are currently + * accessing this pointer. Therefore, acquiring the mutex here + * is unnecessary. + */ + list_for_each_entry_safe(group, next, &fault->deliver, node) { + list_del(&group->node); + iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); + iopf_free_group(group); + } +} + +static void iommufd_compose_fault_message(struct iommu_fault *fault, + struct iommu_hwpt_pgfault *hwpt_fault, + struct iommufd_device *idev, + u32 cookie) +{ + hwpt_fault->flags = fault->prm.flags; + hwpt_fault->dev_id = idev->obj.id; + hwpt_fault->pasid = fault->prm.pasid; + hwpt_fault->grpid = fault->prm.grpid; + hwpt_fault->perm = fault->prm.perm; + hwpt_fault->addr = fault->prm.addr; + hwpt_fault->length = 0; + hwpt_fault->cookie = cookie; +} + +static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, + size_t count, loff_t *ppos) +{ + size_t fault_size = sizeof(struct iommu_hwpt_pgfault); + struct iommufd_fault *fault = filep->private_data; + struct iommu_hwpt_pgfault data; + struct iommufd_device *idev; + struct iopf_group *group; + struct iopf_fault *iopf; + size_t done = 0; + int rc = 0; + + if (*ppos || count % fault_size) + return -ESPIPE; + + mutex_lock(&fault->mutex); + while (!list_empty(&fault->deliver) && count > done) { + group = list_first_entry(&fault->deliver, + struct iopf_group, node); + + if (group->fault_count * fault_size > count - done) + break; + + rc = xa_alloc(&fault->response, &group->cookie, group, + xa_limit_32b, GFP_KERNEL); + if (rc) + break; + + idev = to_iommufd_handle(group->attach_handle)->idev; + list_for_each_entry(iopf, &group->faults, list) { + iommufd_compose_fault_message(&iopf->fault, + &data, idev, + group->cookie); + if (copy_to_user(buf + done, &data, fault_size)) { + xa_erase(&fault->response, group->cookie); + rc = -EFAULT; + break; + } + done += fault_size; + } + + list_del(&group->node); + } + mutex_unlock(&fault->mutex); + + return done == 0 ? rc : done; +} + +static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf, + size_t count, loff_t *ppos) +{ + size_t response_size = sizeof(struct iommu_hwpt_page_response); + struct iommufd_fault *fault = filep->private_data; + struct iommu_hwpt_page_response response; + struct iopf_group *group; + size_t done = 0; + int rc = 0; + + if (*ppos || count % response_size) + return -ESPIPE; + + mutex_lock(&fault->mutex); + while (count > done) { + rc = copy_from_user(&response, buf + done, response_size); + if (rc) + break; + + group = xa_erase(&fault->response, response.cookie); + if (!group) { + rc = -EINVAL; + break; + } + + iopf_group_response(group, response.code); + iopf_free_group(group); + done += response_size; + } + mutex_unlock(&fault->mutex); + + return done == 0 ? rc : done; +} + +static __poll_t iommufd_fault_fops_poll(struct file *filep, + struct poll_table_struct *wait) +{ + struct iommufd_fault *fault = filep->private_data; + __poll_t pollflags = EPOLLOUT; + + poll_wait(filep, &fault->wait_queue, wait); + mutex_lock(&fault->mutex); + if (!list_empty(&fault->deliver)) + pollflags |= EPOLLIN | EPOLLRDNORM; + mutex_unlock(&fault->mutex); + + return pollflags; +} + +static int iommufd_fault_fops_release(struct inode *inode, struct file *filep) +{ + struct iommufd_fault *fault = filep->private_data; + + refcount_dec(&fault->obj.users); + iommufd_ctx_put(fault->ictx); + return 0; +} + +static const struct file_operations iommufd_fault_fops = { + .owner = THIS_MODULE, + .open = nonseekable_open, + .read = iommufd_fault_fops_read, + .write = iommufd_fault_fops_write, + .poll = iommufd_fault_fops_poll, + .release = iommufd_fault_fops_release, + .llseek = no_llseek, +}; + +int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) +{ + struct iommu_fault_alloc *cmd = ucmd->cmd; + struct iommufd_fault *fault; + struct file *filep; + int fdno; + int rc; + + if (cmd->flags) + return -EOPNOTSUPP; + + fault = iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT); + if (IS_ERR(fault)) + return PTR_ERR(fault); + + fault->ictx = ucmd->ictx; + INIT_LIST_HEAD(&fault->deliver); + xa_init_flags(&fault->response, XA_FLAGS_ALLOC1); + mutex_init(&fault->mutex); + init_waitqueue_head(&fault->wait_queue); + + filep = anon_inode_getfile("[iommufd-pgfault]", &iommufd_fault_fops, + fault, O_RDWR); + if (IS_ERR(filep)) { + rc = PTR_ERR(filep); + goto out_abort; + } + + refcount_inc(&fault->obj.users); + iommufd_ctx_get(fault->ictx); + fault->filep = filep; + + fdno = get_unused_fd_flags(O_CLOEXEC); + if (fdno < 0) { + rc = fdno; + goto out_fput; + } + + cmd->out_fault_id = fault->obj.id; + cmd->out_fault_fd = fdno; + + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_put_fdno; + iommufd_object_finalize(ucmd->ictx, &fault->obj); + + fd_install(fdno, fault->filep); + + return 0; +out_put_fdno: + put_unused_fd(fdno); +out_fput: + fput(filep); + refcount_dec(&fault->obj.users); + iommufd_ctx_put(fault->ictx); +out_abort: + iommufd_object_abort_and_destroy(ucmd->ictx, &fault->obj); + + return rc; +} diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 991f864d1f9b..c8a4519f1405 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -128,6 +128,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_HWPT_NESTED, IOMMUFD_OBJ_IOAS, IOMMUFD_OBJ_ACCESS, + IOMMUFD_OBJ_FAULT, #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif @@ -426,6 +427,35 @@ void iopt_remove_access(struct io_pagetable *iopt, u32 iopt_access_list_id); void iommufd_access_destroy_object(struct iommufd_object *obj); +/* + * An iommufd_fault object represents an interface to deliver I/O page faults + * to the user space. These objects are created/destroyed by the user space and + * associated with hardware page table objects during page-table allocation. + */ +struct iommufd_fault { + struct iommufd_object obj; + struct iommufd_ctx *ictx; + struct file *filep; + + /* The lists of outstanding faults protected by below mutex. */ + struct mutex mutex; + struct list_head deliver; + struct xarray response; + + struct wait_queue_head wait_queue; +}; + +struct iommufd_attach_handle { + struct iommu_attach_handle handle; + struct iommufd_device *idev; +}; + +/* Convert an iommu attach handle to iommufd handle. */ +#define to_iommufd_handle(hdl) container_of(hdl, struct iommufd_attach_handle, handle) + +int iommufd_fault_alloc(struct iommufd_ucmd *ucmd); +void iommufd_fault_destroy(struct iommufd_object *obj); + #ifdef CONFIG_IOMMUFD_TEST int iommufd_test(struct iommufd_ucmd *ucmd); void iommufd_selftest_destroy(struct iommufd_object *obj); diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 39b32932c61e..83bbd7c5d160 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -319,6 +319,7 @@ static int iommufd_option(struct iommufd_ucmd *ucmd) union ucmd_buffer { struct iommu_destroy destroy; + struct iommu_fault_alloc fault; struct iommu_hw_info info; struct iommu_hwpt_alloc hwpt; struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap; @@ -355,6 +356,8 @@ struct iommufd_ioctl_op { } static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id), + IOCTL_OP(IOMMU_FAULT_QUEUE_ALLOC, iommufd_fault_alloc, struct iommu_fault_alloc, + out_fault_fd), IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info, __reserved), IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, @@ -513,6 +516,9 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { .destroy = iommufd_hwpt_nested_destroy, .abort = iommufd_hwpt_nested_abort, }, + [IOMMUFD_OBJ_FAULT] = { + .destroy = iommufd_fault_destroy, + }, #ifdef CONFIG_IOMMUFD_TEST [IOMMUFD_OBJ_SELFTEST] = { .destroy = iommufd_selftest_destroy, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b09ca81364f5..32d0d7f7cb08 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -124,12 +124,16 @@ struct iopf_fault { struct iopf_group { struct iopf_fault last_fault; struct list_head faults; + size_t fault_count; /* list node for iommu_fault_param::faults */ struct list_head pending_node; struct work_struct work; struct iommu_attach_handle *attach_handle; /* The device's fault data parameter. */ struct iommu_fault_param *fault_param; + /* Used by handler provider to hook the group on its own lists. */ + struct list_head node; + u32 cookie; }; /** diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 4d89ed97b533..70b8a38fcd46 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -50,6 +50,7 @@ enum { IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, IOMMUFD_CMD_HWPT_INVALIDATE, + IOMMUFD_CMD_FAULT_QUEUE_ALLOC, }; /** @@ -775,4 +776,21 @@ struct iommu_hwpt_page_response { __u32 cookie; __u32 code; }; + +/** + * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC) + * @size: sizeof(struct iommu_fault_alloc) + * @flags: Must be 0 + * @out_fault_id: The ID of the new FAULT + * @out_fault_fd: The fd of the new FAULT + * + * Explicitly allocate a fault handling object. + */ +struct iommu_fault_alloc { + __u32 size; + __u32 flags; + __u32 out_fault_id; + __u32 out_fault_fd; +}; +#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC) #endif -- Gitee From 3770f789ef8227b1b21391f6240af710adb6aa72 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:41 +0800 Subject: [PATCH 140/143] iommufd: Fault-capable hwpt attach/detach/replace ANBZ: #13617 commit b7d8833677baad8c80ed1aac8c396d687e64a376 upstream. Add iopf-capable hw page table attach/detach/replace helpers. The pointer to iommufd_device is stored in the domain attachment handle, so that it can be echo'ed back in the iopf_group. The iopf-capable hw page tables can only be attached to devices that support the IOMMU_DEV_FEAT_IOPF feature. On the first attachment of an iopf-capable hw_pagetable to the device, the IOPF feature is enabled on the device. Similarly, after the last iopf-capable hwpt is detached from the device, the IOPF feature is disabled on the device. The current implementation allows a replacement between iopf-capable and non-iopf-capable hw page tables. This matches the nested translation use case, where a parent domain is attached by default and can then be replaced with a nested user domain with iopf support. Link: https://lore.kernel.org/r/20240702063444.105814-8-baolu.lu@linux.intel.com Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/device.c | 7 +- drivers/iommu/iommufd/fault.c | 190 ++++++++++++++++++++++++ drivers/iommu/iommufd/iommufd_private.h | 41 +++++ 3 files changed, 235 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 68ac138d925c..59ae9a4ad017 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -215,6 +215,7 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, refcount_inc(&idev->obj.users); /* igroup refcount moves into iommufd_device */ idev->igroup = igroup; + mutex_init(&idev->iopf_lock); /* * If the caller fails after this success it must call @@ -376,7 +377,7 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, * attachment. */ if (list_empty(&idev->igroup->device_list)) { - rc = iommu_attach_group(hwpt->domain, idev->igroup->group); + rc = iommufd_hwpt_attach_device(hwpt, idev); if (rc) goto err_unresv; idev->igroup->hwpt = hwpt; @@ -402,7 +403,7 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev) mutex_lock(&idev->igroup->lock); list_del(&idev->group_item); if (list_empty(&idev->igroup->device_list)) { - iommu_detach_group(hwpt->domain, idev->igroup->group); + iommufd_hwpt_detach_device(hwpt, idev); idev->igroup->hwpt = NULL; } if (hwpt_is_paging(hwpt)) @@ -513,7 +514,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, goto err_unlock; } - rc = iommu_group_replace_domain(igroup->group, hwpt->domain); + rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt); if (rc) goto err_unresv; diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c index 68ff94671d48..4934ae572638 100644 --- a/drivers/iommu/iommufd/fault.c +++ b/drivers/iommu/iommufd/fault.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -15,6 +16,195 @@ #include "../iommu-priv.h" #include "iommufd_private.h" +static int iommufd_fault_iopf_enable(struct iommufd_device *idev) +{ + struct device *dev = idev->dev; + int ret; + + /* + * Once we turn on PCI/PRI support for VF, the response failure code + * should not be forwarded to the hardware due to PRI being a shared + * resource between PF and VFs. There is no coordination for this + * shared capability. This waits for a vPRI reset to recover. + */ + if (dev_is_pci(dev) && to_pci_dev(dev)->is_virtfn) + return -EINVAL; + + mutex_lock(&idev->iopf_lock); + /* Device iopf has already been on. */ + if (++idev->iopf_enabled > 1) { + mutex_unlock(&idev->iopf_lock); + return 0; + } + + ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF); + if (ret) + --idev->iopf_enabled; + mutex_unlock(&idev->iopf_lock); + + return ret; +} + +static void iommufd_fault_iopf_disable(struct iommufd_device *idev) +{ + mutex_lock(&idev->iopf_lock); + if (!WARN_ON(idev->iopf_enabled == 0)) { + if (--idev->iopf_enabled == 0) + iommu_dev_disable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF); + } + mutex_unlock(&idev->iopf_lock); +} + +static int __fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev) +{ + struct iommufd_attach_handle *handle; + int ret; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->idev = idev; + ret = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, + &handle->handle); + if (ret) + kfree(handle); + + return ret; +} + +int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev) +{ + int ret; + + if (!hwpt->fault) + return -EINVAL; + + ret = iommufd_fault_iopf_enable(idev); + if (ret) + return ret; + + ret = __fault_domain_attach_dev(hwpt, idev); + if (ret) + iommufd_fault_iopf_disable(idev); + + return ret; +} + +static void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, + struct iommufd_attach_handle *handle) +{ + struct iommufd_fault *fault = hwpt->fault; + struct iopf_group *group, *next; + unsigned long index; + + if (!fault) + return; + + mutex_lock(&fault->mutex); + list_for_each_entry_safe(group, next, &fault->deliver, node) { + if (group->attach_handle != &handle->handle) + continue; + list_del(&group->node); + iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); + iopf_free_group(group); + } + + xa_for_each(&fault->response, index, group) { + if (group->attach_handle != &handle->handle) + continue; + xa_erase(&fault->response, index); + iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); + iopf_free_group(group); + } + mutex_unlock(&fault->mutex); +} + +static struct iommufd_attach_handle * +iommufd_device_get_attach_handle(struct iommufd_device *idev) +{ + struct iommu_attach_handle *handle; + + handle = iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0); + if (!handle) + return NULL; + + return to_iommufd_handle(handle); +} + +void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev) +{ + struct iommufd_attach_handle *handle; + + handle = iommufd_device_get_attach_handle(idev); + iommu_detach_group_handle(hwpt->domain, idev->igroup->group); + iommufd_auto_response_faults(hwpt, handle); + iommufd_fault_iopf_disable(idev); + kfree(handle); +} + +static int __fault_domain_replace_dev(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt, + struct iommufd_hw_pagetable *old) +{ + struct iommufd_attach_handle *handle, *curr = NULL; + int ret; + + if (old->fault) + curr = iommufd_device_get_attach_handle(idev); + + if (hwpt->fault) { + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->handle.domain = hwpt->domain; + handle->idev = idev; + ret = iommu_replace_group_handle(idev->igroup->group, + hwpt->domain, &handle->handle); + } else { + ret = iommu_replace_group_handle(idev->igroup->group, + hwpt->domain, NULL); + } + + if (!ret && curr) { + iommufd_auto_response_faults(old, curr); + kfree(curr); + } + + return ret; +} + +int iommufd_fault_domain_replace_dev(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt, + struct iommufd_hw_pagetable *old) +{ + bool iopf_off = !hwpt->fault && old->fault; + bool iopf_on = hwpt->fault && !old->fault; + int ret; + + if (iopf_on) { + ret = iommufd_fault_iopf_enable(idev); + if (ret) + return ret; + } + + ret = __fault_domain_replace_dev(idev, hwpt, old); + if (ret) { + if (iopf_on) + iommufd_fault_iopf_disable(idev); + return ret; + } + + if (iopf_off) + iommufd_fault_iopf_disable(idev); + + return 0; +} + void iommufd_fault_destroy(struct iommufd_object *obj) { struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj); diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index c8a4519f1405..aa4c26c87cb9 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -11,6 +11,7 @@ #include #include #include +#include "../iommu-priv.h" struct iommu_domain; struct iommu_group; @@ -293,6 +294,7 @@ int iommufd_check_iova_range(struct io_pagetable *iopt, struct iommufd_hw_pagetable { struct iommufd_object obj; struct iommu_domain *domain; + struct iommufd_fault *fault; }; struct iommufd_hwpt_paging { @@ -396,6 +398,9 @@ struct iommufd_device { /* always the physical device */ struct device *dev; bool enforce_cache_coherency; + /* protect iopf_enabled counter */ + struct mutex iopf_lock; + unsigned int iopf_enabled; }; static inline struct iommufd_device * @@ -456,6 +461,42 @@ struct iommufd_attach_handle { int iommufd_fault_alloc(struct iommufd_ucmd *ucmd); void iommufd_fault_destroy(struct iommufd_object *obj); +int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev); +void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev); +int iommufd_fault_domain_replace_dev(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt, + struct iommufd_hw_pagetable *old); + +static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev) +{ + if (hwpt->fault) + return iommufd_fault_domain_attach_dev(hwpt, idev); + + return iommu_attach_group(hwpt->domain, idev->igroup->group); +} + +static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev) +{ + if (hwpt->fault) + iommufd_fault_domain_detach_dev(hwpt, idev); + + iommu_detach_group(hwpt->domain, idev->igroup->group); +} + +static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt, + struct iommufd_hw_pagetable *old) +{ + if (old->fault || hwpt->fault) + return iommufd_fault_domain_replace_dev(idev, hwpt, old); + + return iommu_group_replace_domain(idev->igroup->group, hwpt->domain); +} + #ifdef CONFIG_IOMMUFD_TEST int iommufd_test(struct iommufd_ucmd *ucmd); void iommufd_selftest_destroy(struct iommufd_object *obj); -- Gitee From 296e4d3f33cacc69e9d3701621ae8e2140295b94 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:42 +0800 Subject: [PATCH 141/143] iommufd: Associate fault object with iommufd_hw_pgtable ANBZ: #13617 commit 34765cbc679c59ea5d952d738d2d16bf4aadc497 upstream. When allocating a user iommufd_hw_pagetable, the user space is allowed to associate a fault object with the hw_pagetable by specifying the fault object ID in the page table allocation data and setting the IOMMU_HWPT_FAULT_ID_VALID flag bit. On a successful return of hwpt allocation, the user can retrieve and respond to page faults by reading and writing the file interface of the fault object. Once a fault object has been associated with a hwpt, the hwpt is iopf-capable, indicated by hwpt->fault is non NULL. Attaching, detaching, or replacing an iopf-capable hwpt to an RID or PASID will differ from those that are not iopf-capable. Link: https://lore.kernel.org/r/20240702063444.105814-9-baolu.lu@linux.intel.com Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/fault.c | 17 +++++++++++ drivers/iommu/iommufd/hw_pagetable.c | 38 +++++++++++++++++++------ drivers/iommu/iommufd/iommufd_private.h | 9 ++++++ include/uapi/linux/iommufd.h | 8 ++++++ 4 files changed, 64 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c index 4934ae572638..54d6cd20a673 100644 --- a/drivers/iommu/iommufd/fault.c +++ b/drivers/iommu/iommufd/fault.c @@ -414,3 +414,20 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) return rc; } + +int iommufd_fault_iopf_handler(struct iopf_group *group) +{ + struct iommufd_hw_pagetable *hwpt; + struct iommufd_fault *fault; + + hwpt = group->attach_handle->domain->fault_data; + fault = hwpt->fault; + + mutex_lock(&fault->mutex); + list_add_tail(&group->node, &fault->deliver); + mutex_unlock(&fault->mutex); + + wake_up_interruptible(&fault->wait_queue); + + return 0; +} diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 9020da52c10f..5ea1e6e79dff 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -8,6 +8,15 @@ #include "../iommu-priv.h" #include "iommufd_private.h" +static void __iommufd_hwpt_destroy(struct iommufd_hw_pagetable *hwpt) +{ + if (hwpt->domain) + iommu_domain_free(hwpt->domain); + + if (hwpt->fault) + refcount_dec(&hwpt->fault->obj.users); +} + void iommufd_hwpt_paging_destroy(struct iommufd_object *obj) { struct iommufd_hwpt_paging *hwpt_paging = @@ -22,9 +31,7 @@ void iommufd_hwpt_paging_destroy(struct iommufd_object *obj) hwpt_paging->common.domain); } - if (hwpt_paging->common.domain) - iommu_domain_free(hwpt_paging->common.domain); - + __iommufd_hwpt_destroy(&hwpt_paging->common); refcount_dec(&hwpt_paging->ioas->obj.users); } @@ -49,9 +56,7 @@ void iommufd_hwpt_nested_destroy(struct iommufd_object *obj) struct iommufd_hwpt_nested *hwpt_nested = container_of(obj, struct iommufd_hwpt_nested, common.obj); - if (hwpt_nested->common.domain) - iommu_domain_free(hwpt_nested->common.domain); - + __iommufd_hwpt_destroy(&hwpt_nested->common); refcount_dec(&hwpt_nested->parent->common.obj.users); } @@ -217,7 +222,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, struct iommufd_hw_pagetable *hwpt; int rc; - if (flags || !user_data->len || !ops->domain_alloc_user) + if ((flags & ~IOMMU_HWPT_FAULT_ID_VALID) || + !user_data->len || !ops->domain_alloc_user) return ERR_PTR(-EOPNOTSUPP); if (parent->auto_domain || !parent->nest_parent) return ERR_PTR(-EINVAL); @@ -231,7 +237,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, refcount_inc(&parent->common.obj.users); hwpt_nested->parent = parent; - hwpt->domain = ops->domain_alloc_user(idev->dev, flags, + hwpt->domain = ops->domain_alloc_user(idev->dev, + flags & ~IOMMU_HWPT_FAULT_ID_VALID, parent->common.domain, user_data); if (IS_ERR(hwpt->domain)) { rc = PTR_ERR(hwpt->domain); @@ -312,6 +319,21 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) goto out_put_pt; } + if (cmd->flags & IOMMU_HWPT_FAULT_ID_VALID) { + struct iommufd_fault *fault; + + fault = iommufd_get_fault(ucmd, cmd->fault_id); + if (IS_ERR(fault)) { + rc = PTR_ERR(fault); + goto out_hwpt; + } + hwpt->fault = fault; + hwpt->domain->iopf_handler = iommufd_fault_iopf_handler; + hwpt->domain->fault_data = hwpt; + refcount_inc(&fault->obj.users); + iommufd_put_object(ucmd->ictx, &fault->obj); + } + cmd->out_hwpt_id = hwpt->obj.id; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); if (rc) diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index aa4c26c87cb9..92efe30a8f0d 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -458,8 +458,17 @@ struct iommufd_attach_handle { /* Convert an iommu attach handle to iommufd handle. */ #define to_iommufd_handle(hdl) container_of(hdl, struct iommufd_attach_handle, handle) +static inline struct iommufd_fault * +iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id) +{ + return container_of(iommufd_get_object(ucmd->ictx, id, + IOMMUFD_OBJ_FAULT), + struct iommufd_fault, obj); +} + int iommufd_fault_alloc(struct iommufd_ucmd *ucmd); void iommufd_fault_destroy(struct iommufd_object *obj); +int iommufd_fault_iopf_handler(struct iopf_group *group); int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev); diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 70b8a38fcd46..ede2b464a761 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -357,10 +357,13 @@ struct iommu_vfio_ioas { * the parent HWPT in a nesting configuration. * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is * enforced on device attachment + * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is + * valid. */ enum iommufd_hwpt_alloc_flags { IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0, IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, + IOMMU_HWPT_FAULT_ID_VALID = 1 << 2, }; /** @@ -412,6 +415,9 @@ enum iommu_hwpt_data_type { * @data_type: One of enum iommu_hwpt_data_type * @data_len: Length of the type specific data * @data_uptr: User pointer to the type specific data + * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of + * IOMMU_HWPT_FAULT_ID_VALID is set. + * @__reserved2: Padding to 64-bit alignment. Must be 0. * * Explicitly allocate a hardware page table object. This is the same object * type that is returned by iommufd_device_attach() and represents the @@ -442,6 +448,8 @@ struct iommu_hwpt_alloc { __u32 data_type; __u32 data_len; __aligned_u64 data_uptr; + __u32 fault_id; + __u32 __reserved2; }; #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) -- Gitee From 1e175cbba6757f85a24d7d17ed0d89759d62d9f3 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:43 +0800 Subject: [PATCH 142/143] iommufd/selftest: Add IOPF support for mock device ANBZ: #13617 commit ddee19971081b42615d62f4fdada21274708ed4d upstream. Extend the selftest mock device to support generating and responding to an IOPF. Also add an ioctl interface to userspace applications to trigger the IOPF on the mock device. This would allow userspace applications to test the IOMMUFD's handling of IOPFs without having to rely on any real hardware. Link: https://lore.kernel.org/r/20240702063444.105814-10-baolu.lu@linux.intel.com Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/iommufd_test.h | 8 ++++ drivers/iommu/iommufd/selftest.c | 64 ++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index e854d3f67205..acbbba1c6671 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -22,6 +22,7 @@ enum { IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS, IOMMU_TEST_OP_DIRTY, IOMMU_TEST_OP_MD_CHECK_IOTLB, + IOMMU_TEST_OP_TRIGGER_IOPF, }; enum { @@ -127,6 +128,13 @@ struct iommu_test_cmd { __u32 id; __u32 iotlb; } check_iotlb; + struct { + __u32 dev_id; + __u32 pasid; + __u32 grpid; + __u32 perm; + __u64 addr; + } trigger_iopf; }; __u32 last; }; diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 7a70a3e0fee6..f95e32e29133 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -504,6 +504,8 @@ static bool mock_domain_capable(struct device *dev, enum iommu_cap cap) return false; } +static struct iopf_queue *mock_iommu_iopf_queue; + static struct iommu_device mock_iommu_device = { }; @@ -514,6 +516,29 @@ static struct iommu_device *mock_probe_device(struct device *dev) return &mock_iommu_device; } +static void mock_domain_page_response(struct device *dev, struct iopf_fault *evt, + struct iommu_page_response *msg) +{ +} + +static int mock_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) +{ + if (feat != IOMMU_DEV_FEAT_IOPF || !mock_iommu_iopf_queue) + return -ENODEV; + + return iopf_queue_add_device(mock_iommu_iopf_queue, dev); +} + +static int mock_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) +{ + if (feat != IOMMU_DEV_FEAT_IOPF || !mock_iommu_iopf_queue) + return -ENODEV; + + iopf_queue_remove_device(mock_iommu_iopf_queue, dev); + + return 0; +} + static const struct iommu_ops mock_ops = { /* * IOMMU_DOMAIN_BLOCKED cannot be returned from def_domain_type() @@ -529,6 +554,10 @@ static const struct iommu_ops mock_ops = { .capable = mock_domain_capable, .device_group = generic_device_group, .probe_device = mock_probe_device, + .page_response = mock_domain_page_response, + .dev_enable_feat = mock_dev_enable_feat, + .dev_disable_feat = mock_dev_disable_feat, + .user_pasid_table = true, .default_domain_ops = &(struct iommu_domain_ops){ .free = mock_domain_free, @@ -1375,6 +1404,31 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, return rc; } +static int iommufd_test_trigger_iopf(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + struct iopf_fault event = { }; + struct iommufd_device *idev; + + idev = iommufd_get_device(ucmd, cmd->trigger_iopf.dev_id); + if (IS_ERR(idev)) + return PTR_ERR(idev); + + event.fault.prm.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; + if (cmd->trigger_iopf.pasid != IOMMU_NO_PASID) + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; + event.fault.type = IOMMU_FAULT_PAGE_REQ; + event.fault.prm.addr = cmd->trigger_iopf.addr; + event.fault.prm.pasid = cmd->trigger_iopf.pasid; + event.fault.prm.grpid = cmd->trigger_iopf.grpid; + event.fault.prm.perm = cmd->trigger_iopf.perm; + + iommu_report_device_fault(idev->dev, &event); + iommufd_put_object(ucmd->ictx, &idev->obj); + + return 0; +} + void iommufd_selftest_destroy(struct iommufd_object *obj) { struct selftest_obj *sobj = container_of(obj, struct selftest_obj, obj); @@ -1450,6 +1504,8 @@ int iommufd_test(struct iommufd_ucmd *ucmd) cmd->dirty.page_size, u64_to_user_ptr(cmd->dirty.uptr), cmd->dirty.flags); + case IOMMU_TEST_OP_TRIGGER_IOPF: + return iommufd_test_trigger_iopf(ucmd, cmd); default: return -EOPNOTSUPP; } @@ -1491,6 +1547,9 @@ int __init iommufd_test_init(void) &iommufd_mock_bus_type.nb); if (rc) goto err_sysfs; + + mock_iommu_iopf_queue = iopf_queue_alloc("mock-iopfq"); + return 0; err_sysfs: @@ -1506,6 +1565,11 @@ int __init iommufd_test_init(void) void iommufd_test_exit(void) { + if (mock_iommu_iopf_queue) { + iopf_queue_free(mock_iommu_iopf_queue); + mock_iommu_iopf_queue = NULL; + } + iommu_device_sysfs_remove(&mock_iommu_device); iommu_device_unregister_bus(&mock_iommu_device, &iommufd_mock_bus_type.bus, -- Gitee From 37239c711119f7c62c50c191da801eed9e4cadd1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:44 +0800 Subject: [PATCH 143/143] iommufd/selftest: Add coverage for IOPF test ANBZ: #13617 commit d1211768b62d02e27b46a3ff78f739c4776a0f03 upstream. Extend the selftest tool to add coverage of testing IOPF handling. This would include the following tests: - Allocating and destroying an iommufd fault object. - Allocating and destroying an IOPF-capable HWPT. - Attaching/detaching/replacing an IOPF-capable HWPT on a device. - Triggering an IOPF on the mock device. - Retrieving and responding to the IOPF through the file interface. Link: https://lore.kernel.org/r/20240702063444.105814-11-baolu.lu@linux.intel.com Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- tools/testing/selftests/iommu/iommufd.c | 22 +++++ .../selftests/iommu/iommufd_fail_nth.c | 2 +- tools/testing/selftests/iommu/iommufd_utils.h | 86 +++++++++++++++++-- 3 files changed, 104 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index d79bd5fc08db..4927b9add5ad 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -279,6 +279,9 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) uint32_t parent_hwpt_id = 0; uint32_t parent_hwpt_id_not_work = 0; uint32_t test_hwpt_id = 0; + uint32_t iopf_hwpt_id; + uint32_t fault_id; + uint32_t fault_fd; if (self->device_id) { /* Negative tests */ @@ -326,6 +329,7 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) sizeof(data)); /* Allocate two nested hwpts sharing one common parent hwpt */ + test_ioctl_fault_alloc(&fault_id, &fault_fd); test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, 0, &nested_hwpt_id[0], IOMMU_HWPT_DATA_SELFTEST, &data, @@ -334,6 +338,14 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) &nested_hwpt_id[1], IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); + test_err_hwpt_alloc_iopf(ENOENT, self->device_id, parent_hwpt_id, + UINT32_MAX, IOMMU_HWPT_FAULT_ID_VALID, + &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, + &data, sizeof(data)); + test_cmd_hwpt_alloc_iopf(self->device_id, parent_hwpt_id, fault_id, + IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[0], IOMMU_TEST_IOTLB_DEFAULT); test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[1], @@ -504,14 +516,24 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) _test_ioctl_destroy(self->fd, nested_hwpt_id[1])); test_ioctl_destroy(nested_hwpt_id[0]); + /* Switch from nested_hwpt_id[1] to iopf_hwpt_id */ + test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id); + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, iopf_hwpt_id)); + /* Trigger an IOPF on the device */ + test_cmd_trigger_iopf(self->device_id, fault_fd); + /* Detach from nested_hwpt_id[1] and destroy it */ test_cmd_mock_domain_replace(self->stdev_id, parent_hwpt_id); test_ioctl_destroy(nested_hwpt_id[1]); + test_ioctl_destroy(iopf_hwpt_id); /* Detach from the parent hw_pagetable and destroy it */ test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id); test_ioctl_destroy(parent_hwpt_id); test_ioctl_destroy(parent_hwpt_id_not_work); + close(fault_fd); + test_ioctl_destroy(fault_id); } else { test_err_hwpt_alloc(ENOENT, self->device_id, self->ioas_id, 0, &parent_hwpt_id); diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index f590417cd67a..c5d5e69452b0 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -615,7 +615,7 @@ TEST_FAIL_NTH(basic_fail_nth, device) if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info), NULL)) return -1; - if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, &hwpt_id, + if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, 0, &hwpt_id, IOMMU_HWPT_DATA_NONE, 0, 0)) return -1; diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index c612fbf0195b..40f6f14ce136 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -155,7 +155,7 @@ static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id, EXPECT_ERRNO(_errno, _test_cmd_mock_domain_replace(self->fd, stdev_id, \ pt_id, NULL)) -static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, +static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, __u32 ft_id, __u32 flags, __u32 *hwpt_id, __u32 data_type, void *data, size_t data_len) { @@ -167,6 +167,7 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, .data_type = data_type, .data_len = data_len, .data_uptr = (uint64_t)data, + .fault_id = ft_id, }; int ret; @@ -179,24 +180,36 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, } #define test_cmd_hwpt_alloc(device_id, pt_id, flags, hwpt_id) \ - ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \ + ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \ hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, \ 0)) #define test_err_hwpt_alloc(_errno, device_id, pt_id, flags, hwpt_id) \ EXPECT_ERRNO(_errno, _test_cmd_hwpt_alloc( \ - self->fd, device_id, pt_id, flags, \ + self->fd, device_id, pt_id, 0, flags, \ hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, 0)) #define test_cmd_hwpt_alloc_nested(device_id, pt_id, flags, hwpt_id, \ data_type, data, data_len) \ - ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \ + ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \ hwpt_id, data_type, data, data_len)) #define test_err_hwpt_alloc_nested(_errno, device_id, pt_id, flags, hwpt_id, \ data_type, data, data_len) \ EXPECT_ERRNO(_errno, \ - _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \ + _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \ hwpt_id, data_type, data, data_len)) +#define test_cmd_hwpt_alloc_iopf(device_id, pt_id, fault_id, flags, hwpt_id, \ + data_type, data, data_len) \ + ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, fault_id, \ + flags, hwpt_id, data_type, data, \ + data_len)) +#define test_err_hwpt_alloc_iopf(_errno, device_id, pt_id, fault_id, flags, \ + hwpt_id, data_type, data, data_len) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, fault_id, \ + flags, hwpt_id, data_type, data, \ + data_len)) + #define test_cmd_hwpt_check_iotlb(hwpt_id, iotlb_id, expected) \ ({ \ struct iommu_test_cmd test_cmd = { \ @@ -686,3 +699,66 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data, #define test_cmd_get_hw_capabilities(device_id, caps, mask) \ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, 0, &caps)) + +static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd) +{ + struct iommu_fault_alloc cmd = { + .size = sizeof(cmd), + }; + int ret; + + ret = ioctl(fd, IOMMU_FAULT_QUEUE_ALLOC, &cmd); + if (ret) + return ret; + *fault_id = cmd.out_fault_id; + *fault_fd = cmd.out_fault_fd; + return 0; +} + +#define test_ioctl_fault_alloc(fault_id, fault_fd) \ + ({ \ + ASSERT_EQ(0, _test_ioctl_fault_alloc(self->fd, fault_id, \ + fault_fd)); \ + ASSERT_NE(0, *(fault_id)); \ + ASSERT_NE(0, *(fault_fd)); \ + }) + +static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 fault_fd) +{ + struct iommu_test_cmd trigger_iopf_cmd = { + .size = sizeof(trigger_iopf_cmd), + .op = IOMMU_TEST_OP_TRIGGER_IOPF, + .trigger_iopf = { + .dev_id = device_id, + .pasid = 0x1, + .grpid = 0x2, + .perm = IOMMU_PGFAULT_PERM_READ | IOMMU_PGFAULT_PERM_WRITE, + .addr = 0xdeadbeaf, + }, + }; + struct iommu_hwpt_page_response response = { + .code = IOMMUFD_PAGE_RESP_SUCCESS, + }; + struct iommu_hwpt_pgfault fault = {}; + ssize_t bytes; + int ret; + + ret = ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_IOPF), &trigger_iopf_cmd); + if (ret) + return ret; + + bytes = read(fault_fd, &fault, sizeof(fault)); + if (bytes <= 0) + return -EIO; + + response.cookie = fault.cookie; + + bytes = write(fault_fd, &response, sizeof(response)); + if (bytes <= 0) + return -EIO; + + return 0; +} + +#define test_cmd_trigger_iopf(device_id, fault_fd) \ + ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, fault_fd)) -- Gitee