From 9574555c96e58f5e009c7ff2026d9429f5eb6eeb Mon Sep 17 00:00:00 2001 From: Jay Chen Date: Wed, 27 Aug 2025 10:56:06 +0800 Subject: [PATCH 01/99] anolis: Revert "anolis: phytium: pswiotlb: Add PSWIOTLB mechanism to improve DMA performance" ANBZ: #13617 This reverts commit 985517480ea330869ed27510db7af06a07cc8846. Signed-off-by: Jay Chen --- arch/arm64/mm/init.c | 11 - drivers/base/core.c | 9 - drivers/pci/pci.c | 12 - drivers/pci/probe.c | 10 - include/linux/device.h | 10 - include/linux/page-flags.h | 7 - include/linux/pswiotlb.h | 333 ----- include/trace/events/pswiotlb.h | 44 - kernel/dma/Kconfig | 2 - kernel/dma/Makefile | 1 - kernel/dma/contiguous.c | 12 - kernel/dma/mapping.c | 60 +- kernel/dma/phytium/Kconfig | 10 - kernel/dma/phytium/Makefile | 6 - kernel/dma/phytium/pswiotlb-direct.c | 146 --- kernel/dma/phytium/pswiotlb-dma.h | 334 ----- kernel/dma/phytium/pswiotlb-iommu.c | 1214 ----------------- kernel/dma/phytium/pswiotlb-mapping.c | 157 --- kernel/dma/phytium/pswiotlb.c | 1736 ------------------------- 19 files changed, 9 insertions(+), 4105 deletions(-) delete mode 100644 include/linux/pswiotlb.h delete mode 100644 include/trace/events/pswiotlb.h delete mode 100644 kernel/dma/phytium/Kconfig delete mode 100644 kernel/dma/phytium/Makefile delete mode 100644 kernel/dma/phytium/pswiotlb-direct.c delete mode 100644 kernel/dma/phytium/pswiotlb-dma.h delete mode 100644 kernel/dma/phytium/pswiotlb-iommu.c delete mode 100644 kernel/dma/phytium/pswiotlb-mapping.c delete mode 100644 kernel/dma/phytium/pswiotlb.c diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 1463dc657a98..8a0f8604348b 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -32,10 +32,6 @@ #include #include -#ifdef CONFIG_PSWIOTLB -#include -#endif - #include #include #include @@ -502,13 +498,6 @@ void __init mem_init(void) swiotlb_init(swiotlb, SWIOTLB_VERBOSE); -#ifdef CONFIG_PSWIOTLB - /* enable pswiotlb default */ - if ((pswiotlb_force_disable != true) && - is_phytium_ps_socs()) - pswiotlb_init(1, PSWIOTLB_VERBOSE); -#endif - /* this will put all unused low memory onto the freelists */ memblock_free_all(); diff --git a/drivers/base/core.c b/drivers/base/core.c index a31a7997b7c4..732787134416 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -32,10 +32,6 @@ #include #include /* for dma_default_coherent */ -#ifdef CONFIG_PSWIOTLB -#include -#endif - #include "base.h" #include "physical_location.h" #include "power/power.h" @@ -3173,11 +3169,6 @@ void device_initialize(struct device *dev) dev->dma_coherent = dma_default_coherent; #endif swiotlb_dev_init(dev); -#ifdef CONFIG_PSWIOTLB - if ((pswiotlb_force_disable != true) && - is_phytium_ps_socs()) - pswiotlb_dev_init(dev); -#endif } EXPORT_SYMBOL_GPL(device_initialize); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 0935b77baf7d..244941814f97 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -36,9 +36,6 @@ #include #endif #include "pci.h" -#ifdef CONFIG_PSWIOTLB -#include -#endif DEFINE_MUTEX(pci_slot_mutex); @@ -4565,15 +4562,6 @@ void __weak pcibios_set_master(struct pci_dev *dev) */ void pci_set_master(struct pci_dev *dev) { -#ifdef CONFIG_PSWIOTLB - if ((pswiotlb_force_disable != true) && - is_phytium_ps_socs()) { - dev->dev.can_use_pswiotlb = pswiotlb_is_dev_in_passthroughlist(dev); - dev_info(&dev->dev, "The device %s use pswiotlb because vendor 0x%04x %s in pswiotlb passthroughlist\n", - dev->dev.can_use_pswiotlb ? "would" : "would NOT", - dev->vendor, dev->dev.can_use_pswiotlb ? "is NOT" : "is"); - } -#endif __pci_set_master(dev, true); pcibios_set_master(dev); } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index bfab2fd8c18f..534d4ab7642a 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -21,9 +21,6 @@ #include #include #include "pci.h" -#ifdef CONFIG_PSWIOTLB -#include -#endif #define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */ #define CARDBUS_RESERVE_BUSNR 3 @@ -2569,13 +2566,6 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) dma_set_max_seg_size(&dev->dev, 65536); dma_set_seg_boundary(&dev->dev, 0xffffffff); -#ifdef CONFIG_PSWIOTLB - if ((pswiotlb_force_disable != true) && - is_phytium_ps_socs()) { - pswiotlb_store_local_node(dev, bus); - dma_set_seg_boundary(&dev->dev, 0xffffffffffff); - } -#endif pcie_failed_link_retrain(dev); diff --git a/include/linux/device.h b/include/linux/device.h index cc04849e5ed6..b0b0b3056d4b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -654,8 +654,6 @@ struct device_physical_location { * @dma_io_tlb_pools: List of transient swiotlb memory pools. * @dma_io_tlb_lock: Protects changes to the list of active pools. * @dma_uses_io_tlb: %true if device has used the software IO TLB. - * @dma_p_io_tlb_mem: Phytium Software IO TLB allocator. Not for driver use. - * @dma_uses_p_io_tlb: %true if device has used the Phytium software IO TLB. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. * @fwnode: Associated device node supplied by platform firmware. @@ -762,11 +760,6 @@ struct device { #ifdef CONFIG_SWIOTLB struct io_tlb_mem *dma_io_tlb_mem; #endif -#ifdef CONFIG_PSWIOTLB - struct p_io_tlb_mem *dma_p_io_tlb_mem; - bool dma_uses_p_io_tlb; - bool can_use_pswiotlb; -#endif #ifdef CONFIG_SWIOTLB_DYNAMIC struct list_head dma_io_tlb_pools; spinlock_t dma_io_tlb_lock; @@ -781,9 +774,6 @@ struct device { #ifdef CONFIG_NUMA int numa_node; /* NUMA node this device is close to */ nodemask_t gi_node; /* GPU gi node the device is close to */ -#ifdef CONFIG_PSWIOTLB - int local_node; /* NUMA node this device is really belong to */ -#endif #endif dev_t devt; /* dev_t, creates the sysfs "dev" */ u32 id; /* device instance */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index ce61f32fdacc..5e60fa517878 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -203,13 +203,6 @@ enum pageflags { /* Reuse PG_dirty to indicate whether the duplicate page is a master or slave */ PG_dup_slave = PG_dirty, #endif - -#ifdef CONFIG_PSWIOTLB - /* check if pswiotlb is sync already */ - PG_pswiotlbsync = __NR_PAGEFLAGS + 1, - /* check if the page is used for pswiotlb */ - PG_pswiotlb, -#endif }; #define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1) diff --git a/include/linux/pswiotlb.h b/include/linux/pswiotlb.h deleted file mode 100644 index 548a54730fed..000000000000 --- a/include/linux/pswiotlb.h +++ /dev/null @@ -1,333 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_PSWIOTLB_H -#define __LINUX_PSWIOTLB_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct device; -struct page; -struct scatterlist; -extern bool pswiotlb_force_disable; -struct p_io_tlb_pool; - -#define SOC_ID_PS23064 0x8 -#define SOC_ID_PS24080 0x6 -#define MIDR_PS 0x700F8620 -#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) -#define PSWIOTLB_VERBOSE (1 << 0) /* verbose initialization */ -#define PSWIOTLB_FORCEOFF (1 << 1) /* force phytium bounce buffering off*/ -#define PSWIOTLB_ANY (1 << 2) /* allow any memory for the buffer */ -#define PSWIOTLB_FREE_THRESHOLD 30 -static bool is_ps_socs; - -/* - * Maximum allowable number of contiguous slabs to map, - * must be a power of 2. What is the appropriate value ? - * The complexity of {map,unmap}_single is linearly dependent on this value. - */ -#define P_IO_TLB_SEGSIZE 1024 - -/* - * log of the size of each Phytium IO TLB slab. The number of slabs is command line - * controllable. - */ -#define P_IO_TLB_SHIFT 11 -#define P_IO_TLB_SIZE (1 << P_IO_TLB_SHIFT) - -/* default to 256MB */ -#define P_IO_TLB_DEFAULT_SIZE (256UL<<20) -#define P_IO_TLB_INC_THR (64UL<<20) -#define P_IO_TLB_EXT_WATERMARK (80) - -/* passthroughlist which incompatible with pswiotlb temporarily */ -#define BL_PCI_VENDOR_ID_NVIDIA 0x10de -#define BL_PCI_VENDOR_ID_ILUVATAR 0x1E3E -#define BL_PCI_VENDOR_ID_METAX 0x9999 - -unsigned long pswiotlb_size_or_default(void); -void __init pswiotlb_init_remap(bool addressing_limit, int nid, unsigned int flags, - int (*remap)(void *tlb, unsigned long nslabs)); - -phys_addr_t pswiotlb_tbl_map_single(struct device *hwdev, int nid, phys_addr_t phys, - size_t mapping_size, size_t alloc_size, unsigned int alloc_align_mask, - enum dma_data_direction dir, - unsigned long attrs); - -extern void pswiotlb_tbl_unmap_single(struct device *hwdev, - int nid, - phys_addr_t tlb_addr, - size_t offset, - size_t mapping_size, - enum dma_data_direction dir, - unsigned long attrs, - struct p_io_tlb_pool *pool); - -void pswiotlb_sync_single_for_device(struct device *dev, int nid, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, struct p_io_tlb_pool *pool); -void pswiotlb_sync_single_for_cpu(struct device *dev, int nid, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, struct p_io_tlb_pool *pool); -dma_addr_t pswiotlb_map(struct device *dev, int nid, phys_addr_t phys, - size_t size, enum dma_data_direction dir, unsigned long attrs); -void pswiotlb_store_local_node(struct pci_dev *dev, struct pci_bus *bus); -void iommu_dma_unmap_sg_pswiotlb(struct device *dev, struct scatterlist *sg, unsigned long iova, - size_t mapped, int nents, enum dma_data_direction dir, unsigned long attrs); -#ifdef CONFIG_PSWIOTLB -struct pswiotlb_passthroughlist { - struct list_head node; - unsigned short vendor; - unsigned short device; - bool from_grub; -}; -/** - * struct p_io_tlb_pool - Phytium IO TLB memory pool descriptor - * @start: The start address of the pswiotlb memory pool. Used to do a quick - * range check to see if the memory was in fact allocated by this - * API. - * @end: The end address of the pswiotlb memory pool. Used to do a quick - * range check to see if the memory was in fact allocated by this - * API. - * @nslabs: The number of Phytium IO TLB blocks (in groups of 64) between @start and - * @end. For default pswiotlb, this is command line adjustable via - * setup_io_tlb_npages. - * @used: The number of used Phytium IO TLB block. - * @list: The free list describing the number of free entries available - * from each index. - * @index: The index to start searching in the next round. - * @orig_addr: The original address corresponding to a mapped entry. - * @alloc_size: Size of the allocated buffer. - * @lock: The lock to protect the above data structures in the map and - * unmap calls. - * @vaddr: The vaddr of the pswiotlb memory pool. The pswiotlb memory pool - * may be remapped in the memory encrypted case and store virtual - * address for bounce buffer operation. - * @nslabs: The number of Phytium IO TLB slots between @start and @end. For the - * default pswiotlb, this can be adjusted with a boot parameter, - * see setup_io_tlb_npages(). - * @late_alloc: %true if allocated using the page allocator. - * @nareas: Number of areas in the pool. - * @area_nslabs: Number of slots in each area. - * @areas: Array of memory area descriptors. - * @slots: Array of slot descriptors. - * @node: Member of the Phytium IO TLB memory pool list. - * @rcu: RCU head for pswiotlb_dyn_free(). - * @transient: %true if transient memory pool. - * @busy_flag: %true if the pool is used by devices. - * @free_cnt: Counters every time the pool is free when checked by monitor. - * @free_th: Free threshold determine when to free the pool to memory. - * @busy_recode: Bitmap to record the busy status of the areas in the pool. - * @node_min_addr: Minimum physical address of the numa node. - * @numa_max_addr: Maximum physical address of the numa node. - * @numa_node_id: Numa node id the pool belong to. - */ -struct p_io_tlb_pool { - phys_addr_t start; - phys_addr_t end; - void *vaddr; - unsigned long nslabs; - bool late_alloc; - unsigned int nareas; - unsigned int area_nslabs; - struct p_io_tlb_area *areas; - struct p_io_tlb_slot *slots; - struct list_head node; - struct rcu_head rcu; - bool transient; - bool busy_flag; - unsigned int free_cnt; - unsigned int free_th; - unsigned long *busy_record; - phys_addr_t node_min_addr; - phys_addr_t node_max_addr; - int numa_node_id; -}; - -/** - * struct p_io_tlb_mem - Phytium Software IO TLB allocator - * @defpool: Default (initial) Phytium IO TLB memory pool descriptor. - * @pool: Phytium IO TLB memory pool descriptor (if not dynamic). - * @nslabs: Total number of Phytium IO TLB slabs in all pools. - * @debugfs: The dentry to debugfs. - * @force_bounce: %true if pswiotlb bouncing is forced - * @for_alloc: %true if the pool is used for memory allocation - * @can_grow: %true if more pools can be allocated dynamically. - * @phys_limit: Maximum allowed physical address. - * @pool_addr: Array where all the pools stored. - * @capacity: Number of pools which could be allocated. - * @whole_size: Number of pools which stored in the pool array. - * @lock: Lock to synchronize changes to the list. - * @pools: List of Phytium IO TLB memory pool descriptors (if dynamic). - * @dyn_alloc: Dynamic Phytium IO TLB pool allocation work. - * @total_used: The total number of slots in the pool that are currently used - * across all areas. Used only for calculating used_hiwater in - * debugfs. - * @used_hiwater: The high water mark for total_used. Used only for reporting - * in debugfs. - * @node_min_addr: Minimum physical address of the numa node. - * @numa_max_addr: Maximum physical address of the numa node. - * @numa_node_id: Numa node id the mem belong to. - */ -struct p_io_tlb_mem { - struct p_io_tlb_pool defpool; - unsigned long nslabs; - struct dentry *debugfs; - bool force_bounce; - bool for_alloc; - bool can_grow; - u64 phys_limit; - struct p_io_tlb_pool *pool_addr[64*1024/8]; - int capacity; - int whole_size; - spinlock_t lock; - struct list_head pools; - struct work_struct dyn_alloc; -#ifdef CONFIG_DEBUG_FS - atomic_long_t total_used; - atomic_long_t used_hiwater; -#endif - phys_addr_t node_min_addr; - phys_addr_t node_max_addr; - unsigned long node_total_mem; - int numa_node_id; -}; - -extern struct p_io_tlb_mem p_io_tlb_default_mem[MAX_NUMNODES]; - -struct p_io_tlb_pool *pswiotlb_find_pool(struct device *dev, int nid, phys_addr_t paddr); - -static inline bool is_phytium_ps_socs(void) -{ - unsigned int soc_id; - unsigned int midr; - - if (likely(is_ps_socs)) - return true; - - soc_id = read_sysreg_s(SYS_AIDR_EL1); - midr = read_cpuid_id(); - if ((soc_id == SOC_ID_PS23064 || soc_id == SOC_ID_PS24080) - && midr == MIDR_PS) { - is_ps_socs = true; - return true; - } else - return false; -} - -static inline bool is_pswiotlb_buffer(struct device *dev, int nid, phys_addr_t paddr, - struct p_io_tlb_pool **pool) -{ - struct p_io_tlb_mem *mem = &dev->dma_p_io_tlb_mem[nid]; - struct page *page; - - if (!paddr) - return false; - - page = pfn_to_page(PFN_DOWN(paddr)); - - if (test_bit(PG_pswiotlb, &page->flags) == false) - return false; - - if (!mem) - return false; - - /* - * All PSWIOTLB buffer addresses must have been returned by - * pswiotlb_tbl_map_single() and passed to a device driver. - * If a PSWIOTLB address is checked on another CPU, then it was - * presumably loaded by the device driver from an unspecified private - * data structure. Make sure that this load is ordered before reading - * dev->dma_uses_p_io_tlb here and mem->pools in pswiotlb_find_pool(). - * - * This barrier pairs with smp_mb() in pswiotlb_find_slots(). - */ - smp_rmb(); - - *pool = pswiotlb_find_pool(dev, nid, paddr); - if (READ_ONCE(dev->dma_uses_p_io_tlb) && *pool) - return true; - - return false; -} - -static inline bool dma_is_in_local_node(struct device *dev, int nid, dma_addr_t addr, size_t size) -{ - dma_addr_t end = addr + size - 1; - struct p_io_tlb_mem *mem = &p_io_tlb_default_mem[nid]; - - if (addr >= mem->node_min_addr && end <= mem->node_max_addr) - return true; - - return false; -} - -void pswiotlb_init(bool addressing_limited, unsigned int flags); -void pswiotlb_dev_init(struct device *dev); -size_t pswiotlb_max_mapping_size(struct device *dev); -bool is_pswiotlb_allocated(struct device *dev); -bool is_pswiotlb_active(struct device *dev); -void __init pswiotlb_adjust_size(unsigned long size); -phys_addr_t default_pswiotlb_base(struct device *dev); -phys_addr_t default_pswiotlb_limit(struct device *dev); -bool pswiotlb_is_dev_in_passthroughlist(struct pci_dev *dev); -#else -static inline void pswiotlb_init(bool addressing_limited, unsigned int flags) -{ -} - -static inline void pswiotlb_dev_init(struct device *dev) -{ -} -static inline bool is_pswiotlb_buffer(struct device *dev, int nid, phys_addr_t paddr, - struct p_io_tlb_pool **pool) -{ - return false; -} -static inline bool dma_is_in_local_node(struct device *dev, int nid, dma_addr_t addr, size_t size) -{ - return false; -} -static inline size_t pswiotlb_max_mapping_size(struct device *dev) -{ - return SIZE_MAX; -} - -static inline bool is_pswiotlb_allocated(struct device *dev) -{ - return false; -} -static inline bool is_pswiotlb_active(struct device *dev) -{ - return false; -} - -static inline void pswiotlb_adjust_size(unsigned long size) -{ -} - -static inline phys_addr_t default_pswiotlb_base(struct device *dev) -{ - return 0; -} - -static inline phys_addr_t default_pswiotlb_limit(struct device *dev) -{ - return 0; -} - -static inline bool pswiotlb_is_dev_in_passthroughlist(struct pci_dev *dev) -{ - return false; -} -#endif /* CONFIG_PSWIOTLB */ - -extern void pswiotlb_print_info(int); -extern bool pswiotlb_dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size); - -#endif /* __LINUX_PSWIOTLB_H */ diff --git a/include/trace/events/pswiotlb.h b/include/trace/events/pswiotlb.h deleted file mode 100644 index ed26c41a4046..000000000000 --- a/include/trace/events/pswiotlb.h +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM pswiotlb - -#if !defined(_TRACE_PSWIOTLB_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_PSWIOTLB_H - -#include - -TRACE_EVENT(pswiotlb_bounced, - - TP_PROTO(struct device *dev, - dma_addr_t dev_addr, - size_t size), - - TP_ARGS(dev, dev_addr, size), - - TP_STRUCT__entry( - __string(dev_name, dev_name(dev)) - __field(u64, dma_mask) - __field(dma_addr_t, dev_addr) - __field(size_t, size) - __field(bool, force) - ), - - TP_fast_assign( - __assign_str(dev_name, dev_name(dev)); - __entry->dma_mask = (dev->dma_mask ? *dev->dma_mask : 0); - __entry->dev_addr = dev_addr; - __entry->size = size; - ), - - TP_printk("dev_name: %s dma_mask=%llx dev_addr=%llx size=%zu %s", - __get_str(dev_name), - __entry->dma_mask, - (unsigned long long)__entry->dev_addr, - __entry->size, - __entry->force ? "NORMAL" : "FORCEOFF") -); - -#endif /* _TRACE_PSWIOTLB_H */ - -/* This part must be outside protection */ -#include diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index f13515fc1384..f488997b0717 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -270,5 +270,3 @@ config DMA_MAP_BENCHMARK performance of dma_(un)map_page. See tools/testing/selftests/dma/dma_map_benchmark.c - -source "kernel/dma/phytium/Kconfig" diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index c7c3cb4499e9..21926e46ef4f 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -10,4 +10,3 @@ obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o obj-$(CONFIG_MMU) += remap.o obj-$(CONFIG_DMA_MAP_BENCHMARK) += map_benchmark.o -obj-$(CONFIG_PSWIOTLB) += phytium/ diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 5bc41e33fa5c..a60081979963 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -52,10 +52,6 @@ #include #include -#ifdef CONFIG_PSWIOTLB -#include "./phytium/pswiotlb-dma.h" -#endif - #ifdef CONFIG_CMA_SIZE_MBYTES #define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES #else @@ -359,10 +355,6 @@ static struct page *cma_alloc_aligned(struct cma *cma, size_t size, gfp_t gfp) */ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) { -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) - return NULL; -#endif #ifdef CONFIG_DMA_NUMA_CMA int nid = dev_to_node(dev); #endif @@ -415,10 +407,6 @@ void dma_free_contiguous(struct device *dev, struct page *page, size_t size) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) - __free_pages(page, get_order(size)); -#endif /* if dev has its own cma, free page from there */ if (dev->cma_area) { if (cma_release(dev->cma_area, page, count)) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index f6af86aaa252..f1d9f01b283d 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -16,9 +16,6 @@ #include #include "debug.h" #include "direct.h" -#ifdef CONFIG_PSWIOTLB -#include "./phytium/pswiotlb-dma.h" -#endif #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ @@ -146,6 +143,7 @@ static inline bool dma_map_direct(struct device *dev, { return dma_go_direct(dev, *dev->dma_mask, ops); } + dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, size_t offset, size_t size, enum dma_data_direction dir, unsigned long attrs) @@ -158,12 +156,6 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, if (WARN_ON_ONCE(!dev->dma_mask)) return DMA_MAPPING_ERROR; -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - addr = pswiotlb_dma_map_page_distribute(dev, page, offset, size, dir, attrs); - return addr; - } -#endif if (dma_map_direct(dev, ops) || arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size)) addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); @@ -175,18 +167,13 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, return addr; } EXPORT_SYMBOL(dma_map_page_attrs); + void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - pswiotlb_dma_unmap_page_attrs_distribute(dev, addr, size, dir, attrs); - return; - } -#endif if (dma_map_direct(dev, ops) || arch_dma_unmap_page_direct(dev, addr + size)) dma_direct_unmap_page(dev, addr, size, dir, attrs); @@ -195,6 +182,7 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, debug_dma_unmap_page(dev, addr, size, dir); } EXPORT_SYMBOL(dma_unmap_page_attrs); + static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { @@ -205,12 +193,7 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, if (WARN_ON_ONCE(!dev->dma_mask)) return 0; -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - ents = pswiotlb_dma_map_sg_attrs_distribute(dev, sg, nents, dir, attrs); - return ents; - } -#endif + if (dma_map_direct(dev, ops) || arch_dma_map_sg_direct(dev, sg, nents)) ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); @@ -296,6 +279,7 @@ int dma_map_sgtable(struct device *dev, struct sg_table *sgt, return 0; } EXPORT_SYMBOL_GPL(dma_map_sgtable); + void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) @@ -304,12 +288,6 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); debug_dma_unmap_sg(dev, sg, nents, dir); -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - pswiotlb_dma_unmap_sg_attrs_distribute(dev, sg, nents, dir, attrs); - return; - } -#endif if (dma_map_direct(dev, ops) || arch_dma_unmap_sg_direct(dev, sg, nents)) dma_direct_unmap_sg(dev, sg, nents, dir, attrs); @@ -350,18 +328,13 @@ void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, debug_dma_unmap_resource(dev, addr, size, dir); } EXPORT_SYMBOL(dma_unmap_resource); + void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - pswiotlb_dma_sync_single_for_cpu_distribute(dev, addr, size, dir); - return; - } -#endif if (dma_map_direct(dev, ops)) dma_direct_sync_single_for_cpu(dev, addr, size, dir); else if (ops->sync_single_for_cpu) @@ -369,18 +342,13 @@ void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, debug_dma_sync_single_for_cpu(dev, addr, size, dir); } EXPORT_SYMBOL(dma_sync_single_for_cpu); + void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - pswiotlb_dma_sync_single_for_device_distribute(dev, addr, size, dir); - return; - } -#endif if (dma_map_direct(dev, ops)) dma_direct_sync_single_for_device(dev, addr, size, dir); else if (ops->sync_single_for_device) @@ -388,18 +356,13 @@ void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, debug_dma_sync_single_for_device(dev, addr, size, dir); } EXPORT_SYMBOL(dma_sync_single_for_device); + void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - pswiotlb_dma_sync_sg_for_cpu_distribute(dev, sg, nelems, dir); - return; - } -#endif if (dma_map_direct(dev, ops)) dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir); else if (ops->sync_sg_for_cpu) @@ -407,18 +370,13 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); } EXPORT_SYMBOL(dma_sync_sg_for_cpu); + void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - pswiotlb_dma_sync_sg_for_device_distribute(dev, sg, nelems, dir); - return; - } -#endif if (dma_map_direct(dev, ops)) dma_direct_sync_sg_for_device(dev, sg, nelems, dir); else if (ops->sync_sg_for_device) diff --git a/kernel/dma/phytium/Kconfig b/kernel/dma/phytium/Kconfig deleted file mode 100644 index 8553a65027ee..000000000000 --- a/kernel/dma/phytium/Kconfig +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config PSWIOTLB - bool "Phytium software IO TLB" - select NEED_DMA_MAP_STATE - depends on ARCH_PHYTIUM && NUMA - help - This enables phytium software IO TLB. You can disable phytium software - IO TLB using "pswiotlb=forceoff" on the kernel command line if you do - not need it when PSWIOTLB is Y. diff --git a/kernel/dma/phytium/Makefile b/kernel/dma/phytium/Makefile deleted file mode 100644 index f94ea59e950f..000000000000 --- a/kernel/dma/phytium/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -obj-$(CONFIG_PSWIOTLB) += pswiotlb.o -obj-$(CONFIG_PSWIOTLB) += pswiotlb-mapping.o -obj-$(CONFIG_PSWIOTLB) += pswiotlb-direct.o -obj-$(CONFIG_PSWIOTLB) += pswiotlb-iommu.o diff --git a/kernel/dma/phytium/pswiotlb-direct.c b/kernel/dma/phytium/pswiotlb-direct.c deleted file mode 100644 index f5e1b62c67c9..000000000000 --- a/kernel/dma/phytium/pswiotlb-direct.c +++ /dev/null @@ -1,146 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DMA operations based on Phytium software IO tlb that - * map physical memory directly without using an IOMMU. - * - * Copyright (c) 2024, Phytium Technology Co., Ltd. - */ -#include /* for max_pfn */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "pswiotlb-dma.h" - -/* - * The following functions are ported from - * ./drivers/dma/direct.c - * static inline dma_addr_t phys_to_dma_direct(struct device *dev, - * phys_addr_t phys); - */ - -static inline dma_addr_t phys_to_dma_direct(struct device *dev, - phys_addr_t phys) -{ - if (force_dma_unencrypted(dev)) - return phys_to_dma_unencrypted(dev, phys); - return phys_to_dma(dev, phys); -} - -bool pswiotlb_dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) -{ - dma_addr_t dma_addr = phys_to_dma_direct(dev, phys); - - if (dma_addr == DMA_MAPPING_ERROR) - return false; - return dma_addr + size - 1 <= - min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); -} - -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ - defined(CONFIG_PSWIOTLB) -void pswiotlb_dma_direct_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - for_each_sg(sgl, sg, nents, i) { - phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); - - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_device(dev, paddr, sg->length, - dir); - - if (is_pswiotlb_active(dev) && - unlikely(is_pswiotlb_buffer(dev, nid, paddr, &pool))) - pswiotlb_sync_single_for_device(dev, nid, paddr, - sg->length, dir, pool); - - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_device(paddr, sg->length, - dir); - } -} -#endif - -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ - defined(CONFIG_PSWIOTLB) -void pswiotlb_dma_direct_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - for_each_sg(sgl, sg, nents, i) { - phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); - - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu(paddr, sg->length, dir); - - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_cpu(dev, paddr, sg->length, - dir); - - if (is_pswiotlb_active(dev) && - unlikely(is_pswiotlb_buffer(dev, nid, paddr, &pool))) - pswiotlb_sync_single_for_cpu(dev, nid, paddr, - sg->length, dir, pool); - - if (dir == DMA_FROM_DEVICE) - arch_dma_mark_clean(paddr, sg->length); - } - - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu_all(); -} - -/* - * Unmaps segments, except for ones marked as pci_p2pdma which do not - * require any further action as they contain a bus address. - */ -void pswiotlb_dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - pswiotlb_dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir, - attrs); -} -#endif - -int pswiotlb_dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, - enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *sg; - int i, ret; - - for_each_sg(sgl, sg, nents, i) { - sg->dma_address = pswiotlb_dma_direct_map_page(dev, sg_page(sg), - sg->offset, sg->length, dir, attrs); - if (sg->dma_address == DMA_MAPPING_ERROR) { - ret = -EIO; - goto out_unmap; - } - sg_dma_len(sg) = sg->length; - } - - return nents; - -out_unmap: - pswiotlb_dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); - return ret; -} diff --git a/kernel/dma/phytium/pswiotlb-dma.h b/kernel/dma/phytium/pswiotlb-dma.h deleted file mode 100644 index 98302401febf..000000000000 --- a/kernel/dma/phytium/pswiotlb-dma.h +++ /dev/null @@ -1,334 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * DMA operations based on Phytium software IO tlb that - * map physical memory. - * - * Copyright (c) 2024, Phytium Technology Co., Ltd. - */ -#ifndef _KERNEL_PSWIOTLB_DMA_DIRECT_H -#define _KERNEL_PSWIOTLB_DMA_DIRECT_H - -#include -#include -#include - -extern bool pswiotlb_force_disable; -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ - defined(CONFIG_PSWIOTLB) -void pswiotlb_dma_direct_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir); -#else -static inline void pswiotlb_dma_direct_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ -} -#endif - -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ - defined(CONFIG_PSWIOTLB) -void pswiotlb_dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs); -void pswiotlb_dma_direct_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir); -#else -static inline void pswiotlb_dma_direct_unmap_sg(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ -} -static inline void pswiotlb_dma_direct_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ -} -#endif - -#ifdef CONFIG_PSWIOTLB -int pswiotlb_dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs); -dma_addr_t pswiotlb_dma_map_page_distribute(struct device *dev, struct page *page, - size_t offset, size_t size, enum dma_data_direction dir, - unsigned long attrs); -void pswiotlb_dma_unmap_page_attrs_distribute(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs); -int pswiotlb_dma_map_sg_attrs_distribute(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs); -void pswiotlb_dma_unmap_sg_attrs_distribute(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs); -void pswiotlb_dma_sync_single_for_cpu_distribute(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir); -void pswiotlb_dma_sync_single_for_device_distribute(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir); -void pswiotlb_dma_sync_sg_for_cpu_distribute(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir); -void pswiotlb_dma_sync_sg_for_device_distribute(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir); -dma_addr_t pswiotlb_iommu_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs); -void pswiotlb_iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir, unsigned long attrs); -int pswiotlb_iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs); -void pswiotlb_iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs); -void pswiotlb_iommu_dma_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir); -void pswiotlb_iommu_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir); -void pswiotlb_iommu_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nelems, enum dma_data_direction dir); -void pswiotlb_iommu_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nelems, enum dma_data_direction dir); - -static inline bool check_if_pswiotlb_is_applicable(struct device *dev) -{ - if (dev->can_use_pswiotlb && is_phytium_ps_socs() - && !pswiotlb_force_disable) { - if (dev->numa_node == NUMA_NO_NODE || - dev->numa_node != dev->local_node) - dev->numa_node = dev->local_node; - - if (dev_is_pci(dev) && (dev->numa_node != NUMA_NO_NODE)) - return true; - } - - return false; -} - -static inline void pswiotlb_dma_direct_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - phys_addr_t paddr = dma_to_phys(dev, addr); - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_device(dev, paddr, size, dir); - - if (is_pswiotlb_active(dev)) { - if (unlikely(is_pswiotlb_buffer(dev, nid, paddr, &pool))) - pswiotlb_sync_single_for_device(dev, nid, paddr, size, dir, pool); - } - - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_device(paddr, size, dir); -} - -static inline void pswiotlb_dma_direct_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - phys_addr_t paddr = dma_to_phys(dev, addr); - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - if (!dev_is_dma_coherent(dev)) { - arch_sync_dma_for_cpu(paddr, size, dir); - arch_sync_dma_for_cpu_all(); - } - - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_cpu(dev, paddr, size, dir); - - if (is_pswiotlb_active(dev)) { - if (unlikely(is_pswiotlb_buffer(dev, nid, paddr, &pool))) - pswiotlb_sync_single_for_cpu(dev, nid, paddr, size, dir, pool); - } - - if (dir == DMA_FROM_DEVICE) - arch_dma_mark_clean(paddr, size); -} - -static inline dma_addr_t pswiotlb_dma_direct_map_page(struct device *dev, - struct page *page, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - phys_addr_t phys = page_to_phys(page) + offset; - dma_addr_t dma_addr = phys_to_dma(dev, phys); - int nid = dev->numa_node; - - if (is_swiotlb_force_bounce(dev)) - return swiotlb_map(dev, phys, size, dir, attrs); - - if (unlikely(!dma_capable(dev, dma_addr, size, true)) || - dma_kmalloc_needs_bounce(dev, size, dir)) { - if (is_swiotlb_active(dev)) - return swiotlb_map(dev, phys, size, dir, attrs); - - dev_WARN_ONCE(dev, 1, - "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", - &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); - return DMA_MAPPING_ERROR; - } - - /* check whether dma addr is in local node */ - if (is_pswiotlb_active(dev)) { - if (dir != DMA_TO_DEVICE) { - if (unlikely(!dma_is_in_local_node(dev, nid, dma_addr, size))) { - dma_addr = pswiotlb_map(dev, nid, phys, size, dir, attrs); - if (dma_addr == DMA_MAPPING_ERROR) { - dma_addr = phys_to_dma(dev, phys); - dev_warn_once(dev, - "Failed to allocate memory from pswiotlb, fall back to non-local dma\n"); - } else - return dma_addr; - } - } - } - - if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(phys, size, dir); - return dma_addr; -} - -static inline void pswiotlb_dma_direct_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - phys_addr_t phys = dma_to_phys(dev, addr); - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - !dev_is_dma_coherent(dev)) { - arch_sync_dma_for_cpu(phys, size, dir); - arch_sync_dma_for_cpu_all(); - } - - if (unlikely(is_swiotlb_buffer(dev, phys))) - swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); - - if (is_pswiotlb_active(dev)) { - if (unlikely(is_pswiotlb_buffer(dev, nid, phys, &pool))) - pswiotlb_tbl_unmap_single(dev, nid, phys, 0, size, dir, attrs, pool); - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && (dir == DMA_FROM_DEVICE)) - arch_dma_mark_clean(phys, size); - } -} -#else -static inline int pswiotlb_dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - return 0; -} - -static inline dma_addr_t pswiotlb_dma_map_page_distribute(struct device *dev, - struct page *page, size_t offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - return 0; -} - -static inline void pswiotlb_dma_unmap_page_attrs_distribute(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ -} - -static inline int pswiotlb_dma_map_sg_attrs_distribute(struct device *dev, - struct scatterlist *sg, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - return 0; -} - -static inline void pswiotlb_dma_unmap_sg_attrs_distribute(struct device *dev, - struct scatterlist *sg, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ -} - -static inline void pswiotlb_dma_sync_single_for_cpu_distribute(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ -} - -static inline void pswiotlb_dma_sync_single_for_device_distribute(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ -} - -static inline void pswiotlb_dma_sync_sg_for_cpu_distribute(struct device *dev, - struct scatterlist *sg, int nelems, enum dma_data_direction dir) -{ -} - -static inline void pswiotlb_dma_sync_sg_for_device_distribute(struct device *dev, - struct scatterlist *sg, int nelems, enum dma_data_direction dir) -{ -} - -static inline dma_addr_t pswiotlb_iommu_dma_map_page(struct device *dev, - struct page *page, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - return 0; -} - -static inline void pswiotlb_iommu_dma_unmap_page(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ -} - -static inline int pswiotlb_iommu_dma_map_sg(struct device *dev, - struct scatterlist *sg, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - return 0; -} - -static inline void pswiotlb_iommu_dma_unmap_sg(struct device *dev, - struct scatterlist *sg, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ -} - -static inline void pswiotlb_iommu_dma_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) -{ -} - -static inline void pswiotlb_iommu_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) -{ -} - -static inline void pswiotlb_iommu_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nelems, enum dma_data_direction dir) -{ -} - -static inline void pswiotlb_iommu_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nelems, enum dma_data_direction dir) -{ -} - -static inline bool check_if_pswiotlb_is_applicable(struct device *dev) -{ - return false; -} - -static inline void pswiotlb_dma_direct_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ -} - -static inline void pswiotlb_dma_direct_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ -} - -static inline dma_addr_t pswiotlb_dma_direct_map_page(struct device *dev, - struct page *page, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - return 0; -} - -static inline void pswiotlb_dma_direct_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ -} -#endif /* CONFIG_PSWIOTLB*/ -#endif /* _KERNEL_PSWIOTLB_DMA_DIRECT_H */ diff --git a/kernel/dma/phytium/pswiotlb-iommu.c b/kernel/dma/phytium/pswiotlb-iommu.c deleted file mode 100644 index 4c3f6b9ac9f1..000000000000 --- a/kernel/dma/phytium/pswiotlb-iommu.c +++ /dev/null @@ -1,1214 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DMA operations based on Phytium software IO tlb that - * map physical memory indirectly with an IOMMU. - * - * Copyright (c) 2024, Phytium Technology Co., Ltd. - */ - -#define pr_fmt(fmt) "pswiotlb iommu: " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_ARCH_PHYTIUM -#include -#endif - -#include "pswiotlb-dma.h" - -enum iommu_dma_cookie_type { - IOMMU_DMA_IOVA_COOKIE, - IOMMU_DMA_MSI_COOKIE, -}; - -struct iommu_dma_cookie { - enum iommu_dma_cookie_type type; - union { - /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ - struct { - struct iova_domain iovad; - - struct iova_fq __percpu *fq; /* Flush queue */ - /* Number of TLB flushes that have been started */ - atomic64_t fq_flush_start_cnt; - /* Number of TLB flushes that have been finished */ - atomic64_t fq_flush_finish_cnt; - /* Timer to regularily empty the flush queues */ - struct timer_list fq_timer; - /* 1 when timer is active, 0 when not */ - atomic_t fq_timer_on; - }; - /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */ - dma_addr_t msi_iova; - }; - struct list_head msi_page_list; - - /* Domain for flush queue callback; NULL if flush queue not in use */ - struct iommu_domain *fq_domain; - struct mutex mutex; -}; - -static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled); - -/* Number of entries per flush queue */ -#define IOVA_FQ_SIZE 256 - -/* Timeout (in ms) after which entries are flushed from the queue */ -#define IOVA_FQ_TIMEOUT 10 - -/* Flush queue entry for deferred flushing */ -struct iova_fq_entry { - unsigned long iova_pfn; - unsigned long pages; - struct list_head freelist; - u64 counter; /* Flush counter when this entry was added */ -}; - -/* Per-CPU flush queue structure */ -struct iova_fq { - struct iova_fq_entry entries[IOVA_FQ_SIZE]; - unsigned int head, tail; - spinlock_t lock; -}; - -#define fq_ring_for_each(i, fq) \ - for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) - -/* - * The following functions are ported from - * ./drivers/iommu/dma-iommu.c - * ./drivers/iommu/iommu.c - * static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, - * phys_addr_t paddr, size_t size, size_t *count); - * static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, - * phys_addr_t paddr, size_t size, int prot, - * gfp_t gfp, size_t *mapped); - * static int __iommu_map(struct iommu_domain *domain, unsigned long iova, - * phys_addr_t paddr, size_t size, int prot, gfp_t gfp); - * static bool dev_is_untrusted(struct device *dev); - * static int dma_info_to_prot(enum dma_data_direction dir, bool coherent, - * unsigned long attrs); - * static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, - * size_t size, u64 dma_limit, struct device *dev); - * static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, - * dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather); - * static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, - * size_t size); - * static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, - * size_t size, int prot, u64 dma_mask); - * static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, - * dma_addr_t dma_addr); - * static void __invalidate_sg(struct scatterlist *sg, int nents); - */ - -static inline bool fq_full(struct iova_fq *fq) -{ - assert_spin_locked(&fq->lock); - return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); -} - -static inline unsigned int fq_ring_add(struct iova_fq *fq) -{ - unsigned int idx = fq->tail; - - assert_spin_locked(&fq->lock); - - fq->tail = (idx + 1) % IOVA_FQ_SIZE; - - return idx; -} - -static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq) -{ - u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt); - unsigned int idx; - - assert_spin_locked(&fq->lock); - - fq_ring_for_each(idx, fq) { - - if (fq->entries[idx].counter >= counter) - break; - - put_pages_list(&fq->entries[idx].freelist); - free_iova_fast(&cookie->iovad, - fq->entries[idx].iova_pfn, - fq->entries[idx].pages); - - fq->head = (fq->head + 1) % IOVA_FQ_SIZE; - } -} - -static void fq_flush_iotlb(struct iommu_dma_cookie *cookie) -{ - atomic64_inc(&cookie->fq_flush_start_cnt); - cookie->fq_domain->ops->flush_iotlb_all(cookie->fq_domain); - atomic64_inc(&cookie->fq_flush_finish_cnt); -} - -static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, size_t *count) -{ - unsigned int pgsize_idx, pgsize_idx_next; - unsigned long pgsizes; - size_t offset, pgsize, pgsize_next; - unsigned long addr_merge = paddr | iova; - - /* Page sizes supported by the hardware and small enough for @size */ - pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); - - /* Constrain the page sizes further based on the maximum alignment */ - if (likely(addr_merge)) - pgsizes &= GENMASK(__ffs(addr_merge), 0); - - /* Make sure we have at least one suitable page size */ - BUG_ON(!pgsizes); - - /* Pick the biggest page size remaining */ - pgsize_idx = __fls(pgsizes); - pgsize = BIT(pgsize_idx); - if (!count) - return pgsize; - - /* Find the next biggest support page size, if it exists */ - pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); - if (!pgsizes) - goto out_set_count; - - pgsize_idx_next = __ffs(pgsizes); - pgsize_next = BIT(pgsize_idx_next); - - /* - * There's no point trying a bigger page size unless the virtual - * and physical addresses are similarly offset within the larger page. - */ - if ((iova ^ paddr) & (pgsize_next - 1)) - goto out_set_count; - - /* Calculate the offset to the next page size alignment boundary */ - offset = pgsize_next - (addr_merge & (pgsize_next - 1)); - - /* - * If size is big enough to accommodate the larger page, reduce - * the number of smaller pages. - */ - if (offset + pgsize_next <= size) - size = offset; - -out_set_count: - *count = size >> pgsize_idx; - return pgsize; -} - -static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, - gfp_t gfp, size_t *mapped) -{ - const struct iommu_domain_ops *ops = domain->ops; - size_t pgsize, count; - int ret = -EINVAL; - - pgsize = iommu_pgsize(domain, iova, paddr, size, &count); - - pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", - iova, &paddr, pgsize, count); - - if (ops->map_pages) - ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, - gfp, mapped); - - return ret; -} - -static int __iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) -{ - const struct iommu_domain_ops *ops = domain->ops; - unsigned long orig_iova = iova; - unsigned int min_pagesz; - size_t orig_size = size; - phys_addr_t orig_paddr = paddr; - int ret = 0; - - if (unlikely(!ops->map_pages || domain->pgsize_bitmap == 0UL)) - return -ENODEV; - - if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) - return -EINVAL; - - /* find out the minimum page size supported */ - min_pagesz = 1 << __ffs(domain->pgsize_bitmap); - - /* - * both the virtual address and the physical one, as well as - * the size of the mapping, must be aligned (at least) to the - * size of the smallest page supported by the hardware - */ - if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { - pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", - iova, &paddr, size, min_pagesz); - return -EINVAL; - } - - pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); - - while (size) { - size_t mapped = 0; - - ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp, - &mapped); - /* - * Some pages may have been mapped, even if an error occurred, - * so we should account for those so they can be unmapped. - */ - size -= mapped; - - if (ret) - break; - - iova += mapped; - paddr += mapped; - } - - /* unroll mapping in case something went wrong */ - if (ret) - iommu_unmap(domain, orig_iova, orig_size - size); - else - trace_map(orig_iova, orig_paddr, orig_size); - - return ret; -} - -static ssize_t __iommu_map_sg_dma(struct device *dev, struct iommu_domain *domain, - unsigned long iova, struct scatterlist *sg, unsigned int nents, - int prot, gfp_t gfp, unsigned long attrs) -{ - const struct iommu_domain_ops *ops = domain->ops; - size_t mapped = 0; - int ret; - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - size_t aligned_size; - int nid = dev->numa_node; - enum dma_data_direction dir = prot & (DMA_TO_DEVICE | DMA_FROM_DEVICE | DMA_BIDIRECTIONAL); - struct scatterlist *sg_orig = sg; - struct scatterlist *s; - int i; - - might_sleep_if(gfpflags_allow_blocking(gfp)); - - /* Discourage passing strange GFP flags */ - if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | - __GFP_HIGHMEM))) - return -EINVAL; - - for_each_sg(sg, s, nents, i) { - phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset; - - /* check whether dma addr is in local node */ - if (dir != DMA_TO_DEVICE) { - aligned_size = s->length; - if ((!dma_is_in_local_node(dev, nid, phys, - aligned_size)) && (pswiotlb_force_disable != true)) { - aligned_size = iova_align(iovad, s->length); - phys = pswiotlb_tbl_map_single(dev, nid, - phys, s->length, aligned_size, iova_mask(iovad), dir, attrs); - if (phys == DMA_MAPPING_ERROR) { - phys = page_to_phys(sg_page(s)) + s->offset; - dev_warn_once(dev, - "Failed to allocate memory from pswiotlb, fall back to non-local dma\n"); - } - } - } - if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(phys, s->length, dir); - - ret = __iommu_map(domain, iova + mapped, phys, - s->length, prot, gfp); - if (ret) - goto out_err; - - mapped += s->length; - } - - if (ops->iotlb_sync_map) - ops->iotlb_sync_map(domain, iova, mapped); - return mapped; - -out_err: - /* undo mappings already done */ - iommu_dma_unmap_sg_pswiotlb(dev, sg_orig, iova, - mapped, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); - iommu_unmap(domain, iova, mapped); - - return ret; -} - -static ssize_t pswiotlb_iommu_map_sg_atomic_dma(struct device *dev, - struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot, - unsigned long attrs) -{ - return __iommu_map_sg_dma(dev, domain, iova, sg, nents, prot, GFP_ATOMIC, attrs); -} - -static bool dev_is_untrusted(struct device *dev) -{ - return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; -} - -static bool dev_use_swiotlb(struct device *dev, size_t size, - enum dma_data_direction dir) -{ - return IS_ENABLED(CONFIG_SWIOTLB) && - (dev_is_untrusted(dev) || - dma_kmalloc_needs_bounce(dev, size, dir)); -} - -/** - * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API - * page flags. - * @dir: Direction of DMA transfer - * @coherent: Is the DMA master cache-coherent? - * @attrs: DMA attributes for the mapping - * - * Return: corresponding IOMMU API page protection flags - */ -static int dma_info_to_prot(enum dma_data_direction dir, bool coherent, - unsigned long attrs) -{ - int prot = coherent ? IOMMU_CACHE : 0; - - if (attrs & DMA_ATTR_PRIVILEGED) - prot |= IOMMU_PRIV; - - switch (dir) { - case DMA_BIDIRECTIONAL: - return prot | IOMMU_READ | IOMMU_WRITE; - case DMA_TO_DEVICE: - return prot | IOMMU_READ; - case DMA_FROM_DEVICE: - return prot | IOMMU_WRITE; - default: - return 0; - } -} - -static void queue_iova(struct iommu_dma_cookie *cookie, - unsigned long pfn, unsigned long pages, - struct list_head *freelist) -{ - struct iova_fq *fq; - unsigned long flags; - unsigned int idx; - - /* - * Order against the IOMMU driver's pagetable update from unmapping - * @pte, to guarantee that fq_flush_iotlb() observes that if called - * from a different CPU before we release the lock below. Full barrier - * so it also pairs with iommu_dma_init_fq() to avoid seeing partially - * written fq state here. - */ - smp_mb(); - - fq = raw_cpu_ptr(cookie->fq); - spin_lock_irqsave(&fq->lock, flags); - - /* - * First remove all entries from the flush queue that have already been - * flushed out on another CPU. This makes the fq_full() check below less - * likely to be true. - */ - fq_ring_free(cookie, fq); - - if (fq_full(fq)) { - fq_flush_iotlb(cookie); - fq_ring_free(cookie, fq); - } - - idx = fq_ring_add(fq); - - fq->entries[idx].iova_pfn = pfn; - fq->entries[idx].pages = pages; - fq->entries[idx].counter = atomic64_read(&cookie->fq_flush_start_cnt); - list_splice(freelist, &fq->entries[idx].freelist); - - spin_unlock_irqrestore(&fq->lock, flags); - - /* Avoid false sharing as much as possible. */ - if (!atomic_read(&cookie->fq_timer_on) && - !atomic_xchg(&cookie->fq_timer_on, 1)) - mod_timer(&cookie->fq_timer, - jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); -} - -static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, - size_t size, u64 dma_limit, struct device *dev) -{ - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - unsigned long shift, iova_len, iova; - - if (cookie->type == IOMMU_DMA_MSI_COOKIE) { - cookie->msi_iova += size; - return cookie->msi_iova - size; - } - - shift = iova_shift(iovad); - iova_len = size >> shift; - - dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit); - - if (domain->geometry.force_aperture) - dma_limit = min_t(u64, dma_limit, (u64)domain->geometry.aperture_end); - - /* - * Try to use all the 32-bit PCI addresses first. The original SAC vs. - * DAC reasoning loses relevance with PCIe, but enough hardware and - * firmware bugs are still lurking out there that it's safest not to - * venture into the 64-bit space until necessary. - * - * If your device goes wrong after seeing the notice then likely either - * its driver is not setting DMA masks accurately, the hardware has - * some inherent bug in handling >32-bit addresses, or not all the - * expected address bits are wired up between the device and the IOMMU. - */ - if (dma_limit > DMA_BIT_MASK(32) && dev->iommu->pci_32bit_workaround) { - iova = alloc_iova_fast(iovad, iova_len, - DMA_BIT_MASK(32) >> shift, false); - if (iova) - goto done; - - dev->iommu->pci_32bit_workaround = false; - dev_notice(dev, "Using %d-bit DMA addresses\n", bits_per(dma_limit)); - } - - iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, true); -done: - return (dma_addr_t)iova << shift; -} - -static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, - dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather) -{ - struct iova_domain *iovad = &cookie->iovad; - - /* The MSI case is only ever cleaning up its most recent allocation */ - if (cookie->type == IOMMU_DMA_MSI_COOKIE) - cookie->msi_iova -= size; - else if (gather && gather->queued) - queue_iova(cookie, iova_pfn(iovad, iova), - size >> iova_shift(iovad), - &gather->freelist); - else - free_iova_fast(iovad, iova_pfn(iovad, iova), - size >> iova_shift(iovad)); -} - -static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, - size_t size) -{ - struct iommu_domain *domain = iommu_get_dma_domain(dev); - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - size_t iova_off = iova_offset(iovad, dma_addr); - struct iommu_iotlb_gather iotlb_gather; - size_t unmapped; - - dma_addr -= iova_off; - size = iova_align(iovad, size + iova_off); - iommu_iotlb_gather_init(&iotlb_gather); - iotlb_gather.queued = READ_ONCE(cookie->fq_domain); - - unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather); - WARN_ON(unmapped != size); - - if (!iotlb_gather.queued) - iommu_iotlb_sync(domain, &iotlb_gather); - iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather); -} - -static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, - size_t size, int prot, u64 dma_mask) -{ - struct iommu_domain *domain = iommu_get_dma_domain(dev); - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - size_t iova_off = iova_offset(iovad, phys); - dma_addr_t iova; - - if (static_branch_unlikely(&iommu_deferred_attach_enabled) && - iommu_deferred_attach(dev, domain)) - return DMA_MAPPING_ERROR; - - size = iova_align(iovad, size + iova_off); - - iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev); - if (!iova) - return DMA_MAPPING_ERROR; - - if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) { - iommu_dma_free_iova(cookie, iova, size, NULL); - return DMA_MAPPING_ERROR; - } - return iova + iova_off; -} - -void pswiotlb_iommu_dma_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) -{ - phys_addr_t phys; - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - if (is_pswiotlb_active(dev)) { - phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu(phys, size, dir); - - if (is_pswiotlb_buffer(dev, nid, phys, &pool)) - pswiotlb_sync_single_for_cpu(dev, nid, phys, size, dir, pool); - - if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir)) - return; - - if (is_swiotlb_buffer(dev, phys)) - swiotlb_sync_single_for_cpu(dev, phys, size, dir); - } else { - if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir)) - return; - phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu(phys, size, dir); - if (is_swiotlb_buffer(dev, phys)) - swiotlb_sync_single_for_cpu(dev, phys, size, dir); - } -} - -void pswiotlb_iommu_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) -{ - phys_addr_t phys; - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - if (is_pswiotlb_active(dev)) { - phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); - if (is_pswiotlb_buffer(dev, nid, phys, &pool)) - pswiotlb_sync_single_for_device(dev, nid, phys, size, dir, pool); - - if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir)) - return; - } else { - if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir)) - return; - - phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); - } - - if (is_swiotlb_buffer(dev, phys)) - swiotlb_sync_single_for_device(dev, phys, size, dir); - - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_device(phys, size, dir); -} - -void pswiotlb_iommu_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - int nid = dev->numa_node; - dma_addr_t start_orig; - phys_addr_t phys; - struct iommu_domain *domain = iommu_get_dma_domain(dev); - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - struct p_io_tlb_pool *pool; - - if (is_pswiotlb_active(dev)) { - start_orig = sg_dma_address(sgl); - for_each_sg(sgl, sg, nelems, i) { - if (dir != DMA_TO_DEVICE) { - unsigned int s_iova_off = iova_offset(iovad, sg->offset); - - if (i > 0) - start_orig += s_iova_off; - phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), start_orig); - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu(phys, sg->length, dir); - - if (is_pswiotlb_buffer(dev, nid, phys, &pool)) - pswiotlb_sync_single_for_cpu(dev, nid, phys, - sg->length, dir, pool); - start_orig -= s_iova_off; - start_orig += iova_align(iovad, sg->length + s_iova_off); - } else { - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); - } - } - } else { - if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev)) - return; - - for_each_sg(sgl, sg, nelems, i) { - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); - - if (is_swiotlb_buffer(dev, sg_phys(sg))) - swiotlb_sync_single_for_cpu(dev, sg_phys(sg), - sg->length, dir); - } - } -} - -void pswiotlb_iommu_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - if (is_pswiotlb_active(dev)) { - for_each_sg(sgl, sg, nelems, i) { - if (is_pswiotlb_buffer(dev, nid, sg_phys(sg), &pool)) - pswiotlb_sync_single_for_device(dev, nid, sg_phys(sg), - sg->length, dir, pool); - if (dev_is_dma_coherent(dev) && !sg_dma_is_swiotlb(sgl)) - continue; - - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); - } - } else { - if (dev_is_dma_coherent(dev) && !sg_dma_is_swiotlb(sgl)) - return; - - for_each_sg(sgl, sg, nelems, i) { - if (is_swiotlb_buffer(dev, sg_phys(sg))) - swiotlb_sync_single_for_device(dev, sg_phys(sg), - sg->length, dir); - - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); - } - } -} - -dma_addr_t pswiotlb_iommu_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - phys_addr_t phys = page_to_phys(page) + offset; - bool coherent = dev_is_dma_coherent(dev); - - int prot = dma_info_to_prot(dir, coherent, attrs); - struct iommu_domain *domain = iommu_get_dma_domain(dev); - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - size_t aligned_size = size; - dma_addr_t iova, dma_mask = dma_get_mask(dev); - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - /* - * If both the physical buffer start address and size are - * page aligned, we don't need to use a bounce page. - */ - if (dev_use_swiotlb(dev, size, dir) && - iova_offset(iovad, phys | size)) { - void *padding_start; - size_t padding_size; - - if (!is_swiotlb_active(dev)) { - dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n"); - return DMA_MAPPING_ERROR; - } - - aligned_size = iova_align(iovad, size); - phys = swiotlb_tbl_map_single(dev, phys, size, - iova_mask(iovad), dir, attrs); - - if (phys == DMA_MAPPING_ERROR) - return DMA_MAPPING_ERROR; - - /* Cleanup the padding area. */ - padding_start = phys_to_virt(phys); - padding_size = aligned_size; - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) { - padding_start += size; - padding_size -= size; - } - - memset(padding_start, 0, padding_size); - } - - /* check whether dma addr is in local node */ - if (is_pswiotlb_active(dev)) { - if (dir != DMA_TO_DEVICE) { - if (unlikely(!dma_is_in_local_node(dev, nid, phys, aligned_size))) { - aligned_size = iova_align(iovad, size); - phys = pswiotlb_tbl_map_single(dev, nid, phys, size, - aligned_size, iova_mask(iovad), - dir, attrs); - if (phys == DMA_MAPPING_ERROR) { - phys = page_to_phys(page) + offset; - dev_warn_once(dev, - "Failed to allocate memory from pswiotlb, fall back to non-local dma\n"); - } - } - } - } - - if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(phys, size, dir); - - iova = __iommu_dma_map(dev, phys, size, prot, dma_mask); - if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys)) - swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); - if (iova == DMA_MAPPING_ERROR && is_pswiotlb_buffer(dev, nid, phys, &pool)) - pswiotlb_tbl_unmap_single(dev, nid, phys, 0, size, dir, attrs, pool); - return iova; -} - -void pswiotlb_iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - struct iommu_domain *domain = iommu_get_dma_domain(dev); - phys_addr_t phys; - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - phys = iommu_iova_to_phys(domain, dma_handle); - if (WARN_ON(!phys)) - return; - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu(phys, size, dir); - - __iommu_dma_unmap(dev, dma_handle, size); - - if (unlikely(is_swiotlb_buffer(dev, phys))) - swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); - - if (is_pswiotlb_active(dev) && - is_pswiotlb_buffer(dev, nid, phys, &pool)) - pswiotlb_tbl_unmap_single(dev, nid, phys, 0, size, dir, attrs, pool); -} - -static void iommu_dma_unmap_page_sg(struct device *dev, dma_addr_t dma_handle, - size_t offset, size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - struct iommu_domain *domain = iommu_get_dma_domain(dev); - phys_addr_t phys; - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - phys = iommu_iova_to_phys(domain, dma_handle); - - if (WARN_ON(!phys)) - return; - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu(phys, size, dir); - - if (is_pswiotlb_buffer(dev, nid, phys, &pool)) - pswiotlb_tbl_unmap_single(dev, nid, phys, offset, size, dir, attrs, pool); -} - -/* - * Prepare a successfully-mapped scatterlist to give back to the caller. - * - * At this point the segments are already laid out by pswiotlb_iommu_dma_map_sg() to - * avoid individually crossing any boundaries, so we merely need to check a - * segment's start address to avoid concatenating across one. - */ -static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, - dma_addr_t dma_addr) -{ - struct scatterlist *s, *cur = sg; - unsigned long seg_mask = dma_get_seg_boundary(dev); - unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev); - int i, count = 0; - - for_each_sg(sg, s, nents, i) { - /* Restore this segment's original unaligned fields first */ - dma_addr_t s_dma_addr = sg_dma_address(s); - unsigned int s_iova_off = sg_dma_address(s); - unsigned int s_length = sg_dma_len(s); - unsigned int s_iova_len = s->length; - - sg_dma_address(s) = DMA_MAPPING_ERROR; - sg_dma_len(s) = 0; - - if (sg_dma_is_bus_address(s)) { - if (i > 0) - cur = sg_next(cur); - - sg_dma_unmark_bus_address(s); - sg_dma_address(cur) = s_dma_addr; - sg_dma_len(cur) = s_length; - sg_dma_mark_bus_address(cur); - count++; - cur_len = 0; - continue; - } - - s->offset += s_iova_off; - s->length = s_length; - - /* - * Now fill in the real DMA data. If... - * - there is a valid output segment to append to - * - and this segment starts on an IOVA page boundary - * - but doesn't fall at a segment boundary - * - and wouldn't make the resulting output segment too long - */ - if (cur_len && !s_iova_off && (dma_addr & seg_mask) && - (max_len - cur_len >= s_length)) { - /* ...then concatenate it with the previous one */ - cur_len += s_length; - } else { - /* Otherwise start the next output segment */ - if (i > 0) - cur = sg_next(cur); - cur_len = s_length; - count++; - - sg_dma_address(cur) = dma_addr + s_iova_off; - } - - sg_dma_len(cur) = cur_len; - dma_addr += s_iova_len; - - if (s_length + s_iova_off < s_iova_len) - cur_len = 0; - } - return count; -} - -/* - * If mapping failed, then just restore the original list, - * but making sure the DMA fields are invalidated. - */ -static void __invalidate_sg(struct scatterlist *sg, int nents) -{ - struct scatterlist *s; - int i; - - for_each_sg(sg, s, nents, i) { - if (sg_dma_is_bus_address(s)) { - sg_dma_unmark_bus_address(s); - } else { - if (sg_dma_address(s) != DMA_MAPPING_ERROR) - s->offset += sg_dma_address(s); - if (sg_dma_len(s)) - s->length = sg_dma_len(s); - } - sg_dma_address(s) = DMA_MAPPING_ERROR; - sg_dma_len(s) = 0; - } -} - -static void iommu_dma_unmap_sg_pswiotlb_pagesize(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *s; - int i; - - for_each_sg(sg, s, nents, i) - pswiotlb_iommu_dma_unmap_page(dev, sg_dma_address(s), - sg_dma_len(s), dir, attrs); -} - -void iommu_dma_unmap_sg_pswiotlb(struct device *dev, struct scatterlist *sg, - unsigned long iova_start, size_t mapped, int nents, - enum dma_data_direction dir, unsigned long attrs) -{ - dma_addr_t start, start_orig; - struct scatterlist *s; - struct scatterlist *sg_orig = sg; - int i; - - start = iova_start; - start_orig = start; - for_each_sg(sg_orig, s, nents, i) { - if (!mapped || (start_orig > (start + mapped))) - break; - if (s->length == 0) - break; - iommu_dma_unmap_page_sg(dev, start_orig, 0, - s->length, dir, attrs); - start_orig += s->length; - } -} - -static int iommu_dma_map_sg_pswiotlb_pagesize(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *s; - int i; - - sg_dma_mark_swiotlb(sg); - - for_each_sg(sg, s, nents, i) { - sg_dma_address(s) = pswiotlb_iommu_dma_map_page(dev, sg_page(s), - s->offset, s->length, dir, attrs); - if (sg_dma_address(s) == DMA_MAPPING_ERROR) - goto out_unmap; - sg_dma_len(s) = s->length; - } - - return nents; - -out_unmap: - iommu_dma_unmap_sg_pswiotlb_pagesize(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); - return -EIO; -} - -/* - * The DMA API client is passing in a scatterlist which could describe - * any old buffer layout, but the IOMMU API requires everything to be - * aligned to IOMMU pages. Hence the need for this complicated bit of - * impedance-matching, to be able to hand off a suitably-aligned list, - * but still preserve the original offsets and sizes for the caller. - */ -int pswiotlb_iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct iommu_domain *domain = iommu_get_dma_domain(dev); - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - struct scatterlist *s, *prev = NULL; - int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs); - struct pci_p2pdma_map_state p2pdma_state = {}; - enum pci_p2pdma_map_type map; - dma_addr_t iova; - size_t iova_len = 0; - unsigned long mask = dma_get_seg_boundary(dev); - ssize_t ret; - int i; - - if (static_branch_unlikely(&iommu_deferred_attach_enabled)) { - ret = iommu_deferred_attach(dev, domain); - goto out; - } - - if (dir != DMA_TO_DEVICE && is_pswiotlb_active(dev) - && ((nents == 1) && (sg->length < PAGE_SIZE))) - return iommu_dma_map_sg_pswiotlb_pagesize(dev, sg, nents, dir, attrs); - - if ((dir == DMA_TO_DEVICE) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - pswiotlb_iommu_dma_sync_sg_for_device(dev, sg, nents, dir); - - /* - * Work out how much IOVA space we need, and align the segments to - * IOVA granules for the IOMMU driver to handle. With some clever - * trickery we can modify the list in-place, but reversibly, by - * stashing the unaligned parts in the as-yet-unused DMA fields. - */ - for_each_sg(sg, s, nents, i) { - size_t s_iova_off = iova_offset(iovad, s->offset); - size_t s_length = s->length; - size_t pad_len = (mask - iova_len + 1) & mask; - - if (is_pci_p2pdma_page(sg_page(s))) { - map = pci_p2pdma_map_segment(&p2pdma_state, dev, s); - switch (map) { - case PCI_P2PDMA_MAP_BUS_ADDR: - /* - * iommu_map_sg() will skip this segment as - * it is marked as a bus address, - * __finalise_sg() will copy the dma address - * into the output segment. - */ - continue; - case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: - /* - * Mapping through host bridge should be - * mapped with regular IOVAs, thus we - * do nothing here and continue below. - */ - break; - default: - ret = -EREMOTEIO; - goto out_restore_sg; - } - } - - sg_dma_address(s) = s_iova_off; - sg_dma_len(s) = s_length; - s->offset -= s_iova_off; - s_length = iova_align(iovad, s_length + s_iova_off); - s->length = s_length; - - /* - * Due to the alignment of our single IOVA allocation, we can - * depend on these assumptions about the segment boundary mask: - * - If mask size >= IOVA size, then the IOVA range cannot - * possibly fall across a boundary, so we don't care. - * - If mask size < IOVA size, then the IOVA range must start - * exactly on a boundary, therefore we can lay things out - * based purely on segment lengths without needing to know - * the actual addresses beforehand. - * - The mask must be a power of 2, so pad_len == 0 if - * iova_len == 0, thus we cannot dereference prev the first - * time through here (i.e. before it has a meaningful value). - */ - if (pad_len && pad_len < s_length - 1) { - prev->length += pad_len; - iova_len += pad_len; - } - - iova_len += s_length; - prev = s; - } - - if (!iova_len) - return __finalise_sg(dev, sg, nents, 0); - - iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev); - if (!iova) { - ret = -ENOMEM; - goto out_restore_sg; - } - - /* - * We'll leave any physical concatenation to the IOMMU driver's - * implementation - it knows better than we do. - */ - if (dir != DMA_TO_DEVICE && is_pswiotlb_active(dev)) - ret = pswiotlb_iommu_map_sg_atomic_dma(dev, domain, iova, sg, nents, prot, attrs); - else - ret = iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); - - if (ret < 0 || ret < iova_len) - goto out_free_iova; - - return __finalise_sg(dev, sg, nents, iova); - -out_free_iova: - iommu_dma_free_iova(cookie, iova, iova_len, NULL); -out_restore_sg: - __invalidate_sg(sg, nents); -out: - if (ret != -ENOMEM && ret != -EREMOTEIO) - return -EINVAL; - return ret; -} - -void pswiotlb_iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - dma_addr_t start, end = 0, start_orig; - struct scatterlist *tmp, *s; - struct scatterlist *sg_orig = sg; - int i; - struct iommu_domain *domain = iommu_get_dma_domain(dev); - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - - if ((dir != DMA_TO_DEVICE) && ((nents == 1) && (sg->length < PAGE_SIZE))) { - iommu_dma_unmap_sg_pswiotlb_pagesize(dev, sg, nents, dir, attrs); - return; - } - - if ((dir == DMA_TO_DEVICE) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - pswiotlb_iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir); - - /* - * The scatterlist segments are mapped into a single - * contiguous IOVA allocation, the start and end points - * just have to be determined. - */ - for_each_sg(sg, tmp, nents, i) { - if (sg_dma_is_bus_address(tmp)) { - sg_dma_unmark_bus_address(tmp); - continue; - } - - if (sg_dma_len(tmp) == 0) - break; - - start = sg_dma_address(tmp); - break; - } - - if (is_pswiotlb_active(dev)) { - /* check whether dma addr is in local node */ - start_orig = start; - if (dir != DMA_TO_DEVICE) { - for_each_sg(sg_orig, s, nents, i) { - unsigned int s_iova_off = iova_offset(iovad, s->offset); - - if (i > 0) - start_orig += s_iova_off; - iommu_dma_unmap_page_sg(dev, start_orig, - s_iova_off, s->length, - dir, attrs); - start_orig -= s_iova_off; - start_orig += iova_align(iovad, s->length + s_iova_off); - } - } - } - - nents -= i; - for_each_sg(tmp, tmp, nents, i) { - if (sg_dma_is_bus_address(tmp)) { - sg_dma_unmark_bus_address(tmp); - continue; - } - - if (sg_dma_len(tmp) == 0) - break; - - end = sg_dma_address(tmp) + sg_dma_len(tmp); - } - - if (end) - __iommu_dma_unmap(dev, start, end - start); -} diff --git a/kernel/dma/phytium/pswiotlb-mapping.c b/kernel/dma/phytium/pswiotlb-mapping.c deleted file mode 100644 index 65674b7bdeab..000000000000 --- a/kernel/dma/phytium/pswiotlb-mapping.c +++ /dev/null @@ -1,157 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Auxiliary DMA operations used by arch-independent dma-mapping - * routines when Phytium software IO tlb is required. - * - * Copyright (c) 2024, Phytium Technology Co., Ltd. - */ -#include /* for max_pfn */ -#include -#include -#include -#include -#include -#include -#include -#include "../debug.h" -#include "../direct.h" -#include "pswiotlb-dma.h" - -/* - * The following functions are ported from - * ./drivers/dma/mapping.c - * static bool dma_go_direct(struct device *dev, dma_addr_t mask, - * const struct dma_map_ops *ops); - * static inline bool dma_map_direct(struct device *dev, - * const struct dma_map_ops *ops); - */ - -static bool dma_go_direct(struct device *dev, dma_addr_t mask, - const struct dma_map_ops *ops) -{ - if (likely(!ops)) - return true; -#ifdef CONFIG_DMA_OPS_BYPASS - if (dev->dma_ops_bypass) - return min_not_zero(mask, dev->bus_dma_limit) >= - dma_direct_get_required_mask(dev); -#endif - return false; -} - -static inline bool dma_map_direct(struct device *dev, - const struct dma_map_ops *ops) -{ - return dma_go_direct(dev, *dev->dma_mask, ops); -} -dma_addr_t pswiotlb_dma_map_page_distribute(struct device *dev, struct page *page, - size_t offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - dma_addr_t addr; - - if (dma_map_direct(dev, ops) || - arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size)) - addr = pswiotlb_dma_direct_map_page(dev, page, offset, size, dir, attrs); - else - addr = pswiotlb_iommu_dma_map_page(dev, page, offset, size, dir, attrs); - debug_dma_map_page(dev, page, offset, size, dir, addr, attrs); - - return addr; -} - -void pswiotlb_dma_unmap_page_attrs_distribute(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (dma_map_direct(dev, ops) || - arch_dma_unmap_page_direct(dev, addr + size)) - pswiotlb_dma_direct_unmap_page(dev, addr, size, dir, attrs); - else if (ops->unmap_page) - pswiotlb_iommu_dma_unmap_page(dev, addr, size, dir, attrs); - debug_dma_unmap_page(dev, addr, size, dir); -} - -int pswiotlb_dma_map_sg_attrs_distribute(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - int ents; - - if (dma_map_direct(dev, ops) || - arch_dma_map_sg_direct(dev, sg, nents)) - ents = pswiotlb_dma_direct_map_sg(dev, sg, nents, dir, attrs); - else - ents = pswiotlb_iommu_dma_map_sg(dev, sg, nents, dir, attrs); - - if (ents > 0) - debug_dma_map_sg(dev, sg, nents, ents, dir, attrs); - else if (WARN_ON_ONCE(ents != -EINVAL && ents != -ENOMEM && - ents != -EIO)) - return -EIO; - - return ents; -} - -void pswiotlb_dma_unmap_sg_attrs_distribute(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (dma_map_direct(dev, ops) || - arch_dma_unmap_sg_direct(dev, sg, nents)) - pswiotlb_dma_direct_unmap_sg(dev, sg, nents, dir, attrs); - else if (ops->unmap_sg) - pswiotlb_iommu_dma_unmap_sg(dev, sg, nents, dir, attrs); -} - -void pswiotlb_dma_sync_single_for_cpu_distribute(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (dma_map_direct(dev, ops)) - pswiotlb_dma_direct_sync_single_for_cpu(dev, addr, size, dir); - else if (ops->sync_single_for_cpu) - pswiotlb_iommu_dma_sync_single_for_cpu(dev, addr, size, dir); - debug_dma_sync_single_for_cpu(dev, addr, size, dir); -} - -void pswiotlb_dma_sync_single_for_device_distribute(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (dma_map_direct(dev, ops)) - pswiotlb_dma_direct_sync_single_for_device(dev, addr, size, dir); - else if (ops->sync_single_for_device) - pswiotlb_iommu_dma_sync_single_for_device(dev, addr, size, dir); - debug_dma_sync_single_for_device(dev, addr, size, dir); -} - -void pswiotlb_dma_sync_sg_for_cpu_distribute(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (dma_map_direct(dev, ops)) - pswiotlb_dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir); - else if (ops->sync_sg_for_cpu) - pswiotlb_iommu_dma_sync_sg_for_cpu(dev, sg, nelems, dir); - debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); -} - -void pswiotlb_dma_sync_sg_for_device_distribute(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (dma_map_direct(dev, ops)) - pswiotlb_dma_direct_sync_sg_for_device(dev, sg, nelems, dir); - else if (ops->sync_sg_for_device) - pswiotlb_iommu_dma_sync_sg_for_device(dev, sg, nelems, dir); - debug_dma_sync_sg_for_device(dev, sg, nelems, dir); -} diff --git a/kernel/dma/phytium/pswiotlb.c b/kernel/dma/phytium/pswiotlb.c deleted file mode 100644 index 31ee5ed73448..000000000000 --- a/kernel/dma/phytium/pswiotlb.c +++ /dev/null @@ -1,1736 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Phytium software IO tlb to improve DMA performance. - * - * Copyright (c) 2024, Phytium Technology Co., Ltd. - */ - -#define pr_fmt(fmt) "Phytium software IO TLB: " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_DEBUG_FS -#include -#endif -#ifdef CONFIG_DMA_RESTRICTED_POOL -#include -#include -#include -#include -#include -#endif - -#include - -#define CREATE_TRACE_POINTS -#include - -#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - P_IO_TLB_SHIFT)) - -/* - * Minimum Phytium IO TLB size to bother booting with. If we can't - * allocate a contiguous 1MB, we're probably in trouble anyway. - */ -#define P_IO_TLB_MIN_SLABS ((1<<20) >> P_IO_TLB_SHIFT) -#define PSWIOTLB_VERSION "1.0.0" -#define INVALID_PHYS_ADDR (~(phys_addr_t)0) - -int pswiotlb_node_num; -bool pswiotlb_mtimer_alive; - -/** - * struct p_io_tlb_slot - Phytium IO TLB slot descriptor - * @orig_addr: The original address corresponding to a mapped entry. - * @alloc_size: Size of the allocated buffer. - * @list: The free list describing the number of free entries available - * from each index. - */ -struct p_io_tlb_slot { - phys_addr_t orig_addr; - size_t alloc_size; - unsigned int list; -}; - -bool pswiotlb_force_disable; - -static struct page *alloc_dma_pages(int nid, gfp_t gfp, size_t bytes); - -struct p_io_tlb_mem p_io_tlb_default_mem[MAX_NUMNODES]; -static struct timer_list service_timer; - -static unsigned long default_npslabs = P_IO_TLB_DEFAULT_SIZE >> P_IO_TLB_SHIFT; -static unsigned long dynamic_inc_thr_npslabs = P_IO_TLB_INC_THR >> P_IO_TLB_SHIFT; -static unsigned long default_npareas; - -LIST_HEAD(passthroughlist); -static spinlock_t passthroughlist_lock; -static struct pswiotlb_passthroughlist passthroughlist_entry[1024]; -static struct dentry *passthroughlist_debugfs; -static struct dentry *pswiotlb_debugfs; -/** - * struct p_io_tlb_area - Phytium IO TLB memory area descriptor - * - * This is a single area with a single lock. - * - * @used: The number of used Phytium IO TLB block. - * @index: The slot index to start searching in this area for next round. - * @lock: The lock to protect the above data structures in the map and - * unmap calls. - */ -struct p_io_tlb_area { - unsigned long used; - unsigned int index; - spinlock_t lock; -}; - -static struct pswiotlb_passthroughlist_entry { - unsigned short vendor; - unsigned short device; -} ps_passthroughlist[] = { - {BL_PCI_VENDOR_ID_NVIDIA, 0xFFFF}, - {BL_PCI_VENDOR_ID_ILUVATAR, 0xFFFF}, - {BL_PCI_VENDOR_ID_METAX, 0xFFFF}, - {} -}; - -/* - * Round up number of slabs to the next power of 2. The last area is going - * be smaller than the rest if default_npslabs is not power of two. - * The number of slot in an area should be a multiple of P_IO_TLB_SEGSIZE, - * otherwise a segment may span two or more areas. It conflicts with free - * contiguous slots tracking: free slots are treated contiguous no matter - * whether they cross an area boundary. - * - * Return true if default_npslabs is rounded up. - */ -static bool round_up_default_npslabs(void) -{ - if (!default_npareas) - return false; - - if (default_npslabs < P_IO_TLB_SEGSIZE * default_npareas) - default_npslabs = P_IO_TLB_SEGSIZE * default_npareas; - else if (is_power_of_2(default_npslabs)) - return false; - default_npslabs = roundup_pow_of_two(default_npslabs); - return true; -} - -/** - * pswiotlb_adjust_nareas() - adjust the number of areas and slots - * @nareas: Desired number of areas. Zero is treated as 1. - * - * Adjust the default number of areas in a memory pool. - * The default size of the memory pool may also change to meet minimum area - * size requirements. - */ -static void pswiotlb_adjust_nareas(unsigned int nareas) -{ - if (!nareas) - nareas = 1; - else if (!is_power_of_2(nareas)) - nareas = roundup_pow_of_two(nareas); - - default_npareas = nareas; - - pr_info("area num %d.\n", nareas); - if (round_up_default_npslabs()) - pr_info("PSWIOTLB bounce buffer size roundup to %luMB", - (default_npslabs << P_IO_TLB_SHIFT) >> 20); -} - -/** - * limit_nareas() - get the maximum number of areas for a given memory pool size - * @nareas: Desired number of areas. - * @nslots: Total number of slots in the memory pool. - * - * Limit the number of areas to the maximum possible number of areas in - * a memory pool of the given size. - * - * Return: Maximum possible number of areas. - */ -static unsigned int limit_nareas(unsigned int nareas, unsigned long nslots) -{ - if (nslots < nareas * P_IO_TLB_SEGSIZE) - return nslots / P_IO_TLB_SEGSIZE; - return nareas; -} - -static int __init -setup_p_io_tlb_npages(char *str) -{ - unsigned long nareas; - - if (!strcmp(str, "forceoff")) { - pswiotlb_force_disable = true; - } else if (isdigit(*str) && !kstrtoul(str, 0, &default_npslabs)) { - default_npslabs = ALIGN(default_npslabs, P_IO_TLB_SEGSIZE); - str = strchr(str, ','); - if (str++ && isdigit(*str) && !kstrtoul(str, 0, &nareas)) - pswiotlb_adjust_nareas(nareas); - } - return 0; -} -early_param("pswiotlb", setup_p_io_tlb_npages); - -static int __init -setup_pswiotlb_passthroughlist(char *str) -{ - char tmp_str[5] = {'\0'}; - unsigned long flags; - int i, j, k; - int ret; - - for (i = 0, j = 0, k = 0; i < strlen(str) + 1; i++) { - if (*(str + i) != ',' && *(str + i) != '\0') { - tmp_str[j++] = *(str + i); - } else { - j = 0; - - ret = kstrtou16(tmp_str, 16, &passthroughlist_entry[k].vendor); - if (ret) - return ret; - - passthroughlist_entry[k].from_grub = true; - - spin_lock_irqsave(&passthroughlist_lock, flags); - list_add_rcu(&passthroughlist_entry[k].node, &passthroughlist); - spin_unlock_irqrestore(&passthroughlist_lock, flags); - - k++; - } - } - - return 0; -} -early_param("pswiotlb_passthroughlist", setup_pswiotlb_passthroughlist); - -unsigned long pswiotlb_size_or_default(void) -{ - return default_npslabs << P_IO_TLB_SHIFT; -} - -void __init pswiotlb_adjust_size(unsigned long size) -{ - if (default_npslabs != P_IO_TLB_DEFAULT_SIZE >> P_IO_TLB_SHIFT) - return; - size = ALIGN(size, P_IO_TLB_SIZE); - default_npslabs = ALIGN(size >> P_IO_TLB_SHIFT, P_IO_TLB_SEGSIZE); - if (round_up_default_npslabs()) - size = default_npslabs << P_IO_TLB_SHIFT; - pr_info("PSWIOTLB bounce buffer size adjusted to %luMB", size >> 20); -} - -void pswiotlb_print_info(int nid) -{ - struct p_io_tlb_pool *mem = &p_io_tlb_default_mem[nid].defpool; - - if (!mem->nslabs) { - pr_warn("No local mem of numa node %d\n", nid); - return; - } - - pr_info("numa %d mapped [mem %pa-%pa] (%luMB)\n", nid, &mem->start, &mem->end, - (mem->nslabs << P_IO_TLB_SHIFT) >> 20); -} - -static inline unsigned long io_tlb_offset(unsigned long val) -{ - return val & (P_IO_TLB_SEGSIZE - 1); -} - -static inline unsigned long nr_slots(u64 val) -{ - return DIV_ROUND_UP(val, P_IO_TLB_SIZE); -} - -static void pswiotlb_record_mem_range(struct p_io_tlb_mem *mem) -{ - unsigned long start_pfn, end_pfn; - unsigned long min_pfn = (~(phys_addr_t)0 >> PAGE_SHIFT), max_pfn = 0; - int i, nid; - unsigned long total_pfn = 0; - - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { - pr_info(" node %3d: [mem %#018Lx-%#018Lx]\n", nid, - (u64)start_pfn << PAGE_SHIFT, - ((u64)end_pfn << PAGE_SHIFT) - 1); - if (nid == mem->numa_node_id) { - if (min_pfn > start_pfn) - min_pfn = start_pfn; - if (max_pfn < end_pfn) - max_pfn = end_pfn; - total_pfn += end_pfn - start_pfn + 1; - } - } - - mem->node_min_addr = (u64)min_pfn << PAGE_SHIFT; - mem->node_max_addr = ((u64)max_pfn << PAGE_SHIFT) - 1; - mem->node_total_mem = (u64)total_pfn << PAGE_SHIFT; -} - -static void pswiotlb_init_io_tlb_pool(struct p_io_tlb_pool *mem, int nid, phys_addr_t start, - unsigned long npslabs, bool late_alloc, unsigned int nareas) -{ - void *vaddr = phys_to_virt(start); - unsigned long bytes = npslabs << P_IO_TLB_SHIFT, i; - - mem->nslabs = npslabs; - mem->start = start; - mem->end = mem->start + bytes; - mem->late_alloc = late_alloc; - mem->numa_node_id = nid; - mem->nareas = nareas; - mem->area_nslabs = npslabs / mem->nareas; - mem->free_th = PSWIOTLB_FREE_THRESHOLD; - - for (i = 0; i < mem->nareas; i++) { - spin_lock_init(&mem->areas[i].lock); - mem->areas[i].index = 0; - mem->areas[i].used = 0; - } - - for (i = 0; i < mem->nslabs; i++) { - mem->slots[i].list = P_IO_TLB_SEGSIZE - io_tlb_offset(i); - mem->slots[i].orig_addr = INVALID_PHYS_ADDR; - mem->slots[i].alloc_size = 0; - } - memset(vaddr, 0, bytes); - mem->vaddr = vaddr; -} - -/** - * add_mem_pool() - add a memory pool to the allocator - * @mem: Phytium software IO TLB allocator. - * @pool: Memory pool to be added. - */ -static void add_mem_pool(struct p_io_tlb_mem *mem, struct p_io_tlb_pool *pool) -{ - spin_lock(&mem->lock); - if (mem->capacity != mem->whole_size) { - mem->pool_addr[mem->whole_size] = mem->pool_addr[mem->capacity]; - mem->pool_addr[mem->capacity] = pool; - } else { - mem->pool_addr[mem->capacity] = pool; - } - /* prevent any other writes prior to this time */ - smp_wmb(); - mem->capacity++; - mem->whole_size++; - mem->nslabs += pool->nslabs; - spin_unlock(&mem->lock); -} - -static void __init *pswiotlb_memblock_alloc(unsigned long npslabs, - int nid, unsigned int flags, - int (*remap)(void *tlb, unsigned long npslabs)) -{ - size_t bytes = PAGE_ALIGN(npslabs << P_IO_TLB_SHIFT); - void *tlb; - - tlb = memblock_alloc_node(bytes, PAGE_SIZE, nid); - - if (!tlb) { - pr_warn("%s: Failed to allocate %zu bytes tlb structure\n", - __func__, bytes); - return NULL; - } - - if (remap && remap(tlb, npslabs) < 0) { - memblock_free(tlb, PAGE_ALIGN(bytes)); - pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes); - return NULL; - } - - return tlb; -} - -static void check_if_pswiotlb_in_local_node(struct p_io_tlb_mem *mem, - struct p_io_tlb_pool *pool) -{ - if ((pool->start < mem->node_min_addr) || - pool->end > mem->node_max_addr) { - mem->nslabs = 0; - pool->nslabs = 0; - } -} - -/* - * Statically reserve bounce buffer space and initialize bounce buffer data - * structures for the Phytium software IO TLB used to implement the DMA API. - */ -void __init pswiotlb_init_remap(bool addressing_limit, int nid, unsigned int flags, - int (*remap)(void *tlb, unsigned long npslabs)) -{ - struct p_io_tlb_pool *mem = &p_io_tlb_default_mem[nid].defpool; - unsigned long npslabs; - unsigned int nareas; - size_t alloc_size; - void *tlb; - - if (!addressing_limit) - return; - if (pswiotlb_force_disable) - return; - - if (!remap) - p_io_tlb_default_mem[nid].can_grow = true; - p_io_tlb_default_mem[nid].phys_limit = virt_to_phys(high_memory - 1); - - if (!default_npareas) - pswiotlb_adjust_nareas(num_possible_cpus()); - - npslabs = default_npslabs; - nareas = limit_nareas(default_npareas, npslabs); - while ((tlb = pswiotlb_memblock_alloc(npslabs, nid, flags, remap)) == NULL) { - if (npslabs <= P_IO_TLB_MIN_SLABS) - return; - npslabs = ALIGN(npslabs >> 1, P_IO_TLB_SEGSIZE); - nareas = limit_nareas(nareas, npslabs); - } - - if (default_npslabs != npslabs) { - pr_info("PSWIOTLB bounce buffer size adjusted %lu -> %lu slabs", - default_npslabs, npslabs); - default_npslabs = npslabs; - } - - alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), npslabs)); - mem->slots = memblock_alloc(alloc_size, PAGE_SIZE); - if (!mem->slots) { - pr_warn("%s: Failed to allocate %zu bytes align=0x%lx\n", - __func__, alloc_size, PAGE_SIZE); - return; - } - - mem->areas = memblock_alloc(array_size(sizeof(struct p_io_tlb_area), - nareas), SMP_CACHE_BYTES); - if (!mem->areas) { - pr_warn("%s: Failed to allocate mem->areas.\n", __func__); - return; - } - - pswiotlb_init_io_tlb_pool(mem, nid, __pa(tlb), npslabs, false, nareas); - add_mem_pool(&p_io_tlb_default_mem[nid], mem); - check_if_pswiotlb_in_local_node(&p_io_tlb_default_mem[nid], mem); - - if (flags & PSWIOTLB_VERBOSE) - pswiotlb_print_info(nid); -} -/** - * pswiotlb_free_tlb() - free a dynamically allocated Phytium IO TLB buffer - * @vaddr: Virtual address of the buffer. - * @bytes: Size of the buffer. - */ -static void pswiotlb_free_tlb(void *vaddr, size_t bytes) -{ - if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && - dma_free_from_pool(NULL, vaddr, bytes)) - return; - - /* Intentional leak if pages cannot be encrypted again. */ - if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes))) - __free_pages(virt_to_page(vaddr), get_order(bytes)); -} -/** - * pswiotlb_alloc_tlb() - allocate a dynamic Phytium IO TLB buffer - * @dev: Device for which a memory pool is allocated. - * @bytes: Size of the buffer. - * @phys_limit: Maximum allowed physical address of the buffer. - * @gfp: GFP flags for the allocation. - * - * Return: Allocated pages, or %NULL on allocation failure. - */ -static struct page *pswiotlb_alloc_tlb(struct device *dev, int nid, size_t bytes, - u64 phys_limit, gfp_t gfp) -{ - struct page *page; - - /* - * Allocate from the atomic pools if memory is encrypted and - * the allocation is atomic, because decrypting may block. - */ - if (!gfpflags_allow_blocking(gfp) && dev && force_dma_unencrypted(dev)) { - void *vaddr; - - if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL)) - return NULL; - - return dma_alloc_from_pool(dev, bytes, &vaddr, gfp, - pswiotlb_dma_coherent_ok); - } - - gfp &= ~GFP_ZONEMASK; - if (phys_limit <= DMA_BIT_MASK(zone_dma_bits)) - gfp |= __GFP_DMA; - else if (phys_limit <= DMA_BIT_MASK(32)) - gfp |= __GFP_DMA32; - - while ((page = alloc_dma_pages(nid, gfp, bytes)) && - page_to_phys(page) + bytes - 1 > phys_limit) { - /* allocated, but too high */ - __free_pages(page, get_order(bytes)); - - if (IS_ENABLED(CONFIG_ZONE_DMA32) && - phys_limit < DMA_BIT_MASK(64) && - !(gfp & (__GFP_DMA32 | __GFP_DMA))) - gfp |= __GFP_DMA32; - else if (IS_ENABLED(CONFIG_ZONE_DMA) && - !(gfp & __GFP_DMA)) - gfp = (gfp & ~__GFP_DMA32) | __GFP_DMA; - else - return NULL; - } - - return page; -} -/** - * pswiotlb_alloc_pool() - allocate a new Phytium IO TLB memory pool - * @dev: Device for which a memory pool is allocated. - * @minslabs: Minimum number of slabs. - * @nslabs: Desired (maximum) number of slabs. - * @nareas: Number of areas. - * @phys_limit: Maximum DMA buffer physical address. - * @gfp: GFP flags for the allocations. - * - * Allocate and initialize a new Phytium IO TLB memory pool. The actual number of - * slabs may be reduced if allocation of @nslabs fails. If even - * @minslabs cannot be allocated, this function fails. - * - * Return: New memory pool, or %NULL on allocation failure. - */ -static struct p_io_tlb_pool *pswiotlb_alloc_pool(struct device *dev, - int nid, unsigned long minslabs, unsigned long nslabs, - unsigned int nareas, u64 phys_limit, bool transient, gfp_t gfp) -{ - struct p_io_tlb_pool *pool; - unsigned int slot_order; - struct page *tlb; - size_t pool_size; - size_t tlb_size; - - if (nslabs > SLABS_PER_PAGE << MAX_ORDER) { - nslabs = SLABS_PER_PAGE << MAX_ORDER; - nareas = limit_nareas(nareas, nslabs); - } - - pool_size = sizeof(*pool) + array_size(sizeof(*pool->areas), nareas); - pool = kzalloc(pool_size, gfp); - if (!pool) - goto error; - pool->areas = (void *)pool + sizeof(*pool); - - if (!transient) { - nslabs = ALIGN(nslabs >> 1, P_IO_TLB_SEGSIZE); - nareas = limit_nareas(nareas, nslabs); - } - tlb_size = nslabs << P_IO_TLB_SHIFT; - while (!(tlb = pswiotlb_alloc_tlb(dev, nid, tlb_size, phys_limit, gfp))) { - if (nslabs <= minslabs) - goto error_tlb; - nslabs = ALIGN(nslabs >> 1, P_IO_TLB_SEGSIZE); - nareas = limit_nareas(nareas, nslabs); - tlb_size = nslabs << P_IO_TLB_SHIFT; - } - if (page_to_nid(tlb) != nid) - goto error_slots; - - slot_order = get_order(array_size(sizeof(*pool->slots), nslabs)); - pool->slots = (struct p_io_tlb_slot *) - __get_free_pages(gfp, slot_order); - if (!pool->slots) - goto error_slots; - - pswiotlb_init_io_tlb_pool(pool, nid, page_to_phys(tlb), nslabs, true, nareas); - return pool; - -error_slots: - pswiotlb_free_tlb(page_address(tlb), tlb_size); -error_tlb: - kfree(pool); -error: - return NULL; -} -static void pswiotlb_prepare_release_pool(struct p_io_tlb_mem *mem, - struct p_io_tlb_pool *pool, int pool_idx) -{ - int capacity; - - spin_lock(&mem->lock); - capacity = mem->capacity; - mem->pool_addr[pool_idx] = mem->pool_addr[capacity - 1]; - mem->pool_addr[capacity - 1] = pool; - mem->capacity--; - mem->nslabs -= pool->nslabs; - spin_unlock(&mem->lock); -} -static void pswiotlb_release_pool(struct p_io_tlb_mem *mem, - struct p_io_tlb_pool *pool, int pool_idx) -{ - unsigned int bytes = pool->nslabs * P_IO_TLB_SIZE; - unsigned int order = get_order(bytes); - struct page *page_start; - size_t slots_size = array_size(sizeof(*pool->slots), pool->nslabs); - int pool_idx1; - - spin_lock(&mem->lock); - pool_idx1 = mem->whole_size - 1; - mem->pool_addr[pool_idx] = mem->pool_addr[pool_idx1]; - mem->whole_size--; - spin_unlock(&mem->lock); - - bitmap_free(pool->busy_record); - free_pages((unsigned long)pool->slots, get_order(slots_size)); - page_start = pfn_to_page(PFN_DOWN(pool->start)); - __free_pages(page_start, order); - kfree(pool); -} -static void pswiotlb_monitor_service(struct timer_list *timer) -{ - int i, j, pool_idx; - struct p_io_tlb_pool *pool; - struct p_io_tlb_mem *mem; - int capacity, whole_size; - - for (i = 0; i < pswiotlb_node_num; i++) { - mem = &p_io_tlb_default_mem[i]; - whole_size = mem->whole_size; - capacity = mem->capacity; - rcu_read_lock(); - for (pool_idx = 1; pool_idx < whole_size; pool_idx++) { - pool = mem->pool_addr[pool_idx]; - for (j = 0; j < DIV_ROUND_UP(pool->nareas, BITS_PER_LONG); j++) { - if (*(pool->busy_record + j) != 0) { - pool->busy_flag = true; - break; - } - pool->busy_flag = false; - } - if (!pool->busy_flag) - pool->free_cnt++; - else - pool->free_cnt = 0; - if (pool->free_cnt >= pool->free_th && pool_idx < capacity) { - pswiotlb_prepare_release_pool(mem, pool, pool_idx); - capacity--; - } - if (pool->free_cnt >= 2 * pool->free_th && !pool->busy_flag) { - pswiotlb_release_pool(mem, pool, pool_idx); - whole_size--; - } - } - rcu_read_unlock(); - } - - mod_timer(timer, jiffies + 2 * HZ); -} -static struct p_io_tlb_pool *pswiotlb_formal_alloc(struct device *dev, - struct p_io_tlb_mem *mem) -{ - struct p_io_tlb_pool *pool; - - pool = pswiotlb_alloc_pool(dev, mem->numa_node_id, - P_IO_TLB_MIN_SLABS, dynamic_inc_thr_npslabs, - dynamic_inc_thr_npslabs, mem->phys_limit, - 0, GFP_NOWAIT | __GFP_NOWARN); - if (!pool) { - pr_warn_once("Failed to allocate new formal pool"); - return NULL; - } - - pool->busy_record = bitmap_zalloc(pool->nareas, GFP_KERNEL); - if (!pool->busy_record) { - pr_warn_ratelimited("%s: Failed to allocate pool busy record.\n", __func__); - return NULL; - } - - add_mem_pool(mem, pool); - - return pool; -} - -/** - * pswiotlb_dyn_free() - RCU callback to free a memory pool - * @rcu: RCU head in the corresponding struct p_io_tlb_pool. - */ -static void pswiotlb_dyn_free(struct rcu_head *rcu) -{ - struct p_io_tlb_pool *pool = container_of(rcu, struct p_io_tlb_pool, rcu); - size_t slots_size = array_size(sizeof(*pool->slots), pool->nslabs); - size_t tlb_size = pool->end - pool->start; - - free_pages((unsigned long)pool->slots, get_order(slots_size)); - pswiotlb_free_tlb(pool->vaddr, tlb_size); - kfree(pool); -} -static void pswiotlb_init_tlb_mem_dynamic(struct p_io_tlb_mem *mem, int nid) -{ - spin_lock_init(&mem->lock); - mem->capacity = 0; - mem->whole_size = 0; - mem->numa_node_id = nid; -} - -bool pswiotlb_is_dev_in_passthroughlist(struct pci_dev *dev) -{ - struct pswiotlb_passthroughlist *bl_entry; - - rcu_read_lock(); - list_for_each_entry_rcu(bl_entry, &passthroughlist, node) { - if (bl_entry->vendor == dev->vendor) { - rcu_read_unlock(); - goto out; - } - } - rcu_read_unlock(); - - return true; -out: - return false; -} - -static void pswiotlb_show_passthroughlist(void) -{ - struct pswiotlb_passthroughlist *bl_entry; - - pr_info("The following vendors devices belong to are incompatible with pswiotlb temporarily:\n"); - rcu_read_lock(); - list_for_each_entry_rcu(bl_entry, &passthroughlist, node) - printk(KERN_INFO "0x%06x", bl_entry->vendor); - rcu_read_unlock(); -} - -static void __init pswiotlb_passthroughlist_init(void) -{ - int dev_num = 0; - int i; - size_t alloc_size; - struct pswiotlb_passthroughlist *passthroughlist_array; - - spin_lock_init(&passthroughlist_lock); - - for (i = 0; ps_passthroughlist[i].vendor != 0; i++) - dev_num++; - - alloc_size = PAGE_ALIGN(array_size(sizeof(struct pswiotlb_passthroughlist), dev_num)); - passthroughlist_array = memblock_alloc(alloc_size, PAGE_SIZE); - if (!passthroughlist_array) { - pr_warn("%s: Failed to allocate memory for passthroughlist\n", - __func__); - return; - } - - for (i = 0; i < dev_num; i++) { - passthroughlist_array[i].vendor = ps_passthroughlist[i].vendor; - passthroughlist_array[i].device = ps_passthroughlist[i].device; - - spin_lock(&passthroughlist_lock); - list_add_rcu(&passthroughlist_array[i].node, &passthroughlist); - spin_unlock(&passthroughlist_lock); - } - - pswiotlb_show_passthroughlist(); -} - -/* - * Statically reserve bounce buffer space and initialize bounce buffer data - * structures for the software IO TLB used to implement the DMA API. - */ -void __init pswiotlb_init(bool addressing_limit, unsigned int flags) -{ - int i; - int nid; - unsigned long start_pfn, end_pfn; - - /* Get number of numa node*/ - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid); - pswiotlb_node_num = nid + 1; - pr_info("Total number of numa nodes is %d\n", pswiotlb_node_num); - for (i = 0; i < pswiotlb_node_num; i++) { - struct p_io_tlb_mem *mem = &p_io_tlb_default_mem[i]; - - pswiotlb_init_tlb_mem_dynamic(mem, i); - pswiotlb_record_mem_range(mem); - pr_info(" node %3d memory range: [%#018Lx-%#018Lx], total memory: %ldMB\n", - i, mem->node_min_addr, mem->node_max_addr, - mem->node_total_mem >> 20); - } - /* Get P TLB memory according to numa node id */ - for (i = 0; i < pswiotlb_node_num; i++) - pswiotlb_init_remap(addressing_limit, i, flags, NULL); - - pswiotlb_passthroughlist_init(); -} - -/** - * alloc_dma_pages() - allocate pages to be used for DMA - * @gfp: GFP flags for the allocation. - * @bytes: Size of the buffer. - * - * Allocate pages from the buddy allocator. If successful, make the allocated - * pages decrypted that they can be used for DMA. - * - * Return: Decrypted pages, or %NULL on failure. - */ -static struct page *alloc_dma_pages(int nid, gfp_t gfp, size_t bytes) -{ - unsigned int order = get_order(bytes); - struct page *page; - void *vaddr; - - page = alloc_pages_node(nid, gfp, order); - if (!page) - return NULL; - - vaddr = page_address(page); - if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes))) - goto error; - return page; - -error: - __free_pages(page, order); - return NULL; -} - -/** - * pswiotlb_find_pool() - find the Phytium IO TLB pool for a physical address - * @dev: Device which has mapped the DMA buffer. - * @paddr: Physical address within the DMA buffer. - * - * Find the Phytium IO TLB memory pool descriptor which contains the given physical - * address, if any. - * - * Return: Memory pool which contains @paddr, or %NULL if none. - */ -struct p_io_tlb_pool *pswiotlb_find_pool(struct device *dev, int nid, phys_addr_t paddr) -{ - struct p_io_tlb_mem *mem = &dev->dma_p_io_tlb_mem[nid]; - struct p_io_tlb_pool *pool; - int i; - int whole_size; - - /* prevent any other reads prior to this time */ - smp_rmb(); - whole_size = mem->whole_size; - rcu_read_lock(); - for (i = 0; i < whole_size; i++) { - pool = mem->pool_addr[i]; - if (paddr >= pool->start && paddr < pool->end) - goto out; - } - - pool = NULL; -out: - rcu_read_unlock(); - return pool; -} - -/** - * pswiotlb_dev_init() - initialize pswiotlb fields in &struct device - * @dev: Device to be initialized. - */ -void pswiotlb_dev_init(struct device *dev) -{ - dev->dma_uses_p_io_tlb = false; -} - -void pswiotlb_store_local_node(struct pci_dev *dev, struct pci_bus *bus) -{ - int nid; - struct p_io_tlb_pool *defpool; - struct p_io_tlb_mem *mem; - - dev->dev.local_node = pcibus_to_node(bus); - /* register pswiotlb resources */ - dev->dev.dma_p_io_tlb_mem = p_io_tlb_default_mem; - nid = dev->dev.local_node; - defpool = &dev->dev.dma_p_io_tlb_mem[nid].defpool; - mem = &dev->dev.dma_p_io_tlb_mem[nid]; - pci_info(dev, "numa node: %d, pswiotlb defpool range: [%#018Lx-%#018Lx]\n" - "local node range: [%#018Lx-%#018Lx]\n", nid, - defpool->start, defpool->end, mem->node_min_addr, mem->node_max_addr); -} -/* - * Return the offset into a pswiotlb slot required to keep the device happy. - */ -static unsigned int pswiotlb_align_offset(struct device *dev, u64 addr) -{ - return addr & dma_get_min_align_mask(dev) & (P_IO_TLB_SIZE - 1); -} -/* - * Bounce: copy the pswiotlb buffer from or back to the original dma location - */ -static void pswiotlb_bounce(struct device *dev, int nid, phys_addr_t tlb_addr, size_t size, - enum dma_data_direction dir, struct p_io_tlb_pool *mem) -{ - int index = (tlb_addr - mem->start) >> P_IO_TLB_SHIFT; - phys_addr_t orig_addr = mem->slots[index].orig_addr; - size_t alloc_size = mem->slots[index].alloc_size; - unsigned long pfn = PFN_DOWN(orig_addr); - unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start; - unsigned int tlb_offset, orig_addr_offset; - - if (orig_addr == INVALID_PHYS_ADDR) - return; - - tlb_offset = tlb_addr & (P_IO_TLB_SIZE - 1); - orig_addr_offset = pswiotlb_align_offset(dev, orig_addr); - if (tlb_offset < orig_addr_offset) { - dev_WARN_ONCE(dev, 1, - "Access before mapping start detected. orig offset %u, requested offset %u.\n", - orig_addr_offset, tlb_offset); - return; - } - - tlb_offset -= orig_addr_offset; - if (tlb_offset > alloc_size) { - dev_WARN_ONCE(dev, 1, - "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu+%u.\n", - alloc_size, size, tlb_offset); - return; - } - - orig_addr += tlb_offset; - alloc_size -= tlb_offset; - - if (size > alloc_size) { - dev_WARN_ONCE(dev, 1, - "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu.\n", - alloc_size, size); - size = alloc_size; - } - - if (PageHighMem(pfn_to_page(pfn))) { - unsigned int offset = orig_addr & ~PAGE_MASK; - struct page *page; - unsigned int sz = 0; - unsigned long flags; - - dev_info(dev, "%s line=%d !!!!!!HighMem!!!!!! dir: %d, tlb_addr: %#018Lx, size: %#lx\n", - __func__, __LINE__, dir, tlb_addr, size); - - while (size) { - sz = min_t(size_t, PAGE_SIZE - offset, size); - - local_irq_save(flags); - page = pfn_to_page(pfn); - if (dir == DMA_TO_DEVICE) - memcpy_from_page(vaddr, page, offset, sz); - else - memcpy_to_page(page, offset, vaddr, sz); - local_irq_restore(flags); - - size -= sz; - pfn++; - vaddr += sz; - offset = 0; - } - } else if (dir == DMA_TO_DEVICE) { - memcpy(vaddr, phys_to_virt(orig_addr), size); - } else { - memcpy(phys_to_virt(orig_addr), vaddr, size); - } -} -static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx) -{ - return start + (idx << P_IO_TLB_SHIFT); -} -/* - * Carefully handle integer overflow which can occur when boundary_mask == ~0UL. - */ -static inline unsigned long get_max_slots(unsigned long boundary_mask) -{ - return (boundary_mask >> P_IO_TLB_SHIFT) + 1; -} - -static unsigned int wrap_area_index(struct p_io_tlb_pool *mem, unsigned int index) -{ - if (index >= mem->area_nslabs) - return 0; - return index; -} - -/** - * pswiotlb_area_find_slots() - search for slots in one Phytium IO TLB memory area - * @dev: Device which maps the buffer. - * @pool: Memory pool to be searched. - * @area_index: Index of the Phytium IO TLB memory area to be searched. - * @orig_addr: Original (non-bounced) Phytium IO buffer address. - * @alloc_size: Total requested size of the bounce buffer, - * including initial alignment padding. - * @alloc_align_mask: Required alignment of the allocated buffer. - * - * Find a suitable sequence of Phytium IO TLB entries for the request and allocate - * a buffer from the given Phytium IO TLB memory area. - * This function takes care of locking. - * - * Return: Index of the first allocated slot, or -1 on error. - */ -static int pswiotlb_area_find_slots(struct device *dev, int nid, struct p_io_tlb_pool *pool, - int area_index, phys_addr_t orig_addr, size_t alloc_size, - unsigned int alloc_align_mask) -{ - struct p_io_tlb_area *area = pool->areas + area_index; - unsigned long boundary_mask = dma_get_seg_boundary(dev); - dma_addr_t tbl_dma_addr = - phys_to_dma_unencrypted(dev, pool->start) & boundary_mask; - unsigned long max_slots = get_max_slots(boundary_mask); - unsigned int iotlb_align_mask = - dma_get_min_align_mask(dev) | alloc_align_mask; - unsigned int nslots = nr_slots(alloc_size), stride; - unsigned int offset = pswiotlb_align_offset(dev, orig_addr); - unsigned int index, slots_checked, count = 0, i; - unsigned long flags; - unsigned int slot_base; - unsigned int slot_index; - - WARN_ON(!nslots); - WARN_ON(area_index >= pool->nareas); - - /* - * For allocations of PAGE_SIZE or larger only look for page aligned - * allocations. - */ - if (alloc_size >= PAGE_SIZE) - iotlb_align_mask |= ~PAGE_MASK; - iotlb_align_mask &= ~(P_IO_TLB_SIZE - 1); - - /* - * For mappings with an alignment requirement don't bother looping to - * unaligned slots once we found an aligned one. - */ - stride = (iotlb_align_mask >> P_IO_TLB_SHIFT) + 1; - - if (spin_trylock_irqsave(&area->lock, flags)) { - if (unlikely(nslots > pool->area_nslabs - area->used)) - goto not_found; - - slot_base = area_index * pool->area_nslabs; - index = area->index; - - for (slots_checked = 0; slots_checked < pool->area_nslabs;) { - slot_index = slot_base + index; - - if (orig_addr && - (slot_addr(tbl_dma_addr, slot_index) & - iotlb_align_mask) != (orig_addr & iotlb_align_mask)) { - index = wrap_area_index(pool, index + 1); - slots_checked++; - continue; - } - - if (!iommu_is_span_boundary(slot_index, nslots, - nr_slots(tbl_dma_addr), - max_slots)) { - if (pool->slots[slot_index].list >= nslots) - goto found; - } - index = wrap_area_index(pool, index + stride); - slots_checked += stride; - } - } else { - return -1; - } - -not_found: - spin_unlock_irqrestore(&area->lock, flags); - return -1; - -found: - /* - * If we find a slot that indicates we have 'nslots' number of - * contiguous buffers, we allocate the buffers from that slot onwards - * and set the list of free entries to '0' indicating unavailable. - */ - for (i = slot_index; i < slot_index + nslots; i++) { - pool->slots[i].list = 0; - pool->slots[i].alloc_size = alloc_size - (offset + - ((i - slot_index) << P_IO_TLB_SHIFT)); - } - for (i = slot_index - 1; - io_tlb_offset(i) != P_IO_TLB_SEGSIZE - 1 && - pool->slots[i].list; i--) - pool->slots[i].list = ++count; - - /* - * Update the indices to avoid searching in the next round. - */ - area->index = wrap_area_index(pool, index + nslots); - area->used += nslots; - spin_unlock_irqrestore(&area->lock, flags); - - return slot_index; -} - -/** - * pswiotlb_pool_find_slots() - search for slots in one memory pool - * @dev: Device which maps the buffer. - * @pool: Memory pool to be searched. - * @orig_addr: Original (non-bounced)Phytium IO buffer address. - * @alloc_size: Total requested size of the bounce buffer, - * including initial alignment padding. - * @alloc_align_mask: Required alignment of the allocated buffer. - * - * Search through one memory pool to find a sequence of slots that match the - * allocation constraints. - * - * Return: Index of the first allocated slot, or -1 on error. - */ -static int pswiotlb_pool_find_slots(struct device *dev, int nid, struct p_io_tlb_pool *pool, - phys_addr_t orig_addr, size_t alloc_size, - unsigned int alloc_align_mask) -{ - int start = raw_smp_processor_id() & (pool->nareas - 1); - int i = start, index; - - do { - index = pswiotlb_area_find_slots(dev, nid, pool, i, orig_addr, - alloc_size, alloc_align_mask); - if (index >= 0) { - if ((pool != &p_io_tlb_default_mem[nid].defpool) && - !pool->transient) { - bitmap_set(pool->busy_record, i, 1); - } - return index; - } - if (++i >= pool->nareas) - i = 0; - } while (i != start); - - return -1; -} - -/** - * pswiotlb_find_slots() - search for slots in the whole pswiotlb - * @dev: Device which maps the buffer. - * @orig_addr: Original (non-bounced) Phytium IO buffer address. - * @alloc_size: Total requested size of the bounce buffer, - * including initial alignment padding. - * @alloc_align_mask: Required alignment of the allocated buffer. - * @retpool: Used memory pool, updated on return. - * - * Search through the whole Phytium software IO TLB to find a sequence of slots that - * match the allocation constraints. - * - * Return: Index of the first allocated slot, or -1 on error. - */ -static int pswiotlb_find_slots(struct device *dev, int nid, phys_addr_t orig_addr, - size_t alloc_size, unsigned int alloc_align_mask, - struct p_io_tlb_pool **retpool) -{ - struct p_io_tlb_mem *mem = &dev->dma_p_io_tlb_mem[nid]; - struct p_io_tlb_pool *pool; - int index; - int try_pool_idx; - int i; - int cpuid; - int current_ratio; - unsigned long pswiotlb_mem; - unsigned long nslabs_per_pool = dynamic_inc_thr_npslabs; - - cpuid = raw_smp_processor_id(); - - rcu_read_lock(); -#ifndef CONFIG_ARM64_4K_PAGES - for (i = 0; i < 15; i++) { - if (i == 0) { - pool = mem->pool_addr[0]; - index = pswiotlb_pool_find_slots(dev, nid, pool, orig_addr, - alloc_size, alloc_align_mask); - } else if (i == 1 && mem->capacity > (cpuid + 1)) { - pool = mem->pool_addr[cpuid + 1]; - index = pswiotlb_pool_find_slots(dev, nid, pool, orig_addr, - alloc_size, alloc_align_mask); - } else { - try_pool_idx = get_random_u32() % mem->capacity; - pool = mem->pool_addr[try_pool_idx]; - index = pswiotlb_pool_find_slots(dev, nid, pool, orig_addr, - alloc_size, alloc_align_mask); - } - - if (index >= 0) { - rcu_read_unlock(); - goto found; - } - } -#else - for (i = 0; i < 15; i++) { - try_pool_idx = get_random_u32() % mem->capacity; - pool = mem->pool_addr[try_pool_idx]; - index = pswiotlb_pool_find_slots(dev, nid, pool, orig_addr, - alloc_size, alloc_align_mask); - - if (index >= 0) { - rcu_read_unlock(); - goto found; - } - } -#endif - rcu_read_unlock(); - if (nslabs_per_pool > SLABS_PER_PAGE << MAX_ORDER) - nslabs_per_pool = SLABS_PER_PAGE << MAX_ORDER; - - nslabs_per_pool = ALIGN(nslabs_per_pool >> 1, P_IO_TLB_SEGSIZE); - pswiotlb_mem = P_IO_TLB_DEFAULT_SIZE + - (nslabs_per_pool << P_IO_TLB_SHIFT) * (mem->whole_size - 1); - current_ratio = (pswiotlb_mem * 100 + mem->node_total_mem / 2) / mem->node_total_mem; - if (current_ratio >= P_IO_TLB_EXT_WATERMARK) { - dev_warn_once(dev, "Total pswiotlb (%ld MB) exceeds the watermark (%d%%)\n" - "of memory (%ld MB) in node %d, pswiotlb expansion is prohibited.\n", - pswiotlb_mem >> 20, P_IO_TLB_EXT_WATERMARK, - mem->node_total_mem >> 20, nid); - return -1; - } - - if (!mem->can_grow) - return -1; - - pool = pswiotlb_formal_alloc(dev, mem); - if (!pool) - return -1; - - /* retry */ - rcu_read_lock(); - index = pswiotlb_pool_find_slots(dev, nid, pool, orig_addr, - alloc_size, alloc_align_mask); - rcu_read_unlock(); - - if (index < 0) { - pswiotlb_dyn_free(&pool->rcu); - return -1; - } - -found: - WRITE_ONCE(dev->dma_uses_p_io_tlb, true); - - /* - * The general barrier orders reads and writes against a presumed store - * of the PSWIOTLB buffer address by a device driver (to a driver private - * data structure). It serves two purposes. - * - * First, the store to dev->dma_uses_p_io_tlb must be ordered before the - * presumed store. This guarantees that the returned buffer address - * cannot be passed to another CPU before updating dev->dma_uses_p_io_tlb. - * - * Second, the load from mem->pools must be ordered before the same - * presumed store. This guarantees that the returned buffer address - * cannot be observed by another CPU before an update of the RCU list - * that was made by pswiotlb_dyn_alloc() on a third CPU (cf. multicopy - * atomicity). - * - * See also the comment in is_pswiotlb_buffer(). - */ - smp_mb(); - - *retpool = pool; - return index; -} -#ifdef CONFIG_DEBUG_FS - -/** - * mem_used() - get number of used slots in an allocator - * @mem: Phytium software IO TLB allocator. - * - * The result is accurate in this version of the function, because an atomic - * counter is available if CONFIG_DEBUG_FS is set. - * - * Return: Number of used slots. - */ -static unsigned long mem_used(struct p_io_tlb_mem *mem) -{ - return atomic_long_read(&mem->total_used); -} - -#else /* !CONFIG_DEBUG_FS */ - -/** - * mem_pool_used() - get number of used slots in a memory pool - * @pool: Phytium software IO TLB memory pool. - * - * The result is not accurate, see mem_used(). - * - * Return: Approximate number of used slots. - */ -static unsigned long mem_pool_used(struct p_io_tlb_pool *pool) -{ - int i; - unsigned long used = 0; - - for (i = 0; i < pool->nareas; i++) - used += pool->areas[i].used; - return used; -} - -/** - * mem_used() - get number of used slots in an allocator - * @mem: Phytium software IO TLB allocator. - * - * The result is not accurate, because there is no locking of individual - * areas. - * - * Return: Approximate number of used slots. - */ -static unsigned long mem_used(struct p_io_tlb_mem *mem) -{ - struct p_io_tlb_pool *pool; - unsigned long used = 0; - - rcu_read_lock(); - list_for_each_entry_rcu(pool, &mem->pools, node) - used += mem_pool_used(pool); - rcu_read_unlock(); - - return used; -} - -#endif /* CONFIG_DEBUG_FS */ - -phys_addr_t pswiotlb_tbl_map_single(struct device *dev, int nid, phys_addr_t orig_addr, - size_t mapping_size, size_t alloc_size, - unsigned int alloc_align_mask, enum dma_data_direction dir, - unsigned long attrs) -{ - struct p_io_tlb_mem *mem = &dev->dma_p_io_tlb_mem[nid]; - unsigned int offset = pswiotlb_align_offset(dev, orig_addr); - struct p_io_tlb_pool *pool; - unsigned int i; - unsigned long index; - phys_addr_t tlb_addr; - struct page *page; - - if (alloc_size > (P_IO_TLB_SEGSIZE << P_IO_TLB_SHIFT)) { - dev_warn_ratelimited(dev, "alloc size 0x%lx is larger than segment(0x%x) of pswiotlb\n", - alloc_size, P_IO_TLB_SEGSIZE << P_IO_TLB_SHIFT); - return (phys_addr_t)DMA_MAPPING_ERROR; - } - - if (!mem || !mem->nslabs) { - dev_warn_ratelimited(dev, - "Can not allocate PSWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); - return (phys_addr_t)DMA_MAPPING_ERROR; - } - - if (mapping_size > alloc_size) { - dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)", - mapping_size, alloc_size); - return (phys_addr_t)DMA_MAPPING_ERROR; - } - - index = pswiotlb_find_slots(dev, nid, orig_addr, - alloc_size + offset, alloc_align_mask, &pool); - if (index == -1) { - if (!(attrs & DMA_ATTR_NO_WARN)) - dev_warn_once(dev, - "pswiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", - alloc_size, mem->nslabs, mem_used(mem)); - return (phys_addr_t)DMA_MAPPING_ERROR; - } - - /* - * Save away the mapping from the original address to the DMA address. - * This is needed when we sync the memory. Then we sync the buffer if - * needed. - */ - for (i = 0; i < nr_slots(alloc_size + offset); i++) - pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); - tlb_addr = slot_addr(pool->start, index) + offset; - page = pfn_to_page(PFN_DOWN(tlb_addr)); - set_bit(PG_pswiotlb, &page->flags); - - /* - * When dir == DMA_FROM_DEVICE we could omit the copy from the orig - * to the tlb buffer, if we knew for sure the device will - * overwrite the entire current content. But we don't. Thus - * unconditional bounce may prevent leaking pswiotlb content (i.e. - * kernel memory) to user-space. - */ - pswiotlb_bounce(dev, nid, tlb_addr, mapping_size, DMA_TO_DEVICE, pool); - return tlb_addr; -} -static void pswiotlb_release_slots(struct device *dev, int nid, phys_addr_t tlb_addr, - struct p_io_tlb_pool *mem) -{ - unsigned long flags; - unsigned int offset = pswiotlb_align_offset(dev, tlb_addr); - int index = (tlb_addr - offset - mem->start) >> P_IO_TLB_SHIFT; - int nslots = nr_slots(mem->slots[index].alloc_size + offset); - int aindex = index / mem->area_nslabs; - struct p_io_tlb_area *area = &mem->areas[aindex]; - int count, i; - struct page *page = pfn_to_page(PFN_DOWN(tlb_addr)); - - /* - * Return the buffer to the free list by setting the corresponding - * entries to indicate the number of contiguous entries available. - * While returning the entries to the free list, we merge the entries - * with slots below and above the pool being returned. - */ - WARN_ON(aindex >= mem->nareas); - - spin_lock_irqsave(&area->lock, flags); - if (index + nslots < ALIGN(index + 1, P_IO_TLB_SEGSIZE)) - count = mem->slots[index + nslots].list; - else - count = 0; - - /* - * Step 1: return the slots to the free list, merging the slots with - * superceeding slots - */ - for (i = index + nslots - 1; i >= index; i--) { - mem->slots[i].list = ++count; - mem->slots[i].orig_addr = INVALID_PHYS_ADDR; - mem->slots[i].alloc_size = 0; - } - - /* - * Step 2: merge the returned slots with the preceding slots, if - * available (non zero) - */ - for (i = index - 1; - io_tlb_offset(i) != P_IO_TLB_SEGSIZE - 1 && mem->slots[i].list; - i--) - mem->slots[i].list = ++count; - area->used -= nslots; - if ((mem != &p_io_tlb_default_mem[nid].defpool) && (area->used == 0)) - bitmap_clear(mem->busy_record, aindex, 1); - clear_bit(PG_pswiotlb, &page->flags); - spin_unlock_irqrestore(&area->lock, flags); -} -/* - * tlb_addr is the physical address of the bounce buffer to unmap. - */ -void pswiotlb_tbl_unmap_single(struct device *dev, int nid, phys_addr_t tlb_addr, - size_t offset, size_t mapping_size, enum dma_data_direction dir, - unsigned long attrs, struct p_io_tlb_pool *pool) -{ - struct page *page = pfn_to_page(PFN_DOWN(tlb_addr)); - /* - * First, sync the memory before unmapping the entry - */ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) && - (test_bit(PG_pswiotlbsync, &page->flags) == false)) - pswiotlb_bounce(dev, nid, tlb_addr, mapping_size, DMA_FROM_DEVICE, pool); - - tlb_addr -= offset; - pswiotlb_release_slots(dev, nid, tlb_addr, pool); - - clear_bit(PG_pswiotlbsync, &page->flags); -} -void pswiotlb_sync_single_for_device(struct device *dev, int nid, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, struct p_io_tlb_pool *pool) -{ - if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) - pswiotlb_bounce(dev, nid, tlb_addr, size, DMA_TO_DEVICE, pool); - else - WARN_ON(dir != DMA_FROM_DEVICE); -} - -void pswiotlb_sync_single_for_cpu(struct device *dev, int nid, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, struct p_io_tlb_pool *pool) -{ - if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) { - struct page *page = pfn_to_page(PFN_DOWN(tlb_addr)); - - pswiotlb_bounce(dev, nid, tlb_addr, size, DMA_FROM_DEVICE, pool); - set_bit(PG_pswiotlbsync, &page->flags); - } else - WARN_ON(dir != DMA_TO_DEVICE); -} -/* - * Create a pswiotlb mapping for the buffer at @paddr, and in case of DMAing - * to the device copy the data into it as well. - */ -dma_addr_t pswiotlb_map(struct device *dev, int nid, phys_addr_t paddr, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - phys_addr_t pswiotlb_addr; - dma_addr_t dma_addr; - - trace_pswiotlb_bounced(dev, phys_to_dma(dev, paddr), size); - - pswiotlb_addr = pswiotlb_tbl_map_single(dev, nid, paddr, size, - PAGE_ALIGN(size), PAGE_SIZE - 1, dir, attrs); - if (pswiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) - return DMA_MAPPING_ERROR; - - dma_addr = phys_to_dma_unencrypted(dev, pswiotlb_addr); - - if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(pswiotlb_addr, size, dir); - return dma_addr; -} -size_t pswiotlb_max_mapping_size(struct device *dev) -{ - int min_align_mask = dma_get_min_align_mask(dev); - int min_align = 0; - - /* - * pswiotlb_find_slots() skips slots according to - * min align mask. This affects max mapping size. - * Take it into acount here. - */ - if (min_align_mask) - min_align = roundup(min_align_mask, P_IO_TLB_SIZE); - - return ((size_t)P_IO_TLB_SIZE) * P_IO_TLB_SEGSIZE - min_align; -} - -/** - * is_pswiotlb_allocated() - check if the default Phytium software IO TLB is initialized - */ -bool is_pswiotlb_allocated(struct device *dev) -{ - int nid = dev->local_node; - return p_io_tlb_default_mem[nid].nslabs; -} - -bool is_pswiotlb_active(struct device *dev) -{ - int nid = dev->local_node; - struct p_io_tlb_mem *mem = &dev->dma_p_io_tlb_mem[nid]; - - return mem && mem->nslabs; -} - -/** - * default_pswiotlb_base() - get the base address of the default PSWIOTLB - * - * Get the lowest physical address used by the default Phytium software IO TLB pool. - */ -phys_addr_t default_pswiotlb_base(struct device *dev) -{ - int nid = dev->local_node; - - p_io_tlb_default_mem[nid].can_grow = false; - - return p_io_tlb_default_mem[nid].defpool.start; -} - -/** - * default_pswiotlb_limit() - get the address limit of the default PSWIOTLB - * - * Get the highest physical address used by the default Phytium software IO TLB pool. - */ -phys_addr_t default_pswiotlb_limit(struct device *dev) -{ - int nid = dev->local_node; - - return p_io_tlb_default_mem[nid].phys_limit; -} -#ifdef CONFIG_DEBUG_FS - -static int p_io_tlb_used_get(void *data, u64 *val) -{ - struct p_io_tlb_mem *mem = data; - - *val = mem_used(mem); - return 0; -} - -static int p_io_tlb_hiwater_get(void *data, u64 *val) -{ - struct p_io_tlb_mem *mem = data; - - *val = atomic_long_read(&mem->used_hiwater); - return 0; -} - -static int p_io_tlb_hiwater_set(void *data, u64 val) -{ - struct p_io_tlb_mem *mem = data; - - /* Only allow setting to zero */ - if (val != 0) - return -EINVAL; - - atomic_long_set(&mem->used_hiwater, val); - return 0; -} - -DEFINE_DEBUGFS_ATTRIBUTE(fops_p_io_tlb_used, p_io_tlb_used_get, NULL, "%llu\n"); -DEFINE_DEBUGFS_ATTRIBUTE(fops_p_io_tlb_hiwater, p_io_tlb_hiwater_get, - p_io_tlb_hiwater_set, "%llu\n"); - -static void pswiotlb_create_debugfs_files(struct p_io_tlb_mem *mem, - int nid, const char *dirname) -{ - atomic_long_set(&mem->total_used, 0); - atomic_long_set(&mem->used_hiwater, 0); - - mem->debugfs = debugfs_create_dir(dirname, pswiotlb_debugfs); - if (!mem->nslabs) - return; - - debugfs_create_ulong("p_io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs); - debugfs_create_file("p_io_tlb_used", 0400, mem->debugfs, mem, - &fops_p_io_tlb_used); - debugfs_create_file("p_io_tlb_used_hiwater", 0600, mem->debugfs, mem, - &fops_p_io_tlb_hiwater); -} - -static int passthroughlist_display_show(struct seq_file *m, void *v) -{ - struct pswiotlb_passthroughlist *bl_entry; - - rcu_read_lock(); - list_for_each_entry_rcu(bl_entry, &passthroughlist, node) { - seq_printf(m, "0x%04x\n", bl_entry->vendor); - } - rcu_read_unlock(); - - return 0; -} - -static int version_display_show(struct seq_file *m, void *v) -{ - seq_puts(m, "pswiotlb version "); - seq_printf(m, "%s\n", PSWIOTLB_VERSION); - - return 0; -} - -static int passthroughlist_add(void *data, u64 val) -{ - struct pswiotlb_passthroughlist *bl_entry; - unsigned long flags; - - bl_entry = kzalloc(sizeof(*bl_entry), GFP_ATOMIC); - if (!bl_entry) - return -ENOMEM; - - bl_entry->vendor = val; - bl_entry->from_grub = false; - - spin_lock_irqsave(&passthroughlist_lock, flags); - list_add_rcu(&bl_entry->node, &passthroughlist); - spin_unlock_irqrestore(&passthroughlist_lock, flags); - - return 0; -} - -static int passthroughlist_del(void *data, u64 val) -{ - struct pswiotlb_passthroughlist *bl_entry; - unsigned long flags; - - rcu_read_lock(); - list_for_each_entry_rcu(bl_entry, &passthroughlist, node) { - if (bl_entry->vendor == val) - goto found; - } - rcu_read_unlock(); - - return 0; -found: - rcu_read_unlock(); - spin_lock_irqsave(&passthroughlist_lock, flags); - list_del_rcu(&bl_entry->node); - spin_unlock_irqrestore(&passthroughlist_lock, flags); - - if (bl_entry->from_grub == false) - kfree(bl_entry); - - return 0; -} - -DEFINE_SHOW_ATTRIBUTE(passthroughlist_display); -DEFINE_SHOW_ATTRIBUTE(version_display); -DEFINE_DEBUGFS_ATTRIBUTE(fops_passthroughlist_add, NULL, - passthroughlist_add, "%llu\n"); -DEFINE_DEBUGFS_ATTRIBUTE(fops_passthroughlist_del, NULL, - passthroughlist_del, "%llu\n"); - -static void pswiotlb_create_passthroughlist_debugfs_files(const char *dirname) -{ - passthroughlist_debugfs = debugfs_create_dir(dirname, pswiotlb_debugfs); - if (!passthroughlist_debugfs) - return; - - debugfs_create_file("show_devices", 0400, passthroughlist_debugfs, NULL, - &passthroughlist_display_fops); - debugfs_create_file("add_device", 0600, passthroughlist_debugfs, NULL, - &fops_passthroughlist_add); - debugfs_create_file("del_device", 0600, passthroughlist_debugfs, NULL, - &fops_passthroughlist_del); -} - -static void pswiotlb_create_pswiotlb_debugfs_files(const char *dirname) -{ - int i; - char name[20] = ""; - char passthroughlist_name[50] = ""; - - pswiotlb_debugfs = debugfs_create_dir(dirname, pswiotlb_debugfs); - if (!pswiotlb_debugfs) - return; - - debugfs_create_file("version", 0400, pswiotlb_debugfs, NULL, - &version_display_fops); - - for (i = 0; i < pswiotlb_node_num; i++) { - sprintf(name, "%s-%d", "pswiotlb", i); - pswiotlb_create_debugfs_files(&p_io_tlb_default_mem[i], i, name); - } - sprintf(passthroughlist_name, "%s", "pswiotlb-passthroughlist"); - pswiotlb_create_passthroughlist_debugfs_files(passthroughlist_name); -} - -static int __init pswiotlb_create_default_debugfs(void) -{ - char name[20] = ""; - - if (!pswiotlb_mtimer_alive && !pswiotlb_force_disable) { - pr_info("setup pswiotlb monitor timer service\n"); - timer_setup(&service_timer, pswiotlb_monitor_service, 0); - pswiotlb_mtimer_alive = true; - - /* check pswiotlb every 2 seconds*/ - mod_timer(&service_timer, jiffies + 2 * HZ); - } - - if (!pswiotlb_force_disable) { - sprintf(name, "%s", "pswiotlb"); - pswiotlb_create_pswiotlb_debugfs_files(name); - } - - return 0; -} - -late_initcall(pswiotlb_create_default_debugfs); - -#else /* !CONFIG_DEBUG_FS */ - -static inline void pswiotlb_create_debugfs_files(struct p_io_tlb_mem *mem, - const char *dirname) -{ -} - -#endif /* CONFIG_DEBUG_FS */ -- Gitee From f3b00cec633132f499c5e575cad5a279fb8da39e Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 8 Jul 2024 12:41:00 -0700 Subject: [PATCH 02/99] swiotlb: reduce swiotlb pool lookups ANBZ: #13617 commit 7296f2301a057493e97b07739213c6e864f76891 upstream. With CONFIG_SWIOTLB_DYNAMIC enabled, each round-trip map/unmap pair in the swiotlb results in 6 calls to swiotlb_find_pool(). In multiple places, the pool is found and used in one function, and then must be found again in the next function that is called because only the tlb_addr is passed as an argument. These are the six call sites: dma_direct_map_page: 1. swiotlb_map -> swiotlb_tbl_map_single -> swiotlb_bounce dma_direct_unmap_page: 2. dma_direct_sync_single_for_cpu -> is_swiotlb_buffer 3. dma_direct_sync_single_for_cpu -> swiotlb_sync_single_for_cpu -> swiotlb_bounce 4. is_swiotlb_buffer 5. swiotlb_tbl_unmap_single -> swiotlb_del_transient 6. swiotlb_tbl_unmap_single -> swiotlb_release_slots Reduce the number of calls by finding the pool at a higher level, and passing it as an argument instead of searching again. A key change is for is_swiotlb_buffer() to return a pool pointer instead of a boolean, and then pass this pool pointer to subsequent swiotlb functions. There are 9 occurrences of is_swiotlb_buffer() used to test if a buffer is a swiotlb buffer before calling a swiotlb function. To reduce code duplication in getting the pool pointer and passing it as an argument, introduce inline wrappers for this pattern. The generated code is essentially unchanged. Since is_swiotlb_buffer() no longer returns a boolean, rename some functions to reflect the change: * swiotlb_find_pool() becomes __swiotlb_find_pool() * is_swiotlb_buffer() becomes swiotlb_find_pool() * is_xen_swiotlb_buffer() becomes xen_swiotlb_find_pool() With these changes, a round-trip map/unmap pair requires only 2 pool lookups (listed using the new names and wrappers): dma_direct_unmap_page: 1. dma_direct_sync_single_for_cpu -> swiotlb_find_pool 2. swiotlb_tbl_unmap_single -> swiotlb_find_pool These changes come from noticing the inefficiencies in a code review, not from performance measurements. With CONFIG_SWIOTLB_DYNAMIC, __swiotlb_find_pool() is not trivial, and it uses an RCU read lock, so avoiding the redundant calls helps performance in a hot path. When CONFIG_SWIOTLB_DYNAMIC is *not* set, the code size reduction is minimal and the perf benefits are likely negligible, but no harm is done. No functional change is intended. Signed-off-by: Michael Kelley Reviewed-by: Petr Tesarik Signed-off-by: Christoph Hellwig Signed-off-by: Shuai Xue --- drivers/iommu/dma-iommu.c | 11 ++-- drivers/xen/swiotlb-xen.c | 31 +++++++---- include/linux/scatterlist.h | 2 +- include/linux/swiotlb.h | 105 +++++++++++++++++++++--------------- kernel/dma/direct.c | 10 ++-- kernel/dma/direct.h | 9 ++-- kernel/dma/swiotlb.c | 67 ++++++++++++----------- 7 files changed, 129 insertions(+), 106 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 95918eeb5576..7c29463f013c 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1098,8 +1098,7 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev, if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_cpu(phys, size, dir); - if (is_swiotlb_buffer(dev, phys)) - swiotlb_sync_single_for_cpu(dev, phys, size, dir); + swiotlb_sync_single_for_cpu(dev, phys, size, dir); } static void iommu_dma_sync_single_for_device(struct device *dev, @@ -1111,8 +1110,7 @@ static void iommu_dma_sync_single_for_device(struct device *dev, return; phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); - if (is_swiotlb_buffer(dev, phys)) - swiotlb_sync_single_for_device(dev, phys, size, dir); + swiotlb_sync_single_for_device(dev, phys, size, dir); if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_device(phys, size, dir); @@ -1206,7 +1204,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, arch_sync_dma_for_device(phys, size, dir); iova = __iommu_dma_map(dev, phys, size, prot, dma_mask); - if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys)) + if (iova == DMA_MAPPING_ERROR) swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); return iova; } @@ -1226,8 +1224,7 @@ static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, __iommu_dma_unmap(dev, dma_handle, size); - if (unlikely(is_swiotlb_buffer(dev, phys))) - swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); + swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); } /* diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 042253460b98..0521049367c8 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -96,7 +96,8 @@ static inline int range_straddles_page_boundary(phys_addr_t p, size_t size) return 0; } -static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr) +static struct io_tlb_pool *xen_swiotlb_find_pool(struct device *dev, + dma_addr_t dma_addr) { unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr)); unsigned long xen_pfn = bfn_to_local_pfn(bfn); @@ -107,8 +108,8 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr) * in our domain. Therefore _only_ check address within our domain. */ if (pfn_valid(PFN_DOWN(paddr))) - return is_swiotlb_buffer(dev, paddr); - return 0; + return swiotlb_find_pool(dev, paddr); + return NULL; } #ifdef CONFIG_X86 @@ -238,8 +239,9 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, * Ensure that the address returned is DMA'ble */ if (unlikely(!dma_capable(dev, dev_addr, size, true))) { - swiotlb_tbl_unmap_single(dev, map, size, dir, - attrs | DMA_ATTR_SKIP_CPU_SYNC); + __swiotlb_tbl_unmap_single(dev, map, size, dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC, + swiotlb_find_pool(dev, map)); return DMA_MAPPING_ERROR; } @@ -265,6 +267,7 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { phys_addr_t paddr = xen_dma_to_phys(hwdev, dev_addr); + struct io_tlb_pool *pool; BUG_ON(dir == DMA_NONE); @@ -276,8 +279,10 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, } /* NOTE: We use dev_addr here, not paddr! */ - if (is_xen_swiotlb_buffer(hwdev, dev_addr)) - swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); + pool = xen_swiotlb_find_pool(hwdev, dev_addr); + if (pool) + __swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, + attrs, pool); } static void @@ -285,6 +290,7 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) { phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr); + struct io_tlb_pool *pool; if (!dev_is_dma_coherent(dev)) { if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) @@ -293,8 +299,9 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr, xen_dma_sync_for_cpu(dev, dma_addr, size, dir); } - if (is_xen_swiotlb_buffer(dev, dma_addr)) - swiotlb_sync_single_for_cpu(dev, paddr, size, dir); + pool = xen_swiotlb_find_pool(dev, dma_addr); + if (pool) + __swiotlb_sync_single_for_cpu(dev, paddr, size, dir, pool); } static void @@ -302,9 +309,11 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) { phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr); + struct io_tlb_pool *pool; - if (is_xen_swiotlb_buffer(dev, dma_addr)) - swiotlb_sync_single_for_device(dev, paddr, size, dir); + pool = xen_swiotlb_find_pool(dev, dma_addr); + if (pool) + __swiotlb_sync_single_for_device(dev, paddr, size, dir, pool); if (!dev_is_dma_coherent(dev)) { if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index d45529cbd0bd..1bad36e3e4ef 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -332,7 +332,7 @@ static inline void sg_dma_unmark_bus_address(struct scatterlist *sg) * Description: * Returns true if the scatterlist was marked for SWIOTLB bouncing. Not all * elements may have been bounced, so the caller would have to check - * individual SG entries with is_swiotlb_buffer(). + * individual SG entries with swiotlb_find_pool(). */ static inline bool sg_dma_is_swiotlb(struct scatterlist *sg) { diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 05e6f1b3474e..1752e8ae6bd4 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -42,24 +42,6 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, int (*remap)(void *tlb, unsigned long nslabs)); extern void __init swiotlb_update_mem_attributes(void); -phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, - size_t mapping_size, - unsigned int alloc_aligned_mask, enum dma_data_direction dir, - unsigned long attrs); - -extern void swiotlb_tbl_unmap_single(struct device *hwdev, - phys_addr_t tlb_addr, - size_t mapping_size, - enum dma_data_direction dir, - unsigned long attrs); - -void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir); -void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir); -dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, - size_t size, enum dma_data_direction dir, unsigned long attrs); - #ifdef CONFIG_SWIOTLB /** @@ -140,37 +122,27 @@ struct io_tlb_mem { #endif }; -#ifdef CONFIG_SWIOTLB_DYNAMIC - -struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr); - -#else - -static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev, - phys_addr_t paddr) -{ - return &dev->dma_io_tlb_mem->defpool; -} - -#endif +struct io_tlb_pool *__swiotlb_find_pool(struct device *dev, phys_addr_t paddr); /** - * is_swiotlb_buffer() - check if a physical address belongs to a swiotlb + * swiotlb_find_pool() - find swiotlb pool to which a physical address belongs * @dev: Device which has mapped the buffer. * @paddr: Physical address within the DMA buffer. * - * Check if @paddr points into a bounce buffer. + * Find the swiotlb pool that @paddr points into. * * Return: - * * %true if @paddr points into a bounce buffer - * * %false otherwise + * * pool address if @paddr points into a bounce buffer + * * NULL if @paddr does not point into a bounce buffer. As such, this function + * can be used to determine if @paddr denotes a swiotlb bounce buffer. */ -static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) +static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev, + phys_addr_t paddr) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; if (!mem) - return false; + return NULL; #ifdef CONFIG_SWIOTLB_DYNAMIC /* @@ -179,16 +151,19 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) * If a SWIOTLB address is checked on another CPU, then it was * presumably loaded by the device driver from an unspecified private * data structure. Make sure that this load is ordered before reading - * dev->dma_uses_io_tlb here and mem->pools in swiotlb_find_pool(). + * dev->dma_uses_io_tlb here and mem->pools in __swiotlb_find_pool(). * * This barrier pairs with smp_mb() in swiotlb_find_slots(). */ smp_rmb(); - return READ_ONCE(dev->dma_uses_io_tlb) && - swiotlb_find_pool(dev, paddr); + if (READ_ONCE(dev->dma_uses_io_tlb)) + return __swiotlb_find_pool(dev, paddr); #else - return paddr >= mem->defpool.start && paddr < mem->defpool.end; + if (paddr >= mem->defpool.start && paddr < mem->defpool.end) + return &mem->defpool; #endif + + return NULL; } static inline bool is_swiotlb_force_bounce(struct device *dev) @@ -216,9 +191,10 @@ static inline void swiotlb_dev_init(struct device *dev) { } -static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) +static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev, + phys_addr_t paddr) { - return false; + return NULL; } static inline bool is_swiotlb_force_bounce(struct device *dev) { @@ -257,6 +233,49 @@ static inline phys_addr_t default_swiotlb_limit(void) } #endif /* CONFIG_SWIOTLB */ +phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, + size_t mapping_size, unsigned int alloc_aligned_mask, + enum dma_data_direction dir, unsigned long attrs); +dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, unsigned long attrs); + +void __swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, + size_t mapping_size, enum dma_data_direction dir, + unsigned long attrs, struct io_tlb_pool *pool); +static inline void swiotlb_tbl_unmap_single(struct device *dev, + phys_addr_t addr, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr); + + if (unlikely(pool)) + __swiotlb_tbl_unmap_single(dev, addr, size, dir, attrs, pool); +} + +void __swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + struct io_tlb_pool *pool); +static inline void swiotlb_sync_single_for_device(struct device *dev, + phys_addr_t addr, size_t size, enum dma_data_direction dir) +{ + struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr); + + if (unlikely(pool)) + __swiotlb_sync_single_for_device(dev, addr, size, dir, pool); +} + +void __swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + struct io_tlb_pool *pool); +static inline void swiotlb_sync_single_for_cpu(struct device *dev, + phys_addr_t addr, size_t size, enum dma_data_direction dir) +{ + struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr); + + if (unlikely(pool)) + __swiotlb_sync_single_for_cpu(dev, addr, size, dir, pool); +} + extern void swiotlb_print_info(void); #ifdef CONFIG_DMA_RESTRICTED_POOL diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index fc2d10b2aca6..682babc5e5cc 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -415,9 +415,7 @@ void dma_direct_sync_sg_for_device(struct device *dev, for_each_sg(sgl, sg, nents, i) { phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_device(dev, paddr, sg->length, - dir); + swiotlb_sync_single_for_device(dev, paddr, sg->length, dir); if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_device(paddr, sg->length, @@ -441,9 +439,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev, if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_cpu(paddr, sg->length, dir); - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_cpu(dev, paddr, sg->length, - dir); + swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir); if (dir == DMA_FROM_DEVICE) arch_dma_mark_clean(paddr, sg->length); @@ -611,7 +607,7 @@ size_t dma_direct_max_mapping_size(struct device *dev) bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr) { return !dev_is_dma_coherent(dev) || - is_swiotlb_buffer(dev, dma_to_phys(dev, dma_addr)); + swiotlb_find_pool(dev, dma_to_phys(dev, dma_addr)); } /** diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index 97ec892ea0b5..8aa7cb69fcbd 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h @@ -57,8 +57,7 @@ static inline void dma_direct_sync_single_for_device(struct device *dev, { phys_addr_t paddr = dma_to_phys(dev, addr); - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_device(dev, paddr, size, dir); + swiotlb_sync_single_for_device(dev, paddr, size, dir); if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_device(paddr, size, dir); @@ -74,8 +73,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev, arch_sync_dma_for_cpu_all(); } - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_cpu(dev, paddr, size, dir); + swiotlb_sync_single_for_cpu(dev, paddr, size, dir); if (dir == DMA_FROM_DEVICE) arch_dma_mark_clean(paddr, size); @@ -120,8 +118,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) dma_direct_sync_single_for_cpu(dev, addr, size, dir); - if (unlikely(is_swiotlb_buffer(dev, phys))) - swiotlb_tbl_unmap_single(dev, phys, size, dir, + swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); } #endif /* _KERNEL_DMA_DIRECT_H */ diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 2c6cbeab8021..fa15daac73fc 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -763,16 +763,18 @@ static void swiotlb_dyn_free(struct rcu_head *rcu) } /** - * swiotlb_find_pool() - find the IO TLB pool for a physical address + * __swiotlb_find_pool() - find the IO TLB pool for a physical address * @dev: Device which has mapped the DMA buffer. * @paddr: Physical address within the DMA buffer. * * Find the IO TLB memory pool descriptor which contains the given physical - * address, if any. + * address, if any. This function is for use only when the dev is known to + * be using swiotlb. Use swiotlb_find_pool() for the more general case + * when this condition is not met. * * Return: Memory pool which contains @paddr, or %NULL if none. */ -struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr) +struct io_tlb_pool *__swiotlb_find_pool(struct device *dev, phys_addr_t paddr) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; struct io_tlb_pool *pool; @@ -855,9 +857,8 @@ static unsigned int swiotlb_align_offset(struct device *dev, * Bounce: copy the swiotlb buffer from or back to the original dma location */ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size, - enum dma_data_direction dir) + enum dma_data_direction dir, struct io_tlb_pool *mem) { - struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT; phys_addr_t orig_addr = mem->slots[index].orig_addr; size_t alloc_size = mem->slots[index].alloc_size; @@ -1213,7 +1214,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, * that was made by swiotlb_dyn_alloc() on a third CPU (cf. multicopy * atomicity). * - * See also the comment in is_swiotlb_buffer(). + * See also the comment in swiotlb_find_pool(). */ smp_mb(); @@ -1385,13 +1386,13 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, * unconditional bounce may prevent leaking swiotlb content (i.e. * kernel memory) to user-space. */ - swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE); + swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE, pool); return tlb_addr; } -static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) +static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr, + struct io_tlb_pool *mem) { - struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); unsigned long flags; unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr); int index, nslots, aindex; @@ -1455,11 +1456,9 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) * * Return: %true if @tlb_addr belonged to a transient pool that was released. */ -static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr) +static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr, + struct io_tlb_pool *pool) { - struct io_tlb_pool *pool; - - pool = swiotlb_find_pool(dev, tlb_addr); if (!pool->transient) return false; @@ -1471,7 +1470,7 @@ static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr) #else /* !CONFIG_SWIOTLB_DYNAMIC */ static inline bool swiotlb_del_transient(struct device *dev, - phys_addr_t tlb_addr) + phys_addr_t tlb_addr, struct io_tlb_pool *pool) { return false; } @@ -1481,36 +1480,39 @@ static inline bool swiotlb_del_transient(struct device *dev, /* * tlb_addr is the physical address of the bounce buffer to unmap. */ -void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr, - size_t mapping_size, enum dma_data_direction dir, - unsigned long attrs) +void __swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr, + size_t mapping_size, enum dma_data_direction dir, + unsigned long attrs, struct io_tlb_pool *pool) { /* * First, sync the memory before unmapping the entry */ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE); + swiotlb_bounce(dev, tlb_addr, mapping_size, + DMA_FROM_DEVICE, pool); - if (swiotlb_del_transient(dev, tlb_addr)) + if (swiotlb_del_transient(dev, tlb_addr, pool)) return; - swiotlb_release_slots(dev, tlb_addr); + swiotlb_release_slots(dev, tlb_addr, pool); } -void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir) +void __swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + struct io_tlb_pool *pool) { if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) - swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE); + swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE, pool); else BUG_ON(dir != DMA_FROM_DEVICE); } -void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir) +void __swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + struct io_tlb_pool *pool) { if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) - swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE); + swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE, pool); else BUG_ON(dir != DMA_TO_DEVICE); } @@ -1534,8 +1536,9 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, /* Ensure that the address returned is DMA'ble */ dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr); if (unlikely(!dma_capable(dev, dma_addr, size, true))) { - swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir, - attrs | DMA_ATTR_SKIP_CPU_SYNC); + __swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC, + swiotlb_find_pool(dev, swiotlb_addr)); dev_WARN_ONCE(dev, 1, "swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); @@ -1695,7 +1698,7 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) if (unlikely(!PAGE_ALIGNED(tlb_addr))) { dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n", &tlb_addr); - swiotlb_release_slots(dev, tlb_addr); + swiotlb_release_slots(dev, tlb_addr, pool); return NULL; } @@ -1705,11 +1708,13 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) bool swiotlb_free(struct device *dev, struct page *page, size_t size) { phys_addr_t tlb_addr = page_to_phys(page); + struct io_tlb_pool *pool; - if (!is_swiotlb_buffer(dev, tlb_addr)) + pool = swiotlb_find_pool(dev, tlb_addr); + if (!pool) return false; - swiotlb_release_slots(dev, tlb_addr); + swiotlb_release_slots(dev, tlb_addr, pool); return true; } -- Gitee From 858260840a303c62827d3c445cb20425b9155342 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 9 Jul 2024 23:26:42 +0800 Subject: [PATCH 03/99] iommu/vt-d: Limit max address mask to MAX_AGAW_PFN_WIDTH ANBZ: #13617 commit c420a2b4e8be06f16f3305472bd25a1dd12059ec upstream. Address mask specifies the number of low order bits of the address field that must be masked for the invalidation operation. Since address bits masked start from bit 12, the max address mask should be MAX_AGAW_PFN_WIDTH, as defined in Table 19 ("Invalidate Descriptor Address Mask Encodings") of the spec. Limit the max address mask returned from calculate_psi_aligned_address() to MAX_AGAW_PFN_WIDTH to prevent potential integer overflow in the following code: qi_flush_dev_iotlb(): ... addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; ... Fixes: c4d27ffaa8eb ("iommu/vt-d: Add cache tag invalidation helpers") Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240709152643.28109-2-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/intel/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index e8418cdd8331..0a3bb38a5289 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -245,7 +245,7 @@ static unsigned long calculate_psi_aligned_address(unsigned long start, * shared_bits are all equal in both pfn and end_pfn. */ shared_bits = ~(pfn ^ end_pfn) & ~bitmask; - mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG; + mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH; } *_pages = aligned_pages; -- Gitee From ad0b682ba54479a897886b67599dc29655e5d251 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 9 Jul 2024 23:26:43 +0800 Subject: [PATCH 04/99] iommu/vt-d: Fix aligned pages in calculate_psi_aligned_address() ANBZ: #13617 commit 0a3f6b3463014b03f6ad10eacc4d1d9af75d54a1 upstream. The helper calculate_psi_aligned_address() is used to convert an arbitrary range into a size-aligned one. The aligned_pages variable is calculated from input start and end, but is not adjusted when the start pfn is not aligned and the mask is adjusted, which results in an incorrect number of pages returned. The number of pages is used by qi_flush_piotlb() to flush caches for the first-stage translation. With the wrong number of pages, the cache is not synchronized, leading to inconsistencies in some cases. Fixes: c4d27ffaa8eb ("iommu/vt-d: Add cache tag invalidation helpers") Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240709152643.28109-3-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/intel/cache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index 0a3bb38a5289..44e92638c0cd 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -246,6 +246,7 @@ static unsigned long calculate_psi_aligned_address(unsigned long start, */ shared_bits = ~(pfn ^ end_pfn) & ~bitmask; mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH; + aligned_pages = 1UL << mask; } *_pages = aligned_pages; -- Gitee From 8726145ec04ccfbd2ebdb61fdf2d243e449743bd Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 28 Jun 2024 13:11:11 -0300 Subject: [PATCH 05/99] iommufd: Require drivers to supply the cache_invalidate_user ops ANBZ: #13617 commit a11dda723c6493bb1853bbc61c093377f96e2d47 upstream. If drivers don't do this then iommufd will oops invalidation ioctls with something like: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Mem abort info: ESR = 0x0000000086000004 EC = 0x21: IABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault user pgtable: 4k pages, 48-bit VAs, pgdp=0000000101059000 [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 0000000086000004 [#1] PREEMPT SMP Modules linked in: CPU: 2 PID: 371 Comm: qemu-system-aar Not tainted 6.8.0-rc7-gde77230ac23a #9 Hardware name: linux,dummy-virt (DT) pstate: 81400809 (Nzcv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=-c) pc : 0x0 lr : iommufd_hwpt_invalidate+0xa4/0x204 sp : ffff800080f3bcc0 x29: ffff800080f3bcf0 x28: ffff0000c369b300 x27: 0000000000000000 x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 x23: 0000000000000000 x22: 00000000c1e334a0 x21: ffff0000c1e334a0 x20: ffff800080f3bd38 x19: ffff800080f3bd58 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffff8240d6d8 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : 0000001000000002 x7 : 0000fffeac1ec950 x6 : 0000000000000000 x5 : ffff800080f3bd78 x4 : 0000000000000003 x3 : 0000000000000002 x2 : 0000000000000000 x1 : ffff800080f3bcc8 x0 : ffff0000c6034d80 Call trace: 0x0 iommufd_fops_ioctl+0x154/0x274 __arm64_sys_ioctl+0xac/0xf0 invoke_syscall+0x48/0x110 el0_svc_common.constprop.0+0x40/0xe0 do_el0_svc+0x1c/0x28 el0_svc+0x34/0xb4 el0t_64_sync_handler+0x120/0x12c el0t_64_sync+0x190/0x194 All existing drivers implement this op for nesting, this is mostly a bisection aid. Fixes: 8c6eabae3807 ("iommufd: Add IOMMU_HWPT_INVALIDATE") Link: https://lore.kernel.org/r/0-v1-e153859bd707+61-iommufd_check_ops_jgg@nvidia.com Reviewed-by: Nicolin Chen Reviewed-by: Yi Liu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/hw_pagetable.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 5ea1e6e79dff..aefde4443671 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -247,7 +247,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, } hwpt->domain->owner = ops; - if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { + if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED || + !hwpt->domain->ops->cache_invalidate_user)) { rc = -EINVAL; goto out_abort; } -- Gitee From 9a25d1a931dc4ca086ac9e873b8d56eb5dff29d9 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 10 Jul 2024 16:33:39 +0800 Subject: [PATCH 06/99] iommufd: Remove IOMMUFD_PAGE_RESP_FAILURE ANBZ: #13617 commit 861f96a785149a0062cce6578e0fa7cb95435a7e upstream. The response code of IOMMUFD_PAGE_RESP_FAILURE was defined to be equivalent to the "Response Failure" in PCI spec, section 10.4.2.1. This response code indicates that one or more pages within the associated request group have encountered or caused an unrecoverable error. Therefore, this response disables the PRI at the function. Modern I/O virtualization technologies, like SR-IOV, share PRI among the assignable device units. Therefore, a response failure on one unit might cause I/O failure on other units. Remove this response code so that user space can only respond with SUCCESS or INVALID. The VMM is recommended to emulate a failure response as a PRI reset, or PRI disable and changing to a non-PRI domain. Fixes: c714f15860fc ("iommufd: Add fault and response message definitions") Link: https://lore.kernel.org/r/20240710083341.44617-2-baolu.lu@linux.intel.com Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- include/uapi/linux/iommufd.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index ede2b464a761..e31385b75d0b 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -765,14 +765,10 @@ struct iommu_hwpt_pgfault { * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the * access. This is the "Invalid Request" in PCI * 10.4.2.1. - * @IOMMUFD_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from - * this device if possible. This is the "Response - * Failure" in PCI 10.4.2.1. */ enum iommufd_page_response_code { IOMMUFD_PAGE_RESP_SUCCESS = 0, IOMMUFD_PAGE_RESP_INVALID, - IOMMUFD_PAGE_RESP_FAILURE, }; /** -- Gitee From 6de67ab692a422a7de91ab4a9499f93ba3dc9873 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 10 Jul 2024 16:33:40 +0800 Subject: [PATCH 07/99] iommufd: Add check on user response code ANBZ: #13617 commit d73cf5ff743b5a8de6fa20651baba5bd56ba98a3 upstream. The response code from user space is only allowed to be SUCCESS or INVALID. All other values are treated by the device as a response code of Response Failure according to PCI spec, section 10.4.2.1. This response disables the Page Request Interface for the Function. Add a check in iommufd_fault_fops_write() to avoid invalid response code. Fixes: 07838f7fd529 ("iommufd: Add iommufd fault object") Link: https://lore.kernel.org/r/20240710083341.44617-3-baolu.lu@linux.intel.com Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/fault.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c index 54d6cd20a673..9c142cefa2d2 100644 --- a/drivers/iommu/iommufd/fault.c +++ b/drivers/iommu/iommufd/fault.c @@ -305,6 +305,16 @@ static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *b if (rc) break; + static_assert((int)IOMMUFD_PAGE_RESP_SUCCESS == + (int)IOMMU_PAGE_RESP_SUCCESS); + static_assert((int)IOMMUFD_PAGE_RESP_INVALID == + (int)IOMMU_PAGE_RESP_INVALID); + if (response.code != IOMMUFD_PAGE_RESP_SUCCESS && + response.code != IOMMUFD_PAGE_RESP_INVALID) { + rc = -EINVAL; + break; + } + group = xa_erase(&fault->response, response.cookie); if (!group) { rc = -EINVAL; -- Gitee From 416491a48ff3fdd51ed769b8dfa6e52575fdaffe Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 12 Jul 2024 10:58:19 +0800 Subject: [PATCH 08/99] iommufd: Fix error pointer checking ANBZ: #13617 commit 595572aae3d0c3bf295ea759b74b948e7493a9ff upstream. Smatch static checker reported below warning: drivers/iommu/iommufd/fault.c:131 iommufd_device_get_attach_handle() warn: 'handle' is an error pointer or valid Fix it by checking 'handle' with IS_ERR(). Fixes: b7d8833677ba ("iommufd: Fault-capable hwpt attach/detach/replace") Link: https://lore.kernel.org/r/20240712025819.63147-1-baolu.lu@linux.intel.com Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-iommu/8bb4f37a-4514-4dea-aabb-7380be303895@stanley.mountain/ Signed-off-by: Lu Baolu Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c index 9c142cefa2d2..a643d5c7c535 100644 --- a/drivers/iommu/iommufd/fault.c +++ b/drivers/iommu/iommufd/fault.c @@ -128,7 +128,7 @@ iommufd_device_get_attach_handle(struct iommufd_device *idev) struct iommu_attach_handle *handle; handle = iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0); - if (!handle) + if (IS_ERR(handle)) return NULL; return to_iommufd_handle(handle); -- Gitee From 1a0d6f3b013b5574198006f3b79497ec3271b956 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Fri, 12 Jul 2024 12:31:32 +0100 Subject: [PATCH 09/99] iommu: Move IOMMU_DIRTY_NO_CLEAR define ANBZ: #13617 commit 9b2bc6b9a264b863a2273c02db5ee9e214e0a526 upstream. Fixes the compile issue when CONFIG_IOMMU_API is not set. Fixes: 4fe88fd8b4ae ("iommu/io-pgtable-arm: Add read_and_clear_dirty() support") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202407121602.HL9ih1it-lkp@intel.com/ Signed-off-by: Shameer Kolothum Reviewed-by: Joao Martins Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240712113132.45100-1-shameerali.kolothum.thodi@huawei.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- include/linux/iommu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 32d0d7f7cb08..20c7530e4787 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -326,6 +326,9 @@ enum iommu_dev_features { #define IOMMU_PASID_INVALID (-1U) typedef unsigned int ioasid_t; +/* Read but do not clear any dirty bits */ +#define IOMMU_DIRTY_NO_CLEAR (1 << 0) + #ifdef CONFIG_IOMMU_API /** @@ -365,9 +368,6 @@ struct iommu_dirty_bitmap { struct iommu_iotlb_gather *gather; }; -/* Read but do not clear any dirty bits */ -#define IOMMU_DIRTY_NO_CLEAR (1 << 0) - /** * struct iommu_dirty_ops - domain specific dirty tracking operations * @set_dirty_tracking: Enable or Disable dirty tracking on the iommu domain -- Gitee From dc92662ea4e00b56bb0eacf6fb0f941c3a0330f5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 12 Jul 2024 16:35:25 +0100 Subject: [PATCH 10/99] iommu/tegra-smmu: Pass correct fwnode to iommu_fwspec_init() ANBZ: #13617 commit eac93f4d4ec63423704657895ce9a4ddac7b023b upstream. iommu_fwspec_init() expects to receive the fwnode corresponding to the IOMMU device, not the fwnode corresponding to the client device being probed. Fix arm_smmu_configure() to pass the correct fwnode to iommu_fwspec_init(). Reported-by: Jon Hunter Suggested-by: Robin Murphy Link: https://lore.kernel.org/r/0eec5f84-6b39-43ba-ab2f-914688a5cf45@nvidia.com Signed-off-by: Will Deacon [ Shuai Xue: Conflicts: drivers/iommu/tegra-smmu.c [will: Fixed conflict in drivers/iommu/tegra-smmu.c between fwspec ops removal and fwspec driver fix as per Robin and Jon] ] Signed-off-by: Shuai Xue --- drivers/iommu/tegra-smmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 4365d9936e68..7f633bb5efef 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -837,7 +837,7 @@ static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev, const struct iommu_ops *ops = smmu->iommu.ops; int err; - err = iommu_fwspec_init(dev, of_fwnode_handle(dev->of_node)); + err = iommu_fwspec_init(dev, dev_fwnode(smmu->dev)); if (err < 0) { dev_err(dev, "failed to initialize fwspec: %d\n", err); return err; -- Gitee From 331b432cb1e945b935cd792976ce493bdb35d405 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 11 Jul 2024 21:11:03 -0300 Subject: [PATCH 11/99] iommufd: Put constants for all the uAPI enums ANBZ: #13617 commit 136a8066676e593cd29627219467fc222c8f3b04 upstream. Relying on position in the enum makes it subtly harder when doing merge resolutions or backporting as it is easy to grab a patch and not notice it is a uAPI change with a differently ordered enum. This may become a bigger problem in next cycles when iommu_hwpt_invalidate_data_type and other per-driver enums have patches flowing through different trees. So lets start including constants for all the uAPI enums to make this safer. No functional change. Link: https://lore.kernel.org/r/0-v1-2c06ec044924+133-iommufd_uapi_const_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Tested-by: Yi Liu Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- include/uapi/linux/iommufd.h | 40 ++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index e31385b75d0b..4dde745cfb7e 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -37,20 +37,20 @@ enum { IOMMUFD_CMD_BASE = 0x80, IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE, - IOMMUFD_CMD_IOAS_ALLOC, - IOMMUFD_CMD_IOAS_ALLOW_IOVAS, - IOMMUFD_CMD_IOAS_COPY, - IOMMUFD_CMD_IOAS_IOVA_RANGES, - IOMMUFD_CMD_IOAS_MAP, - IOMMUFD_CMD_IOAS_UNMAP, - IOMMUFD_CMD_OPTION, - IOMMUFD_CMD_VFIO_IOAS, - IOMMUFD_CMD_HWPT_ALLOC, - IOMMUFD_CMD_GET_HW_INFO, - IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, - IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, - IOMMUFD_CMD_HWPT_INVALIDATE, - IOMMUFD_CMD_FAULT_QUEUE_ALLOC, + IOMMUFD_CMD_IOAS_ALLOC = 0x81, + IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82, + IOMMUFD_CMD_IOAS_COPY = 0x83, + IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84, + IOMMUFD_CMD_IOAS_MAP = 0x85, + IOMMUFD_CMD_IOAS_UNMAP = 0x86, + IOMMUFD_CMD_OPTION = 0x87, + IOMMUFD_CMD_VFIO_IOAS = 0x88, + IOMMUFD_CMD_HWPT_ALLOC = 0x89, + IOMMUFD_CMD_GET_HW_INFO = 0x8a, + IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b, + IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c, + IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d, + IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, }; /** @@ -400,8 +400,8 @@ struct iommu_hwpt_vtd_s1 { * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table */ enum iommu_hwpt_data_type { - IOMMU_HWPT_DATA_NONE, - IOMMU_HWPT_DATA_VTD_S1, + IOMMU_HWPT_DATA_NONE = 0, + IOMMU_HWPT_DATA_VTD_S1 = 1, }; /** @@ -491,8 +491,8 @@ struct iommu_hw_info_vtd { * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type */ enum iommu_hw_info_type { - IOMMU_HW_INFO_TYPE_NONE, - IOMMU_HW_INFO_TYPE_INTEL_VTD, + IOMMU_HW_INFO_TYPE_NONE = 0, + IOMMU_HW_INFO_TYPE_INTEL_VTD = 1, }; /** @@ -629,7 +629,7 @@ struct iommu_hwpt_get_dirty_bitmap { * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1 */ enum iommu_hwpt_invalidate_data_type { - IOMMU_HWPT_INVALIDATE_DATA_VTD_S1, + IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0, }; /** @@ -768,7 +768,7 @@ struct iommu_hwpt_pgfault { */ enum iommufd_page_response_code { IOMMUFD_PAGE_RESP_SUCCESS = 0, - IOMMUFD_PAGE_RESP_INVALID, + IOMMUFD_PAGE_RESP_INVALID = 1, }; /** -- Gitee From d23c5841ec3e7022aa9d60afea39d85ccc72b541 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Tue, 16 Jul 2024 15:25:45 +0800 Subject: [PATCH 12/99] iommu/amd: Convert comma to semicolon ANBZ: #13617 commit 86c5eac3c4c4a2ee124d202af9a141bd0457ee68 upstream. Replace a comma between expression statements by a semicolon. Fixes: c9b258c6be09 ("iommu/amd: Prepare for generic IO page table framework") Signed-off-by: Chen Ni Reviewed-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20240716072545.968690-1-nichen@iscas.ac.cn Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/amd/io_pgtable.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 9d9a7fde59e7..1074ee25064d 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -588,9 +588,9 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo { struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); - cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES, - cfg->ias = IOMMU_IN_ADDR_BIT_SIZE, - cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, + cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES; + cfg->ias = IOMMU_IN_ADDR_BIT_SIZE; + cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE; cfg->tlb = &v1_flush_ops; pgtable->iop.ops.map_pages = iommu_v1_map_pages; -- Gitee From 25b6de4aae434948045f02dc3e6320501bfeddbf Mon Sep 17 00:00:00 2001 From: Ashish Mhetre Date: Wed, 24 Jul 2024 17:31:32 +0000 Subject: [PATCH 13/99] iommu: arm-smmu: Fix Tegra workaround for PAGE_SIZE mappings ANBZ: #13617 commit 726d4f528dbc98a84d9ce3c749dfdada3dcdd5ca upstream. PAGE_SIZE can be 16KB for Tegra which is not supported by MMU-500 on both Tegra194 and Tegra234. Retain only valid granularities from pgsize_bitmap which would either be 4KB or 64KB. Signed-off-by: Ashish Mhetre Link: https://lore.kernel.org/r/20240724173132.219978-1-amhetre@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c index 4b2994b6126d..2fce4f6d4e1b 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c @@ -277,7 +277,7 @@ static int nvidia_smmu_init_context(struct arm_smmu_domain *smmu_domain, */ if (of_device_is_compatible(np, "nvidia,tegra234-smmu") || of_device_is_compatible(np, "nvidia,tegra194-smmu")) { - smmu->pgsize_bitmap = PAGE_SIZE; + smmu->pgsize_bitmap &= GENMASK(PAGE_SHIFT, 0); pgtbl_cfg->pgsize_bitmap = smmu->pgsize_bitmap; } -- Gitee From 38e0f60bcb682d9263308aa80ab632a4b32ef4d4 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 17 Jul 2024 22:01:30 -0700 Subject: [PATCH 14/99] iommufd/device: Fix hwpt at err_unresv in iommufd_device_do_replace() ANBZ: #13617 commit 950aeefb34923fe3c28ade35fe05f24e2c5b1d55 upstream. The rewind routine should remove the reserved iovas added to the new hwpt. Fixes: 89db31635c87 ("iommufd: Derive iommufd_hwpt_paging from iommufd_hw_pagetable") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/r/20240718050130.1956804-1-nicolinc@nvidia.com Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 59ae9a4ad017..21021a2cecba 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -542,7 +542,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, err_unresv: if (hwpt_is_paging(hwpt)) iommufd_group_remove_reserved_iova(igroup, - to_hwpt_paging(old_hwpt)); + to_hwpt_paging(hwpt)); err_unlock: mutex_unlock(&idev->igroup->lock); return ERR_PTR(rc); -- Gitee From fb25e8539e63c5df5240b1cf1cd1479aad7b1600 Mon Sep 17 00:00:00 2001 From: Barak Biber Date: Thu, 1 Aug 2024 09:26:04 -0300 Subject: [PATCH 15/99] iommu: Restore lost return in iommu_report_device_fault() ANBZ: #13617 commit fca5b78511e98bdff2cdd55c172b23200a7b3404 upstream. When iommu_report_device_fault gets called with a partial fault it is supposed to collect the fault into the group and then return. Instead the return was accidently deleted which results in trying to process the fault and an eventual crash. Deleting the return was a typo, put it back. Fixes: 3dfa64aecbaf ("iommu: Make iommu_report_device_fault() return void") Signed-off-by: Barak Biber Signed-off-by: Jason Gunthorpe Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/0-v1-e7153d9c8cee+1c6-iommu_fault_fix_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/io-pgfault.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index cd679c13752e..81e9cc6e3164 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -170,6 +170,7 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) report_partial_fault(iopf_param, fault); iopf_put_dev_fault_param(iopf_param); /* A request that is not the last does not need to be ack'd */ + return; } /* -- Gitee From 07359cd20592494b7c0efb2476535abd90b06f20 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 Aug 2024 18:15:39 +0200 Subject: [PATCH 16/99] x86/apic: Provide apic_printk() helpers ANBZ: #13617 commit d768e3f3e3fb43df2559d4c053b0f68c9649b2c7 upstream. apic_printk() requires the APIC verbosity level and printk level which is tedious and horrible to read. Provide helpers to simplify all of that. Signed-off-by: Thomas Gleixner Tested-by: Qiuxu Zhuo Tested-by: Breno Leitao Link: https://lore.kernel.org/all/20240802155440.527510045@linutronix.de Signed-off-by: Jay Chen --- arch/x86/include/asm/apic.h | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a2258c894244..ffe5a04e9de4 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -17,6 +17,11 @@ #define ARCH_APICTIMER_STOPS_ON_C3 1 +/* Macros for apic_extnmi which controls external NMI masking */ +#define APIC_EXTNMI_BSP 0 /* Default */ +#define APIC_EXTNMI_ALL 1 +#define APIC_EXTNMI_NONE 2 + /* * Debugging macros */ @@ -24,22 +29,22 @@ #define APIC_VERBOSE 1 #define APIC_DEBUG 2 -/* Macros for apic_extnmi which controls external NMI masking */ -#define APIC_EXTNMI_BSP 0 /* Default */ -#define APIC_EXTNMI_ALL 1 -#define APIC_EXTNMI_NONE 2 - /* - * Define the default level of output to be very little - * This can be turned up by using apic=verbose for more - * information and apic=debug for _lots_ of information. - * apic_verbosity is defined in apic.c + * Define the default level of output to be very little This can be turned + * up by using apic=verbose for more information and apic=debug for _lots_ + * of information. apic_verbosity is defined in apic.c */ -#define apic_printk(v, s, a...) do { \ - if ((v) <= apic_verbosity) \ - printk(s, ##a); \ - } while (0) - +#define apic_printk(v, s, a...) \ +do { \ + if ((v) <= apic_verbosity) \ + printk(s, ##a); \ +} while (0) + +#define apic_pr_verbose(s, a...) apic_printk(APIC_VERBOSE, KERN_INFO s, ##a) +#define apic_pr_debug(s, a...) apic_printk(APIC_DEBUG, KERN_DEBUG s, ##a) +#define apic_pr_debug_cont(s, a...) apic_printk(APIC_DEBUG, KERN_CONT s, ##a) +/* Unconditional debug prints for code which is guarded by apic_verbosity already */ +#define apic_dbg(s, a...) printk(KERN_DEBUG s, ##a) #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) extern void x86_32_probe_apic(void); -- Gitee From d607a0d290eb0b2154e23f28ed728e49b1bef482 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 Aug 2024 18:15:45 +0200 Subject: [PATCH 17/99] iommu/vt-d: Cleanup apic_printk() ANBZ: #13617 commit 48855a2c92203389cb215c86ee3d2f2df5aa4024 upstream. Use the new apic_pr_verbose() helper. Signed-off-by: Thomas Gleixner Tested-by: Qiuxu Zhuo Tested-by: Breno Leitao Link: https://lore.kernel.org/all/20240802155440.843266805@linutronix.de Signed-off-by: Shuai Xue --- drivers/iommu/intel/irq_remapping.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index e59403767fae..fcf56e262b07 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -1356,12 +1356,11 @@ static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data, case X86_IRQ_ALLOC_TYPE_IOAPIC: /* Set source-id of interrupt request */ set_ioapic_sid(irte, info->devid); - apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: Set IRTE entry (P:%d FPD:%d Dst_Mode:%d Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X Avail:%X Vector:%02X Dest:%08X SID:%04X SQ:%X SVT:%X)\n", - info->devid, irte->present, irte->fpd, - irte->dst_mode, irte->redir_hint, - irte->trigger_mode, irte->dlvry_mode, - irte->avail, irte->vector, irte->dest_id, - irte->sid, irte->sq, irte->svt); + apic_pr_verbose("IOAPIC[%d]: Set IRTE entry (P:%d FPD:%d Dst_Mode:%d Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X Avail:%X Vector:%02X Dest:%08X SID:%04X SQ:%X SVT:%X)\n", + info->devid, irte->present, irte->fpd, irte->dst_mode, + irte->redir_hint, irte->trigger_mode, irte->dlvry_mode, + irte->avail, irte->vector, irte->dest_id, irte->sid, + irte->sq, irte->svt); sub_handle = info->ioapic.pin; break; case X86_IRQ_ALLOC_TYPE_HPET: -- Gitee From cfbe2ea826da69bbb9bde7e633053891e7c284f3 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 22 Jul 2024 11:54:52 +0000 Subject: [PATCH 18/99] iommu/amd: Add blocked domain support ANBZ: #13617 commit e5e5cc8f73fa677b5b96404b9595d653a9ee0805 upstream. Create global blocked domain with attach device ops. It will clear the DTE so that all DMA from device will be aborted. Signed-off-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240722115452.5976-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/iommu.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 79c18b397866..aef3871e937e 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2484,6 +2484,29 @@ void amd_iommu_domain_free(struct iommu_domain *dom) protection_domain_free(domain); } +static int blocked_domain_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); + + if (dev_data->domain) + detach_device(dev); + + /* Clear DTE and flush the entry */ + spin_lock(&dev_data->lock); + amd_iommu_dev_update_dte(dev_data, false); + spin_unlock(&dev_data->lock); + + return 0; +} + +static struct iommu_domain blocked_domain = { + .type = IOMMU_DOMAIN_BLOCKED, + .ops = &(const struct iommu_domain_ops) { + .attach_dev = blocked_domain_attach_device, + } +}; + static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev) { @@ -2881,6 +2904,7 @@ static int amd_iommu_dev_disable_feature(struct device *dev, const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, + .blocked_domain = &blocked_domain, .domain_alloc = amd_iommu_domain_alloc, .domain_alloc_user = amd_iommu_domain_alloc_user, .domain_alloc_sva = amd_iommu_domain_alloc_sva, -- Gitee From e3ede18756c4994daaa213406c4469f0b7cd5181 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Thu, 8 Aug 2024 22:06:19 +0800 Subject: [PATCH 19/99] iommu: Remove unused declaration iommu_sva_unbind_gpasid() ANBZ: #13617 commit 92567a5f92bc947fb7aa4351979db1b7b71a554c upstream. Commit 0c9f17877891 ("iommu: Remove guest pasid related interfaces and definitions") removed the implementation but leave declaration. Signed-off-by: Yue Haibing Reviewed-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240808140619.2498535-1-yuehaibing@huawei.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- include/linux/iommu.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 20c7530e4787..4b7609b9e067 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -812,8 +812,6 @@ extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); -extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, -- Gitee From 8b0b49481f81d395129c149013acd22706ccb441 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 9 Aug 2024 10:27:14 -0700 Subject: [PATCH 20/99] iommu/arm-smmu: Un-demote unhandled-fault msg ANBZ: #13617 commit 98db56e4900837e4d5d3892b332dca76c8c9f68a upstream. Previously this was dev_err_ratelimited() but it got changed to a ratelimited dev_dbg(). Change it back to dev_err(). Fixes: d525b0af0c3b ("iommu/arm-smmu: Pretty-print context fault related regs") Signed-off-by: Rob Clark Reviewed-by: Pranjal Shrivastava Link: https://lore.kernel.org/r/20240809172716.10275-1-robdclark@gmail.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index a7f7c69a5627..17d10685282c 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -422,7 +422,7 @@ void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx, void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx, const struct arm_smmu_context_fault_info *cfi) { - dev_dbg(smmu->dev, + dev_err(smmu->dev, "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", cfi->fsr, cfi->iova, cfi->fsynr, cfi->cbfrsynra, idx); -- Gitee From 7b47df20c1cb080d5b39bcd376ec76b113b5462b Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Thu, 15 Aug 2024 19:15:04 +0800 Subject: [PATCH 21/99] iommu/arm-smmu-v3: Remove the unused empty definition ANBZ: #13617 commit df49881956bab88298e754c73010196b49af6733 upstream. arm_smmu_sva_remove_dev_pasid() has been removed since commit d38c28dbefee ("iommu/arm-smmu-v3: Put the SVA mmu notifier in the smmu_domain"), remain the empty definition untouched in header file, which is used when CONFIG_ARM_SMMU_V3_SVA is not set. So, let's remove the unused definition. Signed-off-by: Zhang Zekun Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240815111504.48810-1-zhangzekun11@huawei.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index b332536a00d4..ee4e013d5fcc 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -860,10 +860,5 @@ static inline void arm_smmu_sva_notifier_synchronize(void) {} #define arm_smmu_sva_domain_alloc NULL -static inline void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, - struct device *dev, - ioasid_t id) -{ -} #endif /* CONFIG_ARM_SMMU_V3_SVA */ #endif /* _ARM_SMMU_V3_H */ -- Gitee From e06aa163c83a2f971135cb01cbb119a6ca5866dd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 15 Aug 2024 14:25:00 +0300 Subject: [PATCH 22/99] iommu/arm-smmu-v3: Fix a NULL vs IS_ERR() check ANBZ: #13617 commit af048ec9c05178206e845a88bfd3cb2884a43da7 upstream. The arm_smmu_domain_alloc() function returns error pointers on error. It doesn't return NULL. Update the error checking to match. Fixes: 52acd7d8a413 ("iommu/arm-smmu-v3: Add support for domain_alloc_user fn") Signed-off-by: Dan Carpenter Reviewed-by: Shameer Kolothum Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/9208cd0d-8105-40df-93e9-bdcdf0d55eec@stanley.mountain Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 2478178dfda7..f5672d936de2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3073,8 +3073,8 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags, return ERR_PTR(-EOPNOTSUPP); smmu_domain = arm_smmu_domain_alloc(); - if (!smmu_domain) - return ERR_PTR(-ENOMEM); + if (IS_ERR(smmu_domain)) + return ERR_CAST(smmu_domain); smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED; smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops; -- Gitee From d8c07efe69073f9e8fad5f2a0ce4c55ed37b471a Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 19 Aug 2024 20:00:07 +0800 Subject: [PATCH 23/99] iommufd/selftest: Make dirty_ops static ANBZ: #13617 commit cf1e515c9a40caa8bddb920970d3257bb01c1421 upstream. The sparse tool complains as follows: drivers/iommu/iommufd/selftest.c:277:30: warning: symbol 'dirty_ops' was not declared. Should it be static? This symbol is not used outside of selftest.c, so marks it static. Fixes: 266ce58989ba ("iommufd/selftest: Test IOMMU_HWPT_ALLOC_DIRTY_TRACKING") Link: https://patch.msgid.link/r/20240819120007.3884868-1-ruanjinjie@huawei.com Signed-off-by: Jinjie Ruan Reviewed-by: Yi Liu Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/selftest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index f95e32e29133..222cfc11ebfd 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -273,7 +273,7 @@ static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain, return 0; } -const struct iommu_dirty_ops dirty_ops = { +static const struct iommu_dirty_ops dirty_ops = { .set_dirty_tracking = mock_domain_set_dirty_tracking, .read_and_clear_dirty = mock_domain_read_and_clear_dirty, }; -- Gitee From c46b633d1aa36a440d956d9eeb1ab0eb4a4260e9 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 21 Mar 2024 09:36:39 -0700 Subject: [PATCH 24/99] change alloc_pages name in dma_map_ops to avoid name conflicts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #13617 commit 8a2f11878771da65b8ac135c73b47dae13afbd62 upstream. After redefining alloc_pages, all uses of that name are being replaced. Change the conflicting names to prevent preprocessor from replacing them when it's not intended. Link: https://lkml.kernel.org/r/20240321163705.3067592-18-surenb@google.com Signed-off-by: Suren Baghdasaryan Tested-by: Kees Cook Cc: Alexander Viro Cc: Alex Gaynor Cc: Alice Ryhl Cc: Andreas Hindborg Cc: Benno Lossin Cc: "Björn Roy Baron" Cc: Boqun Feng Cc: Christoph Lameter Cc: Dennis Zhou Cc: Gary Guo Cc: Kent Overstreet Cc: Miguel Ojeda Cc: Pasha Tatashin Cc: Peter Zijlstra Cc: Tejun Heo Cc: Vlastimil Babka Cc: Wedson Almeida Filho Signed-off-by: Andrew Morton Signed-off-by: Shuai Xue --- arch/alpha/kernel/pci_iommu.c | 2 +- arch/mips/jazz/jazzdma.c | 2 +- arch/powerpc/kernel/dma-iommu.c | 2 +- arch/powerpc/platforms/ps3/system-bus.c | 4 ++-- arch/powerpc/platforms/pseries/vio.c | 2 +- arch/x86/kernel/amd_gart_64.c | 2 +- drivers/iommu/dma-iommu.c | 2 +- drivers/parisc/ccio-dma.c | 2 +- drivers/parisc/sba_iommu.c | 2 +- drivers/xen/grant-dma-ops.c | 2 +- drivers/xen/swiotlb-xen.c | 2 +- include/linux/dma-map-ops.h | 2 +- kernel/dma/mapping.c | 4 ++-- 13 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index c81183935e97..7fcf3e9b7103 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -929,7 +929,7 @@ const struct dma_map_ops alpha_pci_ops = { .dma_supported = alpha_pci_supported, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; EXPORT_SYMBOL(alpha_pci_ops); diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index eabddb89d221..c97b089b9902 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -617,7 +617,7 @@ const struct dma_map_ops jazz_dma_ops = { .sync_sg_for_device = jazz_dma_sync_sg_for_device, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; EXPORT_SYMBOL(jazz_dma_ops); diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 8920862ffd79..f0ae39e77e37 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -216,6 +216,6 @@ const struct dma_map_ops dma_iommu_ops = { .get_required_mask = dma_iommu_get_required_mask, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index d6b5f5ecd515..56dc6b29a3e7 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -695,7 +695,7 @@ static const struct dma_map_ops ps3_sb_dma_ops = { .unmap_page = ps3_unmap_page, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; @@ -709,7 +709,7 @@ static const struct dma_map_ops ps3_ioc0_dma_ops = { .unmap_page = ps3_unmap_page, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 2dc9cbc4bcd8..0c90fc4c3796 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -611,7 +611,7 @@ static const struct dma_map_ops vio_dma_mapping_ops = { .get_required_mask = dma_iommu_get_required_mask, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 56a917df410d..842a0ec5eaa9 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -676,7 +676,7 @@ static const struct dma_map_ops gart_dma_ops = { .get_sgtable = dma_common_get_sgtable, .dma_supported = dma_direct_supported, .get_required_mask = dma_direct_get_required_mask, - .alloc_pages = dma_direct_alloc_pages, + .alloc_pages_op = dma_direct_alloc_pages, .free_pages = dma_direct_free_pages, }; diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 7c29463f013c..367029089976 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1737,7 +1737,7 @@ static const struct dma_map_ops iommu_dma_ops = { .flags = DMA_F_PCI_P2PDMA_SUPPORTED, .alloc = iommu_dma_alloc, .free = iommu_dma_free, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, .alloc_noncontiguous = iommu_dma_alloc_noncontiguous, .free_noncontiguous = iommu_dma_free_noncontiguous, diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 9ce0d20a6c58..feef537257d0 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -1022,7 +1022,7 @@ static const struct dma_map_ops ccio_ops = { .map_sg = ccio_map_sg, .unmap_sg = ccio_unmap_sg, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index 05e7103d1d40..6f5b919280ff 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -1090,7 +1090,7 @@ static const struct dma_map_ops sba_ops = { .map_sg = sba_map_sg, .unmap_sg = sba_unmap_sg, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c index 76f6f26265a3..29257d2639db 100644 --- a/drivers/xen/grant-dma-ops.c +++ b/drivers/xen/grant-dma-ops.c @@ -282,7 +282,7 @@ static int xen_grant_dma_supported(struct device *dev, u64 mask) static const struct dma_map_ops xen_grant_dma_ops = { .alloc = xen_grant_dma_alloc, .free = xen_grant_dma_free, - .alloc_pages = xen_grant_dma_alloc_pages, + .alloc_pages_op = xen_grant_dma_alloc_pages, .free_pages = xen_grant_dma_free_pages, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 0521049367c8..ef56a2500ed6 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -423,7 +423,7 @@ const struct dma_map_ops xen_swiotlb_dma_ops = { .dma_supported = xen_swiotlb_dma_supported, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, .max_mapping_size = swiotlb_max_mapping_size, }; diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index cc848e7380f7..45ecc9c0c53a 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -29,7 +29,7 @@ struct dma_map_ops { unsigned long attrs); void (*free)(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs); - struct page *(*alloc_pages)(struct device *dev, size_t size, + struct page *(*alloc_pages_op)(struct device *dev, size_t size, dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); void (*free_pages)(struct device *dev, size_t size, struct page *vaddr, diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index f1d9f01b283d..2923f3b2dd2c 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -570,9 +570,9 @@ static struct page *__dma_alloc_pages(struct device *dev, size_t size, size = PAGE_ALIGN(size); if (dma_alloc_direct(dev, ops)) return dma_direct_alloc_pages(dev, size, dma_handle, dir, gfp); - if (!ops->alloc_pages) + if (!ops->alloc_pages_op) return NULL; - return ops->alloc_pages(dev, size, dma_handle, dir, gfp); + return ops->alloc_pages_op(dev, size, dma_handle, dir, gfp); } struct page *dma_alloc_pages(struct device *dev, size_t size, -- Gitee From 619c9fb78ed660156b6067212f6c3d648251fb81 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 7 May 2024 13:20:20 +0200 Subject: [PATCH 25/99] dma: compile-out DMA sync op calls when not used ANBZ: #13617 commit fe7514b149e0a8a6f3031d286e52d40163b0b11a upstream. Some platforms do have DMA, but DMA there is always direct and coherent. Currently, even on such platforms DMA sync operations are compiled and called. Add a new hidden Kconfig symbol, DMA_NEED_SYNC, and set it only when either sync operations are needed or there is DMA ops or swiotlb or DMA debug is enabled. Compile global dma_sync_*() and dma_need_sync() only when it's set, otherwise provide empty inline stubs. The change allows for future optimizations of DMA sync calls depending on runtime conditions. Signed-off-by: Alexander Lobakin Signed-off-by: Christoph Hellwig [ Shuai Xue Conflicts: kernel/dma/mapping.c minor: commit 985517480ea3 removed a blank line before dma_sync_single_for_cpu() ] Signed-off-by: Shuai Xue --- include/linux/dma-mapping.h | 62 ++++++++++++++++++++----------------- kernel/dma/Kconfig | 5 +++ kernel/dma/mapping.c | 22 +++++++------ 3 files changed, 50 insertions(+), 39 deletions(-) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 608e8296ba20..14be06861343 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -117,14 +117,6 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, enum dma_data_direction dir, unsigned long attrs); void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs); -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir); -void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir); -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir); -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir); void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs); void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, @@ -146,7 +138,6 @@ int dma_set_coherent_mask(struct device *dev, u64 mask); u64 dma_get_required_mask(struct device *dev); size_t dma_max_mapping_size(struct device *dev); size_t dma_opt_mapping_size(struct device *dev); -bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); unsigned long dma_get_merge_boundary(struct device *dev); struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, enum dma_data_direction dir, gfp_t gfp, unsigned long attrs); @@ -194,22 +185,6 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { } -static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ -} -static inline void dma_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ -} -static inline void dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sg, int nelems, enum dma_data_direction dir) -{ -} -static inline void dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sg, int nelems, enum dma_data_direction dir) -{ -} static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return -ENOMEM; @@ -272,10 +247,6 @@ static inline size_t dma_opt_mapping_size(struct device *dev) { return 0; } -static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) -{ - return false; -} static inline unsigned long dma_get_merge_boundary(struct device *dev) { return 0; @@ -305,6 +276,39 @@ static inline int dma_mmap_noncontiguous(struct device *dev, } #endif /* CONFIG_HAS_DMA */ +#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC) +void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir); +void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir); +void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir); +void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir); +bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); +#else /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */ +static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ +} +static inline void dma_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ +} +static inline void dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction dir) +{ +} +static inline void dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction dir) +{ +} +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + return false; +} +#endif /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */ + struct page *dma_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); void dma_free_pages(struct device *dev, size_t size, struct page *page, diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index f488997b0717..76b60f7828f6 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -107,6 +107,11 @@ config DMA_BOUNCE_UNALIGNED_KMALLOC bool depends on SWIOTLB +config DMA_NEED_SYNC + def_bool ARCH_HAS_SYNC_DMA_FOR_DEVICE || ARCH_HAS_SYNC_DMA_FOR_CPU || \ + ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_API_DEBUG || DMA_OPS || \ + SWIOTLB + config DMA_RESTRICTED_POOL bool "DMA Restricted Pool" depends on OF && OF_RESERVED_MEM && SWIOTLB diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 2923f3b2dd2c..d8620aea0a3b 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -329,6 +329,7 @@ void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, } EXPORT_SYMBOL(dma_unmap_resource); +#ifdef CONFIG_DMA_NEED_SYNC void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { @@ -385,6 +386,17 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, } EXPORT_SYMBOL(dma_sync_sg_for_device); +bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_map_direct(dev, ops)) + return dma_direct_need_sync(dev, dma_addr); + return ops->sync_single_for_cpu || ops->sync_single_for_device; +} +EXPORT_SYMBOL_GPL(dma_need_sync); +#endif /* CONFIG_DMA_NEED_SYNC */ + /* * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems * that the intention is to allow exporting memory allocated via the @@ -819,16 +831,6 @@ size_t dma_opt_mapping_size(struct device *dev) } EXPORT_SYMBOL_GPL(dma_opt_mapping_size); -bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (dma_map_direct(dev, ops)) - return dma_direct_need_sync(dev, dma_addr); - return ops->sync_single_for_cpu || ops->sync_single_for_device; -} -EXPORT_SYMBOL_GPL(dma_need_sync); - unsigned long dma_get_merge_boundary(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); -- Gitee From d5f1bb6f3a9ef3955ca5c93bb3b368da2de175d2 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 7 May 2024 13:20:21 +0200 Subject: [PATCH 26/99] dma: avoid redundant calls for sync operations ANBZ: #13617 commit f406c8e4b770ca3b0df84a17349e13f2b6b07d10 upstream. Quite often, devices do not need dma_sync operations on x86_64 at least. Indeed, when dev_is_dma_coherent(dev) is true and dev_use_swiotlb(dev) is false, iommu_dma_sync_single_for_cpu() and friends do nothing. However, indirectly calling them when CONFIG_RETPOLINE=y consumes about 10% of cycles on a cpu receiving packets from softirq at ~100Gbit rate. Even if/when CONFIG_RETPOLINE is not set, there is a cost of about 3%. Add dev->need_dma_sync boolean and turn it off during the device initialization (dma_set_mask()) depending on the setup: dev_is_dma_coherent() for the direct DMA, !(sync_single_for_device || sync_single_for_cpu) or the new dma_map_ops flag, %DMA_F_CAN_SKIP_SYNC, advertised for non-NULL DMA ops. Then later, if/when swiotlb is used for the first time, the flag is reset back to on, from swiotlb_tbl_map_single(). On iavf, the UDP trafficgen with XDP_DROP in skb mode test shows +3-5% increase for direct DMA. Suggested-by: Christoph Hellwig # direct DMA shortcut Co-developed-by: Eric Dumazet Signed-off-by: Eric Dumazet Signed-off-by: Alexander Lobakin Signed-off-by: Christoph Hellwig [ Shuai Xue: Conflicts: include/linux/device.h: this commit adds new field to struct device which conflicts with its CK_KABI_RESERVE field kernel/dma/mapping.c: minor, e87d4e442f introduces code with CONFIG_PSWIOTLB ] Signed-off-by: Shuai Xue --- include/linux/device.h | 4 +++ include/linux/dma-map-ops.h | 12 ++++++++ include/linux/dma-mapping.h | 53 +++++++++++++++++++++++++++++++---- kernel/dma/mapping.c | 55 +++++++++++++++++++++++++++++-------- kernel/dma/swiotlb.c | 6 ++++ 5 files changed, 113 insertions(+), 17 deletions(-) diff --git a/include/linux/device.h b/include/linux/device.h index b0b0b3056d4b..783d0c3c0ef0 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -692,6 +692,7 @@ struct device_physical_location { * and optionall (if the coherent mask is large enough) also * for dma allocations. This flag is managed by the dma ops * instance from ->dma_supported. + * @dma_need_sync: The device needs performing DMA sync operations. * * At the lowest level, every device in a Linux system is represented by an * instance of struct device. The device structure contains the information @@ -805,6 +806,9 @@ struct device { #ifdef CONFIG_DMA_OPS_BYPASS bool dma_ops_bypass : 1; #endif +#ifdef CONFIG_DMA_NEED_SYNC + bool dma_need_sync:1; +#endif CK_KABI_RESERVE(1) CK_KABI_RESERVE(2) diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 45ecc9c0c53a..b585dcc21dd8 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -18,8 +18,11 @@ struct iommu_ops; * * DMA_F_PCI_P2PDMA_SUPPORTED: Indicates the dma_map_ops implementation can * handle PCI P2PDMA pages in the map_sg/unmap_sg operation. + * DMA_F_CAN_SKIP_SYNC: DMA sync operations can be skipped if the device is + * coherent and it's not an SWIOTLB buffer. */ #define DMA_F_PCI_P2PDMA_SUPPORTED (1 << 0) +#define DMA_F_CAN_SKIP_SYNC (1 << 1) struct dma_map_ops { unsigned int flags; @@ -280,6 +283,15 @@ static inline bool dev_is_dma_coherent(struct device *dev) } #endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ +static inline void dma_reset_need_sync(struct device *dev) +{ +#ifdef CONFIG_DMA_NEED_SYNC + /* Reset it only once so that the function can be called on hotpath */ + if (unlikely(!dev->dma_need_sync)) + dev->dma_need_sync = true; +#endif +} + /* * Check whether potential kmalloc() buffers are safe for non-coherent DMA. */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 14be06861343..cf2f0ec2fadc 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -277,16 +277,59 @@ static inline int dma_mmap_noncontiguous(struct device *dev, #endif /* CONFIG_HAS_DMA */ #if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC) -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, +void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir); -void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, +void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir); -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, +void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir); -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, +void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir); -bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); +bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr); + +static inline bool dma_dev_need_sync(const struct device *dev) +{ + /* Always call DMA sync operations when debugging is enabled */ + return dev->dma_need_sync || IS_ENABLED(CONFIG_DMA_API_DEBUG); +} + +static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + if (dma_dev_need_sync(dev)) + __dma_sync_single_for_cpu(dev, addr, size, dir); +} + +static inline void dma_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + if (dma_dev_need_sync(dev)) + __dma_sync_single_for_device(dev, addr, size, dir); +} + +static inline void dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction dir) +{ + if (dma_dev_need_sync(dev)) + __dma_sync_sg_for_cpu(dev, sg, nelems, dir); +} + +static inline void dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction dir) +{ + if (dma_dev_need_sync(dev)) + __dma_sync_sg_for_device(dev, sg, nelems, dir); +} + +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + return dma_dev_need_sync(dev) ? __dma_need_sync(dev, dma_addr) : false; +} #else /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */ +static inline bool dma_dev_need_sync(const struct device *dev) +{ + return false; +} static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index d8620aea0a3b..545c51202c92 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -330,7 +330,7 @@ void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, EXPORT_SYMBOL(dma_unmap_resource); #ifdef CONFIG_DMA_NEED_SYNC -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, +void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -342,9 +342,9 @@ void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, ops->sync_single_for_cpu(dev, addr, size, dir); debug_dma_sync_single_for_cpu(dev, addr, size, dir); } -EXPORT_SYMBOL(dma_sync_single_for_cpu); +EXPORT_SYMBOL(__dma_sync_single_for_cpu); -void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, +void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -356,9 +356,9 @@ void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, ops->sync_single_for_device(dev, addr, size, dir); debug_dma_sync_single_for_device(dev, addr, size, dir); } -EXPORT_SYMBOL(dma_sync_single_for_device); +EXPORT_SYMBOL(__dma_sync_single_for_device); -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, +void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -370,9 +370,9 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, ops->sync_sg_for_cpu(dev, sg, nelems, dir); debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); } -EXPORT_SYMBOL(dma_sync_sg_for_cpu); +EXPORT_SYMBOL(__dma_sync_sg_for_cpu); -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, +void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -384,18 +384,47 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, ops->sync_sg_for_device(dev, sg, nelems, dir); debug_dma_sync_sg_for_device(dev, sg, nelems, dir); } -EXPORT_SYMBOL(dma_sync_sg_for_device); +EXPORT_SYMBOL(__dma_sync_sg_for_device); -bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr) { const struct dma_map_ops *ops = get_dma_ops(dev); if (dma_map_direct(dev, ops)) + /* + * dma_need_sync could've been reset on first SWIOTLB buffer + * mapping, but @dma_addr is not necessary an SWIOTLB buffer. + * In this case, fall back to more granular check. + */ return dma_direct_need_sync(dev, dma_addr); - return ops->sync_single_for_cpu || ops->sync_single_for_device; + return true; } -EXPORT_SYMBOL_GPL(dma_need_sync); -#endif /* CONFIG_DMA_NEED_SYNC */ +EXPORT_SYMBOL_GPL(__dma_need_sync); + +static void dma_setup_need_sync(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_map_direct(dev, ops) || (ops->flags & DMA_F_CAN_SKIP_SYNC)) + /* + * dma_need_sync will be reset to %true on first SWIOTLB buffer + * mapping, if any. During the device initialization, it's + * enough to check only for the DMA coherence. + */ + dev->dma_need_sync = !dev_is_dma_coherent(dev); + else if (!ops->sync_single_for_device && !ops->sync_single_for_cpu && + !ops->sync_sg_for_device && !ops->sync_sg_for_cpu) + /* + * Synchronization is not possible when none of DMA sync ops + * is set. + */ + dev->dma_need_sync = false; + else + dev->dma_need_sync = true; +} +#else /* !CONFIG_DMA_NEED_SYNC */ +static inline void dma_setup_need_sync(struct device *dev) { } +#endif /* !CONFIG_DMA_NEED_SYNC */ /* * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems @@ -785,6 +814,8 @@ int dma_set_mask(struct device *dev, u64 mask) arch_dma_set_mask(dev, mask); *dev->dma_mask = mask; + dma_setup_need_sync(dev); + return 0; } EXPORT_SYMBOL(dma_set_mask); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index fa15daac73fc..6e1897b1af78 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -1367,6 +1367,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, return (phys_addr_t)DMA_MAPPING_ERROR; } + /* + * If dma_need_sync wasn't set, reset it on first SWIOTLB buffer + * mapping to always sync SWIOTLB buffers. + */ + dma_reset_need_sync(dev); + /* * Save away the mapping from the original address to the DMA address. * This is needed when we sync the memory. Then we sync the buffer if -- Gitee From e0834ac2cde5e4c6a78ebc4c729edb46dcde707c Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 9 May 2024 16:46:16 +0200 Subject: [PATCH 27/99] dma: fix DMA sync for drivers not calling dma_set_mask*() ANBZ: #13617 commit a6016aac5252da9d22a4dc0b98121b0acdf6d2f5 upstream. There are several reports that the DMA sync shortcut broke non-coherent devices. dev->dma_need_sync is false after the &device allocation and if a driver didn't call dma_set_mask*(), it will still be false even if the device is not DMA-coherent and thus needs synchronizing. Due to historical reasons, there's still a lot of drivers not calling it. Invert the boolean, so that the sync will be performed by default and the shortcut will be enabled only when calling dma_set_mask*(). Reported-by: Steven Price Closes: https://lore.kernel.org/lkml/010686f5-3049-46a1-8230-7752a1b433ff@arm.com Reported-by: Marek Szyprowski Closes: https://lore.kernel.org/lkml/46160534-5003-4809-a408-6b3a3f4921e9@samsung.com Fixes: f406c8e4b770. ("dma: avoid redundant calls for sync operations") Signed-off-by: Alexander Lobakin Signed-off-by: Christoph Hellwig Tested-by: Steven Price Tested-by: Marek Szyprowski Signed-off-by: Shuai Xue --- include/linux/device.h | 4 ++-- include/linux/dma-map-ops.h | 4 ++-- include/linux/dma-mapping.h | 2 +- kernel/dma/mapping.c | 10 +++++----- kernel/dma/swiotlb.c | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/linux/device.h b/include/linux/device.h index 783d0c3c0ef0..addfab9f399d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -692,7 +692,7 @@ struct device_physical_location { * and optionall (if the coherent mask is large enough) also * for dma allocations. This flag is managed by the dma ops * instance from ->dma_supported. - * @dma_need_sync: The device needs performing DMA sync operations. + * @dma_skip_sync: DMA sync operations can be skipped for coherent buffers. * * At the lowest level, every device in a Linux system is represented by an * instance of struct device. The device structure contains the information @@ -807,7 +807,7 @@ struct device { bool dma_ops_bypass : 1; #endif #ifdef CONFIG_DMA_NEED_SYNC - bool dma_need_sync:1; + bool dma_skip_sync:1; #endif CK_KABI_RESERVE(1) diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index b585dcc21dd8..4fd4130d9435 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -287,8 +287,8 @@ static inline void dma_reset_need_sync(struct device *dev) { #ifdef CONFIG_DMA_NEED_SYNC /* Reset it only once so that the function can be called on hotpath */ - if (unlikely(!dev->dma_need_sync)) - dev->dma_need_sync = true; + if (unlikely(dev->dma_skip_sync)) + dev->dma_skip_sync = false; #endif } diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index cf2f0ec2fadc..337d9b50e8f1 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -290,7 +290,7 @@ bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr); static inline bool dma_dev_need_sync(const struct device *dev) { /* Always call DMA sync operations when debugging is enabled */ - return dev->dma_need_sync || IS_ENABLED(CONFIG_DMA_API_DEBUG); + return !dev->dma_skip_sync || IS_ENABLED(CONFIG_DMA_API_DEBUG); } static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 545c51202c92..1ab3bc13acb3 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -392,7 +392,7 @@ bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr) if (dma_map_direct(dev, ops)) /* - * dma_need_sync could've been reset on first SWIOTLB buffer + * dma_skip_sync could've been reset on first SWIOTLB buffer * mapping, but @dma_addr is not necessary an SWIOTLB buffer. * In this case, fall back to more granular check. */ @@ -407,20 +407,20 @@ static void dma_setup_need_sync(struct device *dev) if (dma_map_direct(dev, ops) || (ops->flags & DMA_F_CAN_SKIP_SYNC)) /* - * dma_need_sync will be reset to %true on first SWIOTLB buffer + * dma_skip_sync will be reset to %false on first SWIOTLB buffer * mapping, if any. During the device initialization, it's * enough to check only for the DMA coherence. */ - dev->dma_need_sync = !dev_is_dma_coherent(dev); + dev->dma_skip_sync = dev_is_dma_coherent(dev); else if (!ops->sync_single_for_device && !ops->sync_single_for_cpu && !ops->sync_sg_for_device && !ops->sync_sg_for_cpu) /* * Synchronization is not possible when none of DMA sync ops * is set. */ - dev->dma_need_sync = false; + dev->dma_skip_sync = true; else - dev->dma_need_sync = true; + dev->dma_skip_sync = false; } #else /* !CONFIG_DMA_NEED_SYNC */ static inline void dma_setup_need_sync(struct device *dev) { } diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 6e1897b1af78..191b1bc8b945 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -1368,7 +1368,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, } /* - * If dma_need_sync wasn't set, reset it on first SWIOTLB buffer + * If dma_skip_sync was set, reset it on first SWIOTLB buffer * mapping to always sync SWIOTLB buffers. */ dma_reset_need_sync(dev); -- Gitee From d4e41531860baca08632876ad749f741624cb8d8 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 7 May 2024 13:20:22 +0200 Subject: [PATCH 28/99] iommu/dma: avoid expensive indirect calls for sync operations ANBZ: #13617 commit ea01fa703150025806a21c960761c821736f4757 upstream. When IOMMU is on, the actual synchronization happens in the same cases as with the direct DMA. Advertise %DMA_F_CAN_SKIP_SYNC in IOMMU DMA to skip sync ops calls (indirect) for non-SWIOTLB buffers. perf profile before the patch: 18.53% [kernel] [k] gq_rx_skb 14.77% [kernel] [k] napi_reuse_skb 8.95% [kernel] [k] skb_release_data 5.42% [kernel] [k] dev_gro_receive 5.37% [kernel] [k] memcpy <*> 5.26% [kernel] [k] iommu_dma_sync_sg_for_cpu 4.78% [kernel] [k] tcp_gro_receive <*> 4.42% [kernel] [k] iommu_dma_sync_sg_for_device 4.12% [kernel] [k] ipv6_gro_receive 3.65% [kernel] [k] gq_pool_get 3.25% [kernel] [k] skb_gro_receive 2.07% [kernel] [k] napi_gro_frags 1.98% [kernel] [k] tcp6_gro_receive 1.27% [kernel] [k] gq_rx_prep_buffers 1.18% [kernel] [k] gq_rx_napi_handler 0.99% [kernel] [k] csum_partial 0.74% [kernel] [k] csum_ipv6_magic 0.72% [kernel] [k] free_pcp_prepare 0.60% [kernel] [k] __napi_poll 0.58% [kernel] [k] net_rx_action 0.56% [kernel] [k] read_tsc <*> 0.50% [kernel] [k] __x86_indirect_thunk_r11 0.45% [kernel] [k] memset After patch, lines with <*> no longer show up, and overall cpu usage looks much better (~60% instead of ~72%): 25.56% [kernel] [k] gq_rx_skb 9.90% [kernel] [k] napi_reuse_skb 7.39% [kernel] [k] dev_gro_receive 6.78% [kernel] [k] memcpy 6.53% [kernel] [k] skb_release_data 6.39% [kernel] [k] tcp_gro_receive 5.71% [kernel] [k] ipv6_gro_receive 4.35% [kernel] [k] napi_gro_frags 4.34% [kernel] [k] skb_gro_receive 3.50% [kernel] [k] gq_pool_get 3.08% [kernel] [k] gq_rx_napi_handler 2.35% [kernel] [k] tcp6_gro_receive 2.06% [kernel] [k] gq_rx_prep_buffers 1.32% [kernel] [k] csum_partial 0.93% [kernel] [k] csum_ipv6_magic 0.65% [kernel] [k] net_rx_action iavf yields +10% of Mpps on Rx. This also unblocks batched allocations of XSk buffers when IOMMU is active. Co-developed-by: Eric Dumazet Signed-off-by: Eric Dumazet Acked-by: Robin Murphy Signed-off-by: Alexander Lobakin Signed-off-by: Christoph Hellwig Signed-off-by: Shuai Xue --- drivers/iommu/dma-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 367029089976..4b2be665a66d 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1734,7 +1734,8 @@ static size_t iommu_dma_max_mapping_size(struct device *dev) } static const struct dma_map_ops iommu_dma_ops = { - .flags = DMA_F_PCI_P2PDMA_SUPPORTED, + .flags = DMA_F_PCI_P2PDMA_SUPPORTED | + DMA_F_CAN_SKIP_SYNC, .alloc = iommu_dma_alloc, .free = iommu_dma_free, .alloc_pages_op = dma_common_alloc_pages, -- Gitee From 5456967339613ab4f6ae955358eb595989c1e821 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 24 Jul 2024 21:04:48 +0300 Subject: [PATCH 29/99] dma-mapping: call ->unmap_page and ->unmap_sg unconditionally ANBZ: #13617 commit f69e342eec008e1bab772d3963c3dd9979293e13 upstream. Almost all instances of the dma_map_ops ->map_page()/map_sg() methods implement ->unmap_page()/unmap_sg() too. The once instance which doesn't dma_dummy_ops which is used to fail the DMA mapping and thus there won't be any calls to ->unmap_page()/unmap_sg(). Remove the checks for ->unmap_page()/unmap_sg() and call them directly to create an interface that is symmetrical to ->map_page()/map_sg(). Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Reviewed-by: Robin Murphy Signed-off-by: Christoph Hellwig Signed-off-by: Shuai Xue --- kernel/dma/dummy.c | 21 +++++++++++++++++++++ kernel/dma/mapping.c | 4 ++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/kernel/dma/dummy.c b/kernel/dma/dummy.c index b492d59ac77e..92de80e5b057 100644 --- a/kernel/dma/dummy.c +++ b/kernel/dma/dummy.c @@ -17,6 +17,15 @@ static dma_addr_t dma_dummy_map_page(struct device *dev, struct page *page, { return DMA_MAPPING_ERROR; } +static void dma_dummy_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + /* + * Dummy ops doesn't support map_page, so unmap_page should never be + * called. + */ + WARN_ON_ONCE(true); +} static int dma_dummy_map_sg(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, @@ -25,6 +34,16 @@ static int dma_dummy_map_sg(struct device *dev, struct scatterlist *sgl, return -EINVAL; } +static void dma_dummy_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + unsigned long attrs) +{ + /* + * Dummy ops doesn't support map_sg, so unmap_sg should never be called. + */ + WARN_ON_ONCE(true); +} + static int dma_dummy_supported(struct device *hwdev, u64 mask) { return 0; @@ -33,6 +52,8 @@ static int dma_dummy_supported(struct device *hwdev, u64 mask) const struct dma_map_ops dma_dummy_ops = { .mmap = dma_dummy_mmap, .map_page = dma_dummy_map_page, + .unmap_page = dma_dummy_unmap_page, .map_sg = dma_dummy_map_sg, + .unmap_sg = dma_dummy_unmap_sg, .dma_supported = dma_dummy_supported, }; diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 1ab3bc13acb3..7450f8ffc20e 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -177,7 +177,7 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, if (dma_map_direct(dev, ops) || arch_dma_unmap_page_direct(dev, addr + size)) dma_direct_unmap_page(dev, addr, size, dir, attrs); - else if (ops->unmap_page) + else ops->unmap_page(dev, addr, size, dir, attrs); debug_dma_unmap_page(dev, addr, size, dir); } @@ -291,7 +291,7 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, if (dma_map_direct(dev, ops) || arch_dma_unmap_sg_direct(dev, sg, nents)) dma_direct_unmap_sg(dev, sg, nents, dir, attrs); - else if (ops->unmap_sg) + else ops->unmap_sg(dev, sg, nents, dir, attrs); } EXPORT_SYMBOL(dma_unmap_sg_attrs); -- Gitee From 870ec12022449bac38fa6655c196493895e00825 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 24 Jul 2024 21:04:49 +0300 Subject: [PATCH 30/99] dma-mapping: direct calls for dma-iommu ANBZ: #13617 commit b5c58b2fdc427e7958412ecb2de2804a1f7c1572 upstream. Directly call into dma-iommu just like we have been doing for dma-direct for a while. This avoids the indirect call overhead for IOMMU ops and removes the need to have DMA ops entirely for many common configurations. Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Acked-by: Greg Kroah-Hartman Acked-by: Robin Murphy Signed-off-by: Christoph Hellwig [ Shuai Xue: Conflicts: drivers/iommu/Kconfig drivers/iommu/intel/Kconfig include/linux/device.h minor: this commit adds filed to struct device which conflicts with CK_KABI_RESERVE field, also fix conlict in Kconfig ] Signed-off-by: Shuai Xue --- MAINTAINERS | 1 + drivers/iommu/Kconfig | 2 +- drivers/iommu/dma-iommu.c | 104 +++++++++---------------- drivers/iommu/intel/Kconfig | 1 - include/linux/device.h | 5 ++ include/linux/dma-map-ops.h | 13 ---- include/linux/iommu-dma.h | 147 ++++++++++++++++++++++++++++++++++++ kernel/dma/Kconfig | 4 + kernel/dma/Makefile | 2 +- kernel/dma/mapping.c | 83 +++++++++++++++++--- 10 files changed, 269 insertions(+), 93 deletions(-) create mode 100644 include/linux/iommu-dma.h diff --git a/MAINTAINERS b/MAINTAINERS index f329b6ac04cb..e9aa3f7a1b28 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10998,6 +10998,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/dma-iommu.c F: drivers/iommu/dma-iommu.h F: drivers/iommu/iova.c +F: include/linux/iommu-dma.h F: include/linux/iova.h IOMMU SUBSYSTEM diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 2d92a58c76e2..488757e48dd0 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -151,7 +151,7 @@ config OF_IOMMU # IOMMU-agnostic DMA-mapping layer config IOMMU_DMA def_bool ARM64 || IA64 || X86 || LOONGARCH || S390 - select DMA_OPS + select DMA_OPS_HELPERS select IOMMU_API select IOMMU_IOVA select IRQ_MSI_IOMMU diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 4b2be665a66d..b20b734b90f5 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1057,9 +1058,8 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size, return NULL; } -static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, - size_t size, enum dma_data_direction dir, gfp_t gfp, - unsigned long attrs) +struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs) { struct dma_sgt_handle *sh; @@ -1075,7 +1075,7 @@ static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, return &sh->sgt; } -static void iommu_dma_free_noncontiguous(struct device *dev, size_t size, +void iommu_dma_free_noncontiguous(struct device *dev, size_t size, struct sg_table *sgt, enum dma_data_direction dir) { struct dma_sgt_handle *sh = sgt_handle(sgt); @@ -1086,8 +1086,8 @@ static void iommu_dma_free_noncontiguous(struct device *dev, size_t size, kfree(sh); } -static void iommu_dma_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) +void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir) { phys_addr_t phys; @@ -1101,8 +1101,8 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev, swiotlb_sync_single_for_cpu(dev, phys, size, dir); } -static void iommu_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) +void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir) { phys_addr_t phys; @@ -1116,9 +1116,8 @@ static void iommu_dma_sync_single_for_device(struct device *dev, arch_sync_dma_for_device(phys, size, dir); } -static void iommu_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) +void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir) { struct scatterlist *sg; int i; @@ -1132,9 +1131,8 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev, arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); } -static void iommu_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) +void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir) { struct scatterlist *sg; int i; @@ -1149,9 +1147,9 @@ static void iommu_dma_sync_sg_for_device(struct device *dev, arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); } -static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) +dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) { phys_addr_t phys = page_to_phys(page) + offset; bool coherent = dev_is_dma_coherent(dev); @@ -1209,7 +1207,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, return iova; } -static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, +void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { struct iommu_domain *domain = iommu_get_dma_domain(dev); @@ -1362,8 +1360,8 @@ static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg, * impedance-matching, to be able to hand off a suitably-aligned list, * but still preserve the original offsets and sizes for the caller. */ -static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) +int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs) { struct iommu_domain *domain = iommu_get_dma_domain(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; @@ -1482,8 +1480,8 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, return ret; } -static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) +void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs) { dma_addr_t end = 0, start; struct scatterlist *tmp; @@ -1532,7 +1530,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, __iommu_dma_unmap(dev, start, end - start); } -static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, +dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs) { return __iommu_dma_map(dev, phys, size, @@ -1540,7 +1538,7 @@ static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, dma_get_mask(dev)); } -static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, +void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { __iommu_dma_unmap(dev, handle, size); @@ -1577,7 +1575,7 @@ static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr) dma_free_contiguous(dev, page, alloc_size); } -static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, +void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, unsigned long attrs) { __iommu_dma_unmap(dev, handle, size); @@ -1621,8 +1619,8 @@ static void *iommu_dma_alloc_pages(struct device *dev, size_t size, return NULL; } -static void *iommu_dma_alloc(struct device *dev, size_t size, - dma_addr_t *handle, gfp_t gfp, unsigned long attrs) +void *iommu_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, unsigned long attrs) { bool coherent = dev_is_dma_coherent(dev); int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); @@ -1655,7 +1653,7 @@ static void *iommu_dma_alloc(struct device *dev, size_t size, return cpu_addr; } -static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, +int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { @@ -1686,7 +1684,7 @@ static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, vma->vm_page_prot); } -static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, +int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { @@ -1713,19 +1711,19 @@ static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, return ret; } -static unsigned long iommu_dma_get_merge_boundary(struct device *dev) +unsigned long iommu_dma_get_merge_boundary(struct device *dev) { struct iommu_domain *domain = iommu_get_dma_domain(dev); return (1UL << __ffs(domain->pgsize_bitmap)) - 1; } -static size_t iommu_dma_opt_mapping_size(void) +size_t iommu_dma_opt_mapping_size(void) { return iova_rcache_range(); } -static size_t iommu_dma_max_mapping_size(struct device *dev) +size_t iommu_dma_max_mapping_size(struct device *dev) { if (dev_is_untrusted(dev)) return swiotlb_max_mapping_size(dev); @@ -1733,32 +1731,6 @@ static size_t iommu_dma_max_mapping_size(struct device *dev) return SIZE_MAX; } -static const struct dma_map_ops iommu_dma_ops = { - .flags = DMA_F_PCI_P2PDMA_SUPPORTED | - DMA_F_CAN_SKIP_SYNC, - .alloc = iommu_dma_alloc, - .free = iommu_dma_free, - .alloc_pages_op = dma_common_alloc_pages, - .free_pages = dma_common_free_pages, - .alloc_noncontiguous = iommu_dma_alloc_noncontiguous, - .free_noncontiguous = iommu_dma_free_noncontiguous, - .mmap = iommu_dma_mmap, - .get_sgtable = iommu_dma_get_sgtable, - .map_page = iommu_dma_map_page, - .unmap_page = iommu_dma_unmap_page, - .map_sg = iommu_dma_map_sg, - .unmap_sg = iommu_dma_unmap_sg, - .sync_single_for_cpu = iommu_dma_sync_single_for_cpu, - .sync_single_for_device = iommu_dma_sync_single_for_device, - .sync_sg_for_cpu = iommu_dma_sync_sg_for_cpu, - .sync_sg_for_device = iommu_dma_sync_sg_for_device, - .map_resource = iommu_dma_map_resource, - .unmap_resource = iommu_dma_unmap_resource, - .get_merge_boundary = iommu_dma_get_merge_boundary, - .opt_mapping_size = iommu_dma_opt_mapping_size, - .max_mapping_size = iommu_dma_max_mapping_size, -}; - void iommu_setup_dma_ops(struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); @@ -1766,19 +1738,15 @@ void iommu_setup_dma_ops(struct device *dev) if (dev_is_pci(dev)) dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac; - if (iommu_is_dma_domain(domain)) { - if (iommu_dma_init_domain(domain, dev)) - goto out_err; - dev->dma_ops = &iommu_dma_ops; - } else if (dev->dma_ops == &iommu_dma_ops) { - /* Clean up if we've switched *from* a DMA domain */ - dev->dma_ops = NULL; - } + dev->dma_iommu = iommu_is_dma_domain(domain); + if (dev->dma_iommu && iommu_dma_init_domain(domain, dev)) + goto out_err; return; out_err: - pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", - dev_name(dev)); + pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", + dev_name(dev)); + dev->dma_iommu = false; } static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 0a6b9eda12a3..e329d264e56d 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -12,7 +12,6 @@ config DMAR_DEBUG config INTEL_IOMMU bool "Support for Intel IOMMU using DMA Remapping Devices" depends on PCI_MSI && ACPI && (X86 || IA64) - select DMA_OPS select IOMMU_API select IOMMU_IOVA select IOMMUFD_DRIVER if IOMMUFD diff --git a/include/linux/device.h b/include/linux/device.h index addfab9f399d..8c009a0be579 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -693,6 +693,8 @@ struct device_physical_location { * for dma allocations. This flag is managed by the dma ops * instance from ->dma_supported. * @dma_skip_sync: DMA sync operations can be skipped for coherent buffers. + * @dma_iommu: Device is using default IOMMU implementation for DMA and + * doesn't rely on dma_ops structure. * * At the lowest level, every device in a Linux system is represented by an * instance of struct device. The device structure contains the information @@ -809,6 +811,9 @@ struct device { #ifdef CONFIG_DMA_NEED_SYNC bool dma_skip_sync:1; #endif +#ifdef CONFIG_IOMMU_DMA + bool dma_iommu:1; +#endif CK_KABI_RESERVE(1) CK_KABI_RESERVE(2) diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 4fd4130d9435..780d54a089e8 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -13,20 +13,7 @@ struct cma; struct iommu_ops; -/* - * Values for struct dma_map_ops.flags: - * - * DMA_F_PCI_P2PDMA_SUPPORTED: Indicates the dma_map_ops implementation can - * handle PCI P2PDMA pages in the map_sg/unmap_sg operation. - * DMA_F_CAN_SKIP_SYNC: DMA sync operations can be skipped if the device is - * coherent and it's not an SWIOTLB buffer. - */ -#define DMA_F_PCI_P2PDMA_SUPPORTED (1 << 0) -#define DMA_F_CAN_SKIP_SYNC (1 << 1) - struct dma_map_ops { - unsigned int flags; - void *(*alloc)(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h new file mode 100644 index 000000000000..d30a58bf00fd --- /dev/null +++ b/include/linux/iommu-dma.h @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved + * + * DMA operations that map physical memory through IOMMU. + */ +#ifndef _LINUX_IOMMU_DMA_H +#define _LINUX_IOMMU_DMA_H + +#include + +#ifdef CONFIG_IOMMU_DMA +dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs); +void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, unsigned long attrs); +int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); +void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); +void *iommu_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, unsigned long attrs); +int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +unsigned long iommu_dma_get_merge_boundary(struct device *dev); +size_t iommu_dma_opt_mapping_size(void); +size_t iommu_dma_max_mapping_size(struct device *dev); +void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, unsigned long attrs); +dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, unsigned long attrs); +void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, unsigned long attrs); +struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs); +void iommu_dma_free_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir); +void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir); +void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir); +void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir); +void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir); +#else +static inline dma_addr_t iommu_dma_map_page(struct device *dev, + struct page *page, unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + return DMA_MAPPING_ERROR; +} +static inline void iommu_dma_unmap_page(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + return -EINVAL; +} +static inline void iommu_dma_unmap_sg(struct device *dev, + struct scatterlist *sg, int nents, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline void *iommu_dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, unsigned long attrs) +{ + return NULL; +} +static inline int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + return -EINVAL; +} +static inline int iommu_dma_get_sgtable(struct device *dev, + struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, + size_t size, unsigned long attrs) +{ + return -EINVAL; +} +static inline unsigned long iommu_dma_get_merge_boundary(struct device *dev) +{ + return 0; +} +static inline size_t iommu_dma_opt_mapping_size(void) +{ + return 0; +} +static inline size_t iommu_dma_max_mapping_size(struct device *dev) +{ + return 0; +} +static inline void iommu_dma_free(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle, unsigned long attrs) +{ +} +static inline dma_addr_t iommu_dma_map_resource(struct device *dev, + phys_addr_t phys, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + return DMA_MAPPING_ERROR; +} +static inline void iommu_dma_unmap_resource(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline struct sg_table * +iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs) +{ + return NULL; +} +static inline void iommu_dma_free_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir) +{ +} +static inline void iommu_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction dir) +{ +} +static inline void iommu_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) +{ +} +static inline void iommu_dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ +} +static inline void iommu_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ +} +#endif /* CONFIG_IOMMU_DMA */ +#endif /* _LINUX_IOMMU_DMA_H */ diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 76b60f7828f6..43872a7588f9 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -8,8 +8,12 @@ config HAS_DMA depends on !NO_DMA default y +config DMA_OPS_HELPERS + bool + config DMA_OPS depends on HAS_DMA + select DMA_OPS_HELPERS bool # diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index 21926e46ef4f..2e6e933cf7f3 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_HAS_DMA) += mapping.o direct.o -obj-$(CONFIG_DMA_OPS) += ops_helpers.o +obj-$(CONFIG_DMA_OPS_HELPERS) += ops_helpers.o obj-$(CONFIG_DMA_OPS) += dummy.o obj-$(CONFIG_DMA_CMA) += contiguous.o obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 7450f8ffc20e..ca43011b2412 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -113,11 +114,27 @@ void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, } EXPORT_SYMBOL(dmam_alloc_attrs); +#ifdef CONFIG_IOMMU_DMA +static bool use_dma_iommu(struct device *dev) +{ + return dev->dma_iommu; +} +#else +static bool use_dma_iommu(struct device *dev) +{ + return false; +} +#endif + static bool dma_go_direct(struct device *dev, dma_addr_t mask, const struct dma_map_ops *ops) { + if (use_dma_iommu(dev)) + return false; + if (likely(!ops)) return true; + #ifdef CONFIG_DMA_OPS_BYPASS if (dev->dma_ops_bypass) return min_not_zero(mask, dev->bus_dma_limit) >= @@ -159,6 +176,8 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, if (dma_map_direct(dev, ops) || arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size)) addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); + else if (use_dma_iommu(dev)) + addr = iommu_dma_map_page(dev, page, offset, size, dir, attrs); else addr = ops->map_page(dev, page, offset, size, dir, attrs); kmsan_handle_dma(page, offset, size, dir); @@ -177,6 +196,8 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, if (dma_map_direct(dev, ops) || arch_dma_unmap_page_direct(dev, addr + size)) dma_direct_unmap_page(dev, addr, size, dir, attrs); + else if (use_dma_iommu(dev)) + iommu_dma_unmap_page(dev, addr, size, dir, attrs); else ops->unmap_page(dev, addr, size, dir, attrs); debug_dma_unmap_page(dev, addr, size, dir); @@ -197,6 +218,8 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, if (dma_map_direct(dev, ops) || arch_dma_map_sg_direct(dev, sg, nents)) ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); + else if (use_dma_iommu(dev)) + ents = iommu_dma_map_sg(dev, sg, nents, dir, attrs); else ents = ops->map_sg(dev, sg, nents, dir, attrs); @@ -291,7 +314,9 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, if (dma_map_direct(dev, ops) || arch_dma_unmap_sg_direct(dev, sg, nents)) dma_direct_unmap_sg(dev, sg, nents, dir, attrs); - else + else if (use_dma_iommu(dev)) + iommu_dma_unmap_sg(dev, sg, nents, dir, attrs); + else if (ops->unmap_sg) ops->unmap_sg(dev, sg, nents, dir, attrs); } EXPORT_SYMBOL(dma_unmap_sg_attrs); @@ -309,6 +334,8 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, if (dma_map_direct(dev, ops)) addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs); + else if (use_dma_iommu(dev)) + addr = iommu_dma_map_resource(dev, phys_addr, size, dir, attrs); else if (ops->map_resource) addr = ops->map_resource(dev, phys_addr, size, dir, attrs); @@ -323,7 +350,11 @@ void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (!dma_map_direct(dev, ops) && ops->unmap_resource) + if (dma_map_direct(dev, ops)) + ; /* nothing to do: uncached and no swiotlb */ + else if (use_dma_iommu(dev)) + iommu_dma_unmap_resource(dev, addr, size, dir, attrs); + else if (ops->unmap_resource) ops->unmap_resource(dev, addr, size, dir, attrs); debug_dma_unmap_resource(dev, addr, size, dir); } @@ -338,6 +369,8 @@ void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops)) dma_direct_sync_single_for_cpu(dev, addr, size, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_single_for_cpu(dev, addr, size, dir); else if (ops->sync_single_for_cpu) ops->sync_single_for_cpu(dev, addr, size, dir); debug_dma_sync_single_for_cpu(dev, addr, size, dir); @@ -352,6 +385,8 @@ void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops)) dma_direct_sync_single_for_device(dev, addr, size, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_single_for_device(dev, addr, size, dir); else if (ops->sync_single_for_device) ops->sync_single_for_device(dev, addr, size, dir); debug_dma_sync_single_for_device(dev, addr, size, dir); @@ -366,6 +401,8 @@ void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops)) dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_sg_for_cpu(dev, sg, nelems, dir); else if (ops->sync_sg_for_cpu) ops->sync_sg_for_cpu(dev, sg, nelems, dir); debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); @@ -380,6 +417,8 @@ void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops)) dma_direct_sync_sg_for_device(dev, sg, nelems, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_sg_for_device(dev, sg, nelems, dir); else if (ops->sync_sg_for_device) ops->sync_sg_for_device(dev, sg, nelems, dir); debug_dma_sync_sg_for_device(dev, sg, nelems, dir); @@ -405,7 +444,7 @@ static void dma_setup_need_sync(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); - if (dma_map_direct(dev, ops) || (ops->flags & DMA_F_CAN_SKIP_SYNC)) + if (dma_map_direct(dev, ops) || use_dma_iommu(dev)) /* * dma_skip_sync will be reset to %false on first SWIOTLB buffer * mapping, if any. During the device initialization, it's @@ -446,6 +485,9 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, if (dma_alloc_direct(dev, ops)) return dma_direct_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); + if (use_dma_iommu(dev)) + return iommu_dma_get_sgtable(dev, sgt, cpu_addr, dma_addr, + size, attrs); if (!ops->get_sgtable) return -ENXIO; return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); @@ -482,6 +524,8 @@ bool dma_can_mmap(struct device *dev) if (dma_alloc_direct(dev, ops)) return dma_direct_can_mmap(dev); + if (use_dma_iommu(dev)) + return true; return ops->mmap != NULL; } EXPORT_SYMBOL_GPL(dma_can_mmap); @@ -508,6 +552,9 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, if (dma_alloc_direct(dev, ops)) return dma_direct_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); + if (use_dma_iommu(dev)) + return iommu_dma_mmap(dev, vma, cpu_addr, dma_addr, size, + attrs); if (!ops->mmap) return -ENXIO; return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); @@ -559,6 +606,8 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, if (dma_alloc_direct(dev, ops)) cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs); + else if (use_dma_iommu(dev)) + cpu_addr = iommu_dma_alloc(dev, size, dma_handle, flag, attrs); else if (ops->alloc) cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); else @@ -591,6 +640,8 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); if (dma_alloc_direct(dev, ops)) dma_direct_free(dev, size, cpu_addr, dma_handle, attrs); + else if (use_dma_iommu(dev)) + iommu_dma_free(dev, size, cpu_addr, dma_handle, attrs); else if (ops->free) ops->free(dev, size, cpu_addr, dma_handle, attrs); } @@ -611,6 +662,8 @@ static struct page *__dma_alloc_pages(struct device *dev, size_t size, size = PAGE_ALIGN(size); if (dma_alloc_direct(dev, ops)) return dma_direct_alloc_pages(dev, size, dma_handle, dir, gfp); + if (use_dma_iommu(dev)) + return dma_common_alloc_pages(dev, size, dma_handle, dir, gfp); if (!ops->alloc_pages_op) return NULL; return ops->alloc_pages_op(dev, size, dma_handle, dir, gfp); @@ -635,6 +688,8 @@ static void __dma_free_pages(struct device *dev, size_t size, struct page *page, size = PAGE_ALIGN(size); if (dma_alloc_direct(dev, ops)) dma_direct_free_pages(dev, size, page, dma_handle, dir); + else if (use_dma_iommu(dev)) + dma_common_free_pages(dev, size, page, dma_handle, dir); else if (ops->free_pages) ops->free_pages(dev, size, page, dma_handle, dir); } @@ -697,6 +752,8 @@ struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, if (ops && ops->alloc_noncontiguous) sgt = ops->alloc_noncontiguous(dev, size, dir, gfp, attrs); + else if (use_dma_iommu(dev)) + sgt = iommu_dma_alloc_noncontiguous(dev, size, dir, gfp, attrs); else sgt = alloc_single_sgt(dev, size, dir, gfp); @@ -725,6 +782,8 @@ void dma_free_noncontiguous(struct device *dev, size_t size, debug_dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir); if (ops && ops->free_noncontiguous) ops->free_noncontiguous(dev, size, sgt, dir); + else if (use_dma_iommu(dev)) + iommu_dma_free_noncontiguous(dev, size, sgt, dir); else free_single_sgt(dev, size, sgt, dir); } @@ -772,6 +831,8 @@ static int dma_supported(struct device *dev, u64 mask) { const struct dma_map_ops *ops = get_dma_ops(dev); + if (WARN_ON(ops && use_dma_iommu(dev))) + return false; /* * ->dma_supported sets the bypass flag, so we must always call * into the method here unless the device is truly direct mapped. @@ -787,17 +848,14 @@ bool dma_pci_p2pdma_supported(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); - /* if ops is not set, dma direct will be used which supports P2PDMA */ - if (!ops) - return true; - /* * Note: dma_ops_bypass is not checked here because P2PDMA should * not be used with dma mapping ops that do not have support even * if the specific device is bypassing them. */ - return ops->flags & DMA_F_PCI_P2PDMA_SUPPORTED; + /* if ops is not set, dma direct and default IOMMU support P2PDMA */ + return !ops; } EXPORT_SYMBOL_GPL(dma_pci_p2pdma_supported); @@ -843,6 +901,8 @@ size_t dma_max_mapping_size(struct device *dev) if (dma_map_direct(dev, ops)) size = dma_direct_max_mapping_size(dev); + else if (use_dma_iommu(dev)) + size = iommu_dma_max_mapping_size(dev); else if (ops && ops->max_mapping_size) size = ops->max_mapping_size(dev); @@ -855,7 +915,9 @@ size_t dma_opt_mapping_size(struct device *dev) const struct dma_map_ops *ops = get_dma_ops(dev); size_t size = SIZE_MAX; - if (ops && ops->opt_mapping_size) + if (use_dma_iommu(dev)) + size = iommu_dma_opt_mapping_size(); + else if (ops && ops->opt_mapping_size) size = ops->opt_mapping_size(); return min(dma_max_mapping_size(dev), size); @@ -866,6 +928,9 @@ unsigned long dma_get_merge_boundary(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); + if (use_dma_iommu(dev)) + return iommu_dma_get_merge_boundary(dev); + if (!ops || !ops->get_merge_boundary) return 0; /* can't merge */ -- Gitee From a3a67282b58b0ed626a4434d0a0a80b832f530e5 Mon Sep 17 00:00:00 2001 From: Pranjal Shrivastava Date: Fri, 16 Aug 2024 10:49:06 +0000 Subject: [PATCH 31/99] iommu: Handle iommu faults for a bad iopf setup ANBZ: #13617 commit b58b133e680b20d219940e0fdb6f6132c2b60f38 upstream. The iommu_report_device_fault function was updated to return void while assuming that drivers only need to call iommu_report_device_fault() for reporting an iopf. This implementation causes following problems: 1. The drivers rely on the core code to call it's page_reponse, however, when a fault is received and no fault capable domain is attached / iopf_param is NULL, the ops->page_response is NOT called causing the device to stall in case the fault type was PAGE_REQ. 2. The arm_smmu_v3 driver relies on the returned value to log errors returning void from iommu_report_device_fault causes these events to be missed while logging. Modify the iommu_report_device_fault function to return -EINVAL for cases where no fault capable domain is attached or iopf_param was NULL and calls back to the driver (ops->page_response) in case the fault type was IOMMU_FAULT_PAGE_REQ. The returned value can be used by the drivers to log the fault/event as needed. Reported-by: Kunkun Jiang Closes: https://lore.kernel.org/all/6147caf0-b9a0-30ca-795e-a1aa502a5c51@huawei.com/ Fixes: 3dfa64aecbaf ("iommu: Make iommu_report_device_fault() return void") Signed-off-by: Jason Gunthorpe Signed-off-by: Pranjal Shrivastava Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20240816104906.1010626-1-praan@google.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 +- drivers/iommu/io-pgfault.c | 121 ++++++++++++++------ include/linux/iommu.h | 5 +- 3 files changed, 87 insertions(+), 41 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index f5672d936de2..ee8866be799a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1788,7 +1788,7 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) goto out_unlock; } - iommu_report_device_fault(master->dev, &fault_evt); + ret = iommu_report_device_fault(master->dev, &fault_evt); out_unlock: mutex_unlock(&smmu->streams_mutex); return ret; diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 81e9cc6e3164..4674e618797c 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -115,6 +115,59 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param, return group; } +static struct iommu_attach_handle *find_fault_handler(struct device *dev, + struct iopf_fault *evt) +{ + struct iommu_fault *fault = &evt->fault; + struct iommu_attach_handle *attach_handle; + + if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) { + attach_handle = iommu_attach_handle_get(dev->iommu_group, + fault->prm.pasid, 0); + if (IS_ERR(attach_handle)) { + const struct iommu_ops *ops = dev_iommu_ops(dev); + + if (!ops->user_pasid_table) + return NULL; + /* + * The iommu driver for this device supports user- + * managed PASID table. Therefore page faults for + * any PASID should go through the NESTING domain + * attached to the device RID. + */ + attach_handle = iommu_attach_handle_get( + dev->iommu_group, IOMMU_NO_PASID, + IOMMU_DOMAIN_NESTED); + if (IS_ERR(attach_handle)) + return NULL; + } + } else { + attach_handle = iommu_attach_handle_get(dev->iommu_group, + IOMMU_NO_PASID, 0); + + if (IS_ERR(attach_handle)) + return NULL; + } + + if (!attach_handle->domain->iopf_handler) + return NULL; + + return attach_handle; +} + +static void iopf_error_response(struct device *dev, struct iopf_fault *evt) +{ + const struct iommu_ops *ops = dev_iommu_ops(dev); + struct iommu_fault *fault = &evt->fault; + struct iommu_page_response resp = { + .pasid = fault->prm.pasid, + .grpid = fault->prm.grpid, + .code = IOMMU_PAGE_RESP_INVALID + }; + + ops->page_response(dev, evt, &resp); +} + /** * iommu_report_device_fault() - Report fault event to device driver * @dev: the device @@ -153,24 +206,39 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param, * handling framework should guarantee that the iommu domain could only be * freed after the device has stopped generating page faults (or the iommu * hardware has been set to block the page faults) and the pending page faults - * have been flushed. + * have been flushed. In case no page fault handler is attached or no iopf params + * are setup, then the ops->page_response() is called to complete the evt. + * + * Returns 0 on success, or an error in case of a bad/failed iopf setup. */ -void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) +int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) { + struct iommu_attach_handle *attach_handle; struct iommu_fault *fault = &evt->fault; struct iommu_fault_param *iopf_param; struct iopf_group abort_group = {}; struct iopf_group *group; + attach_handle = find_fault_handler(dev, evt); + if (!attach_handle) + goto err_bad_iopf; + + /* + * Something has gone wrong if a fault capable domain is attached but no + * iopf_param is setup + */ iopf_param = iopf_get_dev_fault_param(dev); if (WARN_ON(!iopf_param)) - return; + goto err_bad_iopf; if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { - report_partial_fault(iopf_param, fault); + int ret; + + ret = report_partial_fault(iopf_param, fault); iopf_put_dev_fault_param(iopf_param); /* A request that is not the last does not need to be ack'd */ - return; + + return ret; } /* @@ -185,38 +253,7 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) if (group == &abort_group) goto err_abort; - if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) { - group->attach_handle = iommu_attach_handle_get(dev->iommu_group, - fault->prm.pasid, - 0); - if (IS_ERR(group->attach_handle)) { - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (!ops->user_pasid_table) - goto err_abort; - - /* - * The iommu driver for this device supports user- - * managed PASID table. Therefore page faults for - * any PASID should go through the NESTING domain - * attached to the device RID. - */ - group->attach_handle = - iommu_attach_handle_get(dev->iommu_group, - IOMMU_NO_PASID, - IOMMU_DOMAIN_NESTED); - if (IS_ERR(group->attach_handle)) - goto err_abort; - } - } else { - group->attach_handle = - iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0); - if (IS_ERR(group->attach_handle)) - goto err_abort; - } - - if (!group->attach_handle->domain->iopf_handler) - goto err_abort; + group->attach_handle = attach_handle; /* * On success iopf_handler must call iopf_group_response() and @@ -225,7 +262,7 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) if (group->attach_handle->domain->iopf_handler(group)) goto err_abort; - return; + return 0; err_abort: dev_warn_ratelimited(dev, "iopf with pasid %d aborted\n", @@ -235,6 +272,14 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) __iopf_free_group(group); else iopf_free_group(group); + + return 0; + +err_bad_iopf: + if (fault->type == IOMMU_FAULT_PAGE_REQ) + iopf_error_response(dev, evt); + + return -EINVAL; } EXPORT_SYMBOL_GPL(iommu_report_device_fault); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 4b7609b9e067..1a7fa47dfab0 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1595,7 +1595,7 @@ struct iopf_queue *iopf_queue_alloc(const char *name); void iopf_queue_free(struct iopf_queue *queue); int iopf_queue_discard_partial(struct iopf_queue *queue); void iopf_free_group(struct iopf_group *group); -void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); +int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); void iopf_group_response(struct iopf_group *group, enum iommu_page_response_code status); #else @@ -1633,9 +1633,10 @@ static inline void iopf_free_group(struct iopf_group *group) { } -static inline void +static inline int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) { + return -ENODEV; } static inline void iopf_group_response(struct iopf_group *group, -- Gitee From 6550217df2e159ea81cdf88e0565e39a884c2022 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 16 Aug 2024 22:16:50 +0000 Subject: [PATCH 32/99] iommu/amd: Update PASID, GATS, GLX, SNPAVICSUP feature related macros ANBZ: #13617 commit 014e756247e847cde8a06fc27ee3a72a5140b972 upstream. Clean up and reorder them according to the bit index. There is no functional change. Suggested-by: Jason Gunthorpe Signed-off-by: Suravee Suthikulpanit Reviewed-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240816221650.62295-1-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu.h | 5 ----- drivers/iommu/amd/amd_iommu_types.h | 16 ++++++---------- drivers/iommu/amd/init.c | 8 +++----- 3 files changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 4abfab24585d..807caef76d79 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -121,11 +121,6 @@ static inline bool check_feature2(u64 mask) return (amd_iommu_efr2 & mask); } -static inline int check_feature_gpt_level(void) -{ - return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK); -} - static inline bool amd_iommu_gt_ppr_supported(void) { return (check_feature(FEATURE_GT) && diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 2b76b5dedc1d..c9f9a598eb82 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -8,6 +8,7 @@ #ifndef _ASM_X86_AMD_IOMMU_TYPES_H #define _ASM_X86_AMD_IOMMU_TYPES_H +#include #include #include #include @@ -95,26 +96,21 @@ #define FEATURE_GA BIT_ULL(7) #define FEATURE_HE BIT_ULL(8) #define FEATURE_PC BIT_ULL(9) -#define FEATURE_GATS_SHIFT (12) -#define FEATURE_GATS_MASK (3ULL) +#define FEATURE_GATS GENMASK_ULL(13, 12) +#define FEATURE_GLX GENMASK_ULL(15, 14) #define FEATURE_GAM_VAPIC BIT_ULL(21) +#define FEATURE_PASMAX GENMASK_ULL(36, 32) #define FEATURE_GIOSUP BIT_ULL(48) #define FEATURE_HASUP BIT_ULL(49) #define FEATURE_EPHSUP BIT_ULL(50) #define FEATURE_HDSUP BIT_ULL(52) #define FEATURE_SNP BIT_ULL(63) -#define FEATURE_PASID_SHIFT 32 -#define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT) - -#define FEATURE_GLXVAL_SHIFT 14 -#define FEATURE_GLXVAL_MASK (0x03ULL << FEATURE_GLXVAL_SHIFT) /* Extended Feature 2 Bits */ -#define FEATURE_SNPAVICSUP_SHIFT 5 -#define FEATURE_SNPAVICSUP_MASK (0x07ULL << FEATURE_SNPAVICSUP_SHIFT) +#define FEATURE_SNPAVICSUP GENMASK_ULL(7, 5) #define FEATURE_SNPAVICSUP_GAM(x) \ - ((x & FEATURE_SNPAVICSUP_MASK) >> FEATURE_SNPAVICSUP_SHIFT == 0x1) + (FIELD_GET(FEATURE_SNPAVICSUP, x) == 0x1) /* Note: * The current driver only support 16-bit PASID. diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index d7347e19aabb..d161de2dd740 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2041,14 +2041,12 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) int glxval; u64 pasmax; - pasmax = amd_iommu_efr & FEATURE_PASID_MASK; - pasmax >>= FEATURE_PASID_SHIFT; + pasmax = FIELD_GET(FEATURE_PASMAX, amd_iommu_efr); iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1; BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK); - glxval = amd_iommu_efr & FEATURE_GLXVAL_MASK; - glxval >>= FEATURE_GLXVAL_SHIFT; + glxval = FIELD_GET(FEATURE_GLX, amd_iommu_efr); if (amd_iommu_max_glx_val == -1) amd_iommu_max_glx_val = glxval; @@ -3095,7 +3093,7 @@ static int __init early_amd_iommu_init(void) /* 5 level guest page table */ if (cpu_feature_enabled(X86_FEATURE_LA57) && - check_feature_gpt_level() == GUEST_PGTABLE_5_LEVEL) + FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL) amd_iommu_gpt_level = PAGE_MODE_5_LEVEL; /* Disable any previously enabled IOMMUs */ -- Gitee From 4399f90730c98f72749b82df47253d5b656a1249 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 15 Aug 2024 20:48:57 +0800 Subject: [PATCH 33/99] iommu/vt-d: Fix incorrect domain ID in context flush helper ANBZ: #13617 commit 7af6c720417f21f015f46baa33e182f349ddc93b upstream. The helper intel_context_flush_present() is designed to flush all related caches when a context entry with the present bit set is modified. It currently retrieves the domain ID from the context entry and uses it to flush the IOTLB and context caches. This is incorrect when the context entry transitions from present to non-present, as the domain ID field is cleared before calling the helper. Fix it by passing the domain ID programmed in the context entry before the change to intel_context_flush_present(). This ensures that the correct domain ID is used for cache invalidation. Fixes: f90584f4beb8 ("iommu/vt-d: Add helper to flush caches for context change") Reported-by: Alex Williamson Closes: https://lore.kernel.org/linux-iommu/20240814162726.5efe1a6e.alex.williamson@redhat.com/ Signed-off-by: Lu Baolu Tested-by: Alex Williamson Reviewed-by: Alex Williamson Reviewed-by: Jerry Snitselaar Reviewed-by: Jacob Pan Link: https://lore.kernel.org/r/20240815124857.70038-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/intel/iommu.c | 8 ++++++-- drivers/iommu/intel/iommu.h | 2 +- drivers/iommu/intel/pasid.c | 7 ++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 2e2bd8c1bcf5..abcdf318e75a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1956,6 +1956,7 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 { struct intel_iommu *iommu = info->iommu; struct context_entry *context; + u16 did; spin_lock(&iommu->lock); context = iommu_context_addr(iommu, bus, devfn, 0); @@ -1964,10 +1965,11 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 return; } + did = context_domain_id(context); context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock(&iommu->lock); - intel_context_flush_present(info, context, true); + intel_context_flush_present(info, context, did, true); } static int domain_setup_first_level(struct intel_iommu *iommu, @@ -4322,6 +4324,7 @@ static int context_flip_pri(struct device_domain_info *info, bool enable) struct intel_iommu *iommu = info->iommu; u8 bus = info->bus, devfn = info->devfn; struct context_entry *context; + u16 did; spin_lock(&iommu->lock); if (context_copied(iommu, bus, devfn)) { @@ -4334,6 +4337,7 @@ static int context_flip_pri(struct device_domain_info *info, bool enable) spin_unlock(&iommu->lock); return -ENODEV; } + did = context_domain_id(context); if (enable) context_set_sm_pre(context); @@ -4342,7 +4346,7 @@ static int context_flip_pri(struct device_domain_info *info, bool enable) if (!ecap_coherent(iommu->ecap)) clflush_cache_range(context, sizeof(*context)); - intel_context_flush_present(info, context, true); + intel_context_flush_present(info, context, did, true); spin_unlock(&iommu->lock); return 0; diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 2086688743f0..5dd150eaf60f 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1154,7 +1154,7 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, void intel_context_flush_present(struct device_domain_info *info, struct context_entry *context, - bool affect_domains); + u16 did, bool affect_domains); #ifdef CONFIG_INTEL_IOMMU_SVM void intel_svm_check(struct intel_iommu *iommu); diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 5792c817cefa..b51fc268dc84 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -683,6 +683,7 @@ static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; struct context_entry *context; + u16 did; spin_lock(&iommu->lock); context = iommu_context_addr(iommu, bus, devfn, false); @@ -691,10 +692,11 @@ static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) return; } + did = context_domain_id(context); context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock(&iommu->lock); - intel_context_flush_present(info, context, false); + intel_context_flush_present(info, context, did, false); } static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data) @@ -885,10 +887,9 @@ static void __context_flush_dev_iotlb(struct device_domain_info *info) */ void intel_context_flush_present(struct device_domain_info *info, struct context_entry *context, - bool flush_domains) + u16 did, bool flush_domains) { struct intel_iommu *iommu = info->iommu; - u16 did = context_domain_id(context); struct pasid_entry *pte; int i; -- Gitee From 102f86c103914d9b2aed0612c0ba45e0828d6a02 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 2 Aug 2024 17:32:02 -0700 Subject: [PATCH 34/99] iommufd: Reorder include files ANBZ: #13617 commit 1d4684fbe88dc28e2bf79f5e94a432f0469d2dac upstream. Reorder include files to alphabetic order to simplify maintenance, and separate local headers and global headers with a blank line. No functional change intended. Link: https://patch.msgid.link/r/7524b037cc05afe19db3c18f863253e1d1554fa2.1722644866.git.nicolinc@nvidia.com Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/device.c | 4 ++-- drivers/iommu/iommufd/fault.c | 4 ++-- drivers/iommu/iommufd/io_pagetable.c | 8 ++++---- drivers/iommu/iommufd/io_pagetable.h | 2 +- drivers/iommu/iommufd/ioas.c | 2 +- drivers/iommu/iommufd/iommufd_private.h | 9 +++++---- drivers/iommu/iommufd/iommufd_test.h | 2 +- drivers/iommu/iommufd/iova_bitmap.c | 2 +- drivers/iommu/iommufd/main.c | 8 ++++---- drivers/iommu/iommufd/pages.c | 10 +++++----- drivers/iommu/iommufd/selftest.c | 9 +++++---- include/linux/iommufd.h | 4 ++-- include/uapi/linux/iommufd.h | 2 +- 13 files changed, 34 insertions(+), 32 deletions(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 21021a2cecba..3682ae03e99a 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -1,12 +1,12 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ +#include #include #include -#include #include -#include "../iommu-priv.h" +#include "../iommu-priv.h" #include "io_pagetable.h" #include "iommufd_private.h" diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c index a643d5c7c535..df03411c8728 100644 --- a/drivers/iommu/iommufd/fault.c +++ b/drivers/iommu/iommufd/fault.c @@ -3,14 +3,14 @@ */ #define pr_fmt(fmt) "iommufd: " fmt +#include #include #include +#include #include #include -#include #include #include -#include #include #include "../iommu-priv.h" diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 9f193c933de6..4bf7ccd39d46 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -8,17 +8,17 @@ * The datastructure uses the iopt_pages to optimize the storage of the PFNs * between the domains and xarray. */ +#include +#include +#include #include #include -#include #include -#include #include -#include #include -#include "io_pagetable.h" #include "double_span.h" +#include "io_pagetable.h" struct iopt_pages_list { struct iopt_pages *pages; diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h index 0ec3509b7e33..c61d74471684 100644 --- a/drivers/iommu/iommufd/io_pagetable.h +++ b/drivers/iommu/iommufd/io_pagetable.h @@ -6,8 +6,8 @@ #define __IO_PAGETABLE_H #include -#include #include +#include #include #include "iommufd_private.h" diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c index 157a89b993e4..2c4b2bb11e78 100644 --- a/drivers/iommu/iommufd/ioas.c +++ b/drivers/iommu/iommufd/ioas.c @@ -3,8 +3,8 @@ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ #include -#include #include +#include #include #include "io_pagetable.h" diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 92efe30a8f0d..017e50574f3b 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -4,13 +4,14 @@ #ifndef __IOMMUFD_PRIVATE_H #define __IOMMUFD_PRIVATE_H -#include -#include -#include -#include #include #include +#include +#include +#include +#include #include + #include "../iommu-priv.h" struct iommu_domain; diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index acbbba1c6671..f4bc23a92f9a 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -4,8 +4,8 @@ #ifndef _UAPI_IOMMUFD_TEST_H #define _UAPI_IOMMUFD_TEST_H -#include #include +#include enum { IOMMU_TEST_OP_ADD_RESERVED = 1, diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index 6b09b7856789..2cdc4f542df4 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -3,10 +3,10 @@ * Copyright (c) 2022, Oracle and/or its affiliates. * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved */ +#include #include #include #include -#include #define BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 83bbd7c5d160..b5f5d27ee963 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -8,15 +8,15 @@ */ #define pr_fmt(fmt) "iommufd: " fmt +#include #include #include -#include -#include +#include #include +#include #include -#include +#include #include -#include #include "io_pagetable.h" #include "iommufd_private.h" diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 117f644a0c5b..93d806c9c073 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -45,16 +45,16 @@ * last_iova + 1 can overflow. An iopt_pages index will always be much less than * ULONG_MAX so last_index + 1 cannot overflow. */ +#include +#include +#include +#include #include #include -#include #include -#include -#include -#include -#include "io_pagetable.h" #include "double_span.h" +#include "io_pagetable.h" #ifndef CONFIG_IOMMUFD_TEST #define TEMP_MEMORY_LIMIT 65536 diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 222cfc11ebfd..7464c24764cd 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -3,13 +3,14 @@ * * Kernel side components to support tools/testing/selftests/iommu */ -#include -#include -#include -#include #include +#include #include +#include +#include #include +#include +#include #include #include "../iommu-priv.h" diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index ffc3a949f837..c2f2f6b9148e 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -6,9 +6,9 @@ #ifndef __LINUX_IOMMUFD_H #define __LINUX_IOMMUFD_H -#include -#include #include +#include +#include struct device; struct iommufd_device; diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 4dde745cfb7e..72010f71c5e4 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -4,8 +4,8 @@ #ifndef _UAPI_IOMMUFD_H #define _UAPI_IOMMUFD_H -#include #include +#include #define IOMMUFD_TYPE (';') -- Gitee From 212f8d54d222a0cbd5d0358d4b6b56802230c668 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 22 Aug 2024 11:47:09 -0300 Subject: [PATCH 35/99] iommufd/selftest: Fix buffer read overrrun in the dirty test ANBZ: #13617 commit 79ea4a496ab5c970a3a793d863ed8893b1af107c upstream. test_bit() is used to read the memory storing the bitmap, however test_bit() always uses a unsigned long 8 byte access. If the bitmap is not an aligned size of 64 bits this will now trigger a KASAN warning reading past the end of the buffer. Properly round the buffer allocation to an unsigned long size. Continue to copy_from_user() using a byte granularity. Fixes: 9560393b830b ("iommufd/selftest: Fix iommufd_test_dirty() to handle Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/selftest.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 7464c24764cd..540437be168a 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -1343,7 +1343,7 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, unsigned long page_size, void __user *uptr, u32 flags) { - unsigned long bitmap_size, i, max; + unsigned long i, max; struct iommu_test_cmd *cmd = ucmd->cmd; struct iommufd_hw_pagetable *hwpt; struct mock_iommu_domain *mock; @@ -1364,15 +1364,14 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, } max = length / page_size; - bitmap_size = DIV_ROUND_UP(max, BITS_PER_BYTE); - - tmp = kvzalloc(bitmap_size, GFP_KERNEL_ACCOUNT); + tmp = kvzalloc(DIV_ROUND_UP(max, BITS_PER_LONG) * sizeof(unsigned long), + GFP_KERNEL_ACCOUNT); if (!tmp) { rc = -ENOMEM; goto out_put; } - if (copy_from_user(tmp, uptr, bitmap_size)) { + if (copy_from_user(tmp, uptr,DIV_ROUND_UP(max, BITS_PER_BYTE))) { rc = -EFAULT; goto out_free; } -- Gitee From 77bda7c6ad49cb48933220b34b567e389e083a69 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 6 Aug 2024 17:34:46 -0700 Subject: [PATCH 36/99] iommufd/device: Enforce reserved IOVA also when attached to hwpt_nested ANBZ: #13617 commit b2f44814680b569be98e58111bd582fd3a689d4d upstream. Currently, device reserved regions are only enforced when the device is attached to an hwpt_paging. In other words, if the device gets attached to an hwpt_nested directly, the parent hwpt_paging of the hwpt_nested's would not enforce those reserved IOVAs. This works for most of reserved region types, but not for IOMMU_RESV_SW_MSI, which is a unique software defined window, required by a nesting case too to setup an MSI doorbell on the parent stage-2 hwpt/domain. Kevin pointed out in 1 that: 1) there is no usage using up closely the entire IOVA space yet, 2) guest may change the viommu mode to switch between nested and paging then VMM has to take all devices' reserved regions into consideration anyway, when composing the GPA space. So it would be actually convenient for us to also enforce reserved IOVA onto the parent hwpt_paging, when attaching a device to an hwpt_nested. Repurpose the existing attach/replace_paging helpers to attach device's reserved IOVAs exclusively. Add a new find_hwpt_paging helper, which is only used by these reserved IOVA functions, to allow an IOMMUFD_OBJ_HWPT_NESTED hwpt to redirect to its parent hwpt_paging. Return a NULL in these two helpers for any new HWPT type in the future. Link: https://patch.msgid.link/r/20240807003446.3740368-1-nicolinc@nvidia.com Link: https://lore.kernel.org/all/BN9PR11MB5276497781C96415272E6FED8CB12@BN9PR11MB5276.namprd11.prod.outlook.com/ #1 Suggested-by: Kevin Tian Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/device.c | 52 ++++++++++++------------- drivers/iommu/iommufd/iommufd_private.h | 19 +++++++++ 2 files changed, 45 insertions(+), 26 deletions(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 3682ae03e99a..ba2aed1b7014 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -327,8 +327,9 @@ static int iommufd_group_setup_msi(struct iommufd_group *igroup, return 0; } -static int iommufd_hwpt_paging_attach(struct iommufd_hwpt_paging *hwpt_paging, - struct iommufd_device *idev) +static int +iommufd_device_attach_reserved_iova(struct iommufd_device *idev, + struct iommufd_hwpt_paging *hwpt_paging) { int rc; @@ -354,6 +355,7 @@ static int iommufd_hwpt_paging_attach(struct iommufd_hwpt_paging *hwpt_paging, int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev) { + struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); int rc; mutex_lock(&idev->igroup->lock); @@ -363,8 +365,8 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, goto err_unlock; } - if (hwpt_is_paging(hwpt)) { - rc = iommufd_hwpt_paging_attach(to_hwpt_paging(hwpt), idev); + if (hwpt_paging) { + rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging); if (rc) goto err_unlock; } @@ -387,9 +389,8 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, mutex_unlock(&idev->igroup->lock); return 0; err_unresv: - if (hwpt_is_paging(hwpt)) - iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt, - idev->dev); + if (hwpt_paging) + iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); err_unlock: mutex_unlock(&idev->igroup->lock); return rc; @@ -399,6 +400,7 @@ struct iommufd_hw_pagetable * iommufd_hw_pagetable_detach(struct iommufd_device *idev) { struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt; + struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); mutex_lock(&idev->igroup->lock); list_del(&idev->group_item); @@ -406,9 +408,8 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev) iommufd_hwpt_detach_device(hwpt, idev); idev->igroup->hwpt = NULL; } - if (hwpt_is_paging(hwpt)) - iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt, - idev->dev); + if (hwpt_paging) + iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); mutex_unlock(&idev->igroup->lock); /* Caller must destroy hwpt */ @@ -451,17 +452,17 @@ iommufd_group_remove_reserved_iova(struct iommufd_group *igroup, } static int -iommufd_group_do_replace_paging(struct iommufd_group *igroup, - struct iommufd_hwpt_paging *hwpt_paging) +iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup, + struct iommufd_hwpt_paging *hwpt_paging) { - struct iommufd_hw_pagetable *old_hwpt = igroup->hwpt; + struct iommufd_hwpt_paging *old_hwpt_paging; struct iommufd_device *cur; int rc; lockdep_assert_held(&igroup->lock); - if (!hwpt_is_paging(old_hwpt) || - hwpt_paging->ioas != to_hwpt_paging(old_hwpt)->ioas) { + old_hwpt_paging = find_hwpt_paging(igroup->hwpt); + if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) { list_for_each_entry(cur, &igroup->device_list, group_item) { rc = iopt_table_enforce_dev_resv_regions( &hwpt_paging->ioas->iopt, cur->dev, NULL); @@ -484,6 +485,8 @@ static struct iommufd_hw_pagetable * iommufd_device_do_replace(struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt) { + struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); + struct iommufd_hwpt_paging *old_hwpt_paging; struct iommufd_group *igroup = idev->igroup; struct iommufd_hw_pagetable *old_hwpt; unsigned int num_devices; @@ -507,9 +510,8 @@ iommufd_device_do_replace(struct iommufd_device *idev, } old_hwpt = igroup->hwpt; - if (hwpt_is_paging(hwpt)) { - rc = iommufd_group_do_replace_paging(igroup, - to_hwpt_paging(hwpt)); + if (hwpt_paging) { + rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt_paging); if (rc) goto err_unlock; } @@ -518,11 +520,10 @@ iommufd_device_do_replace(struct iommufd_device *idev, if (rc) goto err_unresv; - if (hwpt_is_paging(old_hwpt) && - (!hwpt_is_paging(hwpt) || - to_hwpt_paging(hwpt)->ioas != to_hwpt_paging(old_hwpt)->ioas)) - iommufd_group_remove_reserved_iova(igroup, - to_hwpt_paging(old_hwpt)); + old_hwpt_paging = find_hwpt_paging(old_hwpt); + if (old_hwpt_paging && + (!hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas)) + iommufd_group_remove_reserved_iova(igroup, old_hwpt_paging); igroup->hwpt = hwpt; @@ -540,9 +541,8 @@ iommufd_device_do_replace(struct iommufd_device *idev, /* Caller must destroy old_hwpt */ return old_hwpt; err_unresv: - if (hwpt_is_paging(hwpt)) - iommufd_group_remove_reserved_iova(igroup, - to_hwpt_paging(hwpt)); + if (hwpt_paging) + iommufd_group_remove_reserved_iova(igroup, hwpt_paging); err_unlock: mutex_unlock(&idev->igroup->lock); return ERR_PTR(rc); diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 017e50574f3b..5d3768d77099 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -325,6 +325,25 @@ to_hwpt_paging(struct iommufd_hw_pagetable *hwpt) return container_of(hwpt, struct iommufd_hwpt_paging, common); } +static inline struct iommufd_hwpt_nested * +to_hwpt_nested(struct iommufd_hw_pagetable *hwpt) +{ + return container_of(hwpt, struct iommufd_hwpt_nested, common); +} + +static inline struct iommufd_hwpt_paging * +find_hwpt_paging(struct iommufd_hw_pagetable *hwpt) +{ + switch (hwpt->obj.type) { + case IOMMUFD_OBJ_HWPT_PAGING: + return to_hwpt_paging(hwpt); + case IOMMUFD_OBJ_HWPT_NESTED: + return to_hwpt_nested(hwpt)->parent; + default: + return NULL; + } +} + static inline struct iommufd_hwpt_paging * iommufd_get_hwpt_paging(struct iommufd_ucmd *ucmd, u32 id) { -- Gitee From 876e0407a9161dd987b72c58db1caaa304599cb7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 7 Aug 2024 15:19:20 -0300 Subject: [PATCH 37/99] iommu: Allow ATS to work on VFs when the PF uses IDENTITY ANBZ: #13617 commit 6c17c7d5936e6af6a5bda9f9de98a5e2ee6e8a6f upstream. PCI ATS has a global Smallest Translation Unit field that is located in the PF but shared by all of the VFs. The expectation is that the STU will be set to the root port's global STU capability which is driven by the IO page table configuration of the iommu HW. Today it becomes set when the iommu driver first enables ATS. Thus, to enable ATS on the VF, the PF must have already had the correct STU programmed, even if ATS is off on the PF. Unfortunately the PF only programs the STU when the PF enables ATS. The iommu drivers tend to leave ATS disabled when IDENTITY translation is being used. Thus we can get into a state where the PF is setup to use IDENTITY with the DMA API while the VF would like to use VFIO with a PAGING domain and have ATS turned on. This fails because the PF never loaded a PAGING domain and so it never setup the STU, and the VF can't do it. The simplest solution is to have the iommu driver set the ATS STU when it probes the device. This way the ATS STU is loaded immediately at boot time to all PFs and there is no issue when a VF comes to use it. Add a new call pci_prepare_ats() which should be called by iommu drivers in their probe_device() op for every PCI device if the iommu driver supports ATS. This will setup the STU based on whatever page size capability the iommu HW has. Signed-off-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/0-v1-0fb4d2ab6770+7e706-ats_vf_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/iommu.c | 3 ++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 6 ++++ drivers/iommu/intel/iommu.c | 1 + drivers/pci/ats.c | 33 +++++++++++++++++++++ include/linux/pci-ats.h | 3 ++ 5 files changed, 46 insertions(+) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index aef3871e937e..87c6d253079d 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2212,6 +2212,9 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) iommu_completion_wait(iommu); + if (dev_is_pci(dev)) + pci_prepare_ats(to_pci_dev(dev), PAGE_SHIFT); + return iommu_dev; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index ee8866be799a..a3fd78f694b2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3300,6 +3300,12 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) smmu->features & ARM_SMMU_FEAT_STALL_FORCE) master->stall_enabled = true; + if (dev_is_pci(dev)) { + unsigned int stu = __ffs(smmu->pgsize_bitmap); + + pci_prepare_ats(to_pci_dev(dev), stu); + } + return &smmu->iommu; err_free_master: diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index abcdf318e75a..b7f127729c0e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4166,6 +4166,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) dev_iommu_priv_set(dev, info); if (pdev && pci_ats_supported(pdev)) { + pci_prepare_ats(pdev, VTD_PAGE_SHIFT); ret = device_rbtree_insert(iommu, info); if (ret) goto free; diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index f9cc2e10b676..0fe7c589e760 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -46,6 +46,39 @@ bool pci_ats_supported(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_ats_supported); +/** + * pci_prepare_ats - Setup the PS for ATS + * @dev: the PCI device + * @ps: the IOMMU page shift + * + * This must be done by the IOMMU driver on the PF before any VFs are created to + * ensure that the VF can have ATS enabled. + * + * Returns 0 on success, or negative on failure. + */ +int pci_prepare_ats(struct pci_dev *dev, int ps) +{ + u16 ctrl; + + if (!pci_ats_supported(dev)) + return -EINVAL; + + if (WARN_ON(dev->ats_enabled)) + return -EBUSY; + + if (ps < PCI_ATS_MIN_STU) + return -EINVAL; + + if (dev->is_virtfn) + return 0; + + dev->ats_stu = ps; + ctrl = PCI_ATS_CTRL_STU(dev->ats_stu - PCI_ATS_MIN_STU); + pci_write_config_word(dev, dev->ats_cap + PCI_ATS_CTRL, ctrl); + return 0; +} +EXPORT_SYMBOL_GPL(pci_prepare_ats); + /** * pci_enable_ats - enable the ATS capability * @dev: the PCI device diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index df54cd5b15db..0e8b74e63767 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -8,6 +8,7 @@ /* Address Translation Service */ bool pci_ats_supported(struct pci_dev *dev); int pci_enable_ats(struct pci_dev *dev, int ps); +int pci_prepare_ats(struct pci_dev *dev, int ps); void pci_disable_ats(struct pci_dev *dev); int pci_ats_queue_depth(struct pci_dev *dev); int pci_ats_page_aligned(struct pci_dev *dev); @@ -16,6 +17,8 @@ static inline bool pci_ats_supported(struct pci_dev *d) { return false; } static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; } +static inline int pci_prepare_ats(struct pci_dev *dev, int ps) +{ return -ENODEV; } static inline void pci_disable_ats(struct pci_dev *d) { } static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; } -- Gitee From 16ba3cf205fc17d17aad5d34363ae3d7d6187f14 Mon Sep 17 00:00:00 2001 From: Ashish Mhetre Date: Tue, 6 Aug 2024 10:51:35 +0000 Subject: [PATCH 38/99] iommu/io-pgtable-arm: Optimise non-coherent unmap ANBZ: #13617 commit 84b2baf427968c1b2e3ae3b7afcb0118cdee0915 upstream. The current __arm_lpae_unmap() function calls dma_sync() on individual PTEs after clearing them. Overall unmap performance can be improved by around 25% for large buffer sizes by combining the syncs for adjacent leaf entries. Optimize the unmap time by clearing all the leaf entries and issuing a single dma_sync() for them. Below is detailed analysis of average unmap latency(in us) with and without this optimization obtained by running dma_map_benchmark for different buffer sizes. UnMap Latency(us) Size Without With % gain with optimiztion optimization optimization 4KB 3 3 0 8KB 4 3.8 5 16KB 6.1 5.4 11.48 32KB 10.2 8.5 16.67 64KB 18.5 14.9 19.46 128KB 35 27.5 21.43 256KB 67.5 52.2 22.67 512KB 127.9 97.2 24.00 1MB 248.6 187.4 24.62 2MB 65.5 65.5 0 4MB 119.2 119 0.17 Reviewed-by: Robin Murphy Signed-off-by: Ashish Mhetre Acked-by: Will Deacon Link: https://lore.kernel.org/r/20240806105135.218089-1-amhetre@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/io-pgtable-arm.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index fd9a5c136b5d..8967b0e38851 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -286,13 +286,13 @@ static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries, sizeof(*ptep) * num_entries, DMA_TO_DEVICE); } -static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg) +static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg, int num_entries) { + for (int i = 0; i < num_entries; i++) + ptep[i] = 0; - *ptep = 0; - - if (!cfg->coherent_walk) - __arm_lpae_sync_pte(ptep, 1, cfg); + if (!cfg->coherent_walk && num_entries) + __arm_lpae_sync_pte(ptep, num_entries, cfg); } static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, @@ -665,26 +665,29 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, max_entries = arm_lpae_max_entries(unmap_idx_start, data); num_entries = min_t(int, pgcount, max_entries); - while (i < num_entries) { - pte = READ_ONCE(*ptep); + /* Find and handle non-leaf entries */ + for (i = 0; i < num_entries; i++) { + pte = READ_ONCE(ptep[i]); if (WARN_ON(!pte)) break; - __arm_lpae_clear_pte(ptep, &iop->cfg); - if (!iopte_leaf(pte, lvl, iop->fmt)) { + __arm_lpae_clear_pte(&ptep[i], &iop->cfg, 1); + /* Also flush any partial walks */ io_pgtable_tlb_flush_walk(iop, iova + i * size, size, ARM_LPAE_GRANULE(data)); __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); - } else if (!iommu_iotlb_gather_queued(gather)) { - io_pgtable_tlb_add_page(iop, gather, iova + i * size, size); } - - ptep++; - i++; } + /* Clear the remaining entries */ + __arm_lpae_clear_pte(ptep, &iop->cfg, i); + + if (gather && !iommu_iotlb_gather_queued(gather)) + for (int j = 0; j < i; j++) + io_pgtable_tlb_add_page(iop, gather, iova + j * size, size); + return i * size; } else if (iopte_leaf(pte, lvl, iop->fmt)) { /* -- Gitee From c3b59dc9c3eb7e0493eb44aca3ffc994d000d946 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 29 Aug 2024 15:34:30 -0700 Subject: [PATCH 39/99] iommu/arm-smmu-v3: Issue a batch of commands to the same cmdq ANBZ: #13617 commit 56ae8866f3b408836c5f6cafbe6102f6e97911ba upstream. The driver calls in different places the arm_smmu_get_cmdq() helper, and it's fine to do so since the helper always returns the single SMMU CMDQ. However, with NVIDIA CMDQV extension or SMMU ECMDQ, there can be multiple cmdqs in the system to select one from. And either case requires a batch of commands to be issued to the same cmdq. Thus, a cmdq has to be decided in the higher-level callers. Add a cmdq pointer in arm_smmu_cmdq_batch structure, and decide the cmdq when initializing the batch. Pass its pointer down to the bottom function. Update __arm_smmu_cmdq_issue_cmd() accordingly for single command issuers. Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/2cbf5ddefb6ea611e48d67c642271bd24421eb21.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 48 +++++++++++++-------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 + 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index a3fd78f694b2..f6b250df2467 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -592,11 +592,11 @@ static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq, /* Wait for the command queue to become non-full */ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { unsigned long flags; struct arm_smmu_queue_poll qp; - struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); int ret = 0; /* @@ -627,11 +627,11 @@ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu, * Must be called with the cmdq lock held in some capacity. */ static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { int ret = 0; struct arm_smmu_queue_poll qp; - struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod)); queue_poll_init(smmu, &qp); @@ -651,10 +651,10 @@ static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu, * Must be called with the cmdq lock held in some capacity. */ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { struct arm_smmu_queue_poll qp; - struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); u32 prod = llq->prod; int ret = 0; @@ -701,12 +701,13 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, } static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { if (smmu->options & ARM_SMMU_OPT_MSIPOLL) - return __arm_smmu_cmdq_poll_until_msi(smmu, llq); + return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); - return __arm_smmu_cmdq_poll_until_consumed(smmu, llq); + return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq); } static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, @@ -743,13 +744,13 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, * CPU will appear before any of the commands from the other CPU. */ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, u64 *cmds, int n, bool sync) { u64 cmd_sync[CMDQ_ENT_DWORDS]; u32 prod; unsigned long flags; bool owner; - struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); struct arm_smmu_ll_queue llq, head; int ret = 0; @@ -763,7 +764,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, while (!queue_has_space(&llq, n + sync)) { local_irq_restore(flags); - if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq)) + if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq)) dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); local_irq_save(flags); } @@ -839,7 +840,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */ if (sync) { llq.prod = queue_inc_prod_n(&llq, n); - ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq); + ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq); if (ret) { dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n", @@ -874,7 +875,8 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, return -EINVAL; } - return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync); + return arm_smmu_cmdq_issue_cmdlist( + smmu, arm_smmu_get_cmdq(smmu), cmd, 1, sync); } static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, @@ -889,6 +891,13 @@ static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu, return __arm_smmu_cmdq_issue_cmd(smmu, ent, true); } +static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq_batch *cmds) +{ + cmds->num = 0; + cmds->cmdq = arm_smmu_get_cmdq(smmu); +} + static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, struct arm_smmu_cmdq_batch *cmds, struct arm_smmu_cmdq_ent *cmd) @@ -897,13 +906,15 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, if (cmds->num == CMDQ_BATCH_ENTRIES - 1 && (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) { - arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); - cmds->num = 0; + arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, + cmds->num, true); + arm_smmu_cmdq_batch_init(smmu, cmds); } if (cmds->num == CMDQ_BATCH_ENTRIES) { - arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false); - cmds->num = 0; + arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, + cmds->num, false); + arm_smmu_cmdq_batch_init(smmu, cmds); } index = cmds->num * CMDQ_ENT_DWORDS; @@ -919,7 +930,8 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu, struct arm_smmu_cmdq_batch *cmds) { - return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); + return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, + cmds->num, true); } static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused, @@ -1170,7 +1182,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master, }, }; - cmds.num = 0; + arm_smmu_cmdq_batch_init(smmu, &cmds); for (i = 0; i < master->num_streams; i++) { cmd.cfgi.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); @@ -2032,7 +2044,7 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master, arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd); - cmds.num = 0; + arm_smmu_cmdq_batch_init(master->smmu, &cmds); for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd); @@ -2070,7 +2082,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, if (!atomic_read(&smmu_domain->nr_ats_masters)) return 0; - cmds.num = 0; + arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds); spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_for_each_entry(master_domain, &smmu_domain->devices, @@ -2152,7 +2164,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, num_pages++; } - cmds.num = 0; + arm_smmu_cmdq_batch_init(smmu, &cmds); while (iova < end) { if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index ee4e013d5fcc..d21411c2f628 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -570,6 +570,7 @@ struct arm_smmu_cmdq { struct arm_smmu_cmdq_batch { u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS]; + struct arm_smmu_cmdq *cmdq; int num; }; -- Gitee From 8c6096af39062722c9a450ffdbc7d3ceb4a57954 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 29 Aug 2024 15:34:31 -0700 Subject: [PATCH 40/99] iommu/arm-smmu-v3: Pass in cmdq pointer to arm_smmu_cmdq_build_sync_cmd ANBZ: #13617 commit 2ea1f0120f900b2643afc71cc6bf5bab52df27d8 upstream. The CMDQV extension on NVIDIA Tegra241 SoC only supports CS_NONE in the CS field of CMD_SYNC, v.s. standard SMMU CMDQ. Pass in the cmdq pointer directly, so the function can identify a different cmdq implementation. Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/723288287997b6dfbcd2a904d2c11e9b23f82250.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index f6b250df2467..c9614fe7f9a7 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -352,8 +352,9 @@ static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu) } static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, - struct arm_smmu_queue *q, u32 prod) + struct arm_smmu_cmdq *cmdq, u32 prod) { + struct arm_smmu_queue *q = &cmdq->q; struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC, }; @@ -371,7 +372,7 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, } static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, - struct arm_smmu_queue *q) + struct arm_smmu_cmdq *cmdq) { static const char * const cerror_str[] = { [CMDQ_ERR_CERROR_NONE_IDX] = "No error", @@ -379,6 +380,7 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch", [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout", }; + struct arm_smmu_queue *q = &cmdq->q; int i; u64 cmd[CMDQ_ENT_DWORDS]; @@ -427,7 +429,7 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) { - __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q); + __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq); } /* @@ -790,7 +792,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n); if (sync) { prod = queue_inc_prod_n(&llq, n); - arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod); + arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod); queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS); /* -- Gitee From b3c52436866d2972268921b8b193e724745b6999 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 29 Aug 2024 15:34:32 -0700 Subject: [PATCH 41/99] iommu/arm-smmu-v3: Pass in cmdq pointer to arm_smmu_cmdq_init ANBZ: #13617 commit e736c895c45bfcf9a9c675022e51fcabbb33e748 upstream. So that this function can be used by other cmdqs than &smmu->cmdq only. Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/e11a3c0bde172c9652c2946f12bc2ceed4c3a355.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index c9614fe7f9a7..88e1bf97116a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3575,9 +3575,9 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, return 0; } -static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu) +static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq) { - struct arm_smmu_cmdq *cmdq = &smmu->cmdq; unsigned int nents = 1 << cmdq->q.llq.max_n_shift; atomic_set(&cmdq->owner_prod, 0); @@ -3602,7 +3602,7 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) if (ret) return ret; - ret = arm_smmu_cmdq_init(smmu); + ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq); if (ret) return ret; -- Gitee From 6ac354f9cdba107153820657d782662fb2738aa8 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 29 Aug 2024 15:34:33 -0700 Subject: [PATCH 42/99] iommu/arm-smmu-v3: Make symbols public for CONFIG_TEGRA241_CMDQV ANBZ: #13617 commit a7a08b857a32d2f17fb9aba42e2c30d816ce5f1c upstream. The symbols __arm_smmu_cmdq_skip_err(), arm_smmu_init_one_queue(), and arm_smmu_cmdq_init() need to be used by the tegra241-cmdqv compilation unit in a following patch. Remove the static and put prototypes in the header. Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/c4f2aa5f5f40a2e7c68b132c6d3171d6403de57a.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 18 ++++++++---------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 9 +++++++++ 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 88e1bf97116a..24ffd86a8cd6 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -371,8 +371,8 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, arm_smmu_cmdq_build_cmd(cmd, &ent); } -static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, - struct arm_smmu_cmdq *cmdq) +void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq) { static const char * const cerror_str[] = { [CMDQ_ERR_CERROR_NONE_IDX] = "No error", @@ -3532,12 +3532,10 @@ static struct iommu_dirty_ops arm_smmu_dirty_ops = { }; /* Probing and initialisation functions */ -static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, - struct arm_smmu_queue *q, - void __iomem *page, - unsigned long prod_off, - unsigned long cons_off, - size_t dwords, const char *name) +int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, + struct arm_smmu_queue *q, void __iomem *page, + unsigned long prod_off, unsigned long cons_off, + size_t dwords, const char *name) { size_t qsz; @@ -3575,8 +3573,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, return 0; } -static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu, - struct arm_smmu_cmdq *cmdq) +int arm_smmu_cmdq_init(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq) { unsigned int nents = 1 << cmdq->q.llq.max_n_shift; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index d21411c2f628..ad295f4928b4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -816,6 +816,15 @@ void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, unsigned long iova, size_t size); +void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq); +int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, + struct arm_smmu_queue *q, void __iomem *page, + unsigned long prod_off, unsigned long cons_off, + size_t dwords, const char *name); +int arm_smmu_cmdq_init(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq); + #ifdef CONFIG_ARM_SMMU_V3_SVA bool arm_smmu_sva_supported(struct arm_smmu_device *smmu); bool arm_smmu_master_sva_supported(struct arm_smmu_master *master); -- Gitee From 6ecf4d69bb11e1cd2067ce0e60684c09a23414c2 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 29 Aug 2024 15:34:34 -0700 Subject: [PATCH 43/99] iommu/arm-smmu-v3: Add ARM_SMMU_OPT_TEGRA241_CMDQV ANBZ: #13617 commit b935a5b1c670c0a167f1263df5647b1b5b06e806 upstream. The CMDQV extension in NVIDIA Tegra241 SoC only supports CS_NONE in the CS field of CMD_SYNC. Add a new SMMU option to accommodate that. Suggested-by: Will Deacon Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/a3cb9bb2429fbae4a59f7ef517614d226763d717.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 16 +++++++++++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 24ffd86a8cd6..94dbb4cec9c2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -351,6 +351,15 @@ static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu) return &smmu->cmdq; } +static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq) +{ + if (cmdq == &smmu->cmdq) + return false; + + return smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV; +} + static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, struct arm_smmu_cmdq *cmdq, u32 prod) { @@ -369,6 +378,8 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, } arm_smmu_cmdq_build_cmd(cmd, &ent); + if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) + u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS); } void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, @@ -423,6 +434,8 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, /* Convert the erroneous command into a CMD_SYNC */ arm_smmu_cmdq_build_cmd(cmd, &cmd_sync); + if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) + u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS); queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); } @@ -706,7 +719,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && + !arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index ad295f4928b4..9950b6ef6e1e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -665,6 +665,7 @@ struct arm_smmu_device { #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1) #define ARM_SMMU_OPT_MSIPOLL (1 << 2) #define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3) +#define ARM_SMMU_OPT_TEGRA241_CMDQV (1 << 4) u32 options; struct arm_smmu_cmdq cmdq; -- Gitee From ec7e6f71020155831f71984ffaeb6c7ca5207061 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 29 Aug 2024 15:34:35 -0700 Subject: [PATCH 44/99] iommu/arm-smmu-v3: Add acpi_smmu_iort_probe_model for impl ANBZ: #13617 commit 6f3f9ff43d005571a8d70d4a562ed7c4150e324c upstream. For model-specific implementation, repurpose the acpi_smmu_get_options() to a wider acpi_smmu_acpi_probe_model(). A new model can add to the list in this new function. Suggested-by: Will Deacon Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/79716299829aeab2e55b8c7932f2634b209bb4d5.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 94dbb4cec9c2..de36c695fe8e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -4352,18 +4352,25 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) } #ifdef CONFIG_ACPI -static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu) +static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node, + struct arm_smmu_device *smmu) { - switch (model) { + struct acpi_iort_smmu_v3 *iort_smmu = + (struct acpi_iort_smmu_v3 *)node->node_data; + + switch (iort_smmu->model) { case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX: smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; break; case ACPI_IORT_SMMU_V3_HISILICON_HI161X: smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; break; + case ACPI_IORT_SMMU_V3_GENERIC: + break; } dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options); + return 0; } static int arm_smmu_device_acpi_probe(struct platform_device *pdev, @@ -4378,8 +4385,6 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, /* Retrieve SMMUv3 specific data */ iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data; - acpi_smmu_get_options(iort_smmu->model, smmu); - if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) smmu->features |= ARM_SMMU_FEAT_COHERENCY; @@ -4391,7 +4396,7 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, smmu->features |= ARM_SMMU_FEAT_HA; } - return 0; + return acpi_smmu_iort_probe_model(node, smmu); } #else static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev, -- Gitee From 98f13ef9a38cfdd61c6d76ffc5eed42e359b1135 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 29 Aug 2024 15:34:36 -0700 Subject: [PATCH 45/99] iommu/arm-smmu-v3: Add struct arm_smmu_impl_ops ANBZ: #13617 commit 6de80d619203c672e5c011e8715bd965d27b69cf upstream. Mimicing the arm-smmu (v2) driver, introduce a struct arm_smmu_impl_ops to accommodate impl routines. Suggested-by: Will Deacon Signed-off-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/8fe9f3805568aabf771fc6706c116459016bf62d.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 51 ++++++++++++++++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 10 ++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index de36c695fe8e..f476ff96c71f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -348,7 +348,12 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu) { - return &smmu->cmdq; + struct arm_smmu_cmdq *cmdq = NULL; + + if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq) + cmdq = smmu->impl_ops->get_secondary_cmdq(smmu); + + return cmdq ?: &smmu->cmdq; } static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu, @@ -4063,6 +4068,14 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu) return ret; } + if (smmu->impl_ops && smmu->impl_ops->device_reset) { + ret = smmu->impl_ops->device_reset(smmu); + if (ret) { + dev_err(smmu->dev, "failed to reset impl\n"); + return ret; + } + } + return 0; } @@ -4477,6 +4490,38 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu) iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); } +static void arm_smmu_impl_remove(void *data) +{ + struct arm_smmu_device *smmu = data; + + if (smmu->impl_ops && smmu->impl_ops->device_remove) + smmu->impl_ops->device_remove(smmu); +} + +/* + * Probe all the compiled in implementations. Each one checks to see if it + * matches this HW and if so returns a devm_krealloc'd arm_smmu_device which + * replaces the callers. Otherwise the original is returned or ERR_PTR. + */ +static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu) +{ + struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV); + int ret; + + /* Add impl probe */ + + if (new_smmu == ERR_PTR(-ENODEV)) + return smmu; + if (IS_ERR(new_smmu)) + return new_smmu; + + ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove, + new_smmu); + if (ret) + return ERR_PTR(ret); + return new_smmu; +} + static int arm_smmu_device_probe(struct platform_device *pdev) { int irq, ret; @@ -4498,6 +4543,10 @@ static int arm_smmu_device_probe(struct platform_device *pdev) if (ret) return ret; + smmu = arm_smmu_impl_probe(smmu); + if (IS_ERR(smmu)) + return PTR_ERR(smmu); + /* Base address */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 9950b6ef6e1e..9d616040c54b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -14,6 +14,8 @@ #include #include +struct arm_smmu_device; + /* MMIO registers */ #define ARM_SMMU_IDR0 0x0 #define IDR0_ST_LVL GENMASK(28, 27) @@ -630,9 +632,17 @@ struct arm_smmu_strtab_cfg { u32 strtab_base_cfg; }; +struct arm_smmu_impl_ops { + int (*device_reset)(struct arm_smmu_device *smmu); + void (*device_remove)(struct arm_smmu_device *smmu); + struct arm_smmu_cmdq *(*get_secondary_cmdq)(struct arm_smmu_device *smmu); +}; + /* An SMMUv3 instance */ struct arm_smmu_device { struct device *dev; + const struct arm_smmu_impl_ops *impl_ops; + void __iomem *base; void __iomem *page1; -- Gitee From 2eb3f1cdb86ba2081f8ed311eb558b7b7ad0d10f Mon Sep 17 00:00:00 2001 From: Nate Watterson Date: Thu, 29 Aug 2024 15:34:37 -0700 Subject: [PATCH 46/99] iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV ANBZ: #13617 commit 918eb5c856f6ce4cf93b4b38e4b5e156905c5943 upstream. NVIDIA's Tegra241 Soc has a CMDQ-Virtualization (CMDQV) hardware, extending the standard ARM SMMU v3 IP to support multiple VCMDQs with virtualization capabilities. In terms of command queue, they are very like a standard SMMU CMDQ (or ECMDQs), but only support CS_NONE in the CS field of CMD_SYNC. Add a new tegra241-cmdqv driver, and insert its structure pointer into the existing arm_smmu_device, and then add related function calls in the SMMUv3 driver to interact with the CMDQV driver. In the CMDQV driver, add a minimal part for the in-kernel support: reserve VINTF0 for in-kernel use, and assign some of the VCMDQs to the VINTF0, and select one VCMDQ based on the current CPU ID to execute supported commands. This multi-queue design for in-kernel use gives some limited improvements: up to 20% reduction of invalidation time was measured by a multi-threaded DMA unmap benchmark, compared to a single queue. The other part of the CMDQV driver will be user-space support that gives a hypervisor running on the host OS to talk to the driver for virtualization use cases, allowing VMs to use VCMDQs without trappings, i.e. no VM Exits. This is designed based on IOMMUFD, and its RFC series is also under review. It will provide a guest OS a bigger improvement: 70% to 90% reductions of TLB invalidation time were measured by DMA unmap tests running in a guest, compared to nested SMMU CMDQ (with trappings). As the initial version, the CMDQV driver only supports ACPI configurations. Signed-off-by: Nate Watterson Reviewed-by: Jason Gunthorpe Co-developed-by: Nicolin Chen Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/dce50490b2c10b7254fb36aa73ed7ffd812b283a.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- MAINTAINERS | 1 + drivers/iommu/Kconfig | 11 + drivers/iommu/arm/arm-smmu-v3/Makefile | 1 + drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 33 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 11 + .../iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 858 ++++++++++++++++++ 6 files changed, 914 insertions(+), 1 deletion(-) create mode 100644 drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c diff --git a/MAINTAINERS b/MAINTAINERS index e9aa3f7a1b28..6046cb477fff 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21264,6 +21264,7 @@ M: Thierry Reding R: Krishna Reddy L: linux-tegra@vger.kernel.org S: Supported +F: drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c F: drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c F: drivers/iommu/tegra* diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 488757e48dd0..f7432745d9bb 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -425,6 +425,17 @@ config ARM_SMMU_V3_KUNIT_TEST Enable this option to unit-test arm-smmu-v3 driver functions. If unsure, say N. + +config TEGRA241_CMDQV + bool "NVIDIA Tegra241 CMDQ-V extension support for ARM SMMUv3" + depends on ACPI + help + Support for NVIDIA CMDQ-Virtualization extension for ARM SMMUv3. The + CMDQ-V extension is similar to v3.3 ECMDQ for multi command queues + support, except with virtualization capabilities. + + Say Y here if your system is NVIDIA Tegra241 (Grace) or it has the same + CMDQ-V extension. endif config S390_IOMMU diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile index 355173d1441d..dc98c88b48c8 100644 --- a/drivers/iommu/arm/arm-smmu-v3/Makefile +++ b/drivers/iommu/arm/arm-smmu-v3/Makefile @@ -2,5 +2,6 @@ obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o arm_smmu_v3-y := arm-smmu-v3.o arm_smmu_v3-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o +arm_smmu_v3-$(CONFIG_TEGRA241_CMDQV) += tegra241-cmdqv.o obj-$(CONFIG_ARM_SMMU_V3_KUNIT_TEST) += arm-smmu-v3-test.o diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index f476ff96c71f..e90baaf073ce 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -4365,6 +4365,31 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) } #ifdef CONFIG_ACPI +#ifdef CONFIG_TEGRA241_CMDQV +static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node, + struct arm_smmu_device *smmu) +{ + const char *uid = kasprintf(GFP_KERNEL, "%u", node->identifier); + struct acpi_device *adev; + + /* Look for an NVDA200C node whose _UID matches the SMMU node ID */ + adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1); + if (adev) { + /* Tegra241 CMDQV driver is responsible for put_device() */ + smmu->impl_dev = &adev->dev; + smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV; + dev_info(smmu->dev, "found companion CMDQV device: %s\n", + dev_name(smmu->impl_dev)); + } + kfree(uid); +} +#else +static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node, + struct arm_smmu_device *smmu) +{ +} +#endif + static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node, struct arm_smmu_device *smmu) { @@ -4379,6 +4404,11 @@ static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node, smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; break; case ACPI_IORT_SMMU_V3_GENERIC: + /* + * Tegra241 implementation stores its SMMU options and impl_dev + * in DSDT. Thus, go through the ACPI tables unconditionally. + */ + acpi_smmu_dsdt_probe_tegra241_cmdqv(node, smmu); break; } @@ -4508,7 +4538,8 @@ static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu) struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV); int ret; - /* Add impl probe */ + if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV)) + new_smmu = tegra241_cmdqv_probe(smmu); if (new_smmu == ERR_PTR(-ENODEV)) return smmu; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 9d616040c54b..e22a1441d50c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -641,6 +641,7 @@ struct arm_smmu_impl_ops { /* An SMMUv3 instance */ struct arm_smmu_device { struct device *dev; + struct device *impl_dev; const struct arm_smmu_impl_ops *impl_ops; void __iomem *base; @@ -882,4 +883,14 @@ static inline void arm_smmu_sva_notifier_synchronize(void) {} #define arm_smmu_sva_domain_alloc NULL #endif /* CONFIG_ARM_SMMU_V3_SVA */ + +#ifdef CONFIG_TEGRA241_CMDQV +struct arm_smmu_device *tegra241_cmdqv_probe(struct arm_smmu_device *smmu); +#else /* CONFIG_TEGRA241_CMDQV */ +static inline struct arm_smmu_device * +tegra241_cmdqv_probe(struct arm_smmu_device *smmu) +{ + return ERR_PTR(-ENODEV); +} +#endif /* CONFIG_TEGRA241_CMDQV */ #endif /* _ARM_SMMU_V3_H */ diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c new file mode 100644 index 000000000000..5ac3032ee6dd --- /dev/null +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -0,0 +1,858 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2021-2024 NVIDIA CORPORATION & AFFILIATES. */ + +#define dev_fmt(fmt) "tegra241_cmdqv: " fmt + +#include +#include +#include +#include +#include +#include + +#include + +#include "arm-smmu-v3.h" + +/* CMDQV register page base and size defines */ +#define TEGRA241_CMDQV_CONFIG_BASE (0) +#define TEGRA241_CMDQV_CONFIG_SIZE (SZ_64K) +#define TEGRA241_VCMDQ_PAGE0_BASE (TEGRA241_CMDQV_CONFIG_BASE + SZ_64K) +#define TEGRA241_VCMDQ_PAGE1_BASE (TEGRA241_VCMDQ_PAGE0_BASE + SZ_64K) +#define TEGRA241_VINTF_PAGE_BASE (TEGRA241_VCMDQ_PAGE1_BASE + SZ_64K) + +/* CMDQV global base regs */ +#define TEGRA241_CMDQV_CONFIG 0x0000 +#define CMDQV_EN BIT(0) + +#define TEGRA241_CMDQV_PARAM 0x0004 +#define CMDQV_NUM_VINTF_LOG2 GENMASK(11, 8) +#define CMDQV_NUM_VCMDQ_LOG2 GENMASK(7, 4) + +#define TEGRA241_CMDQV_STATUS 0x0008 +#define CMDQV_ENABLED BIT(0) + +#define TEGRA241_CMDQV_VINTF_ERR_MAP 0x0014 +#define TEGRA241_CMDQV_VINTF_INT_MASK 0x001C +#define TEGRA241_CMDQV_CMDQ_ERR_MAP(m) (0x0024 + 0x4*(m)) + +#define TEGRA241_CMDQV_CMDQ_ALLOC(q) (0x0200 + 0x4*(q)) +#define CMDQV_CMDQ_ALLOC_VINTF GENMASK(20, 15) +#define CMDQV_CMDQ_ALLOC_LVCMDQ GENMASK(7, 1) +#define CMDQV_CMDQ_ALLOCATED BIT(0) + +/* VINTF base regs */ +#define TEGRA241_VINTF(v) (0x1000 + 0x100*(v)) + +#define TEGRA241_VINTF_CONFIG 0x0000 +#define VINTF_HYP_OWN BIT(17) +#define VINTF_VMID GENMASK(16, 1) +#define VINTF_EN BIT(0) + +#define TEGRA241_VINTF_STATUS 0x0004 +#define VINTF_STATUS GENMASK(3, 1) +#define VINTF_ENABLED BIT(0) + +#define TEGRA241_VINTF_LVCMDQ_ERR_MAP_64(m) \ + (0x00C0 + 0x8*(m)) +#define LVCMDQ_ERR_MAP_NUM_64 2 + +/* VCMDQ base regs */ +/* -- PAGE0 -- */ +#define TEGRA241_VCMDQ_PAGE0(q) (TEGRA241_VCMDQ_PAGE0_BASE + 0x80*(q)) + +#define TEGRA241_VCMDQ_CONS 0x00000 +#define VCMDQ_CONS_ERR GENMASK(30, 24) + +#define TEGRA241_VCMDQ_PROD 0x00004 + +#define TEGRA241_VCMDQ_CONFIG 0x00008 +#define VCMDQ_EN BIT(0) + +#define TEGRA241_VCMDQ_STATUS 0x0000C +#define VCMDQ_ENABLED BIT(0) + +#define TEGRA241_VCMDQ_GERROR 0x00010 +#define TEGRA241_VCMDQ_GERRORN 0x00014 + +/* -- PAGE1 -- */ +#define TEGRA241_VCMDQ_PAGE1(q) (TEGRA241_VCMDQ_PAGE1_BASE + 0x80*(q)) +#define VCMDQ_ADDR GENMASK(47, 5) +#define VCMDQ_LOG2SIZE GENMASK(4, 0) +#define VCMDQ_LOG2SIZE_MAX 19 + +#define TEGRA241_VCMDQ_BASE 0x00000 +#define TEGRA241_VCMDQ_CONS_INDX_BASE 0x00008 + +/* VINTF logical-VCMDQ pages */ +#define TEGRA241_VINTFi_PAGE0(i) (TEGRA241_VINTF_PAGE_BASE + SZ_128K*(i)) +#define TEGRA241_VINTFi_PAGE1(i) (TEGRA241_VINTFi_PAGE0(i) + SZ_64K) +#define TEGRA241_VINTFi_LVCMDQ_PAGE0(i, q) \ + (TEGRA241_VINTFi_PAGE0(i) + 0x80*(q)) +#define TEGRA241_VINTFi_LVCMDQ_PAGE1(i, q) \ + (TEGRA241_VINTFi_PAGE1(i) + 0x80*(q)) + +/* MMIO helpers */ +#define REG_CMDQV(_cmdqv, _regname) \ + ((_cmdqv)->base + TEGRA241_CMDQV_##_regname) +#define REG_VINTF(_vintf, _regname) \ + ((_vintf)->base + TEGRA241_VINTF_##_regname) +#define REG_VCMDQ_PAGE0(_vcmdq, _regname) \ + ((_vcmdq)->page0 + TEGRA241_VCMDQ_##_regname) +#define REG_VCMDQ_PAGE1(_vcmdq, _regname) \ + ((_vcmdq)->page1 + TEGRA241_VCMDQ_##_regname) + + +static bool disable_cmdqv; +module_param(disable_cmdqv, bool, 0444); +MODULE_PARM_DESC(disable_cmdqv, + "This allows to disable CMDQV HW and use default SMMU internal CMDQ."); + +static bool bypass_vcmdq; +module_param(bypass_vcmdq, bool, 0444); +MODULE_PARM_DESC(bypass_vcmdq, + "This allows to bypass VCMDQ for debugging use or perf comparison."); + +/** + * struct tegra241_vcmdq - Virtual Command Queue + * @idx: Global index in the CMDQV + * @lidx: Local index in the VINTF + * @enabled: Enable status + * @cmdqv: Parent CMDQV pointer + * @vintf: Parent VINTF pointer + * @cmdq: Command Queue struct + * @page0: MMIO Page0 base address + * @page1: MMIO Page1 base address + */ +struct tegra241_vcmdq { + u16 idx; + u16 lidx; + + bool enabled; + + struct tegra241_cmdqv *cmdqv; + struct tegra241_vintf *vintf; + struct arm_smmu_cmdq cmdq; + + void __iomem *page0; + void __iomem *page1; +}; + +/** + * struct tegra241_vintf - Virtual Interface + * @idx: Global index in the CMDQV + * @enabled: Enable status + * @cmdqv: Parent CMDQV pointer + * @lvcmdqs: List of logical VCMDQ pointers + * @base: MMIO base address + */ +struct tegra241_vintf { + u16 idx; + + bool enabled; + + struct tegra241_cmdqv *cmdqv; + struct tegra241_vcmdq **lvcmdqs; + + void __iomem *base; +}; + +/** + * struct tegra241_cmdqv - CMDQ-V for SMMUv3 + * @smmu: SMMUv3 device + * @dev: CMDQV device + * @base: MMIO base address + * @irq: IRQ number + * @num_vintfs: Total number of VINTFs + * @num_vcmdqs: Total number of VCMDQs + * @num_lvcmdqs_per_vintf: Number of logical VCMDQs per VINTF + * @vintf_ids: VINTF id allocator + * @vintfs: List of VINTFs + */ +struct tegra241_cmdqv { + struct arm_smmu_device smmu; + struct device *dev; + + void __iomem *base; + int irq; + + /* CMDQV Hardware Params */ + u16 num_vintfs; + u16 num_vcmdqs; + u16 num_lvcmdqs_per_vintf; + + struct ida vintf_ids; + + struct tegra241_vintf **vintfs; +}; + +/* Config and Polling Helpers */ + +static inline int tegra241_cmdqv_write_config(struct tegra241_cmdqv *cmdqv, + void __iomem *addr_config, + void __iomem *addr_status, + u32 regval, const char *header, + bool *out_enabled) +{ + bool en = regval & BIT(0); + int ret; + + writel(regval, addr_config); + ret = readl_poll_timeout(addr_status, regval, + en ? regval & BIT(0) : !(regval & BIT(0)), + 1, ARM_SMMU_POLL_TIMEOUT_US); + if (ret) + dev_err(cmdqv->dev, "%sfailed to %sable, STATUS=0x%08X\n", + header, en ? "en" : "dis", regval); + if (out_enabled) + WRITE_ONCE(*out_enabled, regval & BIT(0)); + return ret; +} + +static inline int cmdqv_write_config(struct tegra241_cmdqv *cmdqv, u32 regval) +{ + return tegra241_cmdqv_write_config(cmdqv, + REG_CMDQV(cmdqv, CONFIG), + REG_CMDQV(cmdqv, STATUS), + regval, "CMDQV: ", NULL); +} + +static inline int vintf_write_config(struct tegra241_vintf *vintf, u32 regval) +{ + char header[16]; + + snprintf(header, 16, "VINTF%u: ", vintf->idx); + return tegra241_cmdqv_write_config(vintf->cmdqv, + REG_VINTF(vintf, CONFIG), + REG_VINTF(vintf, STATUS), + regval, header, &vintf->enabled); +} + +static inline char *lvcmdq_error_header(struct tegra241_vcmdq *vcmdq, + char *header, int hlen) +{ + WARN_ON(hlen < 32); + if (WARN_ON(!vcmdq->vintf)) + return ""; + snprintf(header, hlen, "VINTF%u: VCMDQ%u/LVCMDQ%u: ", + vcmdq->vintf->idx, vcmdq->idx, vcmdq->lidx); + return header; +} + +static inline int vcmdq_write_config(struct tegra241_vcmdq *vcmdq, u32 regval) +{ + char header[32], *h = lvcmdq_error_header(vcmdq, header, 32); + + return tegra241_cmdqv_write_config(vcmdq->cmdqv, + REG_VCMDQ_PAGE0(vcmdq, CONFIG), + REG_VCMDQ_PAGE0(vcmdq, STATUS), + regval, h, &vcmdq->enabled); +} + +/* ISR Functions */ + +static void tegra241_vintf0_handle_error(struct tegra241_vintf *vintf) +{ + int i; + + for (i = 0; i < LVCMDQ_ERR_MAP_NUM_64; i++) { + u64 map = readq_relaxed(REG_VINTF(vintf, LVCMDQ_ERR_MAP_64(i))); + + while (map) { + unsigned long lidx = __ffs64(map); + struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx]; + u32 gerror = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)); + + __arm_smmu_cmdq_skip_err(&vintf->cmdqv->smmu, &vcmdq->cmdq); + writel(gerror, REG_VCMDQ_PAGE0(vcmdq, GERRORN)); + map &= ~BIT_ULL(lidx); + } + } +} + +static irqreturn_t tegra241_cmdqv_isr(int irq, void *devid) +{ + struct tegra241_cmdqv *cmdqv = (struct tegra241_cmdqv *)devid; + void __iomem *reg_vintf_map = REG_CMDQV(cmdqv, VINTF_ERR_MAP); + char err_str[256]; + u64 vintf_map; + + /* Use readl_relaxed() as register addresses are not 64-bit aligned */ + vintf_map = (u64)readl_relaxed(reg_vintf_map + 0x4) << 32 | + (u64)readl_relaxed(reg_vintf_map); + + snprintf(err_str, sizeof(err_str), + "vintf_map: %016llx, vcmdq_map %08x:%08x:%08x:%08x", vintf_map, + readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(3))), + readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(2))), + readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(1))), + readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(0)))); + + dev_warn(cmdqv->dev, "unexpected error reported. %s\n", err_str); + + /* Handle VINTF0 and its LVCMDQs */ + if (vintf_map & BIT_ULL(0)) { + tegra241_vintf0_handle_error(cmdqv->vintfs[0]); + vintf_map &= ~BIT_ULL(0); + } + + return IRQ_HANDLED; +} + +/* Command Queue Function */ + +static struct arm_smmu_cmdq * +tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + struct tegra241_vintf *vintf = cmdqv->vintfs[0]; + struct tegra241_vcmdq *vcmdq; + u16 lidx; + + if (READ_ONCE(bypass_vcmdq)) + return NULL; + + /* Use SMMU CMDQ if VINTF0 is uninitialized */ + if (!READ_ONCE(vintf->enabled)) + return NULL; + + /* + * Select a LVCMDQ to use. Here we use a temporal solution to + * balance out traffic on cmdq issuing: each cmdq has its own + * lock, if all cpus issue cmdlist using the same cmdq, only + * one CPU at a time can enter the process, while the others + * will be spinning at the same lock. + */ + lidx = smp_processor_id() % cmdqv->num_lvcmdqs_per_vintf; + vcmdq = vintf->lvcmdqs[lidx]; + if (!vcmdq || !READ_ONCE(vcmdq->enabled)) + return NULL; + return &vcmdq->cmdq; +} + +/* HW Reset Functions */ + +static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq) +{ + char header[32], *h = lvcmdq_error_header(vcmdq, header, 32); + u32 gerrorn, gerror; + + if (vcmdq_write_config(vcmdq, 0)) { + dev_err(vcmdq->cmdqv->dev, + "%sGERRORN=0x%X, GERROR=0x%X, CONS=0x%X\n", h, + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)), + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)), + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, CONS))); + } + writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, PROD)); + writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, CONS)); + writeq_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, BASE)); + writeq_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, CONS_INDX_BASE)); + + gerrorn = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)); + gerror = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)); + if (gerror != gerrorn) { + dev_warn(vcmdq->cmdqv->dev, + "%suncleared error detected, resetting\n", h); + writel(gerror, REG_VCMDQ_PAGE0(vcmdq, GERRORN)); + } + + dev_dbg(vcmdq->cmdqv->dev, "%sdeinited\n", h); +} + +static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq) +{ + char header[32], *h = lvcmdq_error_header(vcmdq, header, 32); + int ret; + + /* Reset VCMDQ */ + tegra241_vcmdq_hw_deinit(vcmdq); + + /* Configure and enable VCMDQ */ + writeq_relaxed(vcmdq->cmdq.q.q_base, REG_VCMDQ_PAGE1(vcmdq, BASE)); + + ret = vcmdq_write_config(vcmdq, VCMDQ_EN); + if (ret) { + dev_err(vcmdq->cmdqv->dev, + "%sGERRORN=0x%X, GERROR=0x%X, CONS=0x%X\n", h, + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)), + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)), + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, CONS))); + return ret; + } + + dev_dbg(vcmdq->cmdqv->dev, "%sinited\n", h); + return 0; +} + +static void tegra241_vintf_hw_deinit(struct tegra241_vintf *vintf) +{ + u16 lidx; + + for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) + if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) + tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]); + vintf_write_config(vintf, 0); +} + +static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own) +{ + u32 regval; + u16 lidx; + int ret; + + /* Reset VINTF */ + tegra241_vintf_hw_deinit(vintf); + + /* Configure and enable VINTF */ + regval = FIELD_PREP(VINTF_HYP_OWN, hyp_own); + writel(regval, REG_VINTF(vintf, CONFIG)); + + ret = vintf_write_config(vintf, regval | VINTF_EN); + if (ret) + return ret; + + for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) { + if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) { + ret = tegra241_vcmdq_hw_init(vintf->lvcmdqs[lidx]); + if (ret) { + tegra241_vintf_hw_deinit(vintf); + return ret; + } + } + } + + return 0; +} + +static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + u16 qidx, lidx, idx; + u32 regval; + int ret; + + /* Reset CMDQV */ + regval = readl_relaxed(REG_CMDQV(cmdqv, CONFIG)); + ret = cmdqv_write_config(cmdqv, regval & ~CMDQV_EN); + if (ret) + return ret; + ret = cmdqv_write_config(cmdqv, regval | CMDQV_EN); + if (ret) + return ret; + + /* Assign preallocated global VCMDQs to each VINTF as LVCMDQs */ + for (idx = 0, qidx = 0; idx < cmdqv->num_vintfs; idx++) { + for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) { + regval = FIELD_PREP(CMDQV_CMDQ_ALLOC_VINTF, idx); + regval |= FIELD_PREP(CMDQV_CMDQ_ALLOC_LVCMDQ, lidx); + regval |= CMDQV_CMDQ_ALLOCATED; + writel_relaxed(regval, + REG_CMDQV(cmdqv, CMDQ_ALLOC(qidx++))); + } + } + + return tegra241_vintf_hw_init(cmdqv->vintfs[0], true); +} + +/* VCMDQ Resource Helpers */ + +static void tegra241_vcmdq_free_smmu_cmdq(struct tegra241_vcmdq *vcmdq) +{ + struct arm_smmu_queue *q = &vcmdq->cmdq.q; + size_t nents = 1 << q->llq.max_n_shift; + size_t qsz = nents << CMDQ_ENT_SZ_SHIFT; + + if (!q->base) + return; + dmam_free_coherent(vcmdq->cmdqv->smmu.dev, qsz, q->base, q->base_dma); +} + +static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq) +{ + struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu; + struct arm_smmu_cmdq *cmdq = &vcmdq->cmdq; + struct arm_smmu_queue *q = &cmdq->q; + char name[16]; + int ret; + + snprintf(name, 16, "vcmdq%u", vcmdq->idx); + + q->llq.max_n_shift = VCMDQ_LOG2SIZE_MAX; + + /* Use the common helper to init the VCMDQ, and then... */ + ret = arm_smmu_init_one_queue(smmu, q, vcmdq->page0, + TEGRA241_VCMDQ_PROD, TEGRA241_VCMDQ_CONS, + CMDQ_ENT_DWORDS, name); + if (ret) + return ret; + + /* ...override q_base to write VCMDQ_BASE registers */ + q->q_base = q->base_dma & VCMDQ_ADDR; + q->q_base |= FIELD_PREP(VCMDQ_LOG2SIZE, q->llq.max_n_shift); + + return arm_smmu_cmdq_init(smmu, cmdq); +} + +/* VINTF Logical VCMDQ Resource Helpers */ + +static void tegra241_vintf_deinit_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) +{ + vintf->lvcmdqs[lidx] = NULL; +} + +static int tegra241_vintf_init_lvcmdq(struct tegra241_vintf *vintf, u16 lidx, + struct tegra241_vcmdq *vcmdq) +{ + struct tegra241_cmdqv *cmdqv = vintf->cmdqv; + u16 idx = vintf->idx; + + vcmdq->idx = idx * cmdqv->num_lvcmdqs_per_vintf + lidx; + vcmdq->lidx = lidx; + vcmdq->cmdqv = cmdqv; + vcmdq->vintf = vintf; + vcmdq->page0 = cmdqv->base + TEGRA241_VINTFi_LVCMDQ_PAGE0(idx, lidx); + vcmdq->page1 = cmdqv->base + TEGRA241_VINTFi_LVCMDQ_PAGE1(idx, lidx); + + vintf->lvcmdqs[lidx] = vcmdq; + return 0; +} + +static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) +{ + struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx]; + char header[32]; + + tegra241_vcmdq_free_smmu_cmdq(vcmdq); + tegra241_vintf_deinit_lvcmdq(vintf, lidx); + + dev_dbg(vintf->cmdqv->dev, + "%sdeallocated\n", lvcmdq_error_header(vcmdq, header, 32)); + kfree(vcmdq); +} + +static struct tegra241_vcmdq * +tegra241_vintf_alloc_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) +{ + struct tegra241_cmdqv *cmdqv = vintf->cmdqv; + struct tegra241_vcmdq *vcmdq; + char header[32]; + int ret; + + vcmdq = kzalloc(sizeof(*vcmdq), GFP_KERNEL); + if (!vcmdq) + return ERR_PTR(-ENOMEM); + + ret = tegra241_vintf_init_lvcmdq(vintf, lidx, vcmdq); + if (ret) + goto free_vcmdq; + + /* Build an arm_smmu_cmdq for each LVCMDQ */ + ret = tegra241_vcmdq_alloc_smmu_cmdq(vcmdq); + if (ret) + goto deinit_lvcmdq; + + dev_dbg(cmdqv->dev, + "%sallocated\n", lvcmdq_error_header(vcmdq, header, 32)); + return vcmdq; + +deinit_lvcmdq: + tegra241_vintf_deinit_lvcmdq(vintf, lidx); +free_vcmdq: + kfree(vcmdq); + return ERR_PTR(ret); +} + +/* VINTF Resource Helpers */ + +static void tegra241_cmdqv_deinit_vintf(struct tegra241_cmdqv *cmdqv, u16 idx) +{ + kfree(cmdqv->vintfs[idx]->lvcmdqs); + ida_free(&cmdqv->vintf_ids, idx); + cmdqv->vintfs[idx] = NULL; +} + +static int tegra241_cmdqv_init_vintf(struct tegra241_cmdqv *cmdqv, u16 max_idx, + struct tegra241_vintf *vintf) +{ + + u16 idx; + int ret; + + ret = ida_alloc_max(&cmdqv->vintf_ids, max_idx, GFP_KERNEL); + if (ret < 0) + return ret; + idx = ret; + + vintf->idx = idx; + vintf->cmdqv = cmdqv; + vintf->base = cmdqv->base + TEGRA241_VINTF(idx); + + vintf->lvcmdqs = kcalloc(cmdqv->num_lvcmdqs_per_vintf, + sizeof(*vintf->lvcmdqs), GFP_KERNEL); + if (!vintf->lvcmdqs) { + ida_free(&cmdqv->vintf_ids, idx); + return -ENOMEM; + } + + cmdqv->vintfs[idx] = vintf; + return ret; +} + +/* Remove Helpers */ + +static void tegra241_vintf_remove_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) +{ + tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]); + tegra241_vintf_free_lvcmdq(vintf, lidx); +} + +static void tegra241_cmdqv_remove_vintf(struct tegra241_cmdqv *cmdqv, u16 idx) +{ + struct tegra241_vintf *vintf = cmdqv->vintfs[idx]; + u16 lidx; + + /* Remove LVCMDQ resources */ + for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) + if (vintf->lvcmdqs[lidx]) + tegra241_vintf_remove_lvcmdq(vintf, lidx); + + /* Remove VINTF resources */ + tegra241_vintf_hw_deinit(vintf); + + dev_dbg(cmdqv->dev, "VINTF%u: deallocated\n", vintf->idx); + tegra241_cmdqv_deinit_vintf(cmdqv, idx); + kfree(vintf); +} + +static void tegra241_cmdqv_remove(struct arm_smmu_device *smmu) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + u16 idx; + + /* Remove VINTF resources */ + for (idx = 0; idx < cmdqv->num_vintfs; idx++) { + if (cmdqv->vintfs[idx]) { + /* Only vintf0 should remain at this stage */ + WARN_ON(idx > 0); + tegra241_cmdqv_remove_vintf(cmdqv, idx); + } + } + + /* Remove cmdqv resources */ + ida_destroy(&cmdqv->vintf_ids); + + if (cmdqv->irq > 0) + free_irq(cmdqv->irq, cmdqv); + iounmap(cmdqv->base); + kfree(cmdqv->vintfs); + put_device(cmdqv->dev); /* smmu->impl_dev */ +} + +static struct arm_smmu_impl_ops tegra241_cmdqv_impl_ops = { + .get_secondary_cmdq = tegra241_cmdqv_get_cmdq, + .device_reset = tegra241_cmdqv_hw_reset, + .device_remove = tegra241_cmdqv_remove, +}; + +/* Probe Functions */ + +static int tegra241_cmdqv_acpi_is_memory(struct acpi_resource *res, void *data) +{ + struct resource_win win; + + return !acpi_dev_resource_address_space(res, &win); +} + +static int tegra241_cmdqv_acpi_get_irqs(struct acpi_resource *ares, void *data) +{ + struct resource r; + int *irq = data; + + if (*irq <= 0 && acpi_dev_resource_interrupt(ares, 0, &r)) + *irq = r.start; + return 1; /* No need to add resource to the list */ +} + +static struct resource * +tegra241_cmdqv_find_acpi_resource(struct device *dev, int *irq) +{ + struct acpi_device *adev = to_acpi_device(dev); + struct list_head resource_list; + struct resource_entry *rentry; + struct resource *res = NULL; + int ret; + + INIT_LIST_HEAD(&resource_list); + ret = acpi_dev_get_resources(adev, &resource_list, + tegra241_cmdqv_acpi_is_memory, NULL); + if (ret < 0) { + dev_err(dev, "failed to get memory resource: %d\n", ret); + return NULL; + } + + rentry = list_first_entry_or_null(&resource_list, + struct resource_entry, node); + if (!rentry) { + dev_err(dev, "failed to get memory resource entry\n"); + goto free_list; + } + + /* Caller must free the res */ + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + goto free_list; + + *res = *rentry->res; + + acpi_dev_free_resource_list(&resource_list); + + INIT_LIST_HEAD(&resource_list); + + if (irq) + ret = acpi_dev_get_resources(adev, &resource_list, + tegra241_cmdqv_acpi_get_irqs, irq); + if (ret < 0 || !irq || *irq <= 0) + dev_warn(dev, "no interrupt. errors will not be reported\n"); + +free_list: + acpi_dev_free_resource_list(&resource_list); + return res; +} + +struct dentry *cmdqv_debugfs_dir; + +static struct arm_smmu_device * +__tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, + int irq) +{ + static struct arm_smmu_device *new_smmu; + struct tegra241_cmdqv *cmdqv = NULL; + struct tegra241_vintf *vintf; + void __iomem *base; + u32 regval; + int lidx; + int ret; + + static_assert(offsetof(struct tegra241_cmdqv, smmu) == 0); + + base = ioremap(res->start, resource_size(res)); + if (IS_ERR(base)) { + dev_err(smmu->dev, "failed to ioremap: %ld\n", PTR_ERR(base)); + goto iounmap; + } + + regval = readl(base + TEGRA241_CMDQV_CONFIG); + if (disable_cmdqv) { + dev_info(smmu->dev, "Detected disable_cmdqv=true\n"); + writel(regval & ~CMDQV_EN, base + TEGRA241_CMDQV_CONFIG); + goto iounmap; + } + + cmdqv = devm_krealloc(smmu->dev, smmu, sizeof(*cmdqv), GFP_KERNEL); + if (!cmdqv) + goto iounmap; + new_smmu = &cmdqv->smmu; + + cmdqv->irq = irq; + cmdqv->base = base; + cmdqv->dev = smmu->impl_dev; + + if (cmdqv->irq > 0) { + ret = request_irq(irq, tegra241_cmdqv_isr, 0, "tegra241-cmdqv", + cmdqv); + if (ret) { + dev_err(cmdqv->dev, "failed to request irq (%d): %d\n", + cmdqv->irq, ret); + goto iounmap; + } + } + + regval = readl_relaxed(REG_CMDQV(cmdqv, PARAM)); + cmdqv->num_vintfs = 1 << FIELD_GET(CMDQV_NUM_VINTF_LOG2, regval); + cmdqv->num_vcmdqs = 1 << FIELD_GET(CMDQV_NUM_VCMDQ_LOG2, regval); + cmdqv->num_lvcmdqs_per_vintf = cmdqv->num_vcmdqs / cmdqv->num_vintfs; + + cmdqv->vintfs = + kcalloc(cmdqv->num_vintfs, sizeof(*cmdqv->vintfs), GFP_KERNEL); + if (!cmdqv->vintfs) + goto free_irq; + + ida_init(&cmdqv->vintf_ids); + + vintf = kzalloc(sizeof(*vintf), GFP_KERNEL); + if (!vintf) + goto destroy_ids; + + /* Init VINTF0 for in-kernel use */ + ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf); + if (ret) { + dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret); + goto free_vintf; + } + + /* Preallocate logical VCMDQs to VINTF0 */ + for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) { + struct tegra241_vcmdq *vcmdq; + + vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx); + if (IS_ERR(vcmdq)) + goto free_lvcmdq; + } + +#ifdef CONFIG_IOMMU_DEBUGFS + if (!cmdqv_debugfs_dir) { + cmdqv_debugfs_dir = + debugfs_create_dir("tegra241_cmdqv", iommu_debugfs_dir); + debugfs_create_bool("bypass_vcmdq", 0644, cmdqv_debugfs_dir, + &bypass_vcmdq); + } +#endif + + new_smmu->impl_ops = &tegra241_cmdqv_impl_ops; + + return new_smmu; + +free_lvcmdq: + for (lidx--; lidx >= 0; lidx--) + tegra241_vintf_free_lvcmdq(vintf, lidx); + tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx); +free_vintf: + kfree(vintf); +destroy_ids: + ida_destroy(&cmdqv->vintf_ids); + kfree(cmdqv->vintfs); +free_irq: + if (cmdqv->irq > 0) + free_irq(cmdqv->irq, cmdqv); +iounmap: + iounmap(base); + return NULL; +} + +struct arm_smmu_device *tegra241_cmdqv_probe(struct arm_smmu_device *smmu) +{ + struct arm_smmu_device *new_smmu; + struct resource *res = NULL; + int irq; + + if (!smmu->dev->of_node) + res = tegra241_cmdqv_find_acpi_resource(smmu->impl_dev, &irq); + if (!res) + goto out_fallback; + + new_smmu = __tegra241_cmdqv_probe(smmu, res, irq); + kfree(res); + + if (new_smmu) + return new_smmu; + +out_fallback: + dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n"); + smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV; + put_device(smmu->impl_dev); + return ERR_PTR(-ENODEV); +} -- Gitee From acbfd44ca1d8b94546905deaae69bd243b99bb12 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 29 Aug 2024 15:34:38 -0700 Subject: [PATCH 47/99] iommu/arm-smmu-v3: Start a new batch if new command is not supported ANBZ: #13617 commit f59e854907128ec3d4a82b7fc4efe9be8da2e78e upstream. The VCMDQ in the tegra241-cmdqv driver has a guest mode that supports only a few invalidation commands. A batch is initialized with a cmdq, so it has to confirm whether a new command is supported or not. Add a supports_cmd function pointer to the cmdq structure, where the vcmdq driver should hook a command scan function. Add an inline helper too so it can be used by both sides. If a new command is not supported, simply issue the existing batch and re- init it as a new batch. Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/aafb24b881504f18c5d0c7c15f2134e40ad2c486.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 6 ++++-- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 7 +++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index e90baaf073ce..eaf7d4530b68 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -923,10 +923,12 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, struct arm_smmu_cmdq_batch *cmds, struct arm_smmu_cmdq_ent *cmd) { + bool unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd); + bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) && + (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC); int index; - if (cmds->num == CMDQ_BATCH_ENTRIES - 1 && - (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) { + if (force_sync || unsupported_cmd) { arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, cmds->num, true); arm_smmu_cmdq_batch_init(smmu, cmds); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index e22a1441d50c..e6325191d0fe 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -568,8 +568,15 @@ struct arm_smmu_cmdq { atomic_long_t *valid_map; atomic_t owner_prod; atomic_t lock; + bool (*supports_cmd)(struct arm_smmu_cmdq_ent *ent); }; +static inline bool arm_smmu_cmdq_supports_cmd(struct arm_smmu_cmdq *cmdq, + struct arm_smmu_cmdq_ent *ent) +{ + return cmdq->supports_cmd ? cmdq->supports_cmd(ent) : true; +} + struct arm_smmu_cmdq_batch { u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS]; struct arm_smmu_cmdq *cmdq; -- Gitee From 45fa76eedbc250f72cee75c234e81e9475861d0f Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 29 Aug 2024 15:34:39 -0700 Subject: [PATCH 48/99] iommu/tegra241-cmdqv: Limit CMDs for VCMDQs of a guest owned VINTF ANBZ: #13617 commit a9d40285bdefef700ebc7551ef79d2f3e4559e73 upstream. When VCMDQs are assigned to a VINTF owned by a guest (HYP_OWN bit unset), only TLB and ATC invalidation commands are supported by the VCMDQ HW. So, implement the new cmdq->supports_cmd op to scan the input cmd in order to make sure that it is supported by the selected queue. Note that the guest VM shouldn't have HYP_OWN bit being set regardless of guest kernel driver writing it or not, i.e. the hypervisor running in the host OS should wire this bit to zero when trapping a write access to this VINTF_CONFIG register from a guest kernel. Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/8160292337059b91271045800e5c62f7295e2c24.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 28 ++++++++------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +- .../iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 34 ++++++++++++++++++- 3 files changed, 51 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index eaf7d4530b68..e94608d7f0dd 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -346,12 +346,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) return 0; } -static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu) +static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq_ent *ent) { struct arm_smmu_cmdq *cmdq = NULL; if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq) - cmdq = smmu->impl_ops->get_secondary_cmdq(smmu); + cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, ent); return cmdq ?: &smmu->cmdq; } @@ -897,7 +898,7 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, } return arm_smmu_cmdq_issue_cmdlist( - smmu, arm_smmu_get_cmdq(smmu), cmd, 1, sync); + smmu, arm_smmu_get_cmdq(smmu, ent), cmd, 1, sync); } static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, @@ -913,10 +914,11 @@ static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu, } static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu, - struct arm_smmu_cmdq_batch *cmds) + struct arm_smmu_cmdq_batch *cmds, + struct arm_smmu_cmdq_ent *ent) { cmds->num = 0; - cmds->cmdq = arm_smmu_get_cmdq(smmu); + cmds->cmdq = arm_smmu_get_cmdq(smmu, ent); } static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, @@ -931,13 +933,13 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, if (force_sync || unsupported_cmd) { arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, cmds->num, true); - arm_smmu_cmdq_batch_init(smmu, cmds); + arm_smmu_cmdq_batch_init(smmu, cmds, cmd); } if (cmds->num == CMDQ_BATCH_ENTRIES) { arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, cmds->num, false); - arm_smmu_cmdq_batch_init(smmu, cmds); + arm_smmu_cmdq_batch_init(smmu, cmds, cmd); } index = cmds->num * CMDQ_ENT_DWORDS; @@ -1205,7 +1207,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master, }, }; - arm_smmu_cmdq_batch_init(smmu, &cmds); + arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd); for (i = 0; i < master->num_streams; i++) { cmd.cfgi.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); @@ -2067,7 +2069,7 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master, arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd); - arm_smmu_cmdq_batch_init(master->smmu, &cmds); + arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd); for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd); @@ -2082,7 +2084,9 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, struct arm_smmu_master_domain *master_domain; int i; unsigned long flags; - struct arm_smmu_cmdq_ent cmd; + struct arm_smmu_cmdq_ent cmd = { + .opcode = CMDQ_OP_ATC_INV, + }; struct arm_smmu_cmdq_batch cmds; if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS)) @@ -2105,7 +2109,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, if (!atomic_read(&smmu_domain->nr_ats_masters)) return 0; - arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds); + arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd); spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_for_each_entry(master_domain, &smmu_domain->devices, @@ -2187,7 +2191,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, num_pages++; } - arm_smmu_cmdq_batch_init(smmu, &cmds); + arm_smmu_cmdq_batch_init(smmu, &cmds, cmd); while (iova < end) { if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index e6325191d0fe..b35583e07c14 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -642,7 +642,8 @@ struct arm_smmu_strtab_cfg { struct arm_smmu_impl_ops { int (*device_reset)(struct arm_smmu_device *smmu); void (*device_remove)(struct arm_smmu_device *smmu); - struct arm_smmu_cmdq *(*get_secondary_cmdq)(struct arm_smmu_device *smmu); + struct arm_smmu_cmdq *(*get_secondary_cmdq)( + struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent); }; /* An SMMUv3 instance */ diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index 5ac3032ee6dd..9eb9d959f3e5 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -142,6 +142,7 @@ struct tegra241_vcmdq { * struct tegra241_vintf - Virtual Interface * @idx: Global index in the CMDQV * @enabled: Enable status + * @hyp_own: Owned by hypervisor (in-kernel) * @cmdqv: Parent CMDQV pointer * @lvcmdqs: List of logical VCMDQ pointers * @base: MMIO base address @@ -150,6 +151,7 @@ struct tegra241_vintf { u16 idx; bool enabled; + bool hyp_own; struct tegra241_cmdqv *cmdqv; struct tegra241_vcmdq **lvcmdqs; @@ -301,8 +303,21 @@ static irqreturn_t tegra241_cmdqv_isr(int irq, void *devid) /* Command Queue Function */ +static bool tegra241_guest_vcmdq_supports_cmd(struct arm_smmu_cmdq_ent *ent) +{ + switch (ent->opcode) { + case CMDQ_OP_TLBI_NH_ASID: + case CMDQ_OP_TLBI_NH_VA: + case CMDQ_OP_ATC_INV: + return true; + default: + return false; + } +} + static struct arm_smmu_cmdq * -tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu) +tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq_ent *ent) { struct tegra241_cmdqv *cmdqv = container_of(smmu, struct tegra241_cmdqv, smmu); @@ -328,6 +343,10 @@ tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu) vcmdq = vintf->lvcmdqs[lidx]; if (!vcmdq || !READ_ONCE(vcmdq->enabled)) return NULL; + + /* Unsupported CMD goes for smmu->cmdq pathway */ + if (!arm_smmu_cmdq_supports_cmd(&vcmdq->cmdq, ent)) + return NULL; return &vcmdq->cmdq; } @@ -406,12 +425,22 @@ static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own) tegra241_vintf_hw_deinit(vintf); /* Configure and enable VINTF */ + /* + * Note that HYP_OWN bit is wired to zero when running in guest kernel, + * whether enabling it here or not, as !HYP_OWN cmdq HWs only support a + * restricted set of supported commands. + */ regval = FIELD_PREP(VINTF_HYP_OWN, hyp_own); writel(regval, REG_VINTF(vintf, CONFIG)); ret = vintf_write_config(vintf, regval | VINTF_EN); if (ret) return ret; + /* + * As being mentioned above, HYP_OWN bit is wired to zero for a guest + * kernel, so read it back from HW to ensure that reflects in hyp_own + */ + vintf->hyp_own = !!(VINTF_HYP_OWN & readl(REG_VINTF(vintf, CONFIG))); for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) { if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) { @@ -493,6 +522,9 @@ static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq) q->q_base = q->base_dma & VCMDQ_ADDR; q->q_base |= FIELD_PREP(VCMDQ_LOG2SIZE, q->llq.max_n_shift); + if (!vcmdq->vintf->hyp_own) + cmdq->supports_cmd = tegra241_guest_vcmdq_supports_cmd; + return arm_smmu_cmdq_init(smmu, cmdq); } -- Gitee From cf1c63870c97b5db60b8e3bb0871cd2b338330ce Mon Sep 17 00:00:00 2001 From: Mostafa Saleh Date: Fri, 30 Aug 2024 11:03:47 +0000 Subject: [PATCH 49/99] iommu/arm-smmu-v3: Match Stall behaviour for S2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #13617 commit ce7cb08e22e09f43649b025c849a3ae3b80833c4 upstream. According to the spec (ARM IHI 0070 F.b), in "5.5 Fault configuration (A, R, S bits)": A STE with stage 2 translation enabled and STE.S2S == 0 is considered ILLEGAL if SMMU_IDR0.STALL_MODEL == 0b10. Also described in the pseudocode “SteIllegal()” if STE.Config == '11x' then [..] if eff_idr0_stall_model == '10' && STE.S2S == '0' then // stall_model forcing stall, but S2S == 0 return TRUE; Which means, S2S must be set when stall model is "ARM_SMMU_FEAT_STALL_FORCE", but currently the driver ignores that. Although, the driver can do the minimum and only set S2S for “ARM_SMMU_FEAT_STALL_FORCE”, it is more consistent to match S1 behaviour, which also sets it for “ARM_SMMU_FEAT_STALL” if the master has requested stalls. Also, since S2 stalls are enabled now, report them to the IOMMU layer and for VFIO devices it will fail anyway as VFIO doesn’t register an iopf handler. Signed-off-by: Mostafa Saleh Link: https://lore.kernel.org/r/20240830110349.797399-2-smostafa@google.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 8 +++----- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 + 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index e94608d7f0dd..62d5c19031b1 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1049,7 +1049,8 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR | STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI | - STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2R); + STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2S | + STRTAB_STE_2_S2R); used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK); } @@ -1683,6 +1684,7 @@ void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, STRTAB_STE_2_S2ENDI | #endif STRTAB_STE_2_S2PTW | + (master->stall_enabled ? STRTAB_STE_2_S2S : 0) | STRTAB_STE_2_S2R); target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr & @@ -1787,10 +1789,6 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) return -EOPNOTSUPP; } - /* Stage-2 is always pinned at the moment */ - if (evt[1] & EVTQ_1_S2) - return -EFAULT; - if (!(evt[1] & EVTQ_1_STALL)) return -EOPNOTSUPP; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index b35583e07c14..e746470cc767 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -269,6 +269,7 @@ struct arm_smmu_ste { #define STRTAB_STE_2_S2AA64 (1UL << 51) #define STRTAB_STE_2_S2ENDI (1UL << 52) #define STRTAB_STE_2_S2PTW (1UL << 54) +#define STRTAB_STE_2_S2S (1UL << 57) #define STRTAB_STE_2_S2R (1UL << 58) #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4) -- Gitee From f4c5c2df66d0bba7a711e8da705eae66cfd2028e Mon Sep 17 00:00:00 2001 From: Mostafa Saleh Date: Fri, 30 Aug 2024 11:03:48 +0000 Subject: [PATCH 50/99] iommu/arm-smmu-v3-test: Test masters with stall enabled ANBZ: #13617 commit 070e326f327a8d32669b5bf9b50a12c2cd8277ff upstream. At the moment, the SMMUv3 unit tests assume ATS is always enabled, although this is sufficient to test hitless/non-hitless transitions, but exercising other features is useful to check ste/cd population logic (for example the .get_used logic). Add an enum where bits define features per-master, at the moment there is only ATS and STALLs which are mutually exclusive, but this would make it easier to extend with other features in the future. Also, Add 2 more tests for s1 <-> s2 transitions with stalls enabled. Signed-off-by: Mostafa Saleh Link: https://lore.kernel.org/r/20240830110349.797399-3-smostafa@google.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c | 83 ++++++++++++++----- 1 file changed, 62 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c index cceb737a7001..84baa021370a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c @@ -30,6 +30,11 @@ static struct mm_struct sva_mm = { .pgd = (void *)0xdaedbeefdeadbeefULL, }; +enum arm_smmu_test_master_feat { + ARM_SMMU_MASTER_TEST_ATS = BIT(0), + ARM_SMMU_MASTER_TEST_STALL = BIT(1), +}; + static bool arm_smmu_entry_differs_in_used_bits(const __le64 *entry, const __le64 *used_bits, const __le64 *target, @@ -164,16 +169,22 @@ static const dma_addr_t fake_cdtab_dma_addr = 0xF0F0F0F0F0F0; static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste, unsigned int s1dss, - const dma_addr_t dma_addr) + const dma_addr_t dma_addr, + enum arm_smmu_test_master_feat feat) { + bool ats_enabled = feat & ARM_SMMU_MASTER_TEST_ATS; + bool stall_enabled = feat & ARM_SMMU_MASTER_TEST_STALL; + struct arm_smmu_master master = { + .ats_enabled = ats_enabled, .cd_table.cdtab_dma = dma_addr, .cd_table.s1cdmax = 0xFF, .cd_table.s1fmt = STRTAB_STE_0_S1FMT_64K_L2, .smmu = &smmu, + .stall_enabled = stall_enabled, }; - arm_smmu_make_cdtable_ste(ste, &master, true, s1dss); + arm_smmu_make_cdtable_ste(ste, &master, ats_enabled, s1dss); } static void arm_smmu_v3_write_ste_test_bypass_to_abort(struct kunit *test) @@ -204,7 +215,7 @@ static void arm_smmu_v3_write_ste_test_cdtable_to_abort(struct kunit *test) struct arm_smmu_ste ste; arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, - fake_cdtab_dma_addr); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &abort_ste, NUM_EXPECTED_SYNCS(2)); } @@ -214,7 +225,7 @@ static void arm_smmu_v3_write_ste_test_abort_to_cdtable(struct kunit *test) struct arm_smmu_ste ste; arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, - fake_cdtab_dma_addr); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &abort_ste, &ste, NUM_EXPECTED_SYNCS(2)); } @@ -224,7 +235,7 @@ static void arm_smmu_v3_write_ste_test_cdtable_to_bypass(struct kunit *test) struct arm_smmu_ste ste; arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, - fake_cdtab_dma_addr); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &bypass_ste, NUM_EXPECTED_SYNCS(3)); } @@ -234,7 +245,7 @@ static void arm_smmu_v3_write_ste_test_bypass_to_cdtable(struct kunit *test) struct arm_smmu_ste ste; arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, - fake_cdtab_dma_addr); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &bypass_ste, &ste, NUM_EXPECTED_SYNCS(3)); } @@ -245,9 +256,9 @@ static void arm_smmu_v3_write_ste_test_cdtable_s1dss_change(struct kunit *test) struct arm_smmu_ste s1dss_bypass; arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, - fake_cdtab_dma_addr); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, - fake_cdtab_dma_addr); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); /* * Flipping s1dss on a CD table STE only involves changes to the second @@ -265,7 +276,7 @@ arm_smmu_v3_write_ste_test_s1dssbypass_to_stebypass(struct kunit *test) struct arm_smmu_ste s1dss_bypass; arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, - fake_cdtab_dma_addr); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition( test, &s1dss_bypass, &bypass_ste, NUM_EXPECTED_SYNCS(2)); } @@ -276,16 +287,20 @@ arm_smmu_v3_write_ste_test_stebypass_to_s1dssbypass(struct kunit *test) struct arm_smmu_ste s1dss_bypass; arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, - fake_cdtab_dma_addr); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition( test, &bypass_ste, &s1dss_bypass, NUM_EXPECTED_SYNCS(2)); } static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste, - bool ats_enabled) + enum arm_smmu_test_master_feat feat) { + bool ats_enabled = feat & ARM_SMMU_MASTER_TEST_ATS; + bool stall_enabled = feat & ARM_SMMU_MASTER_TEST_STALL; struct arm_smmu_master master = { + .ats_enabled = ats_enabled, .smmu = &smmu, + .stall_enabled = stall_enabled, }; struct io_pgtable io_pgtable = {}; struct arm_smmu_domain smmu_domain = { @@ -308,7 +323,7 @@ static void arm_smmu_v3_write_ste_test_s2_to_abort(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_s2_ste(&ste, true); + arm_smmu_test_make_s2_ste(&ste, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &abort_ste, NUM_EXPECTED_SYNCS(2)); } @@ -317,7 +332,7 @@ static void arm_smmu_v3_write_ste_test_abort_to_s2(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_s2_ste(&ste, true); + arm_smmu_test_make_s2_ste(&ste, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &abort_ste, &ste, NUM_EXPECTED_SYNCS(2)); } @@ -326,7 +341,7 @@ static void arm_smmu_v3_write_ste_test_s2_to_bypass(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_s2_ste(&ste, true); + arm_smmu_test_make_s2_ste(&ste, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &bypass_ste, NUM_EXPECTED_SYNCS(2)); } @@ -335,7 +350,7 @@ static void arm_smmu_v3_write_ste_test_bypass_to_s2(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_s2_ste(&ste, true); + arm_smmu_test_make_s2_ste(&ste, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &bypass_ste, &ste, NUM_EXPECTED_SYNCS(2)); } @@ -346,8 +361,8 @@ static void arm_smmu_v3_write_ste_test_s1_to_s2(struct kunit *test) struct arm_smmu_ste s2_ste; arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0, - fake_cdtab_dma_addr); - arm_smmu_test_make_s2_ste(&s2_ste, true); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); + arm_smmu_test_make_s2_ste(&s2_ste, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &s1_ste, &s2_ste, NUM_EXPECTED_SYNCS(3)); } @@ -358,8 +373,8 @@ static void arm_smmu_v3_write_ste_test_s2_to_s1(struct kunit *test) struct arm_smmu_ste s2_ste; arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0, - fake_cdtab_dma_addr); - arm_smmu_test_make_s2_ste(&s2_ste, true); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); + arm_smmu_test_make_s2_ste(&s2_ste, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &s2_ste, &s1_ste, NUM_EXPECTED_SYNCS(3)); } @@ -375,9 +390,9 @@ static void arm_smmu_v3_write_ste_test_non_hitless(struct kunit *test) * s1 dss field in the same update. */ arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, - fake_cdtab_dma_addr); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_test_make_cdtable_ste(&ste_2, STRTAB_STE_1_S1DSS_BYPASS, - 0x4B4B4b4B4B); + 0x4B4B4b4B4B, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_non_hitless_transition( test, &ste, &ste_2, NUM_EXPECTED_SYNCS(3)); } @@ -503,6 +518,30 @@ static void arm_smmu_test_make_sva_release_cd(struct arm_smmu_cd *cd, arm_smmu_make_sva_cd(cd, &master, NULL, asid); } +static void arm_smmu_v3_write_ste_test_s1_to_s2_stall(struct kunit *test) +{ + struct arm_smmu_ste s1_ste; + struct arm_smmu_ste s2_ste; + + arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_STALL); + arm_smmu_test_make_s2_ste(&s2_ste, ARM_SMMU_MASTER_TEST_STALL); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &s1_ste, &s2_ste, + NUM_EXPECTED_SYNCS(3)); +} + +static void arm_smmu_v3_write_ste_test_s2_to_s1_stall(struct kunit *test) +{ + struct arm_smmu_ste s1_ste; + struct arm_smmu_ste s2_ste; + + arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_STALL); + arm_smmu_test_make_s2_ste(&s2_ste, ARM_SMMU_MASTER_TEST_STALL); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &s2_ste, &s1_ste, + NUM_EXPECTED_SYNCS(3)); +} + static void arm_smmu_v3_write_cd_test_sva_clear(struct kunit *test) { struct arm_smmu_cd cd = {}; @@ -547,6 +586,8 @@ static struct kunit_case arm_smmu_v3_test_cases[] = { KUNIT_CASE(arm_smmu_v3_write_ste_test_non_hitless), KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_clear), KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_change_asid), + KUNIT_CASE(arm_smmu_v3_write_ste_test_s1_to_s2_stall), + KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_s1_stall), KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_clear), KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_release), {}, -- Gitee From a1f9a25b15627e72349eb54a70647ccab8556ddc Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Sun, 1 Sep 2024 22:57:45 -0700 Subject: [PATCH 51/99] iommu/tegra241-cmdqv: Fix -Wformat-truncation warnings in lvcmdq_error_header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #13617 commit db184a1ced56dde6bbf8cc4d9b936c9f6a510e28 upstream. Kernel test robot reported a few trucation warnings at the snprintf: drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c: In function ‘tegra241_vintf_free_lvcmdq’: drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c:239:56: warning: ‘%u’ directive output may be truncated writing between 1 and 5 bytes into a region of size between 3 and 11 [-Wformat-truncation=] 239 | snprintf(header, hlen, "VINTF%u: VCMDQ%u/LVCMDQ%u: ", | ^~ drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c:239:32: note: directive argument in the range [0, 65535] 239 | snprintf(header, hlen, "VINTF%u: VCMDQ%u/LVCMDQ%u: ", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c:239:9: note: ‘snprintf’ output between 25 and 37 bytes into a destination of size 32 239 | snprintf(header, hlen, "VINTF%u: VCMDQ%u/LVCMDQ%u: ", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 240 | vcmdq->vintf->idx, vcmdq->idx, vcmdq->lidx); Fix by bumping up the size of the header to hold more characters. Fixes: 918eb5c856f6 ("iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202409020406.7ed5uojF-lkp@intel.com/ Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20240902055745.629456-1-nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index 9eb9d959f3e5..03fd13c21dcc 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -233,7 +233,7 @@ static inline int vintf_write_config(struct tegra241_vintf *vintf, u32 regval) static inline char *lvcmdq_error_header(struct tegra241_vcmdq *vcmdq, char *header, int hlen) { - WARN_ON(hlen < 32); + WARN_ON(hlen < 64); if (WARN_ON(!vcmdq->vintf)) return ""; snprintf(header, hlen, "VINTF%u: VCMDQ%u/LVCMDQ%u: ", @@ -243,7 +243,7 @@ static inline char *lvcmdq_error_header(struct tegra241_vcmdq *vcmdq, static inline int vcmdq_write_config(struct tegra241_vcmdq *vcmdq, u32 regval) { - char header[32], *h = lvcmdq_error_header(vcmdq, header, 32); + char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); return tegra241_cmdqv_write_config(vcmdq->cmdqv, REG_VCMDQ_PAGE0(vcmdq, CONFIG), @@ -354,7 +354,7 @@ tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu, static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq) { - char header[32], *h = lvcmdq_error_header(vcmdq, header, 32); + char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); u32 gerrorn, gerror; if (vcmdq_write_config(vcmdq, 0)) { @@ -382,7 +382,7 @@ static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq) static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq) { - char header[32], *h = lvcmdq_error_header(vcmdq, header, 32); + char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); int ret; /* Reset VCMDQ */ @@ -555,13 +555,13 @@ static int tegra241_vintf_init_lvcmdq(struct tegra241_vintf *vintf, u16 lidx, static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) { struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx]; - char header[32]; + char header[64]; tegra241_vcmdq_free_smmu_cmdq(vcmdq); tegra241_vintf_deinit_lvcmdq(vintf, lidx); dev_dbg(vintf->cmdqv->dev, - "%sdeallocated\n", lvcmdq_error_header(vcmdq, header, 32)); + "%sdeallocated\n", lvcmdq_error_header(vcmdq, header, 64)); kfree(vcmdq); } @@ -570,7 +570,7 @@ tegra241_vintf_alloc_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) { struct tegra241_cmdqv *cmdqv = vintf->cmdqv; struct tegra241_vcmdq *vcmdq; - char header[32]; + char header[64]; int ret; vcmdq = kzalloc(sizeof(*vcmdq), GFP_KERNEL); @@ -587,7 +587,7 @@ tegra241_vintf_alloc_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) goto deinit_lvcmdq; dev_dbg(cmdqv->dev, - "%sallocated\n", lvcmdq_error_header(vcmdq, header, 32)); + "%sallocated\n", lvcmdq_error_header(vcmdq, header, 64)); return vcmdq; deinit_lvcmdq: -- Gitee From 54b71400dd4ee1dfbe9756867366753eecc16575 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 10:27:11 +0800 Subject: [PATCH 52/99] iommu/vt-d: Require DMA domain if hardware not support passthrough ANBZ: #13617 commit 184bee388d4661c3fea633f135a5c45ff03c7ec6 upstream. The iommu core defines the def_domain_type callback to query the iommu driver about hardware capability and quirks. The iommu driver should declare IOMMU_DOMAIN_DMA requirement for hardware lacking pass-through capability. Earlier VT-d hardware implementations did not support pass-through translation mode. The iommu driver relied on a paging domain with all physical system memory addresses identically mapped to the same IOVA to simulate pass-through translation before the def_domain_type was introduced and it has been kept until now. It's time to adjust it now to make the Intel iommu driver follow the def_domain_type semantics. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20240809055431.36513-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/intel/iommu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b7f127729c0e..0302ad5eecb5 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2163,6 +2163,16 @@ static bool device_rmrr_is_relaxable(struct device *dev) static int device_def_domain_type(struct device *dev) { + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + + /* + * Hardware does not support the passthrough translation mode. + * Always use a dynamaic mapping domain. + */ + if (!ecap_pass_through(iommu->ecap)) + return IOMMU_DOMAIN_DMA; + if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); -- Gitee From 5b55ee8cf49f3cea80086605abeb92bbb2ad31dc Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 10:27:12 +0800 Subject: [PATCH 53/99] iommu/vt-d: Remove identity mappings from si_domain ANBZ: #13617 commit 9e74e1b8198fd07fcbb4266771ca0f5195c71d9c upstream. As the driver has enforced DMA domains for devices managed by an IOMMU hardware that doesn't support passthrough translation mode, there is no need for static identity mappings in the si_domain. Remove the identity mapping code to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240809055431.36513-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/intel/iommu.c | 122 ++---------------------------------- 1 file changed, 4 insertions(+), 118 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 0302ad5eecb5..7a7f737eb8be 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -167,14 +167,7 @@ static void device_rbtree_remove(struct device_domain_info *info) spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); } -/* - * This domain is a statically identity mapping domain. - * 1. This domain creats a static 1:1 mapping to all usable memory. - * 2. It maps to each iommu if successful. - * 3. Each iommu mapps to this domain if successful. - */ static struct dmar_domain *si_domain; -static int hw_pass_through = 1; struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ @@ -1659,7 +1652,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, struct context_entry *context; int agaw, ret; - if (hw_pass_through && domain_type_is_si(domain)) + if (domain_type_is_si(domain)) translation = CONTEXT_TT_PASS_THROUGH; pr_debug("Set context mapping for %02x:%02x.%d\n", @@ -2012,29 +2005,10 @@ static bool dev_is_real_dma_subdevice(struct device *dev) pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev); } -static int iommu_domain_identity_map(struct dmar_domain *domain, - unsigned long first_vpfn, - unsigned long last_vpfn) -{ - /* - * RMRR range might have overlap with physical memory range, - * clear it first - */ - dma_pte_clear_range(domain, first_vpfn, last_vpfn); - - return __domain_mapping(domain, first_vpfn, - first_vpfn, last_vpfn - first_vpfn + 1, - DMA_PTE_READ|DMA_PTE_WRITE, GFP_KERNEL); -} - static int md_domain_init(struct dmar_domain *domain, int guest_width); -static int __init si_domain_init(int hw) +static int __init si_domain_init(void) { - struct dmar_rmrr_unit *rmrr; - struct device *dev; - int i, nid, ret; - si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY); if (!si_domain) return -EFAULT; @@ -2045,44 +2019,6 @@ static int __init si_domain_init(int hw) return -EFAULT; } - if (hw) - return 0; - - for_each_online_node(nid) { - unsigned long start_pfn, end_pfn; - int i; - - for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { - ret = iommu_domain_identity_map(si_domain, - mm_to_dma_pfn_start(start_pfn), - mm_to_dma_pfn_end(end_pfn-1)); - if (ret) - return ret; - } - } - - /* - * Identity map the RMRRs so that devices with RMRRs could also use - * the si_domain. - */ - for_each_rmrr_units(rmrr) { - for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, - i, dev) { - unsigned long long start = rmrr->base_address; - unsigned long long end = rmrr->end_address; - - if (WARN_ON(end < start || - end >> agaw_to_width(si_domain->agaw))) - continue; - - ret = iommu_domain_identity_map(si_domain, - mm_to_dma_pfn_start(start >> PAGE_SHIFT), - mm_to_dma_pfn_end(end >> PAGE_SHIFT)); - if (ret) - return ret; - } - } - return 0; } @@ -2108,7 +2044,7 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (!sm_supported(iommu)) ret = domain_context_mapping(domain, dev); - else if (hw_pass_through && domain_type_is_si(domain)) + else if (domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); else if (domain->use_first_level) ret = domain_setup_first_level(iommu, domain, dev, IOMMU_NO_PASID); @@ -2463,8 +2399,6 @@ static int __init init_dmars(void) } } - if (!ecap_pass_through(iommu->ecap)) - hw_pass_through = 0; intel_svm_check(iommu); } @@ -2480,7 +2414,7 @@ static int __init init_dmars(void) check_tylersburg_isoch(); - ret = si_domain_init(hw_pass_through); + ret = si_domain_init(); if (ret) goto free_iommu; @@ -2907,12 +2841,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) if (ret) goto out; - if (hw_pass_through && !ecap_pass_through(iommu->ecap)) { - pr_warn("%s: Doesn't support hardware pass through.\n", - iommu->name); - return -ENXIO; - } - sp = domain_update_iommu_superpage(NULL, iommu) - 1; if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) { pr_warn("%s: Doesn't support large page.\n", @@ -3181,43 +3109,6 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) return 0; } -static int intel_iommu_memory_notifier(struct notifier_block *nb, - unsigned long val, void *v) -{ - struct memory_notify *mhp = v; - unsigned long start_vpfn = mm_to_dma_pfn_start(mhp->start_pfn); - unsigned long last_vpfn = mm_to_dma_pfn_end(mhp->start_pfn + - mhp->nr_pages - 1); - - switch (val) { - case MEM_GOING_ONLINE: - if (iommu_domain_identity_map(si_domain, - start_vpfn, last_vpfn)) { - pr_warn("Failed to build identity map for [%lx-%lx]\n", - start_vpfn, last_vpfn); - return NOTIFY_BAD; - } - break; - - case MEM_OFFLINE: - case MEM_CANCEL_ONLINE: - { - LIST_HEAD(freelist); - - domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist); - iommu_put_pages_list(&freelist); - } - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block intel_iommu_memory_nb = { - .notifier_call = intel_iommu_memory_notifier, - .priority = 0 -}; - static void intel_disable_iommus(void) { struct intel_iommu *iommu = NULL; @@ -3555,12 +3446,7 @@ int __init intel_iommu_init(void) iommu_pmu_register(iommu); } - up_read(&dmar_global_lock); - - if (si_domain && !hw_pass_through) - register_memory_notifier(&intel_iommu_memory_nb); - down_read(&dmar_global_lock); if (probe_acpi_namespace_devices()) pr_warn("ACPI name space devices didn't probe correctly\n"); -- Gitee From 3d675321a7997478281d153d19b192435f3dfea3 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 10:27:14 +0800 Subject: [PATCH 54/99] iommu/vt-d: Remove has_iotlb_device flag ANBZ: #13617 commit 487df6836606dc67cd8e2c26616f581c8800a17a upstream. The has_iotlb_device flag was used to indicate if a domain had attached devices with ATS enabled. Domains without this flag didn't require device TLB invalidation during unmap operations, optimizing performance by avoiding unnecessary device iteration. With the introduction of cache tags, this flag is no longer needed. The code to iterate over attached devices was removed by commit 06792d067989 ("iommu/vt-d: Cleanup use of iommu_flush_iotlb_psi()"). Remove has_iotlb_device to avoid unnecessary code. Suggested-by: Jason Gunthorpe Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240809055431.36513-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/intel/iommu.c | 34 +--------------------------------- drivers/iommu/intel/iommu.h | 2 -- drivers/iommu/intel/nested.c | 2 -- 3 files changed, 1 insertion(+), 37 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7a7f737eb8be..2396de866c6a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -485,7 +485,6 @@ void domain_update_iommu_cap(struct dmar_domain *domain) domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw); domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain); - domain_update_iotlb(domain); } struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, @@ -1275,32 +1274,6 @@ domain_lookup_dev_info(struct dmar_domain *domain, return NULL; } -void domain_update_iotlb(struct dmar_domain *domain) -{ - struct dev_pasid_info *dev_pasid; - struct device_domain_info *info; - bool has_iotlb_device = false; - unsigned long flags; - - spin_lock_irqsave(&domain->lock, flags); - list_for_each_entry(info, &domain->devices, link) { - if (info->ats_enabled) { - has_iotlb_device = true; - break; - } - } - - list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { - info = dev_iommu_priv_get(dev_pasid->dev); - if (info->ats_enabled) { - has_iotlb_device = true; - break; - } - } - domain->has_iotlb_device = has_iotlb_device; - spin_unlock_irqrestore(&domain->lock, flags); -} - /* * The extra devTLB flush quirk impacts those QAT devices with PCI device * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device() @@ -1337,10 +1310,8 @@ static void iommu_enable_pci_caps(struct device_domain_info *info) info->pasid_enabled = 1; if (info->ats_supported && pci_ats_page_aligned(pdev) && - !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { + !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) info->ats_enabled = 1; - domain_update_iotlb(info->domain); - } } static void iommu_disable_pci_caps(struct device_domain_info *info) @@ -1355,7 +1326,6 @@ static void iommu_disable_pci_caps(struct device_domain_info *info) if (info->ats_enabled) { pci_disable_ats(pdev); info->ats_enabled = 0; - domain_update_iotlb(info->domain); } if (info->pasid_enabled) { @@ -1529,7 +1499,6 @@ static struct dmar_domain *alloc_domain(unsigned int type) domain->nid = NUMA_NO_NODE; if (first_level_by_default(type)) domain->use_first_level = true; - domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); INIT_LIST_HEAD(&domain->dev_pasids); INIT_LIST_HEAD(&domain->cache_tags); @@ -3593,7 +3562,6 @@ static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_st xa_init(&domain->iommu_array); domain->nid = dev_to_node(dev); - domain->has_iotlb_device = info->ats_enabled; domain->use_first_level = first_stage; /* calculate the address width */ diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 5dd150eaf60f..65e3cef88010 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -588,7 +588,6 @@ struct dmar_domain { int nid; /* node id */ struct xarray iommu_array; /* Attached IOMMU array */ - u8 has_iotlb_device: 1; u8 iommu_coherency: 1; /* indicate coherency of iommu access */ u8 force_snooping : 1; /* Create IOPTEs with snoop control */ u8 set_pte_snp:1; @@ -1104,7 +1103,6 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, */ #define QI_OPT_WAIT_DRAIN BIT(0) -void domain_update_iotlb(struct dmar_domain *domain); int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); void device_block_translation(struct device *dev); diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index 16a2bcf5cfeb..36a91b1b52be 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -66,8 +66,6 @@ static int intel_nested_attach_dev(struct iommu_domain *domain, list_add(&info->link, &dmar_domain->devices); spin_unlock_irqrestore(&dmar_domain->lock, flags); - domain_update_iotlb(dmar_domain); - return 0; unassign_tag: cache_tag_unassign_domain(dmar_domain, dev, IOMMU_NO_PASID); -- Gitee From 81986d1a877a79ede5270c92635a5b50acd060e8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 10:27:15 +0800 Subject: [PATCH 55/99] iommu/vt-d: Factor out helpers from domain_context_mapping_one() ANBZ: #13617 commit c7191984e5aade540f1a3845a116537c89572655 upstream. Extract common code from domain_context_mapping_one() into new helpers, making it reusable by other functions such as the upcoming identity domain implementation. No intentional functional changes. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20240809055431.36513-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/intel/iommu.c | 99 ++++++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 41 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 2396de866c6a..8897bfdadc4f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1609,6 +1609,61 @@ static void domain_exit(struct dmar_domain *domain) kfree(domain); } +/* + * For kdump cases, old valid entries may be cached due to the + * in-flight DMA and copied pgtable, but there is no unmapping + * behaviour for them, thus we need an explicit cache flush for + * the newly-mapped device. For kdump, at this point, the device + * is supposed to finish reset at its driver probe stage, so no + * in-flight DMA will exist, and we don't need to worry anymore + * hereafter. + */ +static void copied_context_tear_down(struct intel_iommu *iommu, + struct context_entry *context, + u8 bus, u8 devfn) +{ + u16 did_old; + + if (!context_copied(iommu, bus, devfn)) + return; + + assert_spin_locked(&iommu->lock); + + did_old = context_domain_id(context); + context_clear_entry(context); + + if (did_old < cap_ndoms(iommu->cap)) { + iommu->flush.flush_context(iommu, did_old, + (((u16)bus) << 8) | devfn, + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + iommu->flush.flush_iotlb(iommu, did_old, 0, 0, + DMA_TLB_DSI_FLUSH); + } + + clear_context_copied(iommu, bus, devfn); +} + +/* + * It's a non-present to present mapping. If hardware doesn't cache + * non-present entry we only need to flush the write-buffer. If the + * _does_ cache non-present entries, then it does so in the special + * domain #0, which we have to flush: + */ +static void context_present_cache_flush(struct intel_iommu *iommu, u16 did, + u8 bus, u8 devfn) +{ + if (cap_caching_mode(iommu->cap)) { + iommu->flush.flush_context(iommu, 0, + (((u16)bus) << 8) | devfn, + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); + } else { + iommu_flush_write_buffer(iommu); + } +} + static int domain_context_mapping_one(struct dmar_domain *domain, struct intel_iommu *iommu, u8 bus, u8 devfn) @@ -1637,31 +1692,9 @@ static int domain_context_mapping_one(struct dmar_domain *domain, if (context_present(context) && !context_copied(iommu, bus, devfn)) goto out_unlock; - /* - * For kdump cases, old valid entries may be cached due to the - * in-flight DMA and copied pgtable, but there is no unmapping - * behaviour for them, thus we need an explicit cache flush for - * the newly-mapped device. For kdump, at this point, the device - * is supposed to finish reset at its driver probe stage, so no - * in-flight DMA will exist, and we don't need to worry anymore - * hereafter. - */ - if (context_copied(iommu, bus, devfn)) { - u16 did_old = context_domain_id(context); - - if (did_old < cap_ndoms(iommu->cap)) { - iommu->flush.flush_context(iommu, did_old, - (((u16)bus) << 8) | devfn, - DMA_CCMD_MASK_NOBIT, - DMA_CCMD_DEVICE_INVL); - iommu->flush.flush_iotlb(iommu, did_old, 0, 0, - DMA_TLB_DSI_FLUSH); - } - - clear_context_copied(iommu, bus, devfn); - } - + copied_context_tear_down(iommu, context, bus, devfn); context_clear_entry(context); + context_set_domain_id(context, did); if (translation != CONTEXT_TT_PASS_THROUGH) { @@ -1697,23 +1730,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, context_set_present(context); if (!ecap_coherent(iommu->ecap)) clflush_cache_range(context, sizeof(*context)); - - /* - * It's a non-present to present mapping. If hardware doesn't cache - * non-present entry we only need to flush the write-buffer. If the - * _does_ cache non-present entries, then it does so in the special - * domain #0, which we have to flush: - */ - if (cap_caching_mode(iommu->cap)) { - iommu->flush.flush_context(iommu, 0, - (((u16)bus) << 8) | devfn, - DMA_CCMD_MASK_NOBIT, - DMA_CCMD_DEVICE_INVL); - iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); - } else { - iommu_flush_write_buffer(iommu); - } - + context_present_cache_flush(iommu, did, bus, devfn); ret = 0; out_unlock: -- Gitee From fdf9d3f31421c52efcedb9595feb58d629ef463a Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 10:27:16 +0800 Subject: [PATCH 56/99] iommu/vt-d: Add support for static identity domain ANBZ: #13617 commit 2031c469f8161abe74189cb74f50da224f340b71 upstream. Software determines VT-d hardware support for passthrough translation by inspecting the capability register. If passthrough translation is not supported, the device is instructed to use DMA domain for its default domain. Add a global static identity domain with guaranteed attach semantics for IOMMUs that support passthrough translation mode. The global static identity domain is a dummy domain without corresponding dmar_domain structure. Consequently, the device's info->domain will be NULL with the identity domain is attached. Refactor the code accordingly. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240809055431.36513-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/intel/iommu.c | 114 ++++++++++++++++++++++++++++++++++-- drivers/iommu/intel/svm.c | 2 +- 2 files changed, 111 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 8897bfdadc4f..835e04c20919 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3766,11 +3766,9 @@ int prepare_domain_attach_device(struct iommu_domain *domain, static int intel_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { - struct device_domain_info *info = dev_iommu_priv_get(dev); int ret; - if (info->domain) - device_block_translation(dev); + device_block_translation(dev); ret = prepare_domain_attach_device(domain, dev); if (ret) @@ -4379,11 +4377,17 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, struct iommu_domain *domain) { struct device_domain_info *info = dev_iommu_priv_get(dev); - struct dmar_domain *dmar_domain = to_dmar_domain(domain); struct dev_pasid_info *curr, *dev_pasid = NULL; struct intel_iommu *iommu = info->iommu; + struct dmar_domain *dmar_domain; unsigned long flags; + if (domain->type == IOMMU_DOMAIN_IDENTITY) { + intel_pasid_tear_down_entry(iommu, dev, pasid, false); + return; + } + + dmar_domain = to_dmar_domain(domain); spin_lock_irqsave(&dmar_domain->lock, flags); list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) { if (curr->dev == dev && curr->pasid == pasid) { @@ -4610,9 +4614,111 @@ static const struct iommu_dirty_ops intel_dirty_ops = { .read_and_clear_dirty = intel_iommu_read_and_clear_dirty, }; +static int context_setup_pass_through(struct device *dev, u8 bus, u8 devfn) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + struct context_entry *context; + + spin_lock(&iommu->lock); + context = iommu_context_addr(iommu, bus, devfn, 1); + if (!context) { + spin_unlock(&iommu->lock); + return -ENOMEM; + } + + if (context_present(context) && !context_copied(iommu, bus, devfn)) { + spin_unlock(&iommu->lock); + return 0; + } + + copied_context_tear_down(iommu, context, bus, devfn); + context_clear_entry(context); + context_set_domain_id(context, FLPT_DEFAULT_DID); + + /* + * In pass through mode, AW must be programmed to indicate the largest + * AGAW value supported by hardware. And ASR is ignored by hardware. + */ + context_set_address_width(context, iommu->msagaw); + context_set_translation_type(context, CONTEXT_TT_PASS_THROUGH); + context_set_fault_enable(context); + context_set_present(context); + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(context, sizeof(*context)); + context_present_cache_flush(iommu, FLPT_DEFAULT_DID, bus, devfn); + spin_unlock(&iommu->lock); + + return 0; +} + +static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void *data) +{ + struct device *dev = data; + + if (dev != &pdev->dev) + return 0; + + return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff); +} + +static int device_setup_pass_through(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + + if (!dev_is_pci(dev)) + return context_setup_pass_through(dev, info->bus, info->devfn); + + return pci_for_each_dma_alias(to_pci_dev(dev), + context_setup_pass_through_cb, dev); +} + +static int identity_domain_attach_dev(struct iommu_domain *domain, struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + int ret; + + device_block_translation(dev); + + if (dev_is_real_dma_subdevice(dev)) + return 0; + + if (sm_supported(iommu)) { + ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); + if (!ret) + iommu_enable_pci_caps(info); + } else { + ret = device_setup_pass_through(dev); + } + + return ret; +} + +static int identity_domain_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + + if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) + return -EOPNOTSUPP; + + return intel_pasid_setup_pass_through(iommu, dev, pasid); +} + +static struct iommu_domain identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &(const struct iommu_domain_ops) { + .attach_dev = identity_domain_attach_dev, + .set_dev_pasid = identity_domain_set_dev_pasid, + }, +}; + const struct iommu_ops intel_iommu_ops = { .blocked_domain = &blocking_domain, .release_domain = &blocking_domain, + .identity_domain = &identity_domain, .capable = intel_iommu_capable, .hw_info = intel_iommu_hw_info, .domain_alloc = intel_iommu_domain_alloc, diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 0e3a9b38bef2..ef12e95e400a 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -311,7 +311,7 @@ void intel_drain_pasid_prq(struct device *dev, u32 pasid) domain = info->domain; pdev = to_pci_dev(dev); sid = PCI_DEVID(info->bus, info->devfn); - did = domain_id_iommu(domain, iommu); + did = domain ? domain_id_iommu(domain, iommu) : FLPT_DEFAULT_DID; qdep = pci_ats_queue_depth(pdev); /* -- Gitee From bb12167c76bac9babb1896da2fd96b3b6a62190b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 10:27:17 +0800 Subject: [PATCH 57/99] iommu/vt-d: Cleanup si_domain ANBZ: #13617 commit 50a7e2c6c3b6ea2439aa2e2e392c0ca2ef567fcf upstream. The static identity domain has been introduced, rendering the si_domain obsolete. Remove si_domain and cleanup the code accordingly. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240809055431.36513-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/intel/iommu.c | 91 ++++++++----------------------------- 1 file changed, 19 insertions(+), 72 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 835e04c20919..b97eacca25c9 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -167,8 +167,6 @@ static void device_rbtree_remove(struct device_domain_info *info) spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); } -static struct dmar_domain *si_domain; - struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -286,11 +284,6 @@ static int __init intel_iommu_setup(char *str) } __setup("intel_iommu=", intel_iommu_setup); -static int domain_type_is_si(struct dmar_domain *domain) -{ - return domain->domain.type == IOMMU_DOMAIN_IDENTITY; -} - static int domain_pfn_supported(struct dmar_domain *domain, unsigned long pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; @@ -1676,9 +1669,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, struct context_entry *context; int agaw, ret; - if (domain_type_is_si(domain)) - translation = CONTEXT_TT_PASS_THROUGH; - pr_debug("Set context mapping for %02x:%02x.%d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); @@ -1697,34 +1687,24 @@ static int domain_context_mapping_one(struct dmar_domain *domain, context_set_domain_id(context, did); - if (translation != CONTEXT_TT_PASS_THROUGH) { - /* - * Skip top levels of page tables for iommu which has - * less agaw than default. Unnecessary for PT mode. - */ - for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { - ret = -ENOMEM; - pgd = phys_to_virt(dma_pte_addr(pgd)); - if (!dma_pte_present(pgd)) - goto out_unlock; - } - - if (info && info->ats_supported) - translation = CONTEXT_TT_DEV_IOTLB; - else - translation = CONTEXT_TT_MULTI_LEVEL; - - context_set_address_root(context, virt_to_phys(pgd)); - context_set_address_width(context, agaw); - } else { - /* - * In pass through mode, AW must be programmed to - * indicate the largest AGAW value supported by - * hardware. And ASR is ignored by hardware. - */ - context_set_address_width(context, iommu->msagaw); + /* + * Skip top levels of page tables for iommu which has + * less agaw than default. Unnecessary for PT mode. + */ + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { + ret = -ENOMEM; + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) + goto out_unlock; } + if (info && info->ats_supported) + translation = CONTEXT_TT_DEV_IOTLB; + else + translation = CONTEXT_TT_MULTI_LEVEL; + + context_set_address_root(context, virt_to_phys(pgd)); + context_set_address_width(context, agaw); context_set_translation_type(context, translation); context_set_fault_enable(context); context_set_present(context); @@ -1991,23 +1971,6 @@ static bool dev_is_real_dma_subdevice(struct device *dev) pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev); } -static int md_domain_init(struct dmar_domain *domain, int guest_width); - -static int __init si_domain_init(void) -{ - si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY); - if (!si_domain) - return -EFAULT; - - if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { - domain_exit(si_domain); - si_domain = NULL; - return -EFAULT; - } - - return 0; -} - static int dmar_domain_attach_device(struct dmar_domain *domain, struct device *dev) { @@ -2030,8 +1993,6 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (!sm_supported(iommu)) ret = domain_context_mapping(domain, dev); - else if (domain_type_is_si(domain)) - ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); else if (domain->use_first_level) ret = domain_setup_first_level(iommu, domain, dev, IOMMU_NO_PASID); else @@ -2040,8 +2001,7 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (ret) goto out_block_translation; - if (sm_supported(info->iommu) || !domain_type_is_si(info->domain)) - iommu_enable_pci_caps(info); + iommu_enable_pci_caps(info); ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); if (ret) @@ -2400,10 +2360,6 @@ static int __init init_dmars(void) check_tylersburg_isoch(); - ret = si_domain_init(); - if (ret) - goto free_iommu; - /* * for each drhd * enable fault log @@ -2449,10 +2405,6 @@ static int __init init_dmars(void) disable_dmar_iommu(iommu); free_dmar_iommu(iommu); } - if (si_domain) { - domain_exit(si_domain); - si_domain = NULL; - } return ret; } @@ -3647,8 +3599,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) domain->geometry.force_aperture = true; return domain; - case IOMMU_DOMAIN_IDENTITY: - return &si_domain->domain; default: return NULL; } @@ -3715,8 +3665,7 @@ static void intel_iommu_domain_free(struct iommu_domain *domain) WARN_ON(dmar_domain->nested_parent && !list_empty(&dmar_domain->s1_domains)); - if (domain != &si_domain->domain) - domain_exit(dmar_domain); + domain_exit(dmar_domain); } int prepare_domain_attach_device(struct iommu_domain *domain, @@ -4442,9 +4391,7 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, if (ret) goto out_detach_iommu; - if (domain_type_is_si(dmar_domain)) - ret = intel_pasid_setup_pass_through(iommu, dev, pasid); - else if (dmar_domain->use_first_level) + if (dmar_domain->use_first_level) ret = domain_setup_first_level(iommu, dmar_domain, dev, pasid); else -- Gitee From 0554fcf6c5c7a851f364a031a2c36d1491954cb0 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 10:27:19 +0800 Subject: [PATCH 58/99] iommu/vt-d: Move PCI PASID enablement to probe path ANBZ: #13617 commit ccb02b27bb50c0f5a8f6fd745aecf4ac4beda73f upstream. Currently, PCI PASID is enabled alongside PCI ATS when an iommu domain is attached to the device and disabled when the device transitions to block translation mode. This approach is inappropriate as PCI PASID is a device feature independent of the type of the attached domain. Enable PCI PASID during the IOMMU device probe and disables it during the release path. Suggested-by: Yi Liu Signed-off-by: Lu Baolu Reviewed-by: Yi Liu Tested-by: Yi Liu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240819051805.116936-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/intel/iommu.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b97eacca25c9..acf59b7aeb9c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1293,15 +1293,6 @@ static void iommu_enable_pci_caps(struct device_domain_info *info) return; pdev = to_pci_dev(info->dev); - - /* The PCIe spec, in its wisdom, declares that the behaviour of - the device if you enable PASID support after ATS support is - undefined. So always enable PASID support on devices which - have it, even if we can't yet know if we're ever going to - use it. */ - if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1)) - info->pasid_enabled = 1; - if (info->ats_supported && pci_ats_page_aligned(pdev) && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) info->ats_enabled = 1; @@ -1320,11 +1311,6 @@ static void iommu_disable_pci_caps(struct device_domain_info *info) pci_disable_ats(pdev); info->ats_enabled = 0; } - - if (info->pasid_enabled) { - pci_disable_pasid(pdev); - info->pasid_enabled = 0; - } } static void intel_flush_iotlb_all(struct iommu_domain *domain) @@ -4016,6 +4002,16 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) intel_iommu_debugfs_create_dev(info); + /* + * The PCIe spec, in its wisdom, declares that the behaviour of the + * device is undefined if you enable PASID support after ATS support. + * So always enable PASID support on devices which have it, even if + * we can't yet know if we're ever going to use it. + */ + if (info->pasid_supported && + !pci_enable_pasid(pdev, info->pasid_supported & ~1)) + info->pasid_enabled = 1; + return &iommu->iommu; free_table: intel_pasid_free_table(dev); @@ -4032,6 +4028,11 @@ static void intel_iommu_release_device(struct device *dev) struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; + if (info->pasid_enabled) { + pci_disable_pasid(to_pci_dev(dev)); + info->pasid_enabled = 0; + } + mutex_lock(&iommu->iopf_lock); if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev))) device_rbtree_remove(info); -- Gitee From 4b17cfde8784f7acdb7c9aa8ea18fa531af81329 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Mon, 2 Sep 2024 10:27:21 +0800 Subject: [PATCH 59/99] iommu/vt-d: Factor out invalidation descriptor composition ANBZ: #13617 commit f701c9f36bcb7940f9c53413b508de8c9cb0321c upstream. Separate the logic for constructing IOTLB and device TLB invalidation descriptors from the qi_flush interfaces. New helpers, qi_desc(), are introduced to encapsulate this common functionality. Moving descriptor composition code to new helpers enables its reuse in the upcoming qi_batch interfaces. No functional changes are intended. Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20240815065221.50328-2-tina.zhang@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/intel/dmar.c | 93 ++---------------------------- drivers/iommu/intel/iommu.h | 109 ++++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 87 deletions(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 36c98d87677b..26554a3a2fa1 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1583,24 +1583,9 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type) { - u8 dw = 0, dr = 0; - struct qi_desc desc; - int ih = 0; - - if (cap_write_drain(iommu->cap)) - dw = 1; - - if (cap_read_drain(iommu->cap)) - dr = 1; - - desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw) - | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE; - desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) - | QI_IOTLB_AM(size_order); - desc.qw2 = 0; - desc.qw3 = 0; + qi_desc_iotlb(iommu, did, addr, size_order, type, &desc); qi_submit_sync(iommu, &desc, 1, 0); } @@ -1618,20 +1603,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, if (!(iommu->gcmd & DMA_GCMD_TE)) return; - if (mask) { - addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; - desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; - } else - desc.qw1 = QI_DEV_IOTLB_ADDR(addr); - - if (qdep >= QI_DEV_IOTLB_MAX_INVS) - qdep = 0; - - desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | - QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); - desc.qw2 = 0; - desc.qw3 = 0; - + qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &desc); qi_submit_sync(iommu, &desc, 1, 0); } @@ -1651,28 +1623,7 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, return; } - if (npages == -1) { - desc.qw0 = QI_EIOTLB_PASID(pasid) | - QI_EIOTLB_DID(did) | - QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | - QI_EIOTLB_TYPE; - desc.qw1 = 0; - } else { - int mask = ilog2(__roundup_pow_of_two(npages)); - unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask)); - - if (WARN_ON_ONCE(!IS_ALIGNED(addr, align))) - addr = ALIGN_DOWN(addr, align); - - desc.qw0 = QI_EIOTLB_PASID(pasid) | - QI_EIOTLB_DID(did) | - QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | - QI_EIOTLB_TYPE; - desc.qw1 = QI_EIOTLB_ADDR(addr) | - QI_EIOTLB_IH(ih) | - QI_EIOTLB_AM(mask); - } - + qi_desc_piotlb(did, pasid, addr, npages, ih, &desc); qi_submit_sync(iommu, &desc, 1, 0); } @@ -1680,7 +1631,6 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, u32 pasid, u16 qdep, u64 addr, unsigned int size_order) { - unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1); struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0}; /* @@ -1692,40 +1642,9 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, if (!(iommu->gcmd & DMA_GCMD_TE)) return; - desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) | - QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE | - QI_DEV_IOTLB_PFSID(pfsid); - - /* - * If S bit is 0, we only flush a single page. If S bit is set, - * The least significant zero bit indicates the invalidation address - * range. VT-d spec 6.5.2.6. - * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB. - * size order = 0 is PAGE_SIZE 4KB - * Max Invs Pending (MIP) is set to 0 for now until we have DIT in - * ECAP. - */ - if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order)) - pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n", - addr, size_order); - - /* Take page address */ - desc.qw1 = QI_DEV_EIOTLB_ADDR(addr); - - if (size_order) { - /* - * Existing 0s in address below size_order may be the least - * significant bit, we must set them to 1s to avoid having - * smaller size than desired. - */ - desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1, - VTD_PAGE_SHIFT); - /* Clear size_order bit to indicate size */ - desc.qw1 &= ~mask; - /* Set the S bit to indicate flushing more than 1 page */ - desc.qw1 |= QI_DEV_EIOTLB_SIZE; - } - + qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, + qdep, addr, size_order, + &desc); qi_submit_sync(iommu, &desc, 1, 0); } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 65e3cef88010..8a48d8c170df 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1066,6 +1066,115 @@ static inline unsigned long nrpages_to_size(unsigned long npages) return npages << VTD_PAGE_SHIFT; } +static inline void qi_desc_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type, + struct qi_desc *desc) +{ + u8 dw = 0, dr = 0; + int ih = 0; + + if (cap_write_drain(iommu->cap)) + dw = 1; + + if (cap_read_drain(iommu->cap)) + dr = 1; + + desc->qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw) + | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE; + desc->qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) + | QI_IOTLB_AM(size_order); + desc->qw2 = 0; + desc->qw3 = 0; +} + +static inline void qi_desc_dev_iotlb(u16 sid, u16 pfsid, u16 qdep, u64 addr, + unsigned int mask, struct qi_desc *desc) +{ + if (mask) { + addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; + desc->qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; + } else { + desc->qw1 = QI_DEV_IOTLB_ADDR(addr); + } + + if (qdep >= QI_DEV_IOTLB_MAX_INVS) + qdep = 0; + + desc->qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | + QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); + desc->qw2 = 0; + desc->qw3 = 0; +} + +static inline void qi_desc_piotlb(u16 did, u32 pasid, u64 addr, + unsigned long npages, bool ih, + struct qi_desc *desc) +{ + if (npages == -1) { + desc->qw0 = QI_EIOTLB_PASID(pasid) | + QI_EIOTLB_DID(did) | + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | + QI_EIOTLB_TYPE; + desc->qw1 = 0; + } else { + int mask = ilog2(__roundup_pow_of_two(npages)); + unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask)); + + if (WARN_ON_ONCE(!IS_ALIGNED(addr, align))) + addr = ALIGN_DOWN(addr, align); + + desc->qw0 = QI_EIOTLB_PASID(pasid) | + QI_EIOTLB_DID(did) | + QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | + QI_EIOTLB_TYPE; + desc->qw1 = QI_EIOTLB_ADDR(addr) | + QI_EIOTLB_IH(ih) | + QI_EIOTLB_AM(mask); + } +} + +static inline void qi_desc_dev_iotlb_pasid(u16 sid, u16 pfsid, u32 pasid, + u16 qdep, u64 addr, + unsigned int size_order, + struct qi_desc *desc) +{ + unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1); + + desc->qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) | + QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE | + QI_DEV_IOTLB_PFSID(pfsid); + + /* + * If S bit is 0, we only flush a single page. If S bit is set, + * The least significant zero bit indicates the invalidation address + * range. VT-d spec 6.5.2.6. + * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB. + * size order = 0 is PAGE_SIZE 4KB + * Max Invs Pending (MIP) is set to 0 for now until we have DIT in + * ECAP. + */ + if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order)) + pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n", + addr, size_order); + + /* Take page address */ + desc->qw1 = QI_DEV_EIOTLB_ADDR(addr); + + if (size_order) { + /* + * Existing 0s in address below size_order may be the least + * significant bit, we must set them to 1s to avoid having + * smaller size than desired. + */ + desc->qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1, + VTD_PAGE_SHIFT); + /* Clear size_order bit to indicate size */ + desc->qw1 &= ~mask; + /* Set the S bit to indicate flushing more than 1 page */ + desc->qw1 |= QI_DEV_EIOTLB_SIZE; + } +} + /* Convert value to context PASID directory size field coding. */ #define context_pdts(pds) (((pds) & 0x7) << 9) -- Gitee From d6423019714a24227f04833905974c9218ee9ef8 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Mon, 2 Sep 2024 10:27:22 +0800 Subject: [PATCH 60/99] iommu/vt-d: Refactor IOTLB and Dev-IOTLB flush for batching ANBZ: #13617 commit 3297d047cd7f502ea7bd949fe070bf01c02aec3e upstream. Extracts IOTLB and Dev-IOTLB invalidation logic from cache tag flush interfaces into dedicated helper functions. It prepares the codebase for upcoming changes to support batched cache invalidations. To enable direct use of qi_flush helpers in the new functions, iommu->flush.flush_iotlb and quirk_extra_dev_tlb_flush() are opened up. No functional changes are intended. Co-developed-by: Lu Baolu Signed-off-by: Lu Baolu Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20240815065221.50328-3-tina.zhang@intel.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/intel/cache.c | 142 ++++++++++++++++++++---------------- drivers/iommu/intel/iommu.c | 5 +- drivers/iommu/intel/iommu.h | 3 + 3 files changed, 83 insertions(+), 67 deletions(-) diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index 44e92638c0cd..08f7ce2c16c3 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -255,6 +255,78 @@ static unsigned long calculate_psi_aligned_address(unsigned long start, return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask); } +static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag, + unsigned long addr, unsigned long pages, + unsigned long mask, int ih) +{ + struct intel_iommu *iommu = tag->iommu; + u64 type = DMA_TLB_PSI_FLUSH; + + if (domain->use_first_level) { + qi_flush_piotlb(iommu, tag->domain_id, tag->pasid, addr, pages, ih); + return; + } + + /* + * Fallback to domain selective flush if no PSI support or the size + * is too big. + */ + if (!cap_pgsel_inv(iommu->cap) || + mask > cap_max_amask_val(iommu->cap) || pages == -1) { + addr = 0; + mask = 0; + ih = 0; + type = DMA_TLB_DSI_FLUSH; + } + + if (ecap_qis(iommu->ecap)) + qi_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type); + else + __iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type); +} + +static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_tag *tag, + unsigned long addr, unsigned long mask) +{ + struct intel_iommu *iommu = tag->iommu; + struct device_domain_info *info; + u16 sid; + + info = dev_iommu_priv_get(tag->dev); + sid = PCI_DEVID(info->bus, info->devfn); + + if (tag->pasid == IOMMU_NO_PASID) { + qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, + addr, mask); + if (info->dtlb_extra_inval) + qi_flush_dev_iotlb(iommu, sid, info->pfsid, + info->ats_qdep, addr, mask); + return; + } + + qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, tag->pasid, + info->ats_qdep, addr, mask); + if (info->dtlb_extra_inval) + qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, tag->pasid, + info->ats_qdep, addr, mask); +} + +static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag) +{ + struct intel_iommu *iommu = tag->iommu; + struct device_domain_info *info; + u16 sid; + + info = dev_iommu_priv_get(tag->dev); + sid = PCI_DEVID(info->bus, info->devfn); + + qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, + MAX_AGAW_PFN_WIDTH); + if (info->dtlb_extra_inval) + qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, + MAX_AGAW_PFN_WIDTH); +} + /* * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive) * when the memory mappings in the target domain have been modified. @@ -270,30 +342,10 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, spin_lock_irqsave(&domain->cache_lock, flags); list_for_each_entry(tag, &domain->cache_tags, node) { - struct intel_iommu *iommu = tag->iommu; - struct device_domain_info *info; - u16 sid; - switch (tag->type) { case CACHE_TAG_IOTLB: case CACHE_TAG_NESTING_IOTLB: - if (domain->use_first_level) { - qi_flush_piotlb(iommu, tag->domain_id, - tag->pasid, addr, pages, ih); - } else { - /* - * Fallback to domain selective flush if no - * PSI support or the size is too big. - */ - if (!cap_pgsel_inv(iommu->cap) || - mask > cap_max_amask_val(iommu->cap)) - iommu->flush.flush_iotlb(iommu, tag->domain_id, - 0, 0, DMA_TLB_DSI_FLUSH); - else - iommu->flush.flush_iotlb(iommu, tag->domain_id, - addr | ih, mask, - DMA_TLB_PSI_FLUSH); - } + cache_tag_flush_iotlb(domain, tag, addr, pages, mask, ih); break; case CACHE_TAG_NESTING_DEVTLB: /* @@ -307,18 +359,7 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, mask = MAX_AGAW_PFN_WIDTH; fallthrough; case CACHE_TAG_DEVTLB: - info = dev_iommu_priv_get(tag->dev); - sid = PCI_DEVID(info->bus, info->devfn); - - if (tag->pasid == IOMMU_NO_PASID) - qi_flush_dev_iotlb(iommu, sid, info->pfsid, - info->ats_qdep, addr, mask); - else - qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, - tag->pasid, info->ats_qdep, - addr, mask); - - quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid, info->ats_qdep); + cache_tag_flush_devtlb_psi(domain, tag, addr, mask); break; } @@ -338,29 +379,14 @@ void cache_tag_flush_all(struct dmar_domain *domain) spin_lock_irqsave(&domain->cache_lock, flags); list_for_each_entry(tag, &domain->cache_tags, node) { - struct intel_iommu *iommu = tag->iommu; - struct device_domain_info *info; - u16 sid; - switch (tag->type) { case CACHE_TAG_IOTLB: case CACHE_TAG_NESTING_IOTLB: - if (domain->use_first_level) - qi_flush_piotlb(iommu, tag->domain_id, - tag->pasid, 0, -1, 0); - else - iommu->flush.flush_iotlb(iommu, tag->domain_id, - 0, 0, DMA_TLB_DSI_FLUSH); + cache_tag_flush_iotlb(domain, tag, 0, -1, 0, 0); break; case CACHE_TAG_DEVTLB: case CACHE_TAG_NESTING_DEVTLB: - info = dev_iommu_priv_get(tag->dev); - sid = PCI_DEVID(info->bus, info->devfn); - - qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, - 0, MAX_AGAW_PFN_WIDTH); - quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH, - IOMMU_NO_PASID, info->ats_qdep); + cache_tag_flush_devtlb_all(domain, tag); break; } @@ -399,20 +425,8 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, } if (tag->type == CACHE_TAG_IOTLB || - tag->type == CACHE_TAG_NESTING_IOTLB) { - /* - * Fallback to domain selective flush if no - * PSI support or the size is too big. - */ - if (!cap_pgsel_inv(iommu->cap) || - mask > cap_max_amask_val(iommu->cap)) - iommu->flush.flush_iotlb(iommu, tag->domain_id, - 0, 0, DMA_TLB_DSI_FLUSH); - else - iommu->flush.flush_iotlb(iommu, tag->domain_id, - addr, mask, - DMA_TLB_PSI_FLUSH); - } + tag->type == CACHE_TAG_NESTING_IOTLB) + cache_tag_flush_iotlb(domain, tag, addr, pages, mask, 0); trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask); } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index acf59b7aeb9c..a28e4b411fa0 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1196,9 +1196,8 @@ static void __iommu_flush_context(struct intel_iommu *iommu, raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } -/* return value determine if we need a write buffer flush */ -static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, - u64 addr, unsigned int size_order, u64 type) +void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type) { int tlb_offset = ecap_iotlb_offset(iommu->ecap); u64 val = 0, val_iva = 0; diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 8a48d8c170df..bf9dbef731d2 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1206,6 +1206,9 @@ void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu, int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, unsigned int count, unsigned long options); + +void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type); /* * Options used in qi_submit_sync: * QI_OPT_WAIT_DRAIN - Wait for PRQ drain completion, spec 6.5.2.8. -- Gitee From 1ccb576117580b688f44e3d91b70eb0376c0a72e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 10:27:23 +0800 Subject: [PATCH 61/99] iommu/vt-d: Add qi_batch for dmar_domain ANBZ: #13617 commit 777cdd853434849cc98ef94787538b1eb9f492d9 upstream. Introduces a qi_batch structure to hold batched cache invalidation descriptors on a per-dmar_domain basis. A fixed-size descriptor array is used for simplicity. The qi_batch is allocated when the first cache tag is added to the domain and freed during iommu_free_domain(). Signed-off-by: Lu Baolu Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20240815065221.50328-4-tina.zhang@intel.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/intel/cache.c | 7 +++++++ drivers/iommu/intel/iommu.c | 1 + drivers/iommu/intel/iommu.h | 14 ++++++++++++++ drivers/iommu/intel/nested.c | 1 + drivers/iommu/intel/svm.c | 5 ++++- 5 files changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index 08f7ce2c16c3..2e997d782beb 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -190,6 +190,13 @@ int cache_tag_assign_domain(struct dmar_domain *domain, u16 did = domain_get_id_for_dev(domain, dev); int ret; + /* domain->qi_bach will be freed in iommu_free_domain() path. */ + if (!domain->qi_batch) { + domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_KERNEL); + if (!domain->qi_batch) + return -ENOMEM; + } + ret = __cache_tag_assign_domain(domain, did, dev, pasid); if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED) return ret; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index a28e4b411fa0..9c07050e8e26 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1584,6 +1584,7 @@ static void domain_exit(struct dmar_domain *domain) if (WARN_ON(!list_empty(&domain->devices))) return; + kfree(domain->qi_batch); kfree(domain); } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index bf9dbef731d2..fe9cbfbe38f7 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -584,6 +584,19 @@ struct iommu_domain_info { * to VT-d spec, section 9.3 */ }; +/* + * We start simply by using a fixed size for the batched descriptors. This + * size is currently sufficient for our needs. Future improvements could + * involve dynamically allocating the batch buffer based on actual demand, + * allowing us to adjust the batch size for optimal performance in different + * scenarios. + */ +#define QI_MAX_BATCHED_DESC_COUNT 16 +struct qi_batch { + struct qi_desc descs[QI_MAX_BATCHED_DESC_COUNT]; + unsigned int index; +}; + struct dmar_domain { int nid; /* node id */ struct xarray iommu_array; /* Attached IOMMU array */ @@ -608,6 +621,7 @@ struct dmar_domain { spinlock_t cache_lock; /* Protect the cache tag list */ struct list_head cache_tags; /* Cache tag list */ + struct qi_batch *qi_batch; /* Batched QI descriptors */ int iommu_superpage;/* Level of superpages supported: 0 == 4KiB (no superpages), 1 == 2MiB, diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index 36a91b1b52be..433c58944401 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -83,6 +83,7 @@ static void intel_nested_domain_free(struct iommu_domain *domain) spin_lock(&s2_domain->s1_lock); list_del(&dmar_domain->s2_link); spin_unlock(&s2_domain->s1_lock); + kfree(dmar_domain->qi_batch); kfree(dmar_domain); } diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index ef12e95e400a..078d1e32a24e 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -184,7 +184,10 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) static void intel_mm_free_notifier(struct mmu_notifier *mn) { - kfree(container_of(mn, struct dmar_domain, notifier)); + struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier); + + kfree(domain->qi_batch); + kfree(domain); } static const struct mmu_notifier_ops intel_mmuops = { -- Gitee From 12ff48c1464dff0da6c9835e00f2a5ed704eedfa Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Mon, 2 Sep 2024 10:27:24 +0800 Subject: [PATCH 62/99] iommu/vt-d: Introduce batched cache invalidation ANBZ: #13617 commit 705c1cdf1e73c4c727bbfc8775434e6dd36e8baf upstream. Converts IOTLB and Dev-IOTLB invalidation to a batched model. Cache tag invalidation requests for a domain are now accumulated in a qi_batch structure before being flushed in bulk. It replaces the previous per- request qi_flush approach with a more efficient batching mechanism. Co-developed-by: Lu Baolu Signed-off-by: Lu Baolu Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20240815065221.50328-5-tina.zhang@intel.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/intel/cache.c | 122 +++++++++++++++++++++++++++++++----- 1 file changed, 107 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index 2e997d782beb..e5b89f728ad3 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -262,6 +262,79 @@ static unsigned long calculate_psi_aligned_address(unsigned long start, return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask); } +static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch) +{ + if (!iommu || !batch->index) + return; + + qi_submit_sync(iommu, batch->descs, batch->index, 0); + + /* Reset the index value and clean the whole batch buffer. */ + memset(batch, 0, sizeof(*batch)); +} + +static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch) +{ + if (++batch->index == QI_MAX_BATCHED_DESC_COUNT) + qi_batch_flush_descs(iommu, batch); +} + +static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type, + struct qi_batch *batch) +{ + qi_desc_iotlb(iommu, did, addr, size_order, type, &batch->descs[batch->index]); + qi_batch_increment_index(iommu, batch); +} + +static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, + u16 qdep, u64 addr, unsigned int mask, + struct qi_batch *batch) +{ + /* + * According to VT-d spec, software is recommended to not submit any Device-TLB + * invalidation requests while address remapping hardware is disabled. + */ + if (!(iommu->gcmd & DMA_GCMD_TE)) + return; + + qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &batch->descs[batch->index]); + qi_batch_increment_index(iommu, batch); +} + +static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, + u64 addr, unsigned long npages, bool ih, + struct qi_batch *batch) +{ + /* + * npages == -1 means a PASID-selective invalidation, otherwise, + * a positive value for Page-selective-within-PASID invalidation. + * 0 is not a valid input. + */ + if (!npages) + return; + + qi_desc_piotlb(did, pasid, addr, npages, ih, &batch->descs[batch->index]); + qi_batch_increment_index(iommu, batch); +} + +static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, + u32 pasid, u16 qdep, u64 addr, + unsigned int size_order, struct qi_batch *batch) +{ + /* + * According to VT-d spec, software is recommended to not submit any + * Device-TLB invalidation requests while address remapping hardware + * is disabled. + */ + if (!(iommu->gcmd & DMA_GCMD_TE)) + return; + + qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order, + &batch->descs[batch->index]); + qi_batch_increment_index(iommu, batch); +} + static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag, unsigned long addr, unsigned long pages, unsigned long mask, int ih) @@ -270,7 +343,8 @@ static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag * u64 type = DMA_TLB_PSI_FLUSH; if (domain->use_first_level) { - qi_flush_piotlb(iommu, tag->domain_id, tag->pasid, addr, pages, ih); + qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr, + pages, ih, domain->qi_batch); return; } @@ -287,7 +361,8 @@ static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag * } if (ecap_qis(iommu->ecap)) - qi_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type); + qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type, + domain->qi_batch); else __iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type); } @@ -303,19 +378,20 @@ static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_ sid = PCI_DEVID(info->bus, info->devfn); if (tag->pasid == IOMMU_NO_PASID) { - qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, - addr, mask); + qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, + addr, mask, domain->qi_batch); if (info->dtlb_extra_inval) - qi_flush_dev_iotlb(iommu, sid, info->pfsid, - info->ats_qdep, addr, mask); + qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, + addr, mask, domain->qi_batch); return; } - qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, tag->pasid, - info->ats_qdep, addr, mask); + qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid, + info->ats_qdep, addr, mask, domain->qi_batch); if (info->dtlb_extra_inval) - qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, tag->pasid, - info->ats_qdep, addr, mask); + qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid, + info->ats_qdep, addr, mask, + domain->qi_batch); } static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag) @@ -327,11 +403,11 @@ static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_ info = dev_iommu_priv_get(tag->dev); sid = PCI_DEVID(info->bus, info->devfn); - qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, - MAX_AGAW_PFN_WIDTH); + qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, + MAX_AGAW_PFN_WIDTH, domain->qi_batch); if (info->dtlb_extra_inval) - qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, - MAX_AGAW_PFN_WIDTH); + qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, + MAX_AGAW_PFN_WIDTH, domain->qi_batch); } /* @@ -341,6 +417,7 @@ static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, unsigned long end, int ih) { + struct intel_iommu *iommu = NULL; unsigned long pages, mask, addr; struct cache_tag *tag; unsigned long flags; @@ -349,6 +426,10 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, spin_lock_irqsave(&domain->cache_lock, flags); list_for_each_entry(tag, &domain->cache_tags, node) { + if (iommu && iommu != tag->iommu) + qi_batch_flush_descs(iommu, domain->qi_batch); + iommu = tag->iommu; + switch (tag->type) { case CACHE_TAG_IOTLB: case CACHE_TAG_NESTING_IOTLB: @@ -372,6 +453,7 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, trace_cache_tag_flush_range(tag, start, end, addr, pages, mask); } + qi_batch_flush_descs(iommu, domain->qi_batch); spin_unlock_irqrestore(&domain->cache_lock, flags); } @@ -381,11 +463,16 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, */ void cache_tag_flush_all(struct dmar_domain *domain) { + struct intel_iommu *iommu = NULL; struct cache_tag *tag; unsigned long flags; spin_lock_irqsave(&domain->cache_lock, flags); list_for_each_entry(tag, &domain->cache_tags, node) { + if (iommu && iommu != tag->iommu) + qi_batch_flush_descs(iommu, domain->qi_batch); + iommu = tag->iommu; + switch (tag->type) { case CACHE_TAG_IOTLB: case CACHE_TAG_NESTING_IOTLB: @@ -399,6 +486,7 @@ void cache_tag_flush_all(struct dmar_domain *domain) trace_cache_tag_flush_all(tag); } + qi_batch_flush_descs(iommu, domain->qi_batch); spin_unlock_irqrestore(&domain->cache_lock, flags); } @@ -416,6 +504,7 @@ void cache_tag_flush_all(struct dmar_domain *domain) void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, unsigned long end) { + struct intel_iommu *iommu = NULL; unsigned long pages, mask, addr; struct cache_tag *tag; unsigned long flags; @@ -424,7 +513,9 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, spin_lock_irqsave(&domain->cache_lock, flags); list_for_each_entry(tag, &domain->cache_tags, node) { - struct intel_iommu *iommu = tag->iommu; + if (iommu && iommu != tag->iommu) + qi_batch_flush_descs(iommu, domain->qi_batch); + iommu = tag->iommu; if (!cap_caching_mode(iommu->cap) || domain->use_first_level) { iommu_flush_write_buffer(iommu); @@ -437,5 +528,6 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask); } + qi_batch_flush_descs(iommu, domain->qi_batch); spin_unlock_irqrestore(&domain->cache_lock, flags); } -- Gitee From cd321fd767e583376593dec2289a3d0effdd7295 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 10:27:20 +0800 Subject: [PATCH 63/99] iommu/vt-d: Unconditionally flush device TLB for pasid table updates ANBZ: #13617 commit 1f5e307ca16c0c19186cbd56ac460a687e6daba0 upstream. The caching mode of an IOMMU is irrelevant to the behavior of the device TLB. Previously, commit <304b3bde24b5> ("iommu/vt-d: Remove caching mode check before device TLB flush") removed this redundant check in the domain unmap path. Checking the caching mode before flushing the device TLB after a pasid table entry is updated is unnecessary and can lead to inconsistent behavior. Extends this consistency by removing the caching mode check in the pasid table update path. Suggested-by: Yi Liu Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20240820030208.20020-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/intel/pasid.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index b51fc268dc84..2e5fa0a23299 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -264,9 +264,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, else iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); - /* Device IOTLB doesn't need to be flushed in caching mode. */ - if (!cap_caching_mode(iommu->cap)) - devtlb_invalidation_with_pasid(iommu, dev, pasid); + devtlb_invalidation_with_pasid(iommu, dev, pasid); } /* @@ -493,9 +491,7 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); - /* Device IOTLB doesn't need to be flushed in caching mode. */ - if (!cap_caching_mode(iommu->cap)) - devtlb_invalidation_with_pasid(iommu, dev, pasid); + devtlb_invalidation_with_pasid(iommu, dev, pasid); return 0; } @@ -572,9 +568,7 @@ void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu, pasid_cache_invalidation_with_pasid(iommu, did, pasid); qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); - /* Device IOTLB doesn't need to be flushed in caching mode. */ - if (!cap_caching_mode(iommu->cap)) - devtlb_invalidation_with_pasid(iommu, dev, pasid); + devtlb_invalidation_with_pasid(iommu, dev, pasid); } /** -- Gitee From 0b629aabea68b533e8e44e53dae76d592dbd8174 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 31 Jul 2024 13:12:44 -0600 Subject: [PATCH 64/99] iommu: Use of_property_present() ANBZ: #13617 commit 04f4f33c941c221645d2a58b46f4d698b0f5aa39 upstream. Use of_property_present() to test for property presence rather than of_(find|get)_property(). This is part of a larger effort to remove callers of of_find_property() and similar functions. of_find_property() leaks the DT struct property and data pointers which is a problem for dynamically allocated nodes which may be freed. Signed-off-by: Rob Herring (Arm) Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20240731191312.1710417-6-robh@kernel.org Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/fsl_pamu_domain.c | 4 +--- drivers/iommu/of_iommu.c | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index e9d2bff4659b..30be786bff11 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -416,14 +416,12 @@ static struct iommu_group *fsl_pamu_device_group(struct device *dev) static struct iommu_device *fsl_pamu_probe_device(struct device *dev) { - int len; - /* * uboot must fill the fsl,liodn for platform devices to be supported by * the iommu. */ if (!dev_is_pci(dev) && - !of_get_property(dev->of_node, "fsl,liodn", &len)) + !of_property_present(dev->of_node, "fsl,liodn")) return ERR_PTR(-ENODEV); return &pamu_iommu; diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 78d61da75257..e7a6a1611d19 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -214,7 +214,7 @@ void of_iommu_get_resv_regions(struct device *dev, struct list_head *list) * that represent reservations in the IOVA space, which are regions that should * not be mapped. */ - if (of_find_property(it.node, "reg", NULL)) { + if (of_property_present(it.node, "reg")) { err = of_address_to_resource(it.node, 0, &phys); if (err < 0) { dev_err(dev, "failed to parse memory region %pOF: %d\n", -- Gitee From 5a50d4abc6072e7c4e5db26b0c51137d566e09c1 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 28 Aug 2024 11:10:22 +0000 Subject: [PATCH 65/99] iommu/amd: Update event log pointer as soon as processing is complete ANBZ: #13617 commit fdc39b77db95e36f6b4d3c006a2642b9f47510c5 upstream. Update event buffer head pointer once driver completes processing. So that IOMMU can write new log without waiting for driver to complete processing all event logs. Signed-off-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20240828111029.5429-2-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 87c6d253079d..3cdc3fa5b57e 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -825,10 +825,12 @@ static void iommu_poll_events(struct amd_iommu *iommu) while (head != tail) { iommu_print_event(iommu, iommu->evt_buf + head); + + /* Update head pointer of hardware ring-buffer */ head = (head + EVENT_ENTRY_SIZE) % EVT_BUFFER_SIZE; + writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); } - writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); } #ifdef CONFIG_IRQ_REMAP -- Gitee From 378ae195656da6054ca2986aa895442f9bcb714b Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 28 Aug 2024 11:10:23 +0000 Subject: [PATCH 66/99] iommu/amd: Make amd_iommu_is_attach_deferred() static ANBZ: #13617 commit 53f1fb0c46f0e3ebf13b76697b67782f65fc3f69 upstream. amd_iommu_is_attach_deferred() is a callback function called by iommu_ops. Make it as static. No functional changes intended. Signed-off-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20240828111029.5429-3-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu.h | 1 - drivers/iommu/amd/iommu.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 807caef76d79..771e8565ed72 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -180,7 +180,6 @@ static inline struct protection_domain *to_pdomain(struct iommu_domain *dom) } bool translation_pre_enabled(struct amd_iommu *iommu); -bool amd_iommu_is_attach_deferred(struct device *dev); int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line); #ifdef CONFIG_DMI diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 3cdc3fa5b57e..1ac49f3ee17f 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2807,7 +2807,7 @@ static void amd_iommu_get_resv_regions(struct device *dev, list_add_tail(®ion->list, head); } -bool amd_iommu_is_attach_deferred(struct device *dev) +static bool amd_iommu_is_attach_deferred(struct device *dev) { struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); -- Gitee From 1b76b7f4de0ddaea641111a8b0369c8fb4fdfcfc Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 28 Aug 2024 11:10:24 +0000 Subject: [PATCH 67/99] iommu/amd: Remove unused DTE_GCR3_INDEX_* macros ANBZ: #13617 commit 95eb6a05124f9a206f009a21c9a0a9f320859b6c upstream. It was added in commit 52815b75682e ("iommu/amd: Add support for IOMMUv2 domain mode"), but never used it. Hence remove these unused macros. Signed-off-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20240828111029.5429-4-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu_types.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index c9f9a598eb82..c7432296bb90 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -415,10 +415,6 @@ #define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL) #define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0x1fffffULL) -#define DTE_GCR3_INDEX_A 0 -#define DTE_GCR3_INDEX_B 1 -#define DTE_GCR3_INDEX_C 1 - #define DTE_GCR3_SHIFT_A 58 #define DTE_GCR3_SHIFT_B 16 #define DTE_GCR3_SHIFT_C 43 -- Gitee From 1aed21dfb7de2bdd730d937f2f9444c3e9e205e0 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 28 Aug 2024 11:10:25 +0000 Subject: [PATCH 68/99] iommu/amd: Handle error path in amd_iommu_probe_device() ANBZ: #13617 commit 293aa9ec694e633bff83ab93715a2684e15fe214 upstream. Do not try to set max_pasids in error path as dev_data is not allocated. Fixes: a0c47f233e68 ("iommu/amd: Introduce iommu_dev_data.max_pasids") Signed-off-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20240828111029.5429-5-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/iommu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 1ac49f3ee17f..f2eb58ea5ec0 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2196,11 +2196,12 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) dev_err(dev, "Failed to initialize - trying to proceed anyway\n"); iommu_dev = ERR_PTR(ret); iommu_ignore_device(iommu, dev); - } else { - amd_iommu_set_pci_msi_domain(dev, iommu); - iommu_dev = &iommu->iommu; + goto out_err; } + amd_iommu_set_pci_msi_domain(dev, iommu); + iommu_dev = &iommu->iommu; + /* * If IOMMU and device supports PASID then it will contain max * supported PASIDs, else it will be zero. @@ -2212,6 +2213,7 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) pci_max_pasids(to_pci_dev(dev))); } +out_err: iommu_completion_wait(iommu); if (dev_is_pci(dev)) -- Gitee From af0cb68a036a0668113b72e983bcf363645a19ad Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 28 Aug 2024 11:10:26 +0000 Subject: [PATCH 69/99] iommu/amd: Make amd_iommu_dev_flush_pasid_all() static ANBZ: #13617 commit 845bd6ac43422671778120ec082d7ddcab035a1f upstream. As its not used outside iommu.c. Also rename it as dev_flush_pasid_all(). No functional change intended. Signed-off-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20240828111029.5429-6-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu.h | 2 -- drivers/iommu/amd/iommu.c | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 771e8565ed72..c9a3b3ca9d09 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -93,8 +93,6 @@ void amd_iommu_domain_flush_pages(struct protection_domain *domain, u64 address, size_t size); void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, ioasid_t pasid, u64 address, size_t size); -void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data, - ioasid_t pasid); #ifdef CONFIG_IRQ_REMAP int amd_iommu_create_irq_domain(struct amd_iommu *iommu); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index f2eb58ea5ec0..91372f107bea 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1560,8 +1560,8 @@ void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, iommu_completion_wait(iommu); } -void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data, - ioasid_t pasid) +static void dev_flush_pasid_all(struct iommu_dev_data *dev_data, + ioasid_t pasid) { amd_iommu_dev_flush_pasid_pages(dev_data, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, pasid); @@ -1827,7 +1827,7 @@ static int update_gcr3(struct iommu_dev_data *dev_data, else *pte = 0; - amd_iommu_dev_flush_pasid_all(dev_data, pasid); + dev_flush_pasid_all(dev_data, pasid); return 0; } -- Gitee From e735a7016d84e39d9853ff957027464b00827920 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 28 Aug 2024 11:10:27 +0000 Subject: [PATCH 70/99] iommu/amd: Make amd_iommu_domain_flush_complete() static ANBZ: #13617 commit 964877dc26232835d4465d9565399fe8ca4525e8 upstream. AMD driver uses amd_iommu_domain_flush_complete() function to make sure IOMMU processed invalidation commands before proceeding. Ideally this should be called from functions which updates DTE/invalidates caches. There is no need to call this function explicitly. This patches makes below changes : - Rename amd_iommu_domain_flush_complete() -> domain_flush_complete() and make it as static function. - Rearrage domain_flush_complete() to avoid forward declaration. - Update amd_iommu_update_and_flush_device_table() to call domain_flush_complete(). Signed-off-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20240828111029.5429-7-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu.h | 1 - drivers/iommu/amd/io_pgtable.c | 1 - drivers/iommu/amd/iommu.c | 37 +++++++++++++++++----------------- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index c9a3b3ca9d09..062a60faa64e 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -88,7 +88,6 @@ void amd_iommu_flush_all_caches(struct amd_iommu *iommu); void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); void amd_iommu_domain_update(struct protection_domain *domain); void amd_iommu_dev_update_dte(struct iommu_dev_data *dev_data, bool set); -void amd_iommu_domain_flush_complete(struct protection_domain *domain); void amd_iommu_domain_flush_pages(struct protection_domain *domain, u64 address, size_t size); void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 1074ee25064d..bfbcec68efb9 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -175,7 +175,6 @@ static bool increase_address_space(struct protection_domain *domain, domain->iop.root = pte; domain->iop.mode += 1; amd_iommu_update_and_flush_device_table(domain); - amd_iommu_domain_flush_complete(domain); /* * Device Table needs to be updated and flushed before the new root can diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 91372f107bea..e9054e33d512 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1257,6 +1257,22 @@ static int iommu_completion_wait(struct amd_iommu *iommu) return ret; } +static void domain_flush_complete(struct protection_domain *domain) +{ + int i; + + for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { + if (domain && !domain->dev_iommu[i]) + continue; + + /* + * Devices of this domain are behind this IOMMU + * We need to wait for completion of all commands. + */ + iommu_completion_wait(amd_iommus[i]); + } +} + static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) { struct iommu_cmd cmd; @@ -1494,7 +1510,7 @@ void amd_iommu_domain_flush_pages(struct protection_domain *domain, __domain_flush_pages(domain, address, size); /* Wait until IOMMU TLB and all device IOTLB flushes are complete */ - amd_iommu_domain_flush_complete(domain); + domain_flush_complete(domain); return; } @@ -1534,7 +1550,7 @@ void amd_iommu_domain_flush_pages(struct protection_domain *domain, } /* Wait until IOMMU TLB and all device IOTLB flushes are complete */ - amd_iommu_domain_flush_complete(domain); + domain_flush_complete(domain); } /* Flush the whole IO/TLB for a given protection domain - including PDE */ @@ -1567,22 +1583,6 @@ static void dev_flush_pasid_all(struct iommu_dev_data *dev_data, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, pasid); } -void amd_iommu_domain_flush_complete(struct protection_domain *domain) -{ - int i; - - for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { - if (domain && !domain->dev_iommu[i]) - continue; - - /* - * Devices of this domain are behind this IOMMU - * We need to wait for completion of all commands. - */ - iommu_completion_wait(amd_iommus[i]); - } -} - /* Flush the not present cache if it exists */ static void domain_flush_np_cache(struct protection_domain *domain, dma_addr_t iova, size_t size) @@ -1624,6 +1624,7 @@ void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) { update_device_table(domain); domain_flush_devices(domain); + domain_flush_complete(domain); } void amd_iommu_domain_update(struct protection_domain *domain) -- Gitee From 9e5318691ad237ece7481a5d9607799dbc88040d Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 28 Aug 2024 11:10:28 +0000 Subject: [PATCH 71/99] iommu/amd: Rework amd_iommu_update_and_flush_device_table() ANBZ: #13617 commit a3303762eb80fe03479962470a81b9176fc24f8e upstream. Remove separate function to update and flush the device table as only amd_iommu_update_and_flush_device_table() calls these functions. No functional changes intended. Signed-off-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20240828111029.5429-8-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/iommu.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index e9054e33d512..bcb2d8666224 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1600,15 +1600,7 @@ static void domain_flush_np_cache(struct protection_domain *domain, /* * This function flushes the DTEs for all devices in domain */ -static void domain_flush_devices(struct protection_domain *domain) -{ - struct iommu_dev_data *dev_data; - - list_for_each_entry(dev_data, &domain->dev_list, list) - device_flush_dte(dev_data); -} - -static void update_device_table(struct protection_domain *domain) +void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) { struct iommu_dev_data *dev_data; @@ -1618,12 +1610,10 @@ static void update_device_table(struct protection_domain *domain) set_dte_entry(iommu, dev_data); clone_aliases(iommu, dev_data->dev); } -} -void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) -{ - update_device_table(domain); - domain_flush_devices(domain); + list_for_each_entry(dev_data, &domain->dev_list, list) + device_flush_dte(dev_data); + domain_flush_complete(domain); } -- Gitee From 518ec4a76f7cfe230f17ba22207b181d44a05fef Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 28 Aug 2024 11:10:29 +0000 Subject: [PATCH 72/99] iommu/amd: Make amd_iommu_dev_update_dte() static ANBZ: #13617 commit 89ffb2c3c2a1d0bff5515fc53f93de86fb6753c0 upstream. As its used inside iommu.c only. Also rename function to dev_update_dte() as its static function. No functional changes intended. Signed-off-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20240828111029.5429-9-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu.h | 1 - drivers/iommu/amd/iommu.c | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 062a60faa64e..b59505b30b18 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -87,7 +87,6 @@ int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag); void amd_iommu_flush_all_caches(struct amd_iommu *iommu); void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); void amd_iommu_domain_update(struct protection_domain *domain); -void amd_iommu_dev_update_dte(struct iommu_dev_data *dev_data, bool set); void amd_iommu_domain_flush_pages(struct protection_domain *domain, u64 address, size_t size); void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index bcb2d8666224..535e0ab04984 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1964,7 +1964,7 @@ static void clear_dte_entry(struct amd_iommu *iommu, u16 devid) } /* Update and flush DTE for the given device */ -void amd_iommu_dev_update_dte(struct iommu_dev_data *dev_data, bool set) +static void dev_update_dte(struct iommu_dev_data *dev_data, bool set) { struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev); @@ -2064,7 +2064,7 @@ static void do_detach(struct iommu_dev_data *dev_data) struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); /* Clear DTE and flush the entry */ - amd_iommu_dev_update_dte(dev_data, false); + dev_update_dte(dev_data, false); /* Flush IOTLB and wait for the flushes to finish */ amd_iommu_domain_flush_all(domain); @@ -2492,7 +2492,7 @@ static int blocked_domain_attach_device(struct iommu_domain *domain, /* Clear DTE and flush the entry */ spin_lock(&dev_data->lock); - amd_iommu_dev_update_dte(dev_data, false); + dev_update_dte(dev_data, false); spin_unlock(&dev_data->lock); return 0; @@ -2560,7 +2560,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, } /* Update device table */ - amd_iommu_dev_update_dte(dev_data, true); + dev_update_dte(dev_data, true); return ret; } -- Gitee From dd948d110c1d05a8a38bd2bdd297b7195f89f392 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 29 Aug 2024 21:06:10 -0300 Subject: [PATCH 73/99] iommu/amd: Move allocation of the top table into v1_alloc_pgtable ANBZ: #13617 commit 8d00b77a52ef4b2091696ca25753d0ab95e4d839 upstream. All the page table memory should be allocated/free within the io_pgtable struct. The v2 path is already doing this, make it consistent. It is hard to see but the free of the root in protection_domain_free() is a NOP on the success path because v1_free_pgtable() does amd_iommu_domain_clr_pt_root(). The root memory is already freed because free_sub_pt() put it on the freelist. The free path in protection_domain_free() is only used during error unwind of protection_domain_alloc(). Reviewed-by: Vasant Hegde Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v2-831cdc4d00f3+1a315-amd_iopgtbl_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/io_pgtable.c | 8 ++++++-- drivers/iommu/amd/iommu.c | 21 ++------------------- 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index bfbcec68efb9..03a3b09f0512 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -573,20 +573,24 @@ static void v1_free_pgtable(struct io_pgtable *iop) pgtable->mode > PAGE_MODE_6_LEVEL); free_sub_pt(pgtable->root, pgtable->mode, &freelist); + iommu_put_pages_list(&freelist); /* Update data structure */ amd_iommu_domain_clr_pt_root(dom); /* Make changes visible to IOMMUs */ amd_iommu_domain_update(dom); - - iommu_put_pages_list(&freelist); } static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) { struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); + pgtable->root = iommu_alloc_page(GFP_KERNEL); + if (!pgtable->root) + return NULL; + pgtable->mode = PAGE_MODE_3_LEVEL; + cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES; cfg->ias = IOMMU_IN_ADDR_BIT_SIZE; cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 535e0ab04984..92fd469b3240 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -52,8 +52,6 @@ #define HT_RANGE_START (0xfd00000000ULL) #define HT_RANGE_END (0xffffffffffULL) -#define DEFAULT_PGTABLE_LEVEL PAGE_MODE_3_LEVEL - static DEFINE_SPINLOCK(pd_bitmap_lock); LIST_HEAD(ioapic_map); @@ -2272,30 +2270,15 @@ void protection_domain_free(struct protection_domain *domain) if (domain->iop.pgtbl_cfg.tlb) free_io_pgtable_ops(&domain->iop.iop.ops); - if (domain->iop.root) - iommu_free_page(domain->iop.root); - if (domain->id) domain_id_free(domain->id); kfree(domain); } -static int protection_domain_init_v1(struct protection_domain *domain, int mode) +static int protection_domain_init_v1(struct protection_domain *domain) { - u64 *pt_root = NULL; - - BUG_ON(mode < PAGE_MODE_NONE || mode > PAGE_MODE_6_LEVEL); - - if (mode != PAGE_MODE_NONE) { - pt_root = iommu_alloc_page(GFP_KERNEL); - if (!pt_root) - return -ENOMEM; - } - domain->pd_mode = PD_MODE_V1; - amd_iommu_domain_set_pgtable(domain, pt_root, mode); - return 0; } @@ -2348,7 +2331,7 @@ struct protection_domain *protection_domain_alloc(unsigned int type) switch (pgtable) { case AMD_IOMMU_V1: - ret = protection_domain_init_v1(domain, DEFAULT_PGTABLE_LEVEL); + ret = protection_domain_init_v1(domain); break; case AMD_IOMMU_V2: ret = protection_domain_init_v2(domain); -- Gitee From 35d17d5b9815b46050276f2f7cb72ee95b7facdc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 29 Aug 2024 21:06:11 -0300 Subject: [PATCH 74/99] iommu/amd: Allocate the page table root using GFP_KERNEL ANBZ: #13617 commit b0a6c883bcd42eeb0850135e529b34b64d57673c upstream. Domain allocation is always done under a sleepable context, the v1 path and other drivers use GFP_KERNEL already. Fix the v2 path to also use GFP_KERNEL. Fixes: 0d571dcbe7c6 ("iommu/amd: Allocate page table using numa locality info") Reviewed-by: Vasant Hegde Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v2-831cdc4d00f3+1a315-amd_iopgtbl_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/io_pgtable_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index 3965da0376ce..1929a5e1477a 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -362,7 +362,7 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo struct protection_domain *pdom = (struct protection_domain *)cookie; int ias = IOMMU_IN_ADDR_BIT_SIZE; - pgtable->pgd = iommu_alloc_page_node(pdom->nid, GFP_ATOMIC); + pgtable->pgd = iommu_alloc_page_node(pdom->nid, GFP_KERNEL); if (!pgtable->pgd) return NULL; -- Gitee From 735f58a2b589abf57d511d37e15a05e6b509d01a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 29 Aug 2024 21:06:12 -0300 Subject: [PATCH 75/99] iommu/amd: Set the pgsize_bitmap correctly ANBZ: #13617 commit 7a41dcb52f9de6079621fc31c3b84c7fc290934b upstream. When using io_pgtable the correct pgsize_bitmap is stored in the cfg, both v1_alloc_pgtable() and v2_alloc_pgtable() set it correctly. This fixes a bug where the v2 pgtable had the wrong pgsize as protection_domain_init_v2() would set it and then do_iommu_domain_alloc() immediately resets it. Remove the confusing ops.pgsize_bitmap since that is not used if the driver sets domain.pgsize_bitmap. Fixes: 134288158a41 ("iommu/amd: Add domain_alloc_user based domain allocation") Reviewed-by: Vasant Hegde Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v2-831cdc4d00f3+1a315-amd_iopgtbl_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/iommu.c | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 92fd469b3240..75998f6d7653 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2276,26 +2276,11 @@ void protection_domain_free(struct protection_domain *domain) kfree(domain); } -static int protection_domain_init_v1(struct protection_domain *domain) -{ - domain->pd_mode = PD_MODE_V1; - return 0; -} - -static int protection_domain_init_v2(struct protection_domain *pdom) -{ - pdom->pd_mode = PD_MODE_V2; - pdom->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; - - return 0; -} - struct protection_domain *protection_domain_alloc(unsigned int type) { struct io_pgtable_ops *pgtbl_ops; struct protection_domain *domain; int pgtable; - int ret; domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) @@ -2331,18 +2316,14 @@ struct protection_domain *protection_domain_alloc(unsigned int type) switch (pgtable) { case AMD_IOMMU_V1: - ret = protection_domain_init_v1(domain); + domain->pd_mode = PD_MODE_V1; break; case AMD_IOMMU_V2: - ret = protection_domain_init_v2(domain); + domain->pd_mode = PD_MODE_V2; break; default: - ret = -EINVAL; - break; - } - - if (ret) goto out_err; + } pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain); if (!pgtbl_ops) @@ -2408,10 +2389,10 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, domain->domain.geometry.aperture_start = 0; domain->domain.geometry.aperture_end = dma_max_address(); domain->domain.geometry.force_aperture = true; + domain->domain.pgsize_bitmap = domain->iop.iop.cfg.pgsize_bitmap; if (iommu) { domain->domain.type = type; - domain->domain.pgsize_bitmap = iommu->iommu.ops->pgsize_bitmap; domain->domain.ops = iommu->iommu.ops->default_domain_ops; if (dirty_tracking) @@ -2894,7 +2875,6 @@ const struct iommu_ops amd_iommu_ops = { .device_group = amd_iommu_device_group, .get_resv_regions = amd_iommu_get_resv_regions, .is_attach_deferred = amd_iommu_is_attach_deferred, - .pgsize_bitmap = AMD_IOMMU_PGSIZES, .def_domain_type = amd_iommu_def_domain_type, .dev_enable_feat = amd_iommu_dev_enable_feature, .dev_disable_feat = amd_iommu_dev_disable_feature, -- Gitee From 2c180ce09e7fc5048672a2dff004c51f3603963b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 29 Aug 2024 21:06:13 -0300 Subject: [PATCH 76/99] iommu/amd: Remove amd_iommu_domain_update() from page table freeing ANBZ: #13617 commit 322d889ae7d39f8538a6deac35869aa3be1855bd upstream. It is a serious bug if the domain is still mapped to any DTEs when it is freed as we immediately start freeing page table memory, so any remaining HW touch will UAF. If it is not mapped then dev_list is empty and amd_iommu_domain_update() does nothing. Remove it and add a WARN_ON() to catch this class of bug. Reviewed-by: Vasant Hegde Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/4-v2-831cdc4d00f3+1a315-amd_iopgtbl_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/io_pgtable.c | 3 --- drivers/iommu/amd/iommu.c | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 03a3b09f0512..0743b698128d 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -577,9 +577,6 @@ static void v1_free_pgtable(struct io_pgtable *iop) /* Update data structure */ amd_iommu_domain_clr_pt_root(dom); - - /* Make changes visible to IOMMUs */ - amd_iommu_domain_update(dom); } static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 75998f6d7653..4d71a0d4e6b2 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2267,6 +2267,8 @@ void protection_domain_free(struct protection_domain *domain) if (!domain) return; + WARN_ON(!list_empty(&domain->dev_list)); + if (domain->iop.pgtbl_cfg.tlb) free_io_pgtable_ops(&domain->iop.iop.ops); -- Gitee From 09e2de250cfc608e55fd3a6d0798575bd1f84c0f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 29 Aug 2024 21:06:14 -0300 Subject: [PATCH 77/99] iommu/amd: Remove the amd_iommu_domain_set_pt_root() and related ANBZ: #13617 commit 1ed2d21d471caf2e4351c2e8bb14143bc8062092 upstream. Looks like many refactorings here have left this confused. There is only one storage of the root/mode, it is in the iop struct. increase_address_space() calls amd_iommu_domain_set_pgtable() with values that it already stored in iop a few lines above. amd_iommu_domain_clr_pt_root() is zero'ing memory we are about to free. It used to protect against a double free of root, but that is gone now. Remove amd_iommu_domain_set_pgtable(), amd_iommu_domain_set_pt_root(), amd_iommu_domain_clr_pt_root() as they are all pointless. Reviewed-by: Vasant Hegde Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/5-v2-831cdc4d00f3+1a315-amd_iopgtbl_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu.h | 13 ------------- drivers/iommu/amd/io_pgtable.c | 24 ------------------------ 2 files changed, 37 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index b59505b30b18..79656aae32c0 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -134,19 +134,6 @@ static inline void *iommu_phys_to_virt(unsigned long paddr) return phys_to_virt(__sme_clr(paddr)); } -static inline -void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) -{ - domain->iop.root = (u64 *)(root & PAGE_MASK); - domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */ -} - -static inline -void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) -{ - amd_iommu_domain_set_pt_root(domain, 0); -} - static inline int get_pci_sbdf_id(struct pci_dev *pdev) { int seg = pci_domain_nr(pdev->bus); diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 0743b698128d..e0abcf38c314 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -132,18 +132,6 @@ static void free_sub_pt(u64 *root, int mode, struct list_head *freelist) } } -void amd_iommu_domain_set_pgtable(struct protection_domain *domain, - u64 *root, int mode) -{ - u64 pt_root; - - /* lowest 3 bits encode pgtable mode */ - pt_root = mode & 7; - pt_root |= (u64)root; - - amd_iommu_domain_set_pt_root(domain, pt_root); -} - /* * This function is used to add another level to an IO page table. Adding * another level increases the size of the address space by 9 bits to a size up @@ -176,12 +164,6 @@ static bool increase_address_space(struct protection_domain *domain, domain->iop.mode += 1; amd_iommu_update_and_flush_device_table(domain); - /* - * Device Table needs to be updated and flushed before the new root can - * be published. - */ - amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode); - pte = NULL; ret = true; @@ -560,23 +542,17 @@ static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops, static void v1_free_pgtable(struct io_pgtable *iop) { struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); - struct protection_domain *dom; LIST_HEAD(freelist); if (pgtable->mode == PAGE_MODE_NONE) return; - dom = container_of(pgtable, struct protection_domain, iop); - /* Page-table is not visible to IOMMU anymore, so free it */ BUG_ON(pgtable->mode < PAGE_MODE_NONE || pgtable->mode > PAGE_MODE_6_LEVEL); free_sub_pt(pgtable->root, pgtable->mode, &freelist); iommu_put_pages_list(&freelist); - - /* Update data structure */ - amd_iommu_domain_clr_pt_root(dom); } static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) -- Gitee From 1a07e7a5014443de77db63c0c8ab9d7b03a081d6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 29 Aug 2024 21:06:15 -0300 Subject: [PATCH 78/99] iommu/amd: Rename struct amd_io_pgtable iopt to pgtbl ANBZ: #13617 commit 670b57796c5dc1ca58912132cad914cf4b3c0cdd upstream. There is struct protection_domain iopt and struct amd_io_pgtable iopt. Next patches are going to want to write domain.iopt.iopt.xx which is quite unnatural to read. Give one of them a different name, amd_io_pgtable has fewer references so call it pgtbl, to match pgtbl_cfg, instead. Suggested-by: Alejandro Jimenez Reviewed-by: Vasant Hegde Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/6-v2-831cdc4d00f3+1a315-amd_iopgtbl_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu_types.h | 4 ++-- drivers/iommu/amd/io_pgtable.c | 12 ++++++------ drivers/iommu/amd/io_pgtable_v2.c | 14 +++++++------- drivers/iommu/amd/iommu.c | 14 +++++++------- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index c7432296bb90..ce7825b4d631 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -519,7 +519,7 @@ struct amd_irte_ops; #define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0) #define io_pgtable_to_data(x) \ - container_of((x), struct amd_io_pgtable, iop) + container_of((x), struct amd_io_pgtable, pgtbl) #define io_pgtable_ops_to_data(x) \ io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) @@ -540,7 +540,7 @@ struct gcr3_tbl_info { struct amd_io_pgtable { struct io_pgtable_cfg pgtbl_cfg; - struct io_pgtable iop; + struct io_pgtable pgtbl; int mode; u64 *root; u64 *pgd; /* v2 pgtable pgd pointer */ diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index e0abcf38c314..53de1146928e 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -541,7 +541,7 @@ static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops, */ static void v1_free_pgtable(struct io_pgtable *iop) { - struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); + struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl); LIST_HEAD(freelist); if (pgtable->mode == PAGE_MODE_NONE) @@ -569,12 +569,12 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE; cfg->tlb = &v1_flush_ops; - pgtable->iop.ops.map_pages = iommu_v1_map_pages; - pgtable->iop.ops.unmap_pages = iommu_v1_unmap_pages; - pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; - pgtable->iop.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty; + pgtable->pgtbl.ops.map_pages = iommu_v1_map_pages; + pgtable->pgtbl.ops.unmap_pages = iommu_v1_unmap_pages; + pgtable->pgtbl.ops.iova_to_phys = iommu_v1_iova_to_phys; + pgtable->pgtbl.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty; - return &pgtable->iop; + return &pgtable->pgtbl; } struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = { diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index 1929a5e1477a..c2c3d748baa6 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -234,7 +234,7 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, int prot, gfp_t gfp, size_t *mapped) { struct protection_domain *pdom = io_pgtable_ops_to_domain(ops); - struct io_pgtable_cfg *cfg = &pdom->iop.iop.cfg; + struct io_pgtable_cfg *cfg = &pdom->iop.pgtbl.cfg; u64 *pte; unsigned long map_size; unsigned long mapped_size = 0; @@ -281,7 +281,7 @@ static unsigned long iommu_v2_unmap_pages(struct io_pgtable_ops *ops, struct iommu_iotlb_gather *gather) { struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); - struct io_pgtable_cfg *cfg = &pgtable->iop.cfg; + struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg; unsigned long unmap_size; unsigned long unmapped = 0; size_t size = pgcount << __ffs(pgsize); @@ -346,7 +346,7 @@ static const struct iommu_flush_ops v2_flush_ops = { static void v2_free_pgtable(struct io_pgtable *iop) { - struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); + struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl); if (!pgtable || !pgtable->pgd) return; @@ -369,16 +369,16 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo if (get_pgtable_level() == PAGE_MODE_5_LEVEL) ias = 57; - pgtable->iop.ops.map_pages = iommu_v2_map_pages; - pgtable->iop.ops.unmap_pages = iommu_v2_unmap_pages; - pgtable->iop.ops.iova_to_phys = iommu_v2_iova_to_phys; + pgtable->pgtbl.ops.map_pages = iommu_v2_map_pages; + pgtable->pgtbl.ops.unmap_pages = iommu_v2_unmap_pages; + pgtable->pgtbl.ops.iova_to_phys = iommu_v2_iova_to_phys; cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2, cfg->ias = ias, cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, cfg->tlb = &v2_flush_ops; - return &pgtable->iop; + return &pgtable->pgtbl; } struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = { diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 4d71a0d4e6b2..10a81af6e4a6 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2270,7 +2270,7 @@ void protection_domain_free(struct protection_domain *domain) WARN_ON(!list_empty(&domain->dev_list)); if (domain->iop.pgtbl_cfg.tlb) - free_io_pgtable_ops(&domain->iop.iop.ops); + free_io_pgtable_ops(&domain->iop.pgtbl.ops); if (domain->id) domain_id_free(domain->id); @@ -2391,7 +2391,7 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, domain->domain.geometry.aperture_start = 0; domain->domain.geometry.aperture_end = dma_max_address(); domain->domain.geometry.force_aperture = true; - domain->domain.pgsize_bitmap = domain->iop.iop.cfg.pgsize_bitmap; + domain->domain.pgsize_bitmap = domain->iop.pgtbl.cfg.pgsize_bitmap; if (iommu) { domain->domain.type = type; @@ -2535,7 +2535,7 @@ static int amd_iommu_iotlb_sync_map(struct iommu_domain *dom, unsigned long iova, size_t size) { struct protection_domain *domain = to_pdomain(dom); - struct io_pgtable_ops *ops = &domain->iop.iop.ops; + struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops; if (ops->map_pages) domain_flush_np_cache(domain, iova, size); @@ -2547,7 +2547,7 @@ static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova, int iommu_prot, gfp_t gfp, size_t *mapped) { struct protection_domain *domain = to_pdomain(dom); - struct io_pgtable_ops *ops = &domain->iop.iop.ops; + struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops; int prot = 0; int ret = -EINVAL; @@ -2594,7 +2594,7 @@ static size_t amd_iommu_unmap_pages(struct iommu_domain *dom, unsigned long iova struct iommu_iotlb_gather *gather) { struct protection_domain *domain = to_pdomain(dom); - struct io_pgtable_ops *ops = &domain->iop.iop.ops; + struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops; size_t r; if ((domain->pd_mode == PD_MODE_V1) && @@ -2613,7 +2613,7 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, dma_addr_t iova) { struct protection_domain *domain = to_pdomain(dom); - struct io_pgtable_ops *ops = &domain->iop.iop.ops; + struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops; return ops->iova_to_phys(ops, iova); } @@ -2691,7 +2691,7 @@ static int amd_iommu_read_and_clear_dirty(struct iommu_domain *domain, struct iommu_dirty_bitmap *dirty) { struct protection_domain *pdomain = to_pdomain(domain); - struct io_pgtable_ops *ops = &pdomain->iop.iop.ops; + struct io_pgtable_ops *ops = &pdomain->iop.pgtbl.ops; unsigned long lflags; if (!ops || !ops->read_and_clear_dirty) -- Gitee From 82de1542b6c1fc1a81e8ace86ccb9ede2b871caf Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 29 Aug 2024 21:06:16 -0300 Subject: [PATCH 79/99] iommu/amd: Remove amd_io_pgtable::pgtbl_cfg ANBZ: #13617 commit 977fc27ca7f8a83b67ccd91264ac56b0fb996f51 upstream. This struct is already in iop.cfg, we don't need two. AMD is using this API sort of wrong, the cfg is supposed to be passed in and then the allocation function will allocate ops memory and copy the passed config into the new memory. Keep it kind of wrong and pass in the cfg memory that is already part of the pagetable struct. Reviewed-by: Vasant Hegde Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/7-v2-831cdc4d00f3+1a315-amd_iopgtbl_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu_types.h | 3 +-- drivers/iommu/amd/iommu.c | 5 +++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index ce7825b4d631..7aa4f1983e40 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -529,7 +529,7 @@ struct amd_irte_ops; struct protection_domain, iop) #define io_pgtable_cfg_to_data(x) \ - container_of((x), struct amd_io_pgtable, pgtbl_cfg) + container_of((x), struct amd_io_pgtable, pgtbl.cfg) struct gcr3_tbl_info { u64 *gcr3_tbl; /* Guest CR3 table */ @@ -539,7 +539,6 @@ struct gcr3_tbl_info { }; struct amd_io_pgtable { - struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable pgtbl; int mode; u64 *root; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 10a81af6e4a6..e20dc2f610cc 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2269,7 +2269,7 @@ void protection_domain_free(struct protection_domain *domain) WARN_ON(!list_empty(&domain->dev_list)); - if (domain->iop.pgtbl_cfg.tlb) + if (domain->iop.pgtbl.cfg.tlb) free_io_pgtable_ops(&domain->iop.pgtbl.ops); if (domain->id) @@ -2327,7 +2327,8 @@ struct protection_domain *protection_domain_alloc(unsigned int type) goto out_err; } - pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain); + pgtbl_ops = + alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl.cfg, domain); if (!pgtbl_ops) goto out_err; -- Gitee From f520531fe53d0df0cb44ff6114594d7ab0d8c659 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 29 Aug 2024 21:06:17 -0300 Subject: [PATCH 80/99] iommu/amd: Store the nid in io_pgtable_cfg instead of the domain ANBZ: #13617 commit 47f218d108950984b24af81f66356ceda380eb74 upstream. We already have memory in the union here that is being wasted in AMD's case, use it to store the nid. Putting the nid here further isolates the io_pgtable code from the struct protection_domain. Fixup protection_domain_alloc so that the NID from the device is provided, at this point dev is never NULL for AMD so this will now allocate the first table pointer on the correct NUMA node. Signed-off-by: Jason Gunthorpe Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/8-v2-831cdc4d00f3+1a315-amd_iopgtbl_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue [ Jay Chen: fix rebase conflicts in ../drivers/iommu/amd/io_pgtable_v2.c ] Signed-off-by: Jay Chen --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/amd_iommu_types.h | 1 - drivers/iommu/amd/io_pgtable.c | 8 +++++--- drivers/iommu/amd/io_pgtable_v2.c | 5 ++--- drivers/iommu/amd/iommu.c | 12 +++++++----- drivers/iommu/amd/pasid.c | 2 +- include/linux/io-pgtable.h | 4 ++++ 7 files changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 79656aae32c0..fa0af030d8b3 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -45,7 +45,7 @@ extern enum io_pgtable_fmt amd_iommu_pgtable; extern int amd_iommu_gpt_level; /* Protection domain ops */ -struct protection_domain *protection_domain_alloc(unsigned int type); +struct protection_domain *protection_domain_alloc(unsigned int type, int nid); void protection_domain_free(struct protection_domain *domain); struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, struct mm_struct *mm); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 7aa4f1983e40..30eb07acb8b1 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -571,7 +571,6 @@ struct protection_domain { struct amd_io_pgtable iop; spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ - int nid; /* Node ID */ enum protection_domain_mode pd_mode; /* Track page table type */ bool dirty_tracking; /* dirty tracking is enabled in the domain */ unsigned dev_cnt; /* devices assigned to this domain */ diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 53de1146928e..4492a5800b35 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -141,11 +141,12 @@ static bool increase_address_space(struct protection_domain *domain, unsigned long address, gfp_t gfp) { + struct io_pgtable_cfg *cfg = &domain->iop.pgtbl.cfg; unsigned long flags; bool ret = true; u64 *pte; - pte = iommu_alloc_page_node(domain->nid, gfp); + pte = iommu_alloc_page_node(cfg->amd.nid, gfp); if (!pte) return false; @@ -181,6 +182,7 @@ static u64 *alloc_pte(struct protection_domain *domain, gfp_t gfp, bool *updated) { + struct io_pgtable_cfg *cfg = &domain->iop.pgtbl.cfg; int level, end_lvl; u64 *pte, *page; @@ -232,7 +234,7 @@ static u64 *alloc_pte(struct protection_domain *domain, if (!IOMMU_PTE_PRESENT(__pte) || pte_level == PAGE_MODE_NONE) { - page = iommu_alloc_page_node(domain->nid, gfp); + page = iommu_alloc_page_node(cfg->amd.nid, gfp); if (!page) return NULL; @@ -559,7 +561,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo { struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); - pgtable->root = iommu_alloc_page(GFP_KERNEL); + pgtable->root = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL); if (!pgtable->root) return NULL; pgtable->mode = PAGE_MODE_3_LEVEL; diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index c2c3d748baa6..6c9157bd7601 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -251,7 +251,7 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, while (mapped_size < size) { map_size = get_alloc_page_size(pgsize); - pte = v2_alloc_pte(pdom->nid, pdom->iop.pgd, + pte = v2_alloc_pte(cfg->amd.nid, pdom->iop.pgd, iova, map_size, gfp, &updated); if (!pte) { ret = -ENOMEM; @@ -359,10 +359,9 @@ static void v2_free_pgtable(struct io_pgtable *iop) static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) { struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); - struct protection_domain *pdom = (struct protection_domain *)cookie; int ias = IOMMU_IN_ADDR_BIT_SIZE; - pgtable->pgd = iommu_alloc_page_node(pdom->nid, GFP_KERNEL); + pgtable->pgd = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL); if (!pgtable->pgd) return NULL; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index e20dc2f610cc..7a8224868665 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2032,6 +2032,7 @@ static int do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); + struct io_pgtable_cfg *cfg = &domain->iop.pgtbl.cfg; int ret = 0; /* Update data structures */ @@ -2039,8 +2040,8 @@ static int do_attach(struct iommu_dev_data *dev_data, list_add(&dev_data->list, &domain->dev_list); /* Update NUMA Node ID */ - if (domain->nid == NUMA_NO_NODE) - domain->nid = dev_to_node(dev_data->dev); + if (cfg->amd.nid == NUMA_NO_NODE) + cfg->amd.nid = dev_to_node(dev_data->dev); /* Do reference counting */ domain->dev_iommu[iommu->index] += 1; @@ -2278,7 +2279,7 @@ void protection_domain_free(struct protection_domain *domain) kfree(domain); } -struct protection_domain *protection_domain_alloc(unsigned int type) +struct protection_domain *protection_domain_alloc(unsigned int type, int nid) { struct io_pgtable_ops *pgtbl_ops; struct protection_domain *domain; @@ -2295,7 +2296,7 @@ struct protection_domain *protection_domain_alloc(unsigned int type) spin_lock_init(&domain->lock); INIT_LIST_HEAD(&domain->dev_list); INIT_LIST_HEAD(&domain->dev_data_list); - domain->nid = NUMA_NO_NODE; + domain->iop.pgtbl.cfg.amd.nid = nid; switch (type) { /* No need to allocate io pgtable ops in passthrough mode */ @@ -2385,7 +2386,8 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, if (dirty_tracking && !amd_iommu_hd_support(iommu)) return ERR_PTR(-EOPNOTSUPP); - domain = protection_domain_alloc(type); + domain = protection_domain_alloc(type, + dev ? dev_to_node(dev) : NUMA_NO_NODE); if (!domain) return ERR_PTR(-ENOMEM); diff --git a/drivers/iommu/amd/pasid.c b/drivers/iommu/amd/pasid.c index a68215f2b3e1..0657b9373be5 100644 --- a/drivers/iommu/amd/pasid.c +++ b/drivers/iommu/amd/pasid.c @@ -181,7 +181,7 @@ struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, struct protection_domain *pdom; int ret; - pdom = protection_domain_alloc(IOMMU_DOMAIN_SVA); + pdom = protection_domain_alloc(IOMMU_DOMAIN_SVA, dev_to_node(dev)); if (!pdom) return ERR_PTR(-ENOMEM); diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index f9a81761bfce..b1ecfc3cd5bc 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -171,6 +171,10 @@ struct io_pgtable_cfg { u64 ttbr[4]; u32 n_ttbrs; } apple_dart_cfg; + + struct { + int nid; + } amd; }; }; -- Gitee From 601bb370d01401a70eaca0afe4893ce95d8b2096 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 29 Aug 2024 21:06:18 -0300 Subject: [PATCH 81/99] iommu/amd: Narrow the use of struct protection_domain to invalidation ANBZ: #13617 commit 9ac0b3380acdece01fa1b361687e3cd988831c55 upstream. The AMD io_pgtable stuff doesn't implement the tlb ops callbacks, instead it invokes the invalidation ops directly on the struct protection_domain. Narrow the use of struct protection_domain to only those few code paths. Make everything else properly use struct amd_io_pgtable through the call chains, which is the correct modular type for an io-pgtable module. Signed-off-by: Jason Gunthorpe Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/9-v2-831cdc4d00f3+1a315-amd_iopgtbl_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue [ Jay Chen: fix rebase conflicts in ../drivers/iommu/amd/io_pgtable_v2.c ] Signed-off-by: Jay Chen --- drivers/iommu/amd/io_pgtable.c | 33 +++++++++++++++++-------------- drivers/iommu/amd/io_pgtable_v2.c | 11 +++++++---- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 4492a5800b35..1cf3d580a551 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -137,11 +137,13 @@ static void free_sub_pt(u64 *root, int mode, struct list_head *freelist) * another level increases the size of the address space by 9 bits to a size up * to 64 bits. */ -static bool increase_address_space(struct protection_domain *domain, +static bool increase_address_space(struct amd_io_pgtable *pgtable, unsigned long address, gfp_t gfp) { - struct io_pgtable_cfg *cfg = &domain->iop.pgtbl.cfg; + struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg; + struct protection_domain *domain = + container_of(pgtable, struct protection_domain, iop); unsigned long flags; bool ret = true; u64 *pte; @@ -152,17 +154,17 @@ static bool increase_address_space(struct protection_domain *domain, spin_lock_irqsave(&domain->lock, flags); - if (address <= PM_LEVEL_SIZE(domain->iop.mode)) + if (address <= PM_LEVEL_SIZE(pgtable->mode)) goto out; ret = false; - if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL)) + if (WARN_ON_ONCE(pgtable->mode == PAGE_MODE_6_LEVEL)) goto out; - *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root)); + *pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root)); - domain->iop.root = pte; - domain->iop.mode += 1; + pgtable->root = pte; + pgtable->mode += 1; amd_iommu_update_and_flush_device_table(domain); pte = NULL; @@ -175,31 +177,31 @@ static bool increase_address_space(struct protection_domain *domain, return ret; } -static u64 *alloc_pte(struct protection_domain *domain, +static u64 *alloc_pte(struct amd_io_pgtable *pgtable, unsigned long address, unsigned long page_size, u64 **pte_page, gfp_t gfp, bool *updated) { - struct io_pgtable_cfg *cfg = &domain->iop.pgtbl.cfg; + struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg; int level, end_lvl; u64 *pte, *page; BUG_ON(!is_power_of_2(page_size)); - while (address > PM_LEVEL_SIZE(domain->iop.mode)) { + while (address > PM_LEVEL_SIZE(pgtable->mode)) { /* * Return an error if there is no memory to update the * page-table. */ - if (!increase_address_space(domain, address, gfp)) + if (!increase_address_space(pgtable, address, gfp)) return NULL; } - level = domain->iop.mode - 1; - pte = &domain->iop.root[PM_LEVEL_INDEX(level, address)]; + level = pgtable->mode - 1; + pte = &pgtable->root[PM_LEVEL_INDEX(level, address)]; address = PAGE_SIZE_ALIGN(address, page_size); end_lvl = PAGE_SIZE_LEVEL(page_size); @@ -348,7 +350,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) { - struct protection_domain *dom = io_pgtable_ops_to_domain(ops); + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); LIST_HEAD(freelist); bool updated = false; u64 __pte, *pte; @@ -365,7 +367,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova, while (pgcount > 0) { count = PAGE_SIZE_PTE_COUNT(pgsize); - pte = alloc_pte(dom, iova, pgsize, NULL, gfp, &updated); + pte = alloc_pte(pgtable, iova, pgsize, NULL, gfp, &updated); ret = -ENOMEM; if (!pte) @@ -402,6 +404,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova, out: if (updated) { + struct protection_domain *dom = io_pgtable_ops_to_domain(ops); unsigned long flags; spin_lock_irqsave(&dom->lock, flags); diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index 6c9157bd7601..ed25a163b9d5 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -233,8 +233,8 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) { - struct protection_domain *pdom = io_pgtable_ops_to_domain(ops); - struct io_pgtable_cfg *cfg = &pdom->iop.pgtbl.cfg; + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg; u64 *pte; unsigned long map_size; unsigned long mapped_size = 0; @@ -251,7 +251,7 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, while (mapped_size < size) { map_size = get_alloc_page_size(pgsize); - pte = v2_alloc_pte(cfg->amd.nid, pdom->iop.pgd, + pte = v2_alloc_pte(cfg->amd.nid, pgtable->pgd, iova, map_size, gfp, &updated); if (!pte) { ret = -ENOMEM; @@ -266,8 +266,11 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, } out: - if (updated) + if (updated) { + struct protection_domain *pdom = io_pgtable_ops_to_domain(ops); + amd_iommu_domain_flush_pages(pdom, o_iova, size); + } if (mapped) *mapped += mapped_size; -- Gitee From 9a0467b402982c2b37be877c5b5e50d44c19ad2d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 29 Aug 2024 21:06:19 -0300 Subject: [PATCH 82/99] iommu/amd: Remove conditions from domain free paths ANBZ: #13617 commit 485534bfccb23e1c3e5915aca9acce1ecdc07a3f upstream. Don't use tlb as some flag to indicate if protection_domain_alloc() completed. Have protection_domain_alloc() unwind itself in the normal kernel style and require protection_domain_free() only be called on successful results of protection_domain_alloc(). Also, the amd_iommu_domain_free() op is never called by the core code with a NULL argument, so remove all the NULL tests as well. Reviewed-by: Vasant Hegde Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/10-v2-831cdc4d00f3+1a315-amd_iopgtbl_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/iommu.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 7a8224868665..d7a141f3dbdf 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2265,17 +2265,9 @@ static void cleanup_domain(struct protection_domain *domain) void protection_domain_free(struct protection_domain *domain) { - if (!domain) - return; - WARN_ON(!list_empty(&domain->dev_list)); - - if (domain->iop.pgtbl.cfg.tlb) - free_io_pgtable_ops(&domain->iop.pgtbl.ops); - - if (domain->id) - domain_id_free(domain->id); - + free_io_pgtable_ops(&domain->iop.pgtbl.ops); + domain_id_free(domain->id); kfree(domain); } @@ -2291,7 +2283,7 @@ struct protection_domain *protection_domain_alloc(unsigned int type, int nid) domain->id = domain_id_alloc(); if (!domain->id) - goto out_err; + goto err_free; spin_lock_init(&domain->lock); INIT_LIST_HEAD(&domain->dev_list); @@ -2314,7 +2306,7 @@ struct protection_domain *protection_domain_alloc(unsigned int type, int nid) pgtable = AMD_IOMMU_V1; break; default: - goto out_err; + goto err_id; } switch (pgtable) { @@ -2325,17 +2317,19 @@ struct protection_domain *protection_domain_alloc(unsigned int type, int nid) domain->pd_mode = PD_MODE_V2; break; default: - goto out_err; + goto err_id; } pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl.cfg, domain); if (!pgtbl_ops) - goto out_err; + goto err_id; return domain; -out_err: - protection_domain_free(domain); +err_id: + domain_id_free(domain->id); +err_free: + kfree(domain); return NULL; } @@ -2437,9 +2431,6 @@ void amd_iommu_domain_free(struct iommu_domain *dom) struct protection_domain *domain; unsigned long flags; - if (!dom) - return; - domain = to_pdomain(dom); spin_lock_irqsave(&domain->lock, flags); -- Gitee From 423acfdf08f9dd7a97b79706b1543e6fbf1e9cd3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 29 Aug 2024 21:06:20 -0300 Subject: [PATCH 83/99] iommu/amd: Fix typo of , instead of ; ANBZ: #13617 commit a06dcb6b7897152e4dafdc9d4d84e9a35d0ae94a upstream. Generates the same code, but is not the expected C style. Fixes: aaac38f61487 ("iommu/amd: Initial support for AMD IOMMU v2 page table") Reviewed-by: Vasant Hegde Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/11-v2-831cdc4d00f3+1a315-amd_iopgtbl_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/io_pgtable_v2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index ed25a163b9d5..a5e50a64a37b 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -375,9 +375,9 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo pgtable->pgtbl.ops.unmap_pages = iommu_v2_unmap_pages; pgtable->pgtbl.ops.iova_to_phys = iommu_v2_iova_to_phys; - cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2, - cfg->ias = ias, - cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, + cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; + cfg->ias = ias; + cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE; cfg->tlb = &v2_flush_ops; return &pgtable->pgtbl; -- Gitee From a9c148ef151bb23f9dc62c19ff426b5eb020a8ad Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 29 Aug 2024 21:06:21 -0300 Subject: [PATCH 84/99] iommu/amd: Remove the confusing dummy iommu_flush_ops tlb ops ANBZ: #13617 commit c435209f7203d90676e9eeae6c1b2d375fbf0304 upstream. The iommu driver is supposed to provide these ops to its io_pgtable implementation so that it can hook the invalidations and do the right thing. They are called by wrapper functions like io_pgtable_tlb_add_page() etc, which the AMD code never calls. Instead it directly calls the AMD IOMMU invalidation functions by casting to the struct protection_domain. Remove it all. Reviewed-by: Vasant Hegde Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/12-v2-831cdc4d00f3+1a315-amd_iopgtbl_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/io_pgtable.c | 22 ---------------------- drivers/iommu/amd/io_pgtable_v2.c | 22 ---------------------- 2 files changed, 44 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 1cf3d580a551..14f62c420e4a 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -24,27 +24,6 @@ #include "amd_iommu.h" #include "../iommu-pages.h" -static void v1_tlb_flush_all(void *cookie) -{ -} - -static void v1_tlb_flush_walk(unsigned long iova, size_t size, - size_t granule, void *cookie) -{ -} - -static void v1_tlb_add_page(struct iommu_iotlb_gather *gather, - unsigned long iova, size_t granule, - void *cookie) -{ -} - -static const struct iommu_flush_ops v1_flush_ops = { - .tlb_flush_all = v1_tlb_flush_all, - .tlb_flush_walk = v1_tlb_flush_walk, - .tlb_add_page = v1_tlb_add_page, -}; - /* * Helper function to get the first pte of a large mapping */ @@ -572,7 +551,6 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES; cfg->ias = IOMMU_IN_ADDR_BIT_SIZE; cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE; - cfg->tlb = &v1_flush_ops; pgtable->pgtbl.ops.map_pages = iommu_v1_map_pages; pgtable->pgtbl.ops.unmap_pages = iommu_v1_unmap_pages; diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index a5e50a64a37b..0ee4f45ec14e 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -326,27 +326,6 @@ static phys_addr_t iommu_v2_iova_to_phys(struct io_pgtable_ops *ops, unsigned lo /* * ---------------------------------------------------- */ -static void v2_tlb_flush_all(void *cookie) -{ -} - -static void v2_tlb_flush_walk(unsigned long iova, size_t size, - size_t granule, void *cookie) -{ -} - -static void v2_tlb_add_page(struct iommu_iotlb_gather *gather, - unsigned long iova, size_t granule, - void *cookie) -{ -} - -static const struct iommu_flush_ops v2_flush_ops = { - .tlb_flush_all = v2_tlb_flush_all, - .tlb_flush_walk = v2_tlb_flush_walk, - .tlb_add_page = v2_tlb_add_page, -}; - static void v2_free_pgtable(struct io_pgtable *iop) { struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl); @@ -378,7 +357,6 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; cfg->ias = ias; cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE; - cfg->tlb = &v2_flush_ops; return &pgtable->pgtbl; } -- Gitee From 2b6398d6c7a84b8e66463ef10d1166da27448a2f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 29 Aug 2024 21:06:22 -0300 Subject: [PATCH 85/99] iommu/amd: Correct the reported page sizes from the V1 table ANBZ: #13617 commit 7e515866299d1d01db6c2bbbc8045218c099ba1f upstream. The HW only has 52 bits of physical address support, the supported page sizes should not have bits set beyond this. Further the spec says that the 6th level does not support any "default page size for translation entries" meaning leafs in the 6th level are not allowed too. Rework the definition to use GENMASK to build the range of supported pages from the top of physical to 4k. Nothing ever uses such large pages, so this is a cosmetic/documentation improvement only. Reported-by: Joao Martins Reviewed-by: Vasant Hegde Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/13-v2-831cdc4d00f3+1a315-amd_iopgtbl_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Shuai Xue --- drivers/iommu/amd/amd_iommu_types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 30eb07acb8b1..35aa4ff020f5 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -290,8 +290,9 @@ * that we support. * * 512GB Pages are not supported due to a hardware bug + * Page sizes >= the 52 bit max physical address of the CPU are not supported. */ -#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) +#define AMD_IOMMU_PGSIZES (GENMASK_ULL(51, 12) ^ SZ_512G) /* 4K, 2MB, 1G page sizes are supported */ #define AMD_IOMMU_PGSIZES_V2 (PAGE_SIZE | (1ULL << 21) | (1ULL << 30)) -- Gitee From 094acb0232c3f407d1d3f03eb9d10763c263a557 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Sep 2024 13:36:02 +0100 Subject: [PATCH 86/99] ARM: 9417/1: dma-mapping: Pass device to arm_iommu_create_mapping() ANBZ: #13617 commit 9e8354b399e99cec3e3546035bee3347a6df2f24 upstream. All users of ARM IOMMU mappings create them for a particular device, so change the interface to accept the device rather than forcing a vague indirection through a bus type. This prepares for making a similar change to iommu_domain_alloc() itself. Signed-off-by: Robin Murphy Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Vasant Hegde Acked-by: Michael S. Tsirkin Acked-by: Jeff Johnson Signed-off-by: Jason Gunthorpe Signed-off-by: Russell King (Oracle) Signed-off-by: Shuai Xue --- arch/arm/include/asm/dma-iommu.h | 2 +- arch/arm/mm/dma-mapping.c | 8 ++++---- drivers/gpu/drm/exynos/exynos_drm_dma.c | 2 +- drivers/iommu/ipmmu-vmsa.c | 3 +-- drivers/iommu/mtk_iommu_v1.c | 3 +-- drivers/media/platform/ti/omap3isp/isp.c | 2 +- 6 files changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h index 82ec1ccf1fee..2ce4c5683e6d 100644 --- a/arch/arm/include/asm/dma-iommu.h +++ b/arch/arm/include/asm/dma-iommu.h @@ -24,7 +24,7 @@ struct dma_iommu_mapping { }; struct dma_iommu_mapping * -arm_iommu_create_mapping(const struct bus_type *bus, dma_addr_t base, u64 size); +arm_iommu_create_mapping(struct device *dev, dma_addr_t base, u64 size); void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping); diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 5ac482d7ff94..42c035b356d6 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1535,7 +1535,7 @@ static const struct dma_map_ops iommu_ops = { /** * arm_iommu_create_mapping - * @bus: pointer to the bus holding the client device (for IOMMU calls) + * @dev: pointer to the client device (for IOMMU calls) * @base: start address of the valid IO address space * @size: maximum size of the valid IO address space * @@ -1547,7 +1547,7 @@ static const struct dma_map_ops iommu_ops = { * arm_iommu_attach_device function. */ struct dma_iommu_mapping * -arm_iommu_create_mapping(const struct bus_type *bus, dma_addr_t base, u64 size) +arm_iommu_create_mapping(struct device *dev, dma_addr_t base, u64 size) { unsigned int bits = size >> PAGE_SHIFT; unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long); @@ -1588,7 +1588,7 @@ arm_iommu_create_mapping(const struct bus_type *bus, dma_addr_t base, u64 size) spin_lock_init(&mapping->lock); - mapping->domain = iommu_domain_alloc(bus); + mapping->domain = iommu_domain_alloc(dev->bus); if (!mapping->domain) goto err4; @@ -1721,7 +1721,7 @@ static void arm_setup_iommu_dma_ops(struct device *dev) dma_base = dma_range_map_min(dev->dma_range_map); size = dma_range_map_max(dev->dma_range_map) - dma_base; } - mapping = arm_iommu_create_mapping(dev->bus, dma_base, size); + mapping = arm_iommu_create_mapping(dev, dma_base, size); if (IS_ERR(mapping)) { pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n", size, dev_name(dev)); diff --git a/drivers/gpu/drm/exynos/exynos_drm_dma.c b/drivers/gpu/drm/exynos/exynos_drm_dma.c index e2c7373f20c6..6a6761935224 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dma.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dma.c @@ -110,7 +110,7 @@ int exynos_drm_register_dma(struct drm_device *drm, struct device *dev, void *mapping = NULL; if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) - mapping = arm_iommu_create_mapping(&platform_bus_type, + mapping = arm_iommu_create_mapping(dev, EXYNOS_DEV_ADDR_START, EXYNOS_DEV_ADDR_SIZE); else if (IS_ENABLED(CONFIG_IOMMU_DMA)) mapping = iommu_get_domain_for_dev(priv->dma_dev); diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index b657cc09605f..ff55b8c30712 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -804,8 +804,7 @@ static int ipmmu_init_arm_mapping(struct device *dev) if (!mmu->mapping) { struct dma_iommu_mapping *mapping; - mapping = arm_iommu_create_mapping(&platform_bus_type, - SZ_1G, SZ_2G); + mapping = arm_iommu_create_mapping(dev, SZ_1G, SZ_2G); if (IS_ERR(mapping)) { dev_err(mmu->dev, "failed to create ARM IOMMU mapping\n"); ret = PTR_ERR(mapping); diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index c6ea5b4baff3..ee4e55b6b190 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -433,8 +433,7 @@ static int mtk_iommu_v1_create_mapping(struct device *dev, mtk_mapping = data->mapping; if (!mtk_mapping) { /* MTK iommu support 4GB iova address space. */ - mtk_mapping = arm_iommu_create_mapping(&platform_bus_type, - 0, 1ULL << 32); + mtk_mapping = arm_iommu_create_mapping(dev, 0, 1ULL << 32); if (IS_ERR(mtk_mapping)) return PTR_ERR(mtk_mapping); diff --git a/drivers/media/platform/ti/omap3isp/isp.c b/drivers/media/platform/ti/omap3isp/isp.c index 1cda23244c7b..91101ba88ef0 100644 --- a/drivers/media/platform/ti/omap3isp/isp.c +++ b/drivers/media/platform/ti/omap3isp/isp.c @@ -1965,7 +1965,7 @@ static int isp_attach_iommu(struct isp_device *isp) * Create the ARM mapping, used by the ARM DMA mapping core to allocate * VAs. This will allocate a corresponding IOMMU domain. */ - mapping = arm_iommu_create_mapping(&platform_bus_type, SZ_1G, SZ_2G); + mapping = arm_iommu_create_mapping(isp->dev, SZ_1G, SZ_2G); if (IS_ERR(mapping)) { dev_err(isp->dev, "failed to create ARM IOMMU mapping\n"); return PTR_ERR(mapping); -- Gitee From c248ca653aef0d5be1b277b30203671b64cf3d6e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Sep 2024 11:02:43 +0300 Subject: [PATCH 87/99] iommu/tegra241-cmdqv: Fix ioremap() error handling in probe() ANBZ: #13617 commit 086a3c40ebd02a4ac38121cf909326407b2883bc upstream. The ioremap() function doesn't return error pointers, it returns NULL on error so update the error handling. Also just return directly instead of calling iounmap() on the NULL pointer. Calling iounmap(NULL) doesn't cause a problem on ARM but on other architectures it can trigger a warning so it'a bad habbit. Fixes: 918eb5c856f6 ("iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV") Signed-off-by: Dan Carpenter Reviewed-by: Nicolin Chen Link: https://lore.kernel.org/r/5a6c1e9a-0724-41b1-86d4-36335d3768ea@stanley.mountain Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index 03fd13c21dcc..240b54192177 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -772,9 +772,9 @@ __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, static_assert(offsetof(struct tegra241_cmdqv, smmu) == 0); base = ioremap(res->start, resource_size(res)); - if (IS_ERR(base)) { - dev_err(smmu->dev, "failed to ioremap: %ld\n", PTR_ERR(base)); - goto iounmap; + if (!base) { + dev_err(smmu->dev, "failed to ioremap\n"); + return NULL; } regval = readl(base + TEGRA241_CMDQV_CONFIG); -- Gitee From 7c3e1801cb680985f591fa682055607e8b3fa85a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 29 Aug 2024 10:19:59 -0300 Subject: [PATCH 88/99] iommufd: Check the domain owner of the parent before creating a nesting domain ANBZ: #13617 commit 73183ad6ea51029d04b098286dcee98d715015f1 upstream. This check was missed, before we can pass a struct iommu_domain to a driver callback we need to validate that the domain was created by that driver. Fixes: bd529dbb661d ("iommufd: Add a nested HW pagetable object") Link: https://patch.msgid.link/r/0-v1-c8770519edde+1a-iommufd_nesting_ops_jgg@nvidia.com Reviewed-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- drivers/iommu/iommufd/hw_pagetable.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index aefde4443671..d06bf6e6c19f 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -225,7 +225,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, if ((flags & ~IOMMU_HWPT_FAULT_ID_VALID) || !user_data->len || !ops->domain_alloc_user) return ERR_PTR(-EOPNOTSUPP); - if (parent->auto_domain || !parent->nest_parent) + if (parent->auto_domain || !parent->nest_parent || + parent->common.domain->owner != ops) return ERR_PTR(-EINVAL); hwpt_nested = __iommufd_object_alloc( -- Gitee From 65716cc7c123d40201aa1baef3445bac0da892f9 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 4 Sep 2024 19:40:42 -0700 Subject: [PATCH 89/99] iommu/tegra241-cmdqv: Drop static at local variable ANBZ: #13617 commit 2408b81f817ba6c278c5453eb9b43a167f35d471 upstream. This is likely a typo. Drop it. Fixes: 918eb5c856f6 ("iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV") Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/13fd3accb5b7ed6ec11cc6b7435f79f84af9f45f.1725503154.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index 240b54192177..0766dc2789cb 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -761,8 +761,8 @@ static struct arm_smmu_device * __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, int irq) { - static struct arm_smmu_device *new_smmu; struct tegra241_cmdqv *cmdqv = NULL; + struct arm_smmu_device *new_smmu; struct tegra241_vintf *vintf; void __iomem *base; u32 regval; -- Gitee From 92c149254e0756e32e66077502128a2ef474e912 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 4 Sep 2024 19:40:43 -0700 Subject: [PATCH 90/99] iommu/tegra241-cmdqv: Do not allocate vcmdq until dma_set_mask_and_coherent ANBZ: #13617 commit 483e0bd8883a40fd3dd3193997a4014337698d72 upstream. It's observed that, when the first 4GB of system memory was reserved, all VCMDQ allocations failed (even with the smallest qsz in the last attempt): arm-smmu-v3: found companion CMDQV device: NVDA200C:00 arm-smmu-v3: option mask 0x10 arm-smmu-v3: failed to allocate queue (0x8000 bytes) for vcmdq0 acpi NVDA200C:00: tegra241_cmdqv: Falling back to standard SMMU CMDQ arm-smmu-v3: ias 48-bit, oas 48-bit (features 0x001e1fbf) arm-smmu-v3: allocated 524288 entries for cmdq arm-smmu-v3: allocated 524288 entries for evtq arm-smmu-v3: allocated 524288 entries for priq This is because the 4GB reserved memory shifted the entire DMA zone from a lower 32-bit range (on a system without the 4GB carveout) to higher range, while the dev->coherent_dma_mask was set to DMA_BIT_MASK(32) by default. The dma_set_mask_and_coherent() call is done in arm_smmu_device_hw_probe() of the SMMU driver. So any DMA allocation from tegra241_cmdqv_probe() must wait until the coherent_dma_mask is correctly set. Move the vintf/vcmdq structure initialization routine into a different op, "init_structures". Call it at the end of arm_smmu_init_structures(), where standard SMMU queues get allocated. Most of the impl_ops aren't ready until vintf/vcmdq structure are init-ed. So replace the full impl_ops with an init_ops in __tegra241_cmdqv_probe(). And switch to tegra241_cmdqv_impl_ops later in arm_smmu_init_structures(). Note that tegra241_cmdqv_impl_ops does not link to the new init_structures op after this switch, since there is no point in having it once it's done. Fixes: 918eb5c856f6 ("iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV") Reported-by: Matt Ochs Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/530993c3aafa1b0fc3d879b8119e13c629d12e2b.1725503154.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 9 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 + .../iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 83 ++++++++++++------- 3 files changed, 60 insertions(+), 33 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 62d5c19031b1..1cabd5a28095 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3755,7 +3755,14 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu) if (ret) return ret; - return arm_smmu_init_strtab(smmu); + ret = arm_smmu_init_strtab(smmu); + if (ret) + return ret; + + if (smmu->impl_ops && smmu->impl_ops->init_structures) + return smmu->impl_ops->init_structures(smmu); + + return 0; } static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index e746470cc767..c06a0cd56475 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -643,6 +643,7 @@ struct arm_smmu_strtab_cfg { struct arm_smmu_impl_ops { int (*device_reset)(struct arm_smmu_device *smmu); void (*device_remove)(struct arm_smmu_device *smmu); + int (*init_structures)(struct arm_smmu_device *smmu); struct arm_smmu_cmdq *(*get_secondary_cmdq)( struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent); }; diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index 0766dc2789cb..fcd13d301fff 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -755,18 +755,65 @@ tegra241_cmdqv_find_acpi_resource(struct device *dev, int *irq) return res; } +static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + struct tegra241_vintf *vintf; + int lidx; + int ret; + + vintf = kzalloc(sizeof(*vintf), GFP_KERNEL); + if (!vintf) + goto out_fallback; + + /* Init VINTF0 for in-kernel use */ + ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf); + if (ret) { + dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret); + goto free_vintf; + } + + /* Preallocate logical VCMDQs to VINTF0 */ + for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) { + struct tegra241_vcmdq *vcmdq; + + vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx); + if (IS_ERR(vcmdq)) + goto free_lvcmdq; + } + + /* Now, we are ready to run all the impl ops */ + smmu->impl_ops = &tegra241_cmdqv_impl_ops; + return 0; + +free_lvcmdq: + for (lidx--; lidx >= 0; lidx--) + tegra241_vintf_free_lvcmdq(vintf, lidx); + tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx); +free_vintf: + kfree(vintf); +out_fallback: + dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n"); + smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV; + tegra241_cmdqv_remove(smmu); + return 0; +} + struct dentry *cmdqv_debugfs_dir; static struct arm_smmu_device * __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, int irq) { + static const struct arm_smmu_impl_ops init_ops = { + .init_structures = tegra241_cmdqv_init_structures, + .device_remove = tegra241_cmdqv_remove, + }; struct tegra241_cmdqv *cmdqv = NULL; struct arm_smmu_device *new_smmu; - struct tegra241_vintf *vintf; void __iomem *base; u32 regval; - int lidx; int ret; static_assert(offsetof(struct tegra241_cmdqv, smmu) == 0); @@ -815,26 +862,6 @@ __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, ida_init(&cmdqv->vintf_ids); - vintf = kzalloc(sizeof(*vintf), GFP_KERNEL); - if (!vintf) - goto destroy_ids; - - /* Init VINTF0 for in-kernel use */ - ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf); - if (ret) { - dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret); - goto free_vintf; - } - - /* Preallocate logical VCMDQs to VINTF0 */ - for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) { - struct tegra241_vcmdq *vcmdq; - - vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx); - if (IS_ERR(vcmdq)) - goto free_lvcmdq; - } - #ifdef CONFIG_IOMMU_DEBUGFS if (!cmdqv_debugfs_dir) { cmdqv_debugfs_dir = @@ -844,19 +871,11 @@ __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, } #endif - new_smmu->impl_ops = &tegra241_cmdqv_impl_ops; + /* Provide init-level ops only, until tegra241_cmdqv_init_structures */ + new_smmu->impl_ops = &init_ops; return new_smmu; -free_lvcmdq: - for (lidx--; lidx >= 0; lidx--) - tegra241_vintf_free_lvcmdq(vintf, lidx); - tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx); -free_vintf: - kfree(vintf); -destroy_ids: - ida_destroy(&cmdqv->vintf_ids); - kfree(cmdqv->vintfs); free_irq: if (cmdqv->irq > 0) free_irq(cmdqv->irq, cmdqv); -- Gitee From a4f025e4ddc0a3698ffae4fa28bb93d46f378606 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 27 Aug 2024 09:59:38 -0700 Subject: [PATCH 91/99] iommufd: Reorder struct forward declarations ANBZ: #13617 commit 3e6a7e3cda773adacc2fa4b9623d0a8c0f904d50 upstream. Reorder struct forward declarations to alphabetic order to simplify maintenance, as upcoming patches will add more to the list. No functional change intended. Link: https://patch.msgid.link/r/c5dd87100f6f01389b838c63237e28c5dd373358.1724776335.git.nicolinc@nvidia.com Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Signed-off-by: Shuai Xue --- include/linux/iommufd.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index c2f2f6b9148e..30f832a60ccb 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -11,12 +11,12 @@ #include struct device; -struct iommufd_device; -struct page; -struct iommufd_ctx; -struct iommufd_access; struct file; struct iommu_group; +struct iommufd_access; +struct iommufd_ctx; +struct iommufd_device; +struct page; struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id); -- Gitee From 22aedfe3ca1f3f097736c20ca4f75625b5df7f5d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 6 Sep 2024 12:47:48 -0300 Subject: [PATCH 92/99] iommu/arm-smmu-v3: Add arm_smmu_strtab_l1/2_idx() ANBZ: #13617 commit ce410410f1a7db0259ca9282a285fb80fd553b8c upstream. Don't open code the calculations of the indexes for each level, provide two functions to do that math and call them in all the places. Update all the places computing indexes. Calculate the L1 table size directly based on the max required index from the cap. Remove STRTAB_L1_SZ_SHIFT in favour of STRTAB_NUM_L2_STES. Use STRTAB_NUM_L2_STES to replace remaining open coded 1 << STRTAB_SPLIT. Tested-by: Nicolin Chen Reviewed-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v4-6416877274e1+1af-smmuv3_tidy_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 45 +++++++++------------ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 14 ++++++- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 1cabd5a28095..d83e41fb66b9 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1710,17 +1710,15 @@ static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab, static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) { size_t size; - void *strtab; dma_addr_t l2ptr_dma; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; - struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT]; + struct arm_smmu_strtab_l1_desc *desc; + desc = &cfg->l1_desc[arm_smmu_strtab_l1_idx(sid)]; if (desc->l2ptr) return 0; - size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3); - strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS]; - + size = STRTAB_NUM_L2_STES * sizeof(struct arm_smmu_ste); desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &l2ptr_dma, GFP_KERNEL); if (!desc->l2ptr) { @@ -1730,8 +1728,9 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) return -ENOMEM; } - arm_smmu_init_initial_stes(desc->l2ptr, 1 << STRTAB_SPLIT); - arm_smmu_write_strtab_l1_desc(strtab, l2ptr_dma); + arm_smmu_init_initial_stes(desc->l2ptr, STRTAB_NUM_L2_STES); + arm_smmu_write_strtab_l1_desc(&cfg->strtab[arm_smmu_strtab_l1_idx(sid)], + l2ptr_dma); return 0; } @@ -2486,12 +2485,9 @@ arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { - unsigned int idx1, idx2; - /* Two-level walk */ - idx1 = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS; - idx2 = sid & ((1 << STRTAB_SPLIT) - 1); - return &cfg->l1_desc[idx1].l2ptr[idx2]; + return &cfg->l1_desc[arm_smmu_strtab_l1_idx(sid)] + .l2ptr[arm_smmu_strtab_l2_idx(sid)]; } else { /* Simple linear lookup */ return (struct arm_smmu_ste *)&cfg @@ -3195,12 +3191,9 @@ struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode) static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid) { - unsigned long limit = smmu->strtab_cfg.num_l1_ents; - if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) - limit *= 1UL << STRTAB_SPLIT; - - return sid < limit; + return arm_smmu_strtab_l1_idx(sid) < smmu->strtab_cfg.num_l1_ents; + return sid < smmu->strtab_cfg.num_l1_ents; } static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid) @@ -3654,19 +3647,18 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) { void *strtab; u64 reg; - u32 size, l1size; + u32 l1size; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; + unsigned int last_sid_idx = + arm_smmu_strtab_l1_idx((1 << smmu->sid_bits) - 1); /* Calculate the L1 size, capped to the SIDSIZE. */ - size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3); - size = min(size, smmu->sid_bits - STRTAB_SPLIT); - cfg->num_l1_ents = 1 << size; - - size += STRTAB_SPLIT; - if (size < smmu->sid_bits) + cfg->num_l1_ents = min(last_sid_idx + 1, STRTAB_MAX_L1_ENTRIES); + if (cfg->num_l1_ents <= last_sid_idx) dev_warn(smmu->dev, "2-level strtab only covers %u/%u bits of SID\n", - size, smmu->sid_bits); + ilog2(cfg->num_l1_ents * STRTAB_NUM_L2_STES), + smmu->sid_bits); l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3); strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, @@ -3681,7 +3673,8 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) /* Configure strtab_base_cfg for 2 levels */ reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL); - reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size); + reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, + ilog2(cfg->num_l1_ents) + STRTAB_SPLIT); reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT); cfg->strtab_base_cfg = reg; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index c06a0cd56475..b8dbabb6b80f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -204,7 +204,6 @@ struct arm_smmu_device; * 2lvl: 128k L1 entries, * 256 lazy entries per table (each table covers a PCI bus) */ -#define STRTAB_L1_SZ_SHIFT 20 #define STRTAB_SPLIT 8 #define STRTAB_L1_DESC_DWORDS 1 @@ -217,6 +216,19 @@ struct arm_smmu_ste { __le64 data[STRTAB_STE_DWORDS]; }; +#define STRTAB_NUM_L2_STES (1 << STRTAB_SPLIT) +#define STRTAB_MAX_L1_ENTRIES (1 << 17) + +static inline u32 arm_smmu_strtab_l1_idx(u32 sid) +{ + return sid / STRTAB_NUM_L2_STES; +} + +static inline u32 arm_smmu_strtab_l2_idx(u32 sid) +{ + return sid % STRTAB_NUM_L2_STES; +} + #define STRTAB_STE_0_V (1UL << 0) #define STRTAB_STE_0_CFG GENMASK_ULL(3, 1) #define STRTAB_STE_0_CFG_ABORT 0 -- Gitee From ab33015bbdfef636e42acb7a8f2715d743fb553c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 6 Sep 2024 12:47:49 -0300 Subject: [PATCH 93/99] iommu/arm-smmu-v3: Add types for each level of the 2 level stream table ANBZ: #13617 commit abb4f9d323a8d53870cc842d3c5024f71c2d4951 upstream. Add types struct arm_smmu_strtab_l1 and l2 to represent the HW layout of the descriptors, and use them in most places, following patches will get the remaing places. The size of the l1 and l2 HW allocations are sizeof(struct arm_smmu_strtab_l1/2). This provides some more clarity than having raw __le64 *'s and sizes computed via macros. Remove STRTAB_L1_DESC_DWORDS. Tested-by: Nicolin Chen Reviewed-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v4-6416877274e1+1af-smmuv3_tidy_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 21 +++++++++++---------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 10 ++++++++-- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index d83e41fb66b9..a4c3441d44c7 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1496,7 +1496,8 @@ static void arm_smmu_free_cd_tables(struct arm_smmu_master *master) } /* Stream table manipulation functions */ -static void arm_smmu_write_strtab_l1_desc(__le64 *dst, dma_addr_t l2ptr_dma) +static void arm_smmu_write_strtab_l1_desc(struct arm_smmu_strtab_l1 *dst, + dma_addr_t l2ptr_dma) { u64 val = 0; @@ -1504,7 +1505,7 @@ static void arm_smmu_write_strtab_l1_desc(__le64 *dst, dma_addr_t l2ptr_dma) val |= l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK; /* The HW has 64 bit atomicity with stores to the L2 STE table */ - WRITE_ONCE(*dst, cpu_to_le64(val)); + WRITE_ONCE(dst->l2ptr, cpu_to_le64(val)); } struct arm_smmu_ste_writer { @@ -1709,18 +1710,17 @@ static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab, static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) { - size_t size; dma_addr_t l2ptr_dma; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; struct arm_smmu_strtab_l1_desc *desc; + __le64 *dst; desc = &cfg->l1_desc[arm_smmu_strtab_l1_idx(sid)]; if (desc->l2ptr) return 0; - size = STRTAB_NUM_L2_STES * sizeof(struct arm_smmu_ste); - desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &l2ptr_dma, - GFP_KERNEL); + desc->l2ptr = dmam_alloc_coherent(smmu->dev, sizeof(*desc->l2ptr), + &l2ptr_dma, GFP_KERNEL); if (!desc->l2ptr) { dev_err(smmu->dev, "failed to allocate l2 stream table for SID %u\n", @@ -1728,8 +1728,9 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) return -ENOMEM; } - arm_smmu_init_initial_stes(desc->l2ptr, STRTAB_NUM_L2_STES); - arm_smmu_write_strtab_l1_desc(&cfg->strtab[arm_smmu_strtab_l1_idx(sid)], + arm_smmu_init_initial_stes(desc->l2ptr->stes, STRTAB_NUM_L2_STES); + dst = &cfg->strtab[arm_smmu_strtab_l1_idx(sid)]; + arm_smmu_write_strtab_l1_desc((struct arm_smmu_strtab_l1 *)dst, l2ptr_dma); return 0; } @@ -2487,7 +2488,7 @@ arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { /* Two-level walk */ return &cfg->l1_desc[arm_smmu_strtab_l1_idx(sid)] - .l2ptr[arm_smmu_strtab_l2_idx(sid)]; + .l2ptr->stes[arm_smmu_strtab_l2_idx(sid)]; } else { /* Simple linear lookup */ return (struct arm_smmu_ste *)&cfg @@ -3660,7 +3661,7 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) ilog2(cfg->num_l1_ents * STRTAB_NUM_L2_STES), smmu->sid_bits); - l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3); + l1size = cfg->num_l1_ents * sizeof(struct arm_smmu_strtab_l1); strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, GFP_KERNEL); if (!strtab) { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index b8dbabb6b80f..8eaf05ac3168 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -206,7 +206,6 @@ struct arm_smmu_device; */ #define STRTAB_SPLIT 8 -#define STRTAB_L1_DESC_DWORDS 1 #define STRTAB_L1_DESC_SPAN GENMASK_ULL(4, 0) #define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6) @@ -217,6 +216,13 @@ struct arm_smmu_ste { }; #define STRTAB_NUM_L2_STES (1 << STRTAB_SPLIT) +struct arm_smmu_strtab_l2 { + struct arm_smmu_ste stes[STRTAB_NUM_L2_STES]; +}; + +struct arm_smmu_strtab_l1 { + __le64 l2ptr; +}; #define STRTAB_MAX_L1_ENTRIES (1 << 17) static inline u32 arm_smmu_strtab_l1_idx(u32 sid) @@ -608,7 +614,7 @@ struct arm_smmu_priq { /* High-level stream table and context descriptor structures */ struct arm_smmu_strtab_l1_desc { - struct arm_smmu_ste *l2ptr; + struct arm_smmu_strtab_l2 *l2ptr; }; struct arm_smmu_ctx_desc { -- Gitee From e913608ad908fef1cc378050e39edf8a2920d73d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 6 Sep 2024 12:47:50 -0300 Subject: [PATCH 94/99] iommu/arm-smmu-v3: Reorganize struct arm_smmu_strtab_cfg ANBZ: #13617 commit 85196f54743d97b0678e7889df72fdcc58ab2b02 upstream. The members here are being used for both the linear and the 2 level case, with the meaning of each item slightly different in the two cases. Split it into a clean union where both cases have their own struct with their own logical names and correct types. Adjust all the users to detect linear/2lvl and use the right sub structure and types consistently. Remove STRTAB_STE_DWORDS by changing the last places to use sizeof(struct arm_smmu_ste). Tested-by: Nicolin Chen Reviewed-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v4-6416877274e1+1af-smmuv3_tidy_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 78 ++++++++++----------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 22 +++--- 2 files changed, 50 insertions(+), 50 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index a4c3441d44c7..8ab26da78b30 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1712,25 +1712,24 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) { dma_addr_t l2ptr_dma; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; - struct arm_smmu_strtab_l1_desc *desc; - __le64 *dst; + struct arm_smmu_strtab_l2 **l2table; - desc = &cfg->l1_desc[arm_smmu_strtab_l1_idx(sid)]; - if (desc->l2ptr) + l2table = &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)]; + if (*l2table) return 0; - desc->l2ptr = dmam_alloc_coherent(smmu->dev, sizeof(*desc->l2ptr), - &l2ptr_dma, GFP_KERNEL); - if (!desc->l2ptr) { + *l2table = dmam_alloc_coherent(smmu->dev, sizeof(**l2table), + &l2ptr_dma, GFP_KERNEL); + if (!*l2table) { dev_err(smmu->dev, "failed to allocate l2 stream table for SID %u\n", sid); return -ENOMEM; } - arm_smmu_init_initial_stes(desc->l2ptr->stes, STRTAB_NUM_L2_STES); - dst = &cfg->strtab[arm_smmu_strtab_l1_idx(sid)]; - arm_smmu_write_strtab_l1_desc((struct arm_smmu_strtab_l1 *)dst, + arm_smmu_init_initial_stes((*l2table)->stes, + ARRAY_SIZE((*l2table)->stes)); + arm_smmu_write_strtab_l1_desc(&cfg->l2.l1tab[arm_smmu_strtab_l1_idx(sid)], l2ptr_dma); return 0; } @@ -2487,12 +2486,11 @@ arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { /* Two-level walk */ - return &cfg->l1_desc[arm_smmu_strtab_l1_idx(sid)] - .l2ptr->stes[arm_smmu_strtab_l2_idx(sid)]; + return &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)] + ->stes[arm_smmu_strtab_l2_idx(sid)]; } else { /* Simple linear lookup */ - return (struct arm_smmu_ste *)&cfg - ->strtab[sid * STRTAB_STE_DWORDS]; + return &cfg->linear.table[sid]; } } @@ -3193,8 +3191,8 @@ struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode) static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid) { if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) - return arm_smmu_strtab_l1_idx(sid) < smmu->strtab_cfg.num_l1_ents; - return sid < smmu->strtab_cfg.num_l1_ents; + return arm_smmu_strtab_l1_idx(sid) < smmu->strtab_cfg.l2.num_l1_ents; + return sid < smmu->strtab_cfg.linear.num_ents; } static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid) @@ -3646,7 +3644,6 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) { - void *strtab; u64 reg; u32 l1size; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; @@ -3654,34 +3651,33 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) arm_smmu_strtab_l1_idx((1 << smmu->sid_bits) - 1); /* Calculate the L1 size, capped to the SIDSIZE. */ - cfg->num_l1_ents = min(last_sid_idx + 1, STRTAB_MAX_L1_ENTRIES); - if (cfg->num_l1_ents <= last_sid_idx) + cfg->l2.num_l1_ents = min(last_sid_idx + 1, STRTAB_MAX_L1_ENTRIES); + if (cfg->l2.num_l1_ents <= last_sid_idx) dev_warn(smmu->dev, "2-level strtab only covers %u/%u bits of SID\n", - ilog2(cfg->num_l1_ents * STRTAB_NUM_L2_STES), + ilog2(cfg->l2.num_l1_ents * STRTAB_NUM_L2_STES), smmu->sid_bits); - l1size = cfg->num_l1_ents * sizeof(struct arm_smmu_strtab_l1); - strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, - GFP_KERNEL); - if (!strtab) { + l1size = cfg->l2.num_l1_ents * sizeof(struct arm_smmu_strtab_l1); + cfg->l2.l1tab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->l2.l1_dma, + GFP_KERNEL); + if (!cfg->l2.l1tab) { dev_err(smmu->dev, "failed to allocate l1 stream table (%u bytes)\n", l1size); return -ENOMEM; } - cfg->strtab = strtab; /* Configure strtab_base_cfg for 2 levels */ reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL); reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, - ilog2(cfg->num_l1_ents) + STRTAB_SPLIT); + ilog2(cfg->l2.num_l1_ents) + STRTAB_SPLIT); reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT); cfg->strtab_base_cfg = reg; - cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents, - sizeof(*cfg->l1_desc), GFP_KERNEL); - if (!cfg->l1_desc) + cfg->l2.l2ptrs = devm_kcalloc(smmu->dev, cfg->l2.num_l1_ents, + sizeof(*cfg->l2.l2ptrs), GFP_KERNEL); + if (!cfg->l2.l2ptrs) return -ENOMEM; return 0; @@ -3689,29 +3685,28 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) { - void *strtab; u64 reg; u32 size; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; - size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3); - strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma, - GFP_KERNEL); - if (!strtab) { + size = (1 << smmu->sid_bits) * sizeof(struct arm_smmu_ste); + cfg->linear.table = dmam_alloc_coherent(smmu->dev, size, + &cfg->linear.ste_dma, + GFP_KERNEL); + if (!cfg->linear.table) { dev_err(smmu->dev, "failed to allocate linear stream table (%u bytes)\n", size); return -ENOMEM; } - cfg->strtab = strtab; - cfg->num_l1_ents = 1 << smmu->sid_bits; + cfg->linear.num_ents = 1 << smmu->sid_bits; /* Configure strtab_base_cfg for a linear table covering all SIDs */ reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR); reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits); cfg->strtab_base_cfg = reg; - arm_smmu_init_initial_stes(strtab, cfg->num_l1_ents); + arm_smmu_init_initial_stes(cfg->linear.table, cfg->linear.num_ents); return 0; } @@ -3720,16 +3715,17 @@ static int arm_smmu_init_strtab(struct arm_smmu_device *smmu) u64 reg; int ret; - if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) + if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { ret = arm_smmu_init_strtab_2lvl(smmu); - else + reg = smmu->strtab_cfg.l2.l1_dma & STRTAB_BASE_ADDR_MASK; + } else { ret = arm_smmu_init_strtab_linear(smmu); - + reg = smmu->strtab_cfg.linear.ste_dma & STRTAB_BASE_ADDR_MASK; + } if (ret) return ret; /* Set the strtab base address */ - reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK; reg |= STRTAB_BASE_RA; smmu->strtab_cfg.strtab_base = reg; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 8eaf05ac3168..654fcdd642da 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -613,10 +613,6 @@ struct arm_smmu_priq { }; /* High-level stream table and context descriptor structures */ -struct arm_smmu_strtab_l1_desc { - struct arm_smmu_strtab_l2 *l2ptr; -}; - struct arm_smmu_ctx_desc { u16 asid; }; @@ -649,11 +645,19 @@ struct arm_smmu_s2_cfg { }; struct arm_smmu_strtab_cfg { - __le64 *strtab; - dma_addr_t strtab_dma; - struct arm_smmu_strtab_l1_desc *l1_desc; - unsigned int num_l1_ents; - + union { + struct { + struct arm_smmu_ste *table; + dma_addr_t ste_dma; + unsigned int num_ents; + } linear; + struct { + struct arm_smmu_strtab_l1 *l1tab; + struct arm_smmu_strtab_l2 **l2ptrs; + dma_addr_t l1_dma; + unsigned int num_l1_ents; + } l2; + }; u64 strtab_base; u32 strtab_base_cfg; }; -- Gitee From ad5c33d759366390f39c97dccc9a647fd3266c87 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 6 Sep 2024 12:47:51 -0300 Subject: [PATCH 95/99] iommu/arm-smmu-v3: Remove strtab_base/cfg ANBZ: #13617 commit 8c153ef95697242b72646d2c4cf6c4b23ccf35a3 upstream. These values can be computed from the other values already stored in the config. Move the calculation to arm_smmu_write_strtab() and do it directly before writing the registers. This moves all the logic to calculate the two registers into one function from three and saves an unimportant 16 bytes from the arm_smmu_device. Suggested-by: Nicolin Chen Reviewed-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/4-v4-6416877274e1+1af-smmuv3_tidy_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 55 ++++++++++----------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 2 - 2 files changed, 27 insertions(+), 30 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 8ab26da78b30..03f722d60876 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3644,7 +3644,6 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) { - u64 reg; u32 l1size; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; unsigned int last_sid_idx = @@ -3668,13 +3667,6 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) return -ENOMEM; } - /* Configure strtab_base_cfg for 2 levels */ - reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL); - reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, - ilog2(cfg->l2.num_l1_ents) + STRTAB_SPLIT); - reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT); - cfg->strtab_base_cfg = reg; - cfg->l2.l2ptrs = devm_kcalloc(smmu->dev, cfg->l2.num_l1_ents, sizeof(*cfg->l2.l2ptrs), GFP_KERNEL); if (!cfg->l2.l2ptrs) @@ -3685,7 +3677,6 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) { - u64 reg; u32 size; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; @@ -3701,34 +3692,21 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) } cfg->linear.num_ents = 1 << smmu->sid_bits; - /* Configure strtab_base_cfg for a linear table covering all SIDs */ - reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR); - reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits); - cfg->strtab_base_cfg = reg; - arm_smmu_init_initial_stes(cfg->linear.table, cfg->linear.num_ents); return 0; } static int arm_smmu_init_strtab(struct arm_smmu_device *smmu) { - u64 reg; int ret; - if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { + if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) ret = arm_smmu_init_strtab_2lvl(smmu); - reg = smmu->strtab_cfg.l2.l1_dma & STRTAB_BASE_ADDR_MASK; - } else { + else ret = arm_smmu_init_strtab_linear(smmu); - reg = smmu->strtab_cfg.linear.ste_dma & STRTAB_BASE_ADDR_MASK; - } if (ret) return ret; - /* Set the strtab base address */ - reg |= STRTAB_BASE_RA; - smmu->strtab_cfg.strtab_base = reg; - ida_init(&smmu->vmid_map); return 0; @@ -3944,6 +3922,30 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu) return ret; } +static void arm_smmu_write_strtab(struct arm_smmu_device *smmu) +{ + struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; + dma_addr_t dma; + u32 reg; + + if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { + reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, + STRTAB_BASE_CFG_FMT_2LVL) | + FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, + ilog2(cfg->l2.num_l1_ents) + STRTAB_SPLIT) | + FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT); + dma = cfg->l2.l1_dma; + } else { + reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, + STRTAB_BASE_CFG_FMT_LINEAR) | + FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits); + dma = cfg->linear.ste_dma; + } + writeq_relaxed((dma & STRTAB_BASE_ADDR_MASK) | STRTAB_BASE_RA, + smmu->base + ARM_SMMU_STRTAB_BASE); + writel_relaxed(reg, smmu->base + ARM_SMMU_STRTAB_BASE_CFG); +} + static int arm_smmu_device_reset(struct arm_smmu_device *smmu) { int ret; @@ -3979,10 +3981,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu) writel_relaxed(reg, smmu->base + ARM_SMMU_CR2); /* Stream table */ - writeq_relaxed(smmu->strtab_cfg.strtab_base, - smmu->base + ARM_SMMU_STRTAB_BASE); - writel_relaxed(smmu->strtab_cfg.strtab_base_cfg, - smmu->base + ARM_SMMU_STRTAB_BASE_CFG); + arm_smmu_write_strtab(smmu); /* Command queue */ writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 654fcdd642da..d763280310fc 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -658,8 +658,6 @@ struct arm_smmu_strtab_cfg { unsigned int num_l1_ents; } l2; }; - u64 strtab_base; - u32 strtab_base_cfg; }; struct arm_smmu_impl_ops { -- Gitee From e40b4517eb15314157faff8ac9465a78814b4dea Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 6 Sep 2024 12:47:52 -0300 Subject: [PATCH 96/99] iommu/arm-smmu-v3: Do not use devm for the cd table allocations ANBZ: #13617 commit 47b2de35cab2b683f69d03515c2658c2d8515323 upstream. The master->cd_table is entirely contained within the struct arm_smmu_master which is guaranteed to be freed by the core code under arm_smmu_release_device(). There is no reason to use devm here, arm_smmu_free_cd_tables() is reliably called to free the CD related memory. Remove it and save some memory. Tested-by: Nicolin Chen Reviewed-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/5-v4-6416877274e1+1af-smmuv3_tidy_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 29 +++++++++------------ 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 03f722d60876..53939a546dc4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1222,8 +1222,8 @@ static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu, { size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); - l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, - &l1_desc->l2ptr_dma, GFP_KERNEL); + l1_desc->l2ptr = dma_alloc_coherent(smmu->dev, size, + &l1_desc->l2ptr_dma, GFP_KERNEL); if (!l1_desc->l2ptr) { dev_warn(smmu->dev, "failed to allocate context descriptor table\n"); @@ -1437,17 +1437,17 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master) cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts, CTXDESC_L2_ENTRIES); - cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents, - sizeof(*cd_table->l1_desc), - GFP_KERNEL); + cd_table->l1_desc = kcalloc(cd_table->num_l1_ents, + sizeof(*cd_table->l1_desc), + GFP_KERNEL); if (!cd_table->l1_desc) return -ENOMEM; l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); } - cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma, - GFP_KERNEL); + cd_table->cdtab = dma_alloc_coherent(smmu->dev, l1size, + &cd_table->cdtab_dma, GFP_KERNEL); if (!cd_table->cdtab) { dev_warn(smmu->dev, "failed to allocate context descriptor\n"); ret = -ENOMEM; @@ -1458,7 +1458,7 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master) err_free_l1: if (cd_table->l1_desc) { - devm_kfree(smmu->dev, cd_table->l1_desc); + kfree(cd_table->l1_desc); cd_table->l1_desc = NULL; } return ret; @@ -1478,21 +1478,18 @@ static void arm_smmu_free_cd_tables(struct arm_smmu_master *master) if (!cd_table->l1_desc[i].l2ptr) continue; - dmam_free_coherent(smmu->dev, size, - cd_table->l1_desc[i].l2ptr, - cd_table->l1_desc[i].l2ptr_dma); + dma_free_coherent(smmu->dev, size, + cd_table->l1_desc[i].l2ptr, + cd_table->l1_desc[i].l2ptr_dma); } - devm_kfree(smmu->dev, cd_table->l1_desc); - cd_table->l1_desc = NULL; + kfree(cd_table->l1_desc); l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); } else { l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3); } - dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma); - cd_table->cdtab_dma = 0; - cd_table->cdtab = NULL; + dma_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma); } /* Stream table manipulation functions */ -- Gitee From e0e808197c85ce704dfb0ce9ca5fb8dc0afdd17b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 6 Sep 2024 12:47:53 -0300 Subject: [PATCH 97/99] iommu/arm-smmu-v3: Shrink the cdtab l1_desc array ANBZ: #13617 commit c0a25a96dee9c3af01fbcad227871fc0f222900b upstream. The top of the 2 level CD table is (at most) 1024 entries big, and two high order allocations are required. One of __le64 which is programmed into the HW (8k) and one of struct arm_smmu_l1_ctx_desc which holds the CPU pointer (16k). There are two copies of the l2ptr_dma, one is stored in the struct arm_smmu_l1_ctx_desc, and another is encoded in the __le64 for the HW to use. Instead of storing two copies just decode the value from the __le64. Tested-by: Nicolin Chen Reviewed-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/6-v4-6416877274e1+1af-smmuv3_tidy_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 42 +++++++++------------ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 - 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 53939a546dc4..1e7a9937ce11 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1217,29 +1217,17 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master, arm_smmu_cmdq_batch_submit(smmu, &cmds); } -static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu, - struct arm_smmu_l1_ctx_desc *l1_desc) +static void arm_smmu_write_cd_l1_desc(__le64 *dst, dma_addr_t l2ptr_dma) { - size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); + u64 val = (l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | CTXDESC_L1_DESC_V; - l1_desc->l2ptr = dma_alloc_coherent(smmu->dev, size, - &l1_desc->l2ptr_dma, GFP_KERNEL); - if (!l1_desc->l2ptr) { - dev_warn(smmu->dev, - "failed to allocate context descriptor table\n"); - return -ENOMEM; - } - return 0; + /* The HW has 64 bit atomicity with stores to the L2 CD table */ + WRITE_ONCE(*dst, cpu_to_le64(val)); } -static void arm_smmu_write_cd_l1_desc(__le64 *dst, - struct arm_smmu_l1_ctx_desc *l1_desc) +static dma_addr_t arm_smmu_cd_l1_get_desc(const __le64 *src) { - u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | - CTXDESC_L1_DESC_V; - - /* The HW has 64 bit atomicity with stores to the L2 CD table */ - WRITE_ONCE(*dst, cpu_to_le64(val)); + return le64_to_cpu(*src) & CTXDESC_L1_DESC_L2PTR_MASK; } struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, @@ -1281,13 +1269,18 @@ static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, l1_desc = &cd_table->l1_desc[idx]; if (!l1_desc->l2ptr) { - __le64 *l1ptr; - - if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc)) + dma_addr_t l2ptr_dma; + size_t size; + + size = CTXDESC_L2_ENTRIES * sizeof(struct arm_smmu_cd); + l1_desc->l2ptr = dma_alloc_coherent(smmu->dev, size, + &l2ptr_dma, + GFP_KERNEL); + if (!l1_desc->l2ptr) return NULL; - l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS; - arm_smmu_write_cd_l1_desc(l1ptr, l1_desc); + arm_smmu_write_cd_l1_desc(&cd_table->cdtab[idx], + l2ptr_dma); /* An invalid L1CD can be cached */ arm_smmu_sync_cd(master, ssid, false); } @@ -1480,7 +1473,8 @@ static void arm_smmu_free_cd_tables(struct arm_smmu_master *master) dma_free_coherent(smmu->dev, size, cd_table->l1_desc[i].l2ptr, - cd_table->l1_desc[i].l2ptr_dma); + arm_smmu_cd_l1_get_desc( + &cd_table->cdtab[i])); } kfree(cd_table->l1_desc); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index d763280310fc..72451ad4e7b9 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -619,7 +619,6 @@ struct arm_smmu_ctx_desc { struct arm_smmu_l1_ctx_desc { struct arm_smmu_cd *l2ptr; - dma_addr_t l2ptr_dma; }; struct arm_smmu_ctx_desc_cfg { -- Gitee From 757acb9429aa90bf96580088194dc55e9131d9cc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 6 Sep 2024 12:47:54 -0300 Subject: [PATCH 98/99] iommu/arm-smmu-v3: Add types for each level of the CD table ANBZ: #13617 commit 7c567eb1e1d2a835140091ff8d4b73ac5454ba7b upstream. As well as indexing helpers arm_smmu_cdtab_l1/2_idx(). Remove CTXDESC_L1_DESC_DWORDS and CTXDESC_CD_DWORDS replacing them all with type specific calculations. Tested-by: Nicolin Chen Reviewed-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/7-v4-6416877274e1+1af-smmuv3_tidy_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 45 +++++++++++---------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 23 +++++++++-- 2 files changed, 44 insertions(+), 24 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 1e7a9937ce11..108912cf95b3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1217,17 +1217,18 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master, arm_smmu_cmdq_batch_submit(smmu, &cmds); } -static void arm_smmu_write_cd_l1_desc(__le64 *dst, dma_addr_t l2ptr_dma) +static void arm_smmu_write_cd_l1_desc(struct arm_smmu_cdtab_l1 *dst, + dma_addr_t l2ptr_dma) { u64 val = (l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | CTXDESC_L1_DESC_V; /* The HW has 64 bit atomicity with stores to the L2 CD table */ - WRITE_ONCE(*dst, cpu_to_le64(val)); + WRITE_ONCE(dst->l2ptr, cpu_to_le64(val)); } -static dma_addr_t arm_smmu_cd_l1_get_desc(const __le64 *src) +static dma_addr_t arm_smmu_cd_l1_get_desc(const struct arm_smmu_cdtab_l1 *src) { - return le64_to_cpu(*src) & CTXDESC_L1_DESC_L2PTR_MASK; + return le64_to_cpu(src->l2ptr) & CTXDESC_L1_DESC_L2PTR_MASK; } struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, @@ -1240,13 +1241,12 @@ struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, return NULL; if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) - return (struct arm_smmu_cd *)(cd_table->cdtab + - ssid * CTXDESC_CD_DWORDS); + return &((struct arm_smmu_cd *)cd_table->cdtab)[ssid]; - l1_desc = &cd_table->l1_desc[ssid / CTXDESC_L2_ENTRIES]; + l1_desc = &cd_table->l1_desc[arm_smmu_cdtab_l1_idx(ssid)]; if (!l1_desc->l2ptr) return NULL; - return &l1_desc->l2ptr[ssid % CTXDESC_L2_ENTRIES]; + return &l1_desc->l2ptr->cds[arm_smmu_cdtab_l2_idx(ssid)]; } static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, @@ -1264,11 +1264,12 @@ static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, } if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) { - unsigned int idx = ssid / CTXDESC_L2_ENTRIES; + unsigned int idx = arm_smmu_cdtab_l1_idx(ssid); struct arm_smmu_l1_ctx_desc *l1_desc; l1_desc = &cd_table->l1_desc[idx]; if (!l1_desc->l2ptr) { + struct arm_smmu_cdtab_l1 *dst; dma_addr_t l2ptr_dma; size_t size; @@ -1279,8 +1280,8 @@ static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, if (!l1_desc->l2ptr) return NULL; - arm_smmu_write_cd_l1_desc(&cd_table->cdtab[idx], - l2ptr_dma); + dst = &((struct arm_smmu_cdtab_l1 *)cd_table->cdtab)[idx]; + arm_smmu_write_cd_l1_desc(dst, l2ptr_dma); /* An invalid L1CD can be cached */ arm_smmu_sync_cd(master, ssid, false); } @@ -1424,7 +1425,7 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master) cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; cd_table->num_l1_ents = max_contexts; - l1size = max_contexts * (CTXDESC_CD_DWORDS << 3); + l1size = max_contexts * sizeof(struct arm_smmu_cd); } else { cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2; cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts, @@ -1436,7 +1437,7 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master) if (!cd_table->l1_desc) return -ENOMEM; - l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); + l1size = cd_table->num_l1_ents * sizeof(struct arm_smmu_cdtab_l1); } cd_table->cdtab = dma_alloc_coherent(smmu->dev, l1size, @@ -1460,27 +1461,29 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master) static void arm_smmu_free_cd_tables(struct arm_smmu_master *master) { int i; - size_t size, l1size; + size_t l1size; struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; if (cd_table->l1_desc) { - size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); - for (i = 0; i < cd_table->num_l1_ents; i++) { + dma_addr_t dma_handle; + if (!cd_table->l1_desc[i].l2ptr) continue; - dma_free_coherent(smmu->dev, size, + dma_handle = arm_smmu_cd_l1_get_desc(&( + (struct arm_smmu_cdtab_l1 *)cd_table->cdtab)[i]); + dma_free_coherent(smmu->dev, + sizeof(*cd_table->l1_desc[i].l2ptr), cd_table->l1_desc[i].l2ptr, - arm_smmu_cd_l1_get_desc( - &cd_table->cdtab[i])); + dma_handle); } kfree(cd_table->l1_desc); - l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); + l1size = cd_table->num_l1_ents * sizeof(struct arm_smmu_cdtab_l1); } else { - l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3); + l1size = cd_table->num_l1_ents * sizeof(struct arm_smmu_cd); } dma_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 72451ad4e7b9..9cda3c9bd5e4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -301,7 +301,6 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) */ #define CTXDESC_L2_ENTRIES 1024 -#define CTXDESC_L1_DESC_DWORDS 1 #define CTXDESC_L1_DESC_V (1UL << 0) #define CTXDESC_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 12) @@ -311,6 +310,24 @@ struct arm_smmu_cd { __le64 data[CTXDESC_CD_DWORDS]; }; +struct arm_smmu_cdtab_l2 { + struct arm_smmu_cd cds[CTXDESC_L2_ENTRIES]; +}; + +struct arm_smmu_cdtab_l1 { + __le64 l2ptr; +}; + +static inline unsigned int arm_smmu_cdtab_l1_idx(unsigned int ssid) +{ + return ssid / CTXDESC_L2_ENTRIES; +} + +static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid) +{ + return ssid % CTXDESC_L2_ENTRIES; +} + #define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0) #define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6) #define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8) @@ -341,7 +358,7 @@ struct arm_smmu_cd { * When the SMMU only supports linear context descriptor tables, pick a * reasonable size limit (64kB). */ -#define CTXDESC_LINEAR_CDMAX ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3)) +#define CTXDESC_LINEAR_CDMAX ilog2(SZ_64K / sizeof(struct arm_smmu_cd)) /* Command queue */ #define CMDQ_ENT_SZ_SHIFT 4 @@ -618,7 +635,7 @@ struct arm_smmu_ctx_desc { }; struct arm_smmu_l1_ctx_desc { - struct arm_smmu_cd *l2ptr; + struct arm_smmu_cdtab_l2 *l2ptr; }; struct arm_smmu_ctx_desc_cfg { -- Gitee From c2771bb7331197e1f35dd879c7518eb679ba78fa Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 6 Sep 2024 12:47:55 -0300 Subject: [PATCH 99/99] iommu/arm-smmu-v3: Reorganize struct arm_smmu_ctx_desc_cfg ANBZ: #13617 commit e3b1be2e73dbe599f8b8886e120d206aa87e90f9 upstream. The members here are being used for both the linear and the 2 level case, with the meaning of each item slightly different in the two cases. Split it into a clean union where both cases have their own struct with their own logical names and correct types. Adjust all the users to detect linear/2lvl and use the right sub structure and types consistently. Remove CTXDESC_CD_DWORDS by changing the last places to use sizeof(struct arm_smmu_cd). Tested-by: Nicolin Chen Reviewed-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/8-v4-6416877274e1+1af-smmuv3_tidy_jgg@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Shuai Xue --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 115 ++++++++++---------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 24 ++-- 2 files changed, 72 insertions(+), 67 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 108912cf95b3..58bd779f214a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1234,19 +1234,19 @@ static dma_addr_t arm_smmu_cd_l1_get_desc(const struct arm_smmu_cdtab_l1 *src) struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid) { - struct arm_smmu_l1_ctx_desc *l1_desc; + struct arm_smmu_cdtab_l2 *l2; struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; - if (!cd_table->cdtab) + if (!arm_smmu_cdtab_allocated(cd_table)) return NULL; if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) - return &((struct arm_smmu_cd *)cd_table->cdtab)[ssid]; + return &cd_table->linear.table[ssid]; - l1_desc = &cd_table->l1_desc[arm_smmu_cdtab_l1_idx(ssid)]; - if (!l1_desc->l2ptr) + l2 = cd_table->l2.l2ptrs[arm_smmu_cdtab_l1_idx(ssid)]; + if (!l2) return NULL; - return &l1_desc->l2ptr->cds[arm_smmu_cdtab_l2_idx(ssid)]; + return &l2->cds[arm_smmu_cdtab_l2_idx(ssid)]; } static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, @@ -1258,30 +1258,25 @@ static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, might_sleep(); iommu_group_mutex_assert(master->dev); - if (!cd_table->cdtab) { + if (!arm_smmu_cdtab_allocated(cd_table)) { if (arm_smmu_alloc_cd_tables(master)) return NULL; } if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) { unsigned int idx = arm_smmu_cdtab_l1_idx(ssid); - struct arm_smmu_l1_ctx_desc *l1_desc; + struct arm_smmu_cdtab_l2 **l2ptr = &cd_table->l2.l2ptrs[idx]; - l1_desc = &cd_table->l1_desc[idx]; - if (!l1_desc->l2ptr) { - struct arm_smmu_cdtab_l1 *dst; + if (!*l2ptr) { dma_addr_t l2ptr_dma; - size_t size; - size = CTXDESC_L2_ENTRIES * sizeof(struct arm_smmu_cd); - l1_desc->l2ptr = dma_alloc_coherent(smmu->dev, size, - &l2ptr_dma, - GFP_KERNEL); - if (!l1_desc->l2ptr) + *l2ptr = dma_alloc_coherent(smmu->dev, sizeof(**l2ptr), + &l2ptr_dma, GFP_KERNEL); + if (!*l2ptr) return NULL; - dst = &((struct arm_smmu_cdtab_l1 *)cd_table->cdtab)[idx]; - arm_smmu_write_cd_l1_desc(dst, l2ptr_dma); + arm_smmu_write_cd_l1_desc(&cd_table->l2.l1tab[idx], + l2ptr_dma); /* An invalid L1CD can be cached */ arm_smmu_sync_cd(master, ssid, false); } @@ -1401,7 +1396,7 @@ void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid) struct arm_smmu_cd target = {}; struct arm_smmu_cd *cdptr; - if (!master->cd_table.cdtab) + if (!arm_smmu_cdtab_allocated(&master->cd_table)) return; cdptr = arm_smmu_get_cd_ptr(master, ssid); if (WARN_ON(!cdptr)) @@ -1423,70 +1418,70 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master) if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) || max_contexts <= CTXDESC_L2_ENTRIES) { cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; - cd_table->num_l1_ents = max_contexts; + cd_table->linear.num_ents = max_contexts; - l1size = max_contexts * sizeof(struct arm_smmu_cd); + l1size = max_contexts * sizeof(struct arm_smmu_cd), + cd_table->linear.table = dma_alloc_coherent(smmu->dev, l1size, + &cd_table->cdtab_dma, + GFP_KERNEL); + if (!cd_table->linear.table) + return -ENOMEM; } else { cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2; - cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts, - CTXDESC_L2_ENTRIES); + cd_table->l2.num_l1_ents = + DIV_ROUND_UP(max_contexts, CTXDESC_L2_ENTRIES); - cd_table->l1_desc = kcalloc(cd_table->num_l1_ents, - sizeof(*cd_table->l1_desc), - GFP_KERNEL); - if (!cd_table->l1_desc) + cd_table->l2.l2ptrs = kcalloc(cd_table->l2.num_l1_ents, + sizeof(*cd_table->l2.l2ptrs), + GFP_KERNEL); + if (!cd_table->l2.l2ptrs) return -ENOMEM; - l1size = cd_table->num_l1_ents * sizeof(struct arm_smmu_cdtab_l1); - } - - cd_table->cdtab = dma_alloc_coherent(smmu->dev, l1size, - &cd_table->cdtab_dma, GFP_KERNEL); - if (!cd_table->cdtab) { - dev_warn(smmu->dev, "failed to allocate context descriptor\n"); - ret = -ENOMEM; - goto err_free_l1; + l1size = cd_table->l2.num_l1_ents * sizeof(struct arm_smmu_cdtab_l1); + cd_table->l2.l1tab = dma_alloc_coherent(smmu->dev, l1size, + &cd_table->cdtab_dma, + GFP_KERNEL); + if (!cd_table->l2.l2ptrs) { + ret = -ENOMEM; + goto err_free_l2ptrs; + } } - return 0; -err_free_l1: - if (cd_table->l1_desc) { - kfree(cd_table->l1_desc); - cd_table->l1_desc = NULL; - } +err_free_l2ptrs: + kfree(cd_table->l2.l2ptrs); + cd_table->l2.l2ptrs = NULL; return ret; } static void arm_smmu_free_cd_tables(struct arm_smmu_master *master) { int i; - size_t l1size; struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; - if (cd_table->l1_desc) { - for (i = 0; i < cd_table->num_l1_ents; i++) { - dma_addr_t dma_handle; - - if (!cd_table->l1_desc[i].l2ptr) + if (cd_table->s1fmt != STRTAB_STE_0_S1FMT_LINEAR) { + for (i = 0; i < cd_table->l2.num_l1_ents; i++) { + if (!cd_table->l2.l2ptrs[i]) continue; - dma_handle = arm_smmu_cd_l1_get_desc(&( - (struct arm_smmu_cdtab_l1 *)cd_table->cdtab)[i]); dma_free_coherent(smmu->dev, - sizeof(*cd_table->l1_desc[i].l2ptr), - cd_table->l1_desc[i].l2ptr, - dma_handle); + sizeof(*cd_table->l2.l2ptrs[i]), + cd_table->l2.l2ptrs[i], + arm_smmu_cd_l1_get_desc(&cd_table->l2.l1tab[i])); } - kfree(cd_table->l1_desc); + kfree(cd_table->l2.l2ptrs); - l1size = cd_table->num_l1_ents * sizeof(struct arm_smmu_cdtab_l1); + dma_free_coherent(smmu->dev, + cd_table->l2.num_l1_ents * + sizeof(struct arm_smmu_cdtab_l1), + cd_table->l2.l1tab, cd_table->cdtab_dma); } else { - l1size = cd_table->num_l1_ents * sizeof(struct arm_smmu_cd); + dma_free_coherent(smmu->dev, + cd_table->linear.num_ents * + sizeof(struct arm_smmu_cd), + cd_table->linear.table, cd_table->cdtab_dma); } - - dma_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma); } /* Stream table manipulation functions */ @@ -3351,7 +3346,7 @@ static void arm_smmu_release_device(struct device *dev) arm_smmu_disable_pasid(master); arm_smmu_remove_master(master); - if (master->cd_table.cdtab) + if (arm_smmu_cdtab_allocated(&master->cd_table)) arm_smmu_free_cd_tables(master); kfree(master); } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 9cda3c9bd5e4..e41fb067d06d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -634,15 +634,19 @@ struct arm_smmu_ctx_desc { u16 asid; }; -struct arm_smmu_l1_ctx_desc { - struct arm_smmu_cdtab_l2 *l2ptr; -}; - struct arm_smmu_ctx_desc_cfg { - __le64 *cdtab; + union { + struct { + struct arm_smmu_cd *table; + unsigned int num_ents; + } linear; + struct { + struct arm_smmu_cdtab_l1 *l1tab; + struct arm_smmu_cdtab_l2 **l2ptrs; + unsigned int num_l1_ents; + } l2; + }; dma_addr_t cdtab_dma; - struct arm_smmu_l1_ctx_desc *l1_desc; - unsigned int num_l1_ents; unsigned int used_ssids; u8 in_ste; u8 s1fmt; @@ -650,6 +654,12 @@ struct arm_smmu_ctx_desc_cfg { u8 s1cdmax; }; +static inline bool +arm_smmu_cdtab_allocated(struct arm_smmu_ctx_desc_cfg *cfg) +{ + return cfg->linear.table || cfg->l2.l1tab; +} + /* True if the cd table has SSIDS > 0 in use. */ static inline bool arm_smmu_ssids_in_use(struct arm_smmu_ctx_desc_cfg *cd_table) { -- Gitee