diff --git a/MAINTAINERS b/MAINTAINERS index f329b6ac04cb3b910c18d0d922c46e7a1aba42d1..6046cb477fffbdd6220dfb97944bd0ab7cc0c002 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10998,6 +10998,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/dma-iommu.c F: drivers/iommu/dma-iommu.h F: drivers/iommu/iova.c +F: include/linux/iommu-dma.h F: include/linux/iova.h IOMMU SUBSYSTEM @@ -21263,6 +21264,7 @@ M: Thierry Reding R: Krishna Reddy L: linux-tegra@vger.kernel.org S: Supported +F: drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c F: drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c F: drivers/iommu/tegra* diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index c81183935e97070b3e8cac9f6a24bbd09cf639ed..7fcf3e9b7103051bf3ee06424231a05fad37c832 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -929,7 +929,7 @@ const struct dma_map_ops alpha_pci_ops = { .dma_supported = alpha_pci_supported, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; EXPORT_SYMBOL(alpha_pci_ops); diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h index 82ec1ccf1fee4c110881045b7f057fc6caea23b7..2ce4c5683e6d3cfae6116b359e1f09dbd101b400 100644 --- a/arch/arm/include/asm/dma-iommu.h +++ b/arch/arm/include/asm/dma-iommu.h @@ -24,7 +24,7 @@ struct dma_iommu_mapping { }; struct dma_iommu_mapping * -arm_iommu_create_mapping(const struct bus_type *bus, dma_addr_t base, u64 size); +arm_iommu_create_mapping(struct device *dev, dma_addr_t base, u64 size); void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping); diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 5ac482d7ff94c430c9445ba24c07dc09b1191277..42c035b356d6c9d44639eb94990981d7f89a3ad5 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1535,7 +1535,7 @@ static const struct dma_map_ops iommu_ops = { /** * arm_iommu_create_mapping - * @bus: pointer to the bus holding the client device (for IOMMU calls) + * @dev: pointer to the client device (for IOMMU calls) * @base: start address of the valid IO address space * @size: maximum size of the valid IO address space * @@ -1547,7 +1547,7 @@ static const struct dma_map_ops iommu_ops = { * arm_iommu_attach_device function. */ struct dma_iommu_mapping * -arm_iommu_create_mapping(const struct bus_type *bus, dma_addr_t base, u64 size) +arm_iommu_create_mapping(struct device *dev, dma_addr_t base, u64 size) { unsigned int bits = size >> PAGE_SHIFT; unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long); @@ -1588,7 +1588,7 @@ arm_iommu_create_mapping(const struct bus_type *bus, dma_addr_t base, u64 size) spin_lock_init(&mapping->lock); - mapping->domain = iommu_domain_alloc(bus); + mapping->domain = iommu_domain_alloc(dev->bus); if (!mapping->domain) goto err4; @@ -1721,7 +1721,7 @@ static void arm_setup_iommu_dma_ops(struct device *dev) dma_base = dma_range_map_min(dev->dma_range_map); size = dma_range_map_max(dev->dma_range_map) - dma_base; } - mapping = arm_iommu_create_mapping(dev->bus, dma_base, size); + mapping = arm_iommu_create_mapping(dev, dma_base, size); if (IS_ERR(mapping)) { pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n", size, dev_name(dev)); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 1463dc657a98114a22ea8f6493daba1b6af28bfd..8a0f8604348b877e6cccd08a833638c50fa39ae7 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -32,10 +32,6 @@ #include #include -#ifdef CONFIG_PSWIOTLB -#include -#endif - #include #include #include @@ -502,13 +498,6 @@ void __init mem_init(void) swiotlb_init(swiotlb, SWIOTLB_VERBOSE); -#ifdef CONFIG_PSWIOTLB - /* enable pswiotlb default */ - if ((pswiotlb_force_disable != true) && - is_phytium_ps_socs()) - pswiotlb_init(1, PSWIOTLB_VERBOSE); -#endif - /* this will put all unused low memory onto the freelists */ memblock_free_all(); diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index eabddb89d221fe8dcab6ea9a0170afaf666ef697..c97b089b99029d154d7acfeb8d6a579a20c232ff 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -617,7 +617,7 @@ const struct dma_map_ops jazz_dma_ops = { .sync_sg_for_device = jazz_dma_sync_sg_for_device, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; EXPORT_SYMBOL(jazz_dma_ops); diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 8920862ffd7916ef3afcde6714a1676ae6a1f647..f0ae39e77e374143599d67702b4e9a8e12b60f1f 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -216,6 +216,6 @@ const struct dma_map_ops dma_iommu_ops = { .get_required_mask = dma_iommu_get_required_mask, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index d6b5f5ecd5152bddf6050ac63d1dc9b3bf34e61f..56dc6b29a3e7699a2695dfc23f0d07b50e6205a8 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -695,7 +695,7 @@ static const struct dma_map_ops ps3_sb_dma_ops = { .unmap_page = ps3_unmap_page, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; @@ -709,7 +709,7 @@ static const struct dma_map_ops ps3_ioc0_dma_ops = { .unmap_page = ps3_unmap_page, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 2dc9cbc4bcd8fea42fc12e19a93880dcde3edafb..0c90fc4c379631d671c742664d347bb638796865 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -611,7 +611,7 @@ static const struct dma_map_ops vio_dma_mapping_ops = { .get_required_mask = dma_iommu_get_required_mask, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a2258c894244a8369bf9e0fe9b9cfb2dc9bfb9e5..ffe5a04e9de4dfe1eb927a218e9d2e5710249a0c 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -17,6 +17,11 @@ #define ARCH_APICTIMER_STOPS_ON_C3 1 +/* Macros for apic_extnmi which controls external NMI masking */ +#define APIC_EXTNMI_BSP 0 /* Default */ +#define APIC_EXTNMI_ALL 1 +#define APIC_EXTNMI_NONE 2 + /* * Debugging macros */ @@ -24,22 +29,22 @@ #define APIC_VERBOSE 1 #define APIC_DEBUG 2 -/* Macros for apic_extnmi which controls external NMI masking */ -#define APIC_EXTNMI_BSP 0 /* Default */ -#define APIC_EXTNMI_ALL 1 -#define APIC_EXTNMI_NONE 2 - /* - * Define the default level of output to be very little - * This can be turned up by using apic=verbose for more - * information and apic=debug for _lots_ of information. - * apic_verbosity is defined in apic.c + * Define the default level of output to be very little This can be turned + * up by using apic=verbose for more information and apic=debug for _lots_ + * of information. apic_verbosity is defined in apic.c */ -#define apic_printk(v, s, a...) do { \ - if ((v) <= apic_verbosity) \ - printk(s, ##a); \ - } while (0) - +#define apic_printk(v, s, a...) \ +do { \ + if ((v) <= apic_verbosity) \ + printk(s, ##a); \ +} while (0) + +#define apic_pr_verbose(s, a...) apic_printk(APIC_VERBOSE, KERN_INFO s, ##a) +#define apic_pr_debug(s, a...) apic_printk(APIC_DEBUG, KERN_DEBUG s, ##a) +#define apic_pr_debug_cont(s, a...) apic_printk(APIC_DEBUG, KERN_CONT s, ##a) +/* Unconditional debug prints for code which is guarded by apic_verbosity already */ +#define apic_dbg(s, a...) printk(KERN_DEBUG s, ##a) #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) extern void x86_32_probe_apic(void); diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 56a917df410d30b49ed3dedc709b32f8c34decfd..842a0ec5eaa9e32b492c926b24aa72d221d0f878 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -676,7 +676,7 @@ static const struct dma_map_ops gart_dma_ops = { .get_sgtable = dma_common_get_sgtable, .dma_supported = dma_direct_supported, .get_required_mask = dma_direct_get_required_mask, - .alloc_pages = dma_direct_alloc_pages, + .alloc_pages_op = dma_direct_alloc_pages, .free_pages = dma_direct_free_pages, }; diff --git a/drivers/base/core.c b/drivers/base/core.c index a31a7997b7c41716175f3fa879af1729bddf8d13..7327871344164f2f09ef151d77864809ccadc2d1 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -32,10 +32,6 @@ #include #include /* for dma_default_coherent */ -#ifdef CONFIG_PSWIOTLB -#include -#endif - #include "base.h" #include "physical_location.h" #include "power/power.h" @@ -3173,11 +3169,6 @@ void device_initialize(struct device *dev) dev->dma_coherent = dma_default_coherent; #endif swiotlb_dev_init(dev); -#ifdef CONFIG_PSWIOTLB - if ((pswiotlb_force_disable != true) && - is_phytium_ps_socs()) - pswiotlb_dev_init(dev); -#endif } EXPORT_SYMBOL_GPL(device_initialize); diff --git a/drivers/gpu/drm/exynos/exynos_drm_dma.c b/drivers/gpu/drm/exynos/exynos_drm_dma.c index e2c7373f20c6b791f9706c1dad0758f363c63073..6a6761935224ceb3aeb30649e5fc433199bbd955 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dma.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dma.c @@ -110,7 +110,7 @@ int exynos_drm_register_dma(struct drm_device *drm, struct device *dev, void *mapping = NULL; if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) - mapping = arm_iommu_create_mapping(&platform_bus_type, + mapping = arm_iommu_create_mapping(dev, EXYNOS_DEV_ADDR_START, EXYNOS_DEV_ADDR_SIZE); else if (IS_ENABLED(CONFIG_IOMMU_DMA)) mapping = iommu_get_domain_for_dev(priv->dma_dev); diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 2d92a58c76e2950ac6f0da7047adb80ea0b2cebb..f7432745d9bbd3248eb350bd8f2e8d7939cd2d75 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -151,7 +151,7 @@ config OF_IOMMU # IOMMU-agnostic DMA-mapping layer config IOMMU_DMA def_bool ARM64 || IA64 || X86 || LOONGARCH || S390 - select DMA_OPS + select DMA_OPS_HELPERS select IOMMU_API select IOMMU_IOVA select IRQ_MSI_IOMMU @@ -425,6 +425,17 @@ config ARM_SMMU_V3_KUNIT_TEST Enable this option to unit-test arm-smmu-v3 driver functions. If unsure, say N. + +config TEGRA241_CMDQV + bool "NVIDIA Tegra241 CMDQ-V extension support for ARM SMMUv3" + depends on ACPI + help + Support for NVIDIA CMDQ-Virtualization extension for ARM SMMUv3. The + CMDQ-V extension is similar to v3.3 ECMDQ for multi command queues + support, except with virtualization capabilities. + + Say Y here if your system is NVIDIA Tegra241 (Grace) or it has the same + CMDQ-V extension. endif config S390_IOMMU diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 4abfab24585d2c06f4bc36e253a02583cca76c14..fa0af030d8b3ff5fbe712568996b13e8a9869448 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -45,7 +45,7 @@ extern enum io_pgtable_fmt amd_iommu_pgtable; extern int amd_iommu_gpt_level; /* Protection domain ops */ -struct protection_domain *protection_domain_alloc(unsigned int type); +struct protection_domain *protection_domain_alloc(unsigned int type, int nid); void protection_domain_free(struct protection_domain *domain); struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, struct mm_struct *mm); @@ -87,14 +87,10 @@ int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag); void amd_iommu_flush_all_caches(struct amd_iommu *iommu); void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); void amd_iommu_domain_update(struct protection_domain *domain); -void amd_iommu_dev_update_dte(struct iommu_dev_data *dev_data, bool set); -void amd_iommu_domain_flush_complete(struct protection_domain *domain); void amd_iommu_domain_flush_pages(struct protection_domain *domain, u64 address, size_t size); void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, ioasid_t pasid, u64 address, size_t size); -void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data, - ioasid_t pasid); #ifdef CONFIG_IRQ_REMAP int amd_iommu_create_irq_domain(struct amd_iommu *iommu); @@ -121,11 +117,6 @@ static inline bool check_feature2(u64 mask) return (amd_iommu_efr2 & mask); } -static inline int check_feature_gpt_level(void) -{ - return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK); -} - static inline bool amd_iommu_gt_ppr_supported(void) { return (check_feature(FEATURE_GT) && @@ -143,19 +134,6 @@ static inline void *iommu_phys_to_virt(unsigned long paddr) return phys_to_virt(__sme_clr(paddr)); } -static inline -void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) -{ - domain->iop.root = (u64 *)(root & PAGE_MASK); - domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */ -} - -static inline -void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) -{ - amd_iommu_domain_set_pt_root(domain, 0); -} - static inline int get_pci_sbdf_id(struct pci_dev *pdev) { int seg = pci_domain_nr(pdev->bus); @@ -185,7 +163,6 @@ static inline struct protection_domain *to_pdomain(struct iommu_domain *dom) } bool translation_pre_enabled(struct amd_iommu *iommu); -bool amd_iommu_is_attach_deferred(struct device *dev); int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line); #ifdef CONFIG_DMI diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 2b76b5dedc1d9bd324c9abd69451b3cafa280378..35aa4ff020f506e8c750b830a3276330d0031439 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -8,6 +8,7 @@ #ifndef _ASM_X86_AMD_IOMMU_TYPES_H #define _ASM_X86_AMD_IOMMU_TYPES_H +#include #include #include #include @@ -95,26 +96,21 @@ #define FEATURE_GA BIT_ULL(7) #define FEATURE_HE BIT_ULL(8) #define FEATURE_PC BIT_ULL(9) -#define FEATURE_GATS_SHIFT (12) -#define FEATURE_GATS_MASK (3ULL) +#define FEATURE_GATS GENMASK_ULL(13, 12) +#define FEATURE_GLX GENMASK_ULL(15, 14) #define FEATURE_GAM_VAPIC BIT_ULL(21) +#define FEATURE_PASMAX GENMASK_ULL(36, 32) #define FEATURE_GIOSUP BIT_ULL(48) #define FEATURE_HASUP BIT_ULL(49) #define FEATURE_EPHSUP BIT_ULL(50) #define FEATURE_HDSUP BIT_ULL(52) #define FEATURE_SNP BIT_ULL(63) -#define FEATURE_PASID_SHIFT 32 -#define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT) - -#define FEATURE_GLXVAL_SHIFT 14 -#define FEATURE_GLXVAL_MASK (0x03ULL << FEATURE_GLXVAL_SHIFT) /* Extended Feature 2 Bits */ -#define FEATURE_SNPAVICSUP_SHIFT 5 -#define FEATURE_SNPAVICSUP_MASK (0x07ULL << FEATURE_SNPAVICSUP_SHIFT) +#define FEATURE_SNPAVICSUP GENMASK_ULL(7, 5) #define FEATURE_SNPAVICSUP_GAM(x) \ - ((x & FEATURE_SNPAVICSUP_MASK) >> FEATURE_SNPAVICSUP_SHIFT == 0x1) + (FIELD_GET(FEATURE_SNPAVICSUP, x) == 0x1) /* Note: * The current driver only support 16-bit PASID. @@ -294,8 +290,9 @@ * that we support. * * 512GB Pages are not supported due to a hardware bug + * Page sizes >= the 52 bit max physical address of the CPU are not supported. */ -#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) +#define AMD_IOMMU_PGSIZES (GENMASK_ULL(51, 12) ^ SZ_512G) /* 4K, 2MB, 1G page sizes are supported */ #define AMD_IOMMU_PGSIZES_V2 (PAGE_SIZE | (1ULL << 21) | (1ULL << 30)) @@ -419,10 +416,6 @@ #define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL) #define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0x1fffffULL) -#define DTE_GCR3_INDEX_A 0 -#define DTE_GCR3_INDEX_B 1 -#define DTE_GCR3_INDEX_C 1 - #define DTE_GCR3_SHIFT_A 58 #define DTE_GCR3_SHIFT_B 16 #define DTE_GCR3_SHIFT_C 43 @@ -527,7 +520,7 @@ struct amd_irte_ops; #define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0) #define io_pgtable_to_data(x) \ - container_of((x), struct amd_io_pgtable, iop) + container_of((x), struct amd_io_pgtable, pgtbl) #define io_pgtable_ops_to_data(x) \ io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) @@ -537,7 +530,7 @@ struct amd_irte_ops; struct protection_domain, iop) #define io_pgtable_cfg_to_data(x) \ - container_of((x), struct amd_io_pgtable, pgtbl_cfg) + container_of((x), struct amd_io_pgtable, pgtbl.cfg) struct gcr3_tbl_info { u64 *gcr3_tbl; /* Guest CR3 table */ @@ -547,8 +540,7 @@ struct gcr3_tbl_info { }; struct amd_io_pgtable { - struct io_pgtable_cfg pgtbl_cfg; - struct io_pgtable iop; + struct io_pgtable pgtbl; int mode; u64 *root; u64 *pgd; /* v2 pgtable pgd pointer */ @@ -580,7 +572,6 @@ struct protection_domain { struct amd_io_pgtable iop; spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ - int nid; /* Node ID */ enum protection_domain_mode pd_mode; /* Track page table type */ bool dirty_tracking; /* dirty tracking is enabled in the domain */ unsigned dev_cnt; /* devices assigned to this domain */ diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index d7347e19aabb72cccacadcc4ad6d96d2f7c1c5bf..d161de2dd740a8290cf60b49d81a25d7014886ed 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2041,14 +2041,12 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) int glxval; u64 pasmax; - pasmax = amd_iommu_efr & FEATURE_PASID_MASK; - pasmax >>= FEATURE_PASID_SHIFT; + pasmax = FIELD_GET(FEATURE_PASMAX, amd_iommu_efr); iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1; BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK); - glxval = amd_iommu_efr & FEATURE_GLXVAL_MASK; - glxval >>= FEATURE_GLXVAL_SHIFT; + glxval = FIELD_GET(FEATURE_GLX, amd_iommu_efr); if (amd_iommu_max_glx_val == -1) amd_iommu_max_glx_val = glxval; @@ -3095,7 +3093,7 @@ static int __init early_amd_iommu_init(void) /* 5 level guest page table */ if (cpu_feature_enabled(X86_FEATURE_LA57) && - check_feature_gpt_level() == GUEST_PGTABLE_5_LEVEL) + FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL) amd_iommu_gpt_level = PAGE_MODE_5_LEVEL; /* Disable any previously enabled IOMMUs */ diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 9d9a7fde59e75dcbdbff9fecd5cda23fc9c3faf7..14f62c420e4a9c9e034635121eaf4abd3ff35071 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -24,27 +24,6 @@ #include "amd_iommu.h" #include "../iommu-pages.h" -static void v1_tlb_flush_all(void *cookie) -{ -} - -static void v1_tlb_flush_walk(unsigned long iova, size_t size, - size_t granule, void *cookie) -{ -} - -static void v1_tlb_add_page(struct iommu_iotlb_gather *gather, - unsigned long iova, size_t granule, - void *cookie) -{ -} - -static const struct iommu_flush_ops v1_flush_ops = { - .tlb_flush_all = v1_tlb_flush_all, - .tlb_flush_walk = v1_tlb_flush_walk, - .tlb_add_page = v1_tlb_add_page, -}; - /* * Helper function to get the first pte of a large mapping */ @@ -132,56 +111,40 @@ static void free_sub_pt(u64 *root, int mode, struct list_head *freelist) } } -void amd_iommu_domain_set_pgtable(struct protection_domain *domain, - u64 *root, int mode) -{ - u64 pt_root; - - /* lowest 3 bits encode pgtable mode */ - pt_root = mode & 7; - pt_root |= (u64)root; - - amd_iommu_domain_set_pt_root(domain, pt_root); -} - /* * This function is used to add another level to an IO page table. Adding * another level increases the size of the address space by 9 bits to a size up * to 64 bits. */ -static bool increase_address_space(struct protection_domain *domain, +static bool increase_address_space(struct amd_io_pgtable *pgtable, unsigned long address, gfp_t gfp) { + struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg; + struct protection_domain *domain = + container_of(pgtable, struct protection_domain, iop); unsigned long flags; bool ret = true; u64 *pte; - pte = iommu_alloc_page_node(domain->nid, gfp); + pte = iommu_alloc_page_node(cfg->amd.nid, gfp); if (!pte) return false; spin_lock_irqsave(&domain->lock, flags); - if (address <= PM_LEVEL_SIZE(domain->iop.mode)) + if (address <= PM_LEVEL_SIZE(pgtable->mode)) goto out; ret = false; - if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL)) + if (WARN_ON_ONCE(pgtable->mode == PAGE_MODE_6_LEVEL)) goto out; - *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root)); + *pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root)); - domain->iop.root = pte; - domain->iop.mode += 1; + pgtable->root = pte; + pgtable->mode += 1; amd_iommu_update_and_flush_device_table(domain); - amd_iommu_domain_flush_complete(domain); - - /* - * Device Table needs to be updated and flushed before the new root can - * be published. - */ - amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode); pte = NULL; ret = true; @@ -193,30 +156,31 @@ static bool increase_address_space(struct protection_domain *domain, return ret; } -static u64 *alloc_pte(struct protection_domain *domain, +static u64 *alloc_pte(struct amd_io_pgtable *pgtable, unsigned long address, unsigned long page_size, u64 **pte_page, gfp_t gfp, bool *updated) { + struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg; int level, end_lvl; u64 *pte, *page; BUG_ON(!is_power_of_2(page_size)); - while (address > PM_LEVEL_SIZE(domain->iop.mode)) { + while (address > PM_LEVEL_SIZE(pgtable->mode)) { /* * Return an error if there is no memory to update the * page-table. */ - if (!increase_address_space(domain, address, gfp)) + if (!increase_address_space(pgtable, address, gfp)) return NULL; } - level = domain->iop.mode - 1; - pte = &domain->iop.root[PM_LEVEL_INDEX(level, address)]; + level = pgtable->mode - 1; + pte = &pgtable->root[PM_LEVEL_INDEX(level, address)]; address = PAGE_SIZE_ALIGN(address, page_size); end_lvl = PAGE_SIZE_LEVEL(page_size); @@ -251,7 +215,7 @@ static u64 *alloc_pte(struct protection_domain *domain, if (!IOMMU_PTE_PRESENT(__pte) || pte_level == PAGE_MODE_NONE) { - page = iommu_alloc_page_node(domain->nid, gfp); + page = iommu_alloc_page_node(cfg->amd.nid, gfp); if (!page) return NULL; @@ -365,7 +329,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) { - struct protection_domain *dom = io_pgtable_ops_to_domain(ops); + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); LIST_HEAD(freelist); bool updated = false; u64 __pte, *pte; @@ -382,7 +346,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova, while (pgcount > 0) { count = PAGE_SIZE_PTE_COUNT(pgsize); - pte = alloc_pte(dom, iova, pgsize, NULL, gfp, &updated); + pte = alloc_pte(pgtable, iova, pgsize, NULL, gfp, &updated); ret = -ENOMEM; if (!pte) @@ -419,6 +383,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova, out: if (updated) { + struct protection_domain *dom = io_pgtable_ops_to_domain(ops); unsigned long flags; spin_lock_irqsave(&dom->lock, flags); @@ -560,27 +525,17 @@ static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops, */ static void v1_free_pgtable(struct io_pgtable *iop) { - struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); - struct protection_domain *dom; + struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl); LIST_HEAD(freelist); if (pgtable->mode == PAGE_MODE_NONE) return; - dom = container_of(pgtable, struct protection_domain, iop); - /* Page-table is not visible to IOMMU anymore, so free it */ BUG_ON(pgtable->mode < PAGE_MODE_NONE || pgtable->mode > PAGE_MODE_6_LEVEL); free_sub_pt(pgtable->root, pgtable->mode, &freelist); - - /* Update data structure */ - amd_iommu_domain_clr_pt_root(dom); - - /* Make changes visible to IOMMUs */ - amd_iommu_domain_update(dom); - iommu_put_pages_list(&freelist); } @@ -588,17 +543,21 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo { struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); - cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES, - cfg->ias = IOMMU_IN_ADDR_BIT_SIZE, - cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, - cfg->tlb = &v1_flush_ops; + pgtable->root = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL); + if (!pgtable->root) + return NULL; + pgtable->mode = PAGE_MODE_3_LEVEL; + + cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES; + cfg->ias = IOMMU_IN_ADDR_BIT_SIZE; + cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE; - pgtable->iop.ops.map_pages = iommu_v1_map_pages; - pgtable->iop.ops.unmap_pages = iommu_v1_unmap_pages; - pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; - pgtable->iop.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty; + pgtable->pgtbl.ops.map_pages = iommu_v1_map_pages; + pgtable->pgtbl.ops.unmap_pages = iommu_v1_unmap_pages; + pgtable->pgtbl.ops.iova_to_phys = iommu_v1_iova_to_phys; + pgtable->pgtbl.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty; - return &pgtable->iop; + return &pgtable->pgtbl; } struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = { diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index 3965da0376ce9259ff8810d23f398e760cd8d6d0..0ee4f45ec14ec95442ed83cfd03c62695569a961 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -233,8 +233,8 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) { - struct protection_domain *pdom = io_pgtable_ops_to_domain(ops); - struct io_pgtable_cfg *cfg = &pdom->iop.iop.cfg; + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg; u64 *pte; unsigned long map_size; unsigned long mapped_size = 0; @@ -251,7 +251,7 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, while (mapped_size < size) { map_size = get_alloc_page_size(pgsize); - pte = v2_alloc_pte(pdom->nid, pdom->iop.pgd, + pte = v2_alloc_pte(cfg->amd.nid, pgtable->pgd, iova, map_size, gfp, &updated); if (!pte) { ret = -ENOMEM; @@ -266,8 +266,11 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, } out: - if (updated) + if (updated) { + struct protection_domain *pdom = io_pgtable_ops_to_domain(ops); + amd_iommu_domain_flush_pages(pdom, o_iova, size); + } if (mapped) *mapped += mapped_size; @@ -281,7 +284,7 @@ static unsigned long iommu_v2_unmap_pages(struct io_pgtable_ops *ops, struct iommu_iotlb_gather *gather) { struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); - struct io_pgtable_cfg *cfg = &pgtable->iop.cfg; + struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg; unsigned long unmap_size; unsigned long unmapped = 0; size_t size = pgcount << __ffs(pgsize); @@ -323,30 +326,9 @@ static phys_addr_t iommu_v2_iova_to_phys(struct io_pgtable_ops *ops, unsigned lo /* * ---------------------------------------------------- */ -static void v2_tlb_flush_all(void *cookie) -{ -} - -static void v2_tlb_flush_walk(unsigned long iova, size_t size, - size_t granule, void *cookie) -{ -} - -static void v2_tlb_add_page(struct iommu_iotlb_gather *gather, - unsigned long iova, size_t granule, - void *cookie) -{ -} - -static const struct iommu_flush_ops v2_flush_ops = { - .tlb_flush_all = v2_tlb_flush_all, - .tlb_flush_walk = v2_tlb_flush_walk, - .tlb_add_page = v2_tlb_add_page, -}; - static void v2_free_pgtable(struct io_pgtable *iop) { - struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); + struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl); if (!pgtable || !pgtable->pgd) return; @@ -359,26 +341,24 @@ static void v2_free_pgtable(struct io_pgtable *iop) static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) { struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); - struct protection_domain *pdom = (struct protection_domain *)cookie; int ias = IOMMU_IN_ADDR_BIT_SIZE; - pgtable->pgd = iommu_alloc_page_node(pdom->nid, GFP_ATOMIC); + pgtable->pgd = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL); if (!pgtable->pgd) return NULL; if (get_pgtable_level() == PAGE_MODE_5_LEVEL) ias = 57; - pgtable->iop.ops.map_pages = iommu_v2_map_pages; - pgtable->iop.ops.unmap_pages = iommu_v2_unmap_pages; - pgtable->iop.ops.iova_to_phys = iommu_v2_iova_to_phys; + pgtable->pgtbl.ops.map_pages = iommu_v2_map_pages; + pgtable->pgtbl.ops.unmap_pages = iommu_v2_unmap_pages; + pgtable->pgtbl.ops.iova_to_phys = iommu_v2_iova_to_phys; - cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2, - cfg->ias = ias, - cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, - cfg->tlb = &v2_flush_ops; + cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; + cfg->ias = ias; + cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE; - return &pgtable->iop; + return &pgtable->pgtbl; } struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = { diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 79c18b3978663c8f70f3abef9618bbcd3dc740a5..d7a141f3dbdf9c3f02a1ff7a5d7f16870deef0b1 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -52,8 +52,6 @@ #define HT_RANGE_START (0xfd00000000ULL) #define HT_RANGE_END (0xffffffffffULL) -#define DEFAULT_PGTABLE_LEVEL PAGE_MODE_3_LEVEL - static DEFINE_SPINLOCK(pd_bitmap_lock); LIST_HEAD(ioapic_map); @@ -825,10 +823,12 @@ static void iommu_poll_events(struct amd_iommu *iommu) while (head != tail) { iommu_print_event(iommu, iommu->evt_buf + head); + + /* Update head pointer of hardware ring-buffer */ head = (head + EVENT_ENTRY_SIZE) % EVT_BUFFER_SIZE; + writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); } - writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); } #ifdef CONFIG_IRQ_REMAP @@ -1255,6 +1255,22 @@ static int iommu_completion_wait(struct amd_iommu *iommu) return ret; } +static void domain_flush_complete(struct protection_domain *domain) +{ + int i; + + for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { + if (domain && !domain->dev_iommu[i]) + continue; + + /* + * Devices of this domain are behind this IOMMU + * We need to wait for completion of all commands. + */ + iommu_completion_wait(amd_iommus[i]); + } +} + static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) { struct iommu_cmd cmd; @@ -1492,7 +1508,7 @@ void amd_iommu_domain_flush_pages(struct protection_domain *domain, __domain_flush_pages(domain, address, size); /* Wait until IOMMU TLB and all device IOTLB flushes are complete */ - amd_iommu_domain_flush_complete(domain); + domain_flush_complete(domain); return; } @@ -1532,7 +1548,7 @@ void amd_iommu_domain_flush_pages(struct protection_domain *domain, } /* Wait until IOMMU TLB and all device IOTLB flushes are complete */ - amd_iommu_domain_flush_complete(domain); + domain_flush_complete(domain); } /* Flush the whole IO/TLB for a given protection domain - including PDE */ @@ -1558,29 +1574,13 @@ void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, iommu_completion_wait(iommu); } -void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data, - ioasid_t pasid) +static void dev_flush_pasid_all(struct iommu_dev_data *dev_data, + ioasid_t pasid) { amd_iommu_dev_flush_pasid_pages(dev_data, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, pasid); } -void amd_iommu_domain_flush_complete(struct protection_domain *domain) -{ - int i; - - for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { - if (domain && !domain->dev_iommu[i]) - continue; - - /* - * Devices of this domain are behind this IOMMU - * We need to wait for completion of all commands. - */ - iommu_completion_wait(amd_iommus[i]); - } -} - /* Flush the not present cache if it exists */ static void domain_flush_np_cache(struct protection_domain *domain, dma_addr_t iova, size_t size) @@ -1598,15 +1598,7 @@ static void domain_flush_np_cache(struct protection_domain *domain, /* * This function flushes the DTEs for all devices in domain */ -static void domain_flush_devices(struct protection_domain *domain) -{ - struct iommu_dev_data *dev_data; - - list_for_each_entry(dev_data, &domain->dev_list, list) - device_flush_dte(dev_data); -} - -static void update_device_table(struct protection_domain *domain) +void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) { struct iommu_dev_data *dev_data; @@ -1616,12 +1608,11 @@ static void update_device_table(struct protection_domain *domain) set_dte_entry(iommu, dev_data); clone_aliases(iommu, dev_data->dev); } -} -void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) -{ - update_device_table(domain); - domain_flush_devices(domain); + list_for_each_entry(dev_data, &domain->dev_list, list) + device_flush_dte(dev_data); + + domain_flush_complete(domain); } void amd_iommu_domain_update(struct protection_domain *domain) @@ -1825,7 +1816,7 @@ static int update_gcr3(struct iommu_dev_data *dev_data, else *pte = 0; - amd_iommu_dev_flush_pasid_all(dev_data, pasid); + dev_flush_pasid_all(dev_data, pasid); return 0; } @@ -1971,7 +1962,7 @@ static void clear_dte_entry(struct amd_iommu *iommu, u16 devid) } /* Update and flush DTE for the given device */ -void amd_iommu_dev_update_dte(struct iommu_dev_data *dev_data, bool set) +static void dev_update_dte(struct iommu_dev_data *dev_data, bool set) { struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev); @@ -2041,6 +2032,7 @@ static int do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); + struct io_pgtable_cfg *cfg = &domain->iop.pgtbl.cfg; int ret = 0; /* Update data structures */ @@ -2048,8 +2040,8 @@ static int do_attach(struct iommu_dev_data *dev_data, list_add(&dev_data->list, &domain->dev_list); /* Update NUMA Node ID */ - if (domain->nid == NUMA_NO_NODE) - domain->nid = dev_to_node(dev_data->dev); + if (cfg->amd.nid == NUMA_NO_NODE) + cfg->amd.nid = dev_to_node(dev_data->dev); /* Do reference counting */ domain->dev_iommu[iommu->index] += 1; @@ -2071,7 +2063,7 @@ static void do_detach(struct iommu_dev_data *dev_data) struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); /* Clear DTE and flush the entry */ - amd_iommu_dev_update_dte(dev_data, false); + dev_update_dte(dev_data, false); /* Flush IOTLB and wait for the flushes to finish */ amd_iommu_domain_flush_all(domain); @@ -2194,11 +2186,12 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) dev_err(dev, "Failed to initialize - trying to proceed anyway\n"); iommu_dev = ERR_PTR(ret); iommu_ignore_device(iommu, dev); - } else { - amd_iommu_set_pci_msi_domain(dev, iommu); - iommu_dev = &iommu->iommu; + goto out_err; } + amd_iommu_set_pci_msi_domain(dev, iommu); + iommu_dev = &iommu->iommu; + /* * If IOMMU and device supports PASID then it will contain max * supported PASIDs, else it will be zero. @@ -2210,8 +2203,12 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) pci_max_pasids(to_pci_dev(dev))); } +out_err: iommu_completion_wait(iommu); + if (dev_is_pci(dev)) + pci_prepare_ats(to_pci_dev(dev), PAGE_SHIFT); + return iommu_dev; } @@ -2268,53 +2265,17 @@ static void cleanup_domain(struct protection_domain *domain) void protection_domain_free(struct protection_domain *domain) { - if (!domain) - return; - - if (domain->iop.pgtbl_cfg.tlb) - free_io_pgtable_ops(&domain->iop.iop.ops); - - if (domain->iop.root) - iommu_free_page(domain->iop.root); - - if (domain->id) - domain_id_free(domain->id); - + WARN_ON(!list_empty(&domain->dev_list)); + free_io_pgtable_ops(&domain->iop.pgtbl.ops); + domain_id_free(domain->id); kfree(domain); } -static int protection_domain_init_v1(struct protection_domain *domain, int mode) -{ - u64 *pt_root = NULL; - - BUG_ON(mode < PAGE_MODE_NONE || mode > PAGE_MODE_6_LEVEL); - - if (mode != PAGE_MODE_NONE) { - pt_root = iommu_alloc_page(GFP_KERNEL); - if (!pt_root) - return -ENOMEM; - } - - domain->pd_mode = PD_MODE_V1; - amd_iommu_domain_set_pgtable(domain, pt_root, mode); - - return 0; -} - -static int protection_domain_init_v2(struct protection_domain *pdom) -{ - pdom->pd_mode = PD_MODE_V2; - pdom->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; - - return 0; -} - -struct protection_domain *protection_domain_alloc(unsigned int type) +struct protection_domain *protection_domain_alloc(unsigned int type, int nid) { struct io_pgtable_ops *pgtbl_ops; struct protection_domain *domain; int pgtable; - int ret; domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) @@ -2322,12 +2283,12 @@ struct protection_domain *protection_domain_alloc(unsigned int type) domain->id = domain_id_alloc(); if (!domain->id) - goto out_err; + goto err_free; spin_lock_init(&domain->lock); INIT_LIST_HEAD(&domain->dev_list); INIT_LIST_HEAD(&domain->dev_data_list); - domain->nid = NUMA_NO_NODE; + domain->iop.pgtbl.cfg.amd.nid = nid; switch (type) { /* No need to allocate io pgtable ops in passthrough mode */ @@ -2345,31 +2306,30 @@ struct protection_domain *protection_domain_alloc(unsigned int type) pgtable = AMD_IOMMU_V1; break; default: - goto out_err; + goto err_id; } switch (pgtable) { case AMD_IOMMU_V1: - ret = protection_domain_init_v1(domain, DEFAULT_PGTABLE_LEVEL); + domain->pd_mode = PD_MODE_V1; break; case AMD_IOMMU_V2: - ret = protection_domain_init_v2(domain); + domain->pd_mode = PD_MODE_V2; break; default: - ret = -EINVAL; - break; + goto err_id; } - if (ret) - goto out_err; - - pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain); + pgtbl_ops = + alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl.cfg, domain); if (!pgtbl_ops) - goto out_err; + goto err_id; return domain; -out_err: - protection_domain_free(domain); +err_id: + domain_id_free(domain->id); +err_free: + kfree(domain); return NULL; } @@ -2420,17 +2380,18 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, if (dirty_tracking && !amd_iommu_hd_support(iommu)) return ERR_PTR(-EOPNOTSUPP); - domain = protection_domain_alloc(type); + domain = protection_domain_alloc(type, + dev ? dev_to_node(dev) : NUMA_NO_NODE); if (!domain) return ERR_PTR(-ENOMEM); domain->domain.geometry.aperture_start = 0; domain->domain.geometry.aperture_end = dma_max_address(); domain->domain.geometry.force_aperture = true; + domain->domain.pgsize_bitmap = domain->iop.pgtbl.cfg.pgsize_bitmap; if (iommu) { domain->domain.type = type; - domain->domain.pgsize_bitmap = iommu->iommu.ops->pgsize_bitmap; domain->domain.ops = iommu->iommu.ops->default_domain_ops; if (dirty_tracking) @@ -2470,9 +2431,6 @@ void amd_iommu_domain_free(struct iommu_domain *dom) struct protection_domain *domain; unsigned long flags; - if (!dom) - return; - domain = to_pdomain(dom); spin_lock_irqsave(&domain->lock, flags); @@ -2484,6 +2442,29 @@ void amd_iommu_domain_free(struct iommu_domain *dom) protection_domain_free(domain); } +static int blocked_domain_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); + + if (dev_data->domain) + detach_device(dev); + + /* Clear DTE and flush the entry */ + spin_lock(&dev_data->lock); + dev_update_dte(dev_data, false); + spin_unlock(&dev_data->lock); + + return 0; +} + +static struct iommu_domain blocked_domain = { + .type = IOMMU_DOMAIN_BLOCKED, + .ops = &(const struct iommu_domain_ops) { + .attach_dev = blocked_domain_attach_device, + } +}; + static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev) { @@ -2539,7 +2520,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, } /* Update device table */ - amd_iommu_dev_update_dte(dev_data, true); + dev_update_dte(dev_data, true); return ret; } @@ -2548,7 +2529,7 @@ static int amd_iommu_iotlb_sync_map(struct iommu_domain *dom, unsigned long iova, size_t size) { struct protection_domain *domain = to_pdomain(dom); - struct io_pgtable_ops *ops = &domain->iop.iop.ops; + struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops; if (ops->map_pages) domain_flush_np_cache(domain, iova, size); @@ -2560,7 +2541,7 @@ static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova, int iommu_prot, gfp_t gfp, size_t *mapped) { struct protection_domain *domain = to_pdomain(dom); - struct io_pgtable_ops *ops = &domain->iop.iop.ops; + struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops; int prot = 0; int ret = -EINVAL; @@ -2607,7 +2588,7 @@ static size_t amd_iommu_unmap_pages(struct iommu_domain *dom, unsigned long iova struct iommu_iotlb_gather *gather) { struct protection_domain *domain = to_pdomain(dom); - struct io_pgtable_ops *ops = &domain->iop.iop.ops; + struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops; size_t r; if ((domain->pd_mode == PD_MODE_V1) && @@ -2626,7 +2607,7 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, dma_addr_t iova) { struct protection_domain *domain = to_pdomain(dom); - struct io_pgtable_ops *ops = &domain->iop.iop.ops; + struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops; return ops->iova_to_phys(ops, iova); } @@ -2704,7 +2685,7 @@ static int amd_iommu_read_and_clear_dirty(struct iommu_domain *domain, struct iommu_dirty_bitmap *dirty) { struct protection_domain *pdomain = to_pdomain(domain); - struct io_pgtable_ops *ops = &pdomain->iop.iop.ops; + struct io_pgtable_ops *ops = &pdomain->iop.pgtbl.ops; unsigned long lflags; if (!ops || !ops->read_and_clear_dirty) @@ -2779,7 +2760,7 @@ static void amd_iommu_get_resv_regions(struct device *dev, list_add_tail(®ion->list, head); } -bool amd_iommu_is_attach_deferred(struct device *dev) +static bool amd_iommu_is_attach_deferred(struct device *dev) { struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); @@ -2881,6 +2862,7 @@ static int amd_iommu_dev_disable_feature(struct device *dev, const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, + .blocked_domain = &blocked_domain, .domain_alloc = amd_iommu_domain_alloc, .domain_alloc_user = amd_iommu_domain_alloc_user, .domain_alloc_sva = amd_iommu_domain_alloc_sva, @@ -2889,7 +2871,6 @@ const struct iommu_ops amd_iommu_ops = { .device_group = amd_iommu_device_group, .get_resv_regions = amd_iommu_get_resv_regions, .is_attach_deferred = amd_iommu_is_attach_deferred, - .pgsize_bitmap = AMD_IOMMU_PGSIZES, .def_domain_type = amd_iommu_def_domain_type, .dev_enable_feat = amd_iommu_dev_enable_feature, .dev_disable_feat = amd_iommu_dev_disable_feature, diff --git a/drivers/iommu/amd/pasid.c b/drivers/iommu/amd/pasid.c index a68215f2b3e1d6a4a422bffa9d2081ee3a3bed20..0657b9373be5475fdcd765048d3f359106fd8b2b 100644 --- a/drivers/iommu/amd/pasid.c +++ b/drivers/iommu/amd/pasid.c @@ -181,7 +181,7 @@ struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, struct protection_domain *pdom; int ret; - pdom = protection_domain_alloc(IOMMU_DOMAIN_SVA); + pdom = protection_domain_alloc(IOMMU_DOMAIN_SVA, dev_to_node(dev)); if (!pdom) return ERR_PTR(-ENOMEM); diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile index 355173d1441d2f86044a181a3bf045dffc707480..dc98c88b48c827084faecc0e847af262b1e17373 100644 --- a/drivers/iommu/arm/arm-smmu-v3/Makefile +++ b/drivers/iommu/arm/arm-smmu-v3/Makefile @@ -2,5 +2,6 @@ obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o arm_smmu_v3-y := arm-smmu-v3.o arm_smmu_v3-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o +arm_smmu_v3-$(CONFIG_TEGRA241_CMDQV) += tegra241-cmdqv.o obj-$(CONFIG_ARM_SMMU_V3_KUNIT_TEST) += arm-smmu-v3-test.o diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c index cceb737a700126455425c7aab8b27fe88adf78a3..84baa021370a86efb5f7b3e496b1be02e8ddcb2c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c @@ -30,6 +30,11 @@ static struct mm_struct sva_mm = { .pgd = (void *)0xdaedbeefdeadbeefULL, }; +enum arm_smmu_test_master_feat { + ARM_SMMU_MASTER_TEST_ATS = BIT(0), + ARM_SMMU_MASTER_TEST_STALL = BIT(1), +}; + static bool arm_smmu_entry_differs_in_used_bits(const __le64 *entry, const __le64 *used_bits, const __le64 *target, @@ -164,16 +169,22 @@ static const dma_addr_t fake_cdtab_dma_addr = 0xF0F0F0F0F0F0; static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste, unsigned int s1dss, - const dma_addr_t dma_addr) + const dma_addr_t dma_addr, + enum arm_smmu_test_master_feat feat) { + bool ats_enabled = feat & ARM_SMMU_MASTER_TEST_ATS; + bool stall_enabled = feat & ARM_SMMU_MASTER_TEST_STALL; + struct arm_smmu_master master = { + .ats_enabled = ats_enabled, .cd_table.cdtab_dma = dma_addr, .cd_table.s1cdmax = 0xFF, .cd_table.s1fmt = STRTAB_STE_0_S1FMT_64K_L2, .smmu = &smmu, + .stall_enabled = stall_enabled, }; - arm_smmu_make_cdtable_ste(ste, &master, true, s1dss); + arm_smmu_make_cdtable_ste(ste, &master, ats_enabled, s1dss); } static void arm_smmu_v3_write_ste_test_bypass_to_abort(struct kunit *test) @@ -204,7 +215,7 @@ static void arm_smmu_v3_write_ste_test_cdtable_to_abort(struct kunit *test) struct arm_smmu_ste ste; arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, - fake_cdtab_dma_addr); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &abort_ste, NUM_EXPECTED_SYNCS(2)); } @@ -214,7 +225,7 @@ static void arm_smmu_v3_write_ste_test_abort_to_cdtable(struct kunit *test) struct arm_smmu_ste ste; arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, - fake_cdtab_dma_addr); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &abort_ste, &ste, NUM_EXPECTED_SYNCS(2)); } @@ -224,7 +235,7 @@ static void arm_smmu_v3_write_ste_test_cdtable_to_bypass(struct kunit *test) struct arm_smmu_ste ste; arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, - fake_cdtab_dma_addr); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &bypass_ste, NUM_EXPECTED_SYNCS(3)); } @@ -234,7 +245,7 @@ static void arm_smmu_v3_write_ste_test_bypass_to_cdtable(struct kunit *test) struct arm_smmu_ste ste; arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, - fake_cdtab_dma_addr); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &bypass_ste, &ste, NUM_EXPECTED_SYNCS(3)); } @@ -245,9 +256,9 @@ static void arm_smmu_v3_write_ste_test_cdtable_s1dss_change(struct kunit *test) struct arm_smmu_ste s1dss_bypass; arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, - fake_cdtab_dma_addr); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, - fake_cdtab_dma_addr); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); /* * Flipping s1dss on a CD table STE only involves changes to the second @@ -265,7 +276,7 @@ arm_smmu_v3_write_ste_test_s1dssbypass_to_stebypass(struct kunit *test) struct arm_smmu_ste s1dss_bypass; arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, - fake_cdtab_dma_addr); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition( test, &s1dss_bypass, &bypass_ste, NUM_EXPECTED_SYNCS(2)); } @@ -276,16 +287,20 @@ arm_smmu_v3_write_ste_test_stebypass_to_s1dssbypass(struct kunit *test) struct arm_smmu_ste s1dss_bypass; arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, - fake_cdtab_dma_addr); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition( test, &bypass_ste, &s1dss_bypass, NUM_EXPECTED_SYNCS(2)); } static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste, - bool ats_enabled) + enum arm_smmu_test_master_feat feat) { + bool ats_enabled = feat & ARM_SMMU_MASTER_TEST_ATS; + bool stall_enabled = feat & ARM_SMMU_MASTER_TEST_STALL; struct arm_smmu_master master = { + .ats_enabled = ats_enabled, .smmu = &smmu, + .stall_enabled = stall_enabled, }; struct io_pgtable io_pgtable = {}; struct arm_smmu_domain smmu_domain = { @@ -308,7 +323,7 @@ static void arm_smmu_v3_write_ste_test_s2_to_abort(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_s2_ste(&ste, true); + arm_smmu_test_make_s2_ste(&ste, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &abort_ste, NUM_EXPECTED_SYNCS(2)); } @@ -317,7 +332,7 @@ static void arm_smmu_v3_write_ste_test_abort_to_s2(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_s2_ste(&ste, true); + arm_smmu_test_make_s2_ste(&ste, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &abort_ste, &ste, NUM_EXPECTED_SYNCS(2)); } @@ -326,7 +341,7 @@ static void arm_smmu_v3_write_ste_test_s2_to_bypass(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_s2_ste(&ste, true); + arm_smmu_test_make_s2_ste(&ste, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &bypass_ste, NUM_EXPECTED_SYNCS(2)); } @@ -335,7 +350,7 @@ static void arm_smmu_v3_write_ste_test_bypass_to_s2(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_s2_ste(&ste, true); + arm_smmu_test_make_s2_ste(&ste, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &bypass_ste, &ste, NUM_EXPECTED_SYNCS(2)); } @@ -346,8 +361,8 @@ static void arm_smmu_v3_write_ste_test_s1_to_s2(struct kunit *test) struct arm_smmu_ste s2_ste; arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0, - fake_cdtab_dma_addr); - arm_smmu_test_make_s2_ste(&s2_ste, true); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); + arm_smmu_test_make_s2_ste(&s2_ste, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &s1_ste, &s2_ste, NUM_EXPECTED_SYNCS(3)); } @@ -358,8 +373,8 @@ static void arm_smmu_v3_write_ste_test_s2_to_s1(struct kunit *test) struct arm_smmu_ste s2_ste; arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0, - fake_cdtab_dma_addr); - arm_smmu_test_make_s2_ste(&s2_ste, true); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); + arm_smmu_test_make_s2_ste(&s2_ste, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_hitless_transition(test, &s2_ste, &s1_ste, NUM_EXPECTED_SYNCS(3)); } @@ -375,9 +390,9 @@ static void arm_smmu_v3_write_ste_test_non_hitless(struct kunit *test) * s1 dss field in the same update. */ arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, - fake_cdtab_dma_addr); + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_test_make_cdtable_ste(&ste_2, STRTAB_STE_1_S1DSS_BYPASS, - 0x4B4B4b4B4B); + 0x4B4B4b4B4B, ARM_SMMU_MASTER_TEST_ATS); arm_smmu_v3_test_ste_expect_non_hitless_transition( test, &ste, &ste_2, NUM_EXPECTED_SYNCS(3)); } @@ -503,6 +518,30 @@ static void arm_smmu_test_make_sva_release_cd(struct arm_smmu_cd *cd, arm_smmu_make_sva_cd(cd, &master, NULL, asid); } +static void arm_smmu_v3_write_ste_test_s1_to_s2_stall(struct kunit *test) +{ + struct arm_smmu_ste s1_ste; + struct arm_smmu_ste s2_ste; + + arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_STALL); + arm_smmu_test_make_s2_ste(&s2_ste, ARM_SMMU_MASTER_TEST_STALL); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &s1_ste, &s2_ste, + NUM_EXPECTED_SYNCS(3)); +} + +static void arm_smmu_v3_write_ste_test_s2_to_s1_stall(struct kunit *test) +{ + struct arm_smmu_ste s1_ste; + struct arm_smmu_ste s2_ste; + + arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_STALL); + arm_smmu_test_make_s2_ste(&s2_ste, ARM_SMMU_MASTER_TEST_STALL); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &s2_ste, &s1_ste, + NUM_EXPECTED_SYNCS(3)); +} + static void arm_smmu_v3_write_cd_test_sva_clear(struct kunit *test) { struct arm_smmu_cd cd = {}; @@ -547,6 +586,8 @@ static struct kunit_case arm_smmu_v3_test_cases[] = { KUNIT_CASE(arm_smmu_v3_write_ste_test_non_hitless), KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_clear), KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_change_asid), + KUNIT_CASE(arm_smmu_v3_write_ste_test_s1_to_s2_stall), + KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_s1_stall), KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_clear), KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_release), {}, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 2478178dfda7aa390b2701c5b4b905ffd089873a..58bd779f214ae494c0179d70caaaeb2b4dc39fa5 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -346,14 +346,30 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) return 0; } -static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu) +static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq_ent *ent) { - return &smmu->cmdq; + struct arm_smmu_cmdq *cmdq = NULL; + + if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq) + cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, ent); + + return cmdq ?: &smmu->cmdq; +} + +static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq) +{ + if (cmdq == &smmu->cmdq) + return false; + + return smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV; } static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, - struct arm_smmu_queue *q, u32 prod) + struct arm_smmu_cmdq *cmdq, u32 prod) { + struct arm_smmu_queue *q = &cmdq->q; struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC, }; @@ -368,10 +384,12 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, } arm_smmu_cmdq_build_cmd(cmd, &ent); + if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) + u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS); } -static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, - struct arm_smmu_queue *q) +void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq) { static const char * const cerror_str[] = { [CMDQ_ERR_CERROR_NONE_IDX] = "No error", @@ -379,6 +397,7 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch", [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout", }; + struct arm_smmu_queue *q = &cmdq->q; int i; u64 cmd[CMDQ_ENT_DWORDS]; @@ -421,13 +440,15 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, /* Convert the erroneous command into a CMD_SYNC */ arm_smmu_cmdq_build_cmd(cmd, &cmd_sync); + if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) + u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS); queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); } static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) { - __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q); + __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq); } /* @@ -592,11 +613,11 @@ static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq, /* Wait for the command queue to become non-full */ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { unsigned long flags; struct arm_smmu_queue_poll qp; - struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); int ret = 0; /* @@ -627,11 +648,11 @@ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu, * Must be called with the cmdq lock held in some capacity. */ static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { int ret = 0; struct arm_smmu_queue_poll qp; - struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod)); queue_poll_init(smmu, &qp); @@ -651,10 +672,10 @@ static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu, * Must be called with the cmdq lock held in some capacity. */ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { struct arm_smmu_queue_poll qp; - struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); u32 prod = llq->prod; int ret = 0; @@ -701,12 +722,14 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, } static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) - return __arm_smmu_cmdq_poll_until_msi(smmu, llq); + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && + !arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) + return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); - return __arm_smmu_cmdq_poll_until_consumed(smmu, llq); + return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq); } static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, @@ -743,13 +766,13 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, * CPU will appear before any of the commands from the other CPU. */ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, u64 *cmds, int n, bool sync) { u64 cmd_sync[CMDQ_ENT_DWORDS]; u32 prod; unsigned long flags; bool owner; - struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); struct arm_smmu_ll_queue llq, head; int ret = 0; @@ -763,7 +786,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, while (!queue_has_space(&llq, n + sync)) { local_irq_restore(flags); - if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq)) + if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq)) dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); local_irq_save(flags); } @@ -789,7 +812,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n); if (sync) { prod = queue_inc_prod_n(&llq, n); - arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod); + arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod); queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS); /* @@ -839,7 +862,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */ if (sync) { llq.prod = queue_inc_prod_n(&llq, n); - ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq); + ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq); if (ret) { dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n", @@ -874,7 +897,8 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, return -EINVAL; } - return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync); + return arm_smmu_cmdq_issue_cmdlist( + smmu, arm_smmu_get_cmdq(smmu, ent), cmd, 1, sync); } static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, @@ -889,21 +913,33 @@ static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu, return __arm_smmu_cmdq_issue_cmd(smmu, ent, true); } +static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq_batch *cmds, + struct arm_smmu_cmdq_ent *ent) +{ + cmds->num = 0; + cmds->cmdq = arm_smmu_get_cmdq(smmu, ent); +} + static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, struct arm_smmu_cmdq_batch *cmds, struct arm_smmu_cmdq_ent *cmd) { + bool unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd); + bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) && + (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC); int index; - if (cmds->num == CMDQ_BATCH_ENTRIES - 1 && - (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) { - arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); - cmds->num = 0; + if (force_sync || unsupported_cmd) { + arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, + cmds->num, true); + arm_smmu_cmdq_batch_init(smmu, cmds, cmd); } if (cmds->num == CMDQ_BATCH_ENTRIES) { - arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false); - cmds->num = 0; + arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, + cmds->num, false); + arm_smmu_cmdq_batch_init(smmu, cmds, cmd); } index = cmds->num * CMDQ_ENT_DWORDS; @@ -919,7 +955,8 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu, struct arm_smmu_cmdq_batch *cmds) { - return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); + return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, + cmds->num, true); } static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused, @@ -1012,7 +1049,8 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR | STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI | - STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2R); + STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2S | + STRTAB_STE_2_S2R); used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK); } @@ -1170,7 +1208,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master, }, }; - cmds.num = 0; + arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd); for (i = 0; i < master->num_streams; i++) { cmd.cfgi.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); @@ -1179,48 +1217,36 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master, arm_smmu_cmdq_batch_submit(smmu, &cmds); } -static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu, - struct arm_smmu_l1_ctx_desc *l1_desc) +static void arm_smmu_write_cd_l1_desc(struct arm_smmu_cdtab_l1 *dst, + dma_addr_t l2ptr_dma) { - size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); + u64 val = (l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | CTXDESC_L1_DESC_V; - l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, - &l1_desc->l2ptr_dma, GFP_KERNEL); - if (!l1_desc->l2ptr) { - dev_warn(smmu->dev, - "failed to allocate context descriptor table\n"); - return -ENOMEM; - } - return 0; + /* The HW has 64 bit atomicity with stores to the L2 CD table */ + WRITE_ONCE(dst->l2ptr, cpu_to_le64(val)); } -static void arm_smmu_write_cd_l1_desc(__le64 *dst, - struct arm_smmu_l1_ctx_desc *l1_desc) +static dma_addr_t arm_smmu_cd_l1_get_desc(const struct arm_smmu_cdtab_l1 *src) { - u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | - CTXDESC_L1_DESC_V; - - /* The HW has 64 bit atomicity with stores to the L2 CD table */ - WRITE_ONCE(*dst, cpu_to_le64(val)); + return le64_to_cpu(src->l2ptr) & CTXDESC_L1_DESC_L2PTR_MASK; } struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid) { - struct arm_smmu_l1_ctx_desc *l1_desc; + struct arm_smmu_cdtab_l2 *l2; struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; - if (!cd_table->cdtab) + if (!arm_smmu_cdtab_allocated(cd_table)) return NULL; if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) - return (struct arm_smmu_cd *)(cd_table->cdtab + - ssid * CTXDESC_CD_DWORDS); + return &cd_table->linear.table[ssid]; - l1_desc = &cd_table->l1_desc[ssid / CTXDESC_L2_ENTRIES]; - if (!l1_desc->l2ptr) + l2 = cd_table->l2.l2ptrs[arm_smmu_cdtab_l1_idx(ssid)]; + if (!l2) return NULL; - return &l1_desc->l2ptr[ssid % CTXDESC_L2_ENTRIES]; + return &l2->cds[arm_smmu_cdtab_l2_idx(ssid)]; } static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, @@ -1232,24 +1258,25 @@ static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, might_sleep(); iommu_group_mutex_assert(master->dev); - if (!cd_table->cdtab) { + if (!arm_smmu_cdtab_allocated(cd_table)) { if (arm_smmu_alloc_cd_tables(master)) return NULL; } if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) { - unsigned int idx = ssid / CTXDESC_L2_ENTRIES; - struct arm_smmu_l1_ctx_desc *l1_desc; + unsigned int idx = arm_smmu_cdtab_l1_idx(ssid); + struct arm_smmu_cdtab_l2 **l2ptr = &cd_table->l2.l2ptrs[idx]; - l1_desc = &cd_table->l1_desc[idx]; - if (!l1_desc->l2ptr) { - __le64 *l1ptr; + if (!*l2ptr) { + dma_addr_t l2ptr_dma; - if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc)) + *l2ptr = dma_alloc_coherent(smmu->dev, sizeof(**l2ptr), + &l2ptr_dma, GFP_KERNEL); + if (!*l2ptr) return NULL; - l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS; - arm_smmu_write_cd_l1_desc(l1ptr, l1_desc); + arm_smmu_write_cd_l1_desc(&cd_table->l2.l1tab[idx], + l2ptr_dma); /* An invalid L1CD can be cached */ arm_smmu_sync_cd(master, ssid, false); } @@ -1369,7 +1396,7 @@ void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid) struct arm_smmu_cd target = {}; struct arm_smmu_cd *cdptr; - if (!master->cd_table.cdtab) + if (!arm_smmu_cdtab_allocated(&master->cd_table)) return; cdptr = arm_smmu_get_cd_ptr(master, ssid); if (WARN_ON(!cdptr)) @@ -1391,74 +1418,75 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master) if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) || max_contexts <= CTXDESC_L2_ENTRIES) { cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; - cd_table->num_l1_ents = max_contexts; + cd_table->linear.num_ents = max_contexts; - l1size = max_contexts * (CTXDESC_CD_DWORDS << 3); + l1size = max_contexts * sizeof(struct arm_smmu_cd), + cd_table->linear.table = dma_alloc_coherent(smmu->dev, l1size, + &cd_table->cdtab_dma, + GFP_KERNEL); + if (!cd_table->linear.table) + return -ENOMEM; } else { cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2; - cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts, - CTXDESC_L2_ENTRIES); + cd_table->l2.num_l1_ents = + DIV_ROUND_UP(max_contexts, CTXDESC_L2_ENTRIES); - cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents, - sizeof(*cd_table->l1_desc), - GFP_KERNEL); - if (!cd_table->l1_desc) + cd_table->l2.l2ptrs = kcalloc(cd_table->l2.num_l1_ents, + sizeof(*cd_table->l2.l2ptrs), + GFP_KERNEL); + if (!cd_table->l2.l2ptrs) return -ENOMEM; - l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); - } - - cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma, - GFP_KERNEL); - if (!cd_table->cdtab) { - dev_warn(smmu->dev, "failed to allocate context descriptor\n"); - ret = -ENOMEM; - goto err_free_l1; + l1size = cd_table->l2.num_l1_ents * sizeof(struct arm_smmu_cdtab_l1); + cd_table->l2.l1tab = dma_alloc_coherent(smmu->dev, l1size, + &cd_table->cdtab_dma, + GFP_KERNEL); + if (!cd_table->l2.l2ptrs) { + ret = -ENOMEM; + goto err_free_l2ptrs; + } } - return 0; -err_free_l1: - if (cd_table->l1_desc) { - devm_kfree(smmu->dev, cd_table->l1_desc); - cd_table->l1_desc = NULL; - } +err_free_l2ptrs: + kfree(cd_table->l2.l2ptrs); + cd_table->l2.l2ptrs = NULL; return ret; } static void arm_smmu_free_cd_tables(struct arm_smmu_master *master) { int i; - size_t size, l1size; struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; - if (cd_table->l1_desc) { - size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); - - for (i = 0; i < cd_table->num_l1_ents; i++) { - if (!cd_table->l1_desc[i].l2ptr) + if (cd_table->s1fmt != STRTAB_STE_0_S1FMT_LINEAR) { + for (i = 0; i < cd_table->l2.num_l1_ents; i++) { + if (!cd_table->l2.l2ptrs[i]) continue; - dmam_free_coherent(smmu->dev, size, - cd_table->l1_desc[i].l2ptr, - cd_table->l1_desc[i].l2ptr_dma); + dma_free_coherent(smmu->dev, + sizeof(*cd_table->l2.l2ptrs[i]), + cd_table->l2.l2ptrs[i], + arm_smmu_cd_l1_get_desc(&cd_table->l2.l1tab[i])); } - devm_kfree(smmu->dev, cd_table->l1_desc); - cd_table->l1_desc = NULL; + kfree(cd_table->l2.l2ptrs); - l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); + dma_free_coherent(smmu->dev, + cd_table->l2.num_l1_ents * + sizeof(struct arm_smmu_cdtab_l1), + cd_table->l2.l1tab, cd_table->cdtab_dma); } else { - l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3); + dma_free_coherent(smmu->dev, + cd_table->linear.num_ents * + sizeof(struct arm_smmu_cd), + cd_table->linear.table, cd_table->cdtab_dma); } - - dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma); - cd_table->cdtab_dma = 0; - cd_table->cdtab = NULL; } /* Stream table manipulation functions */ -static void arm_smmu_write_strtab_l1_desc(__le64 *dst, dma_addr_t l2ptr_dma) +static void arm_smmu_write_strtab_l1_desc(struct arm_smmu_strtab_l1 *dst, + dma_addr_t l2ptr_dma) { u64 val = 0; @@ -1466,7 +1494,7 @@ static void arm_smmu_write_strtab_l1_desc(__le64 *dst, dma_addr_t l2ptr_dma) val |= l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK; /* The HW has 64 bit atomicity with stores to the L2 STE table */ - WRITE_ONCE(*dst, cpu_to_le64(val)); + WRITE_ONCE(dst->l2ptr, cpu_to_le64(val)); } struct arm_smmu_ste_writer { @@ -1646,6 +1674,7 @@ void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, STRTAB_STE_2_S2ENDI | #endif STRTAB_STE_2_S2PTW | + (master->stall_enabled ? STRTAB_STE_2_S2S : 0) | STRTAB_STE_2_S2R); target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr & @@ -1670,29 +1699,27 @@ static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab, static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) { - size_t size; - void *strtab; dma_addr_t l2ptr_dma; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; - struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT]; + struct arm_smmu_strtab_l2 **l2table; - if (desc->l2ptr) + l2table = &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)]; + if (*l2table) return 0; - size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3); - strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS]; - - desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &l2ptr_dma, - GFP_KERNEL); - if (!desc->l2ptr) { + *l2table = dmam_alloc_coherent(smmu->dev, sizeof(**l2table), + &l2ptr_dma, GFP_KERNEL); + if (!*l2table) { dev_err(smmu->dev, "failed to allocate l2 stream table for SID %u\n", sid); return -ENOMEM; } - arm_smmu_init_initial_stes(desc->l2ptr, 1 << STRTAB_SPLIT); - arm_smmu_write_strtab_l1_desc(strtab, l2ptr_dma); + arm_smmu_init_initial_stes((*l2table)->stes, + ARRAY_SIZE((*l2table)->stes)); + arm_smmu_write_strtab_l1_desc(&cfg->l2.l1tab[arm_smmu_strtab_l1_idx(sid)], + l2ptr_dma); return 0; } @@ -1750,10 +1777,6 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) return -EOPNOTSUPP; } - /* Stage-2 is always pinned at the moment */ - if (evt[1] & EVTQ_1_S2) - return -EFAULT; - if (!(evt[1] & EVTQ_1_STALL)) return -EOPNOTSUPP; @@ -1788,7 +1811,7 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) goto out_unlock; } - iommu_report_device_fault(master->dev, &fault_evt); + ret = iommu_report_device_fault(master->dev, &fault_evt); out_unlock: mutex_unlock(&smmu->streams_mutex); return ret; @@ -2032,7 +2055,7 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master, arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd); - cmds.num = 0; + arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd); for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd); @@ -2047,7 +2070,9 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, struct arm_smmu_master_domain *master_domain; int i; unsigned long flags; - struct arm_smmu_cmdq_ent cmd; + struct arm_smmu_cmdq_ent cmd = { + .opcode = CMDQ_OP_ATC_INV, + }; struct arm_smmu_cmdq_batch cmds; if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS)) @@ -2070,7 +2095,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, if (!atomic_read(&smmu_domain->nr_ats_masters)) return 0; - cmds.num = 0; + arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd); spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_for_each_entry(master_domain, &smmu_domain->devices, @@ -2152,7 +2177,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, num_pages++; } - cmds.num = 0; + arm_smmu_cmdq_batch_init(smmu, &cmds, cmd); while (iova < end) { if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { @@ -2449,16 +2474,12 @@ arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { - unsigned int idx1, idx2; - /* Two-level walk */ - idx1 = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS; - idx2 = sid & ((1 << STRTAB_SPLIT) - 1); - return &cfg->l1_desc[idx1].l2ptr[idx2]; + return &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)] + ->stes[arm_smmu_strtab_l2_idx(sid)]; } else { /* Simple linear lookup */ - return (struct arm_smmu_ste *)&cfg - ->strtab[sid * STRTAB_STE_DWORDS]; + return &cfg->linear.table[sid]; } } @@ -3073,8 +3094,8 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags, return ERR_PTR(-EOPNOTSUPP); smmu_domain = arm_smmu_domain_alloc(); - if (!smmu_domain) - return ERR_PTR(-ENOMEM); + if (IS_ERR(smmu_domain)) + return ERR_CAST(smmu_domain); smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED; smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops; @@ -3158,12 +3179,9 @@ struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode) static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid) { - unsigned long limit = smmu->strtab_cfg.num_l1_ents; - if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) - limit *= 1UL << STRTAB_SPLIT; - - return sid < limit; + return arm_smmu_strtab_l1_idx(sid) < smmu->strtab_cfg.l2.num_l1_ents; + return sid < smmu->strtab_cfg.linear.num_ents; } static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid) @@ -3300,6 +3318,12 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) smmu->features & ARM_SMMU_FEAT_STALL_FORCE) master->stall_enabled = true; + if (dev_is_pci(dev)) { + unsigned int stu = __ffs(smmu->pgsize_bitmap); + + pci_prepare_ats(to_pci_dev(dev), stu); + } + return &smmu->iommu; err_free_master: @@ -3322,7 +3346,7 @@ static void arm_smmu_release_device(struct device *dev) arm_smmu_disable_pasid(master); arm_smmu_remove_master(master); - if (master->cd_table.cdtab) + if (arm_smmu_cdtab_allocated(&master->cd_table)) arm_smmu_free_cd_tables(master); kfree(master); } @@ -3512,12 +3536,10 @@ static struct iommu_dirty_ops arm_smmu_dirty_ops = { }; /* Probing and initialisation functions */ -static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, - struct arm_smmu_queue *q, - void __iomem *page, - unsigned long prod_off, - unsigned long cons_off, - size_t dwords, const char *name) +int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, + struct arm_smmu_queue *q, void __iomem *page, + unsigned long prod_off, unsigned long cons_off, + size_t dwords, const char *name) { size_t qsz; @@ -3555,9 +3577,9 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, return 0; } -static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu) +int arm_smmu_cmdq_init(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq) { - struct arm_smmu_cmdq *cmdq = &smmu->cmdq; unsigned int nents = 1 << cmdq->q.llq.max_n_shift; atomic_set(&cmdq->owner_prod, 0); @@ -3582,7 +3604,7 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) if (ret) return ret; - ret = arm_smmu_cmdq_init(smmu); + ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq); if (ret) return ret; @@ -3611,42 +3633,32 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) { - void *strtab; - u64 reg; - u32 size, l1size; + u32 l1size; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; + unsigned int last_sid_idx = + arm_smmu_strtab_l1_idx((1 << smmu->sid_bits) - 1); /* Calculate the L1 size, capped to the SIDSIZE. */ - size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3); - size = min(size, smmu->sid_bits - STRTAB_SPLIT); - cfg->num_l1_ents = 1 << size; - - size += STRTAB_SPLIT; - if (size < smmu->sid_bits) + cfg->l2.num_l1_ents = min(last_sid_idx + 1, STRTAB_MAX_L1_ENTRIES); + if (cfg->l2.num_l1_ents <= last_sid_idx) dev_warn(smmu->dev, "2-level strtab only covers %u/%u bits of SID\n", - size, smmu->sid_bits); + ilog2(cfg->l2.num_l1_ents * STRTAB_NUM_L2_STES), + smmu->sid_bits); - l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3); - strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, - GFP_KERNEL); - if (!strtab) { + l1size = cfg->l2.num_l1_ents * sizeof(struct arm_smmu_strtab_l1); + cfg->l2.l1tab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->l2.l1_dma, + GFP_KERNEL); + if (!cfg->l2.l1tab) { dev_err(smmu->dev, "failed to allocate l1 stream table (%u bytes)\n", l1size); return -ENOMEM; } - cfg->strtab = strtab; - /* Configure strtab_base_cfg for 2 levels */ - reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL); - reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size); - reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT); - cfg->strtab_base_cfg = reg; - - cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents, - sizeof(*cfg->l1_desc), GFP_KERNEL); - if (!cfg->l1_desc) + cfg->l2.l2ptrs = devm_kcalloc(smmu->dev, cfg->l2.num_l1_ents, + sizeof(*cfg->l2.l2ptrs), GFP_KERNEL); + if (!cfg->l2.l2ptrs) return -ENOMEM; return 0; @@ -3654,50 +3666,36 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) { - void *strtab; - u64 reg; u32 size; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; - size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3); - strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma, - GFP_KERNEL); - if (!strtab) { + size = (1 << smmu->sid_bits) * sizeof(struct arm_smmu_ste); + cfg->linear.table = dmam_alloc_coherent(smmu->dev, size, + &cfg->linear.ste_dma, + GFP_KERNEL); + if (!cfg->linear.table) { dev_err(smmu->dev, "failed to allocate linear stream table (%u bytes)\n", size); return -ENOMEM; } - cfg->strtab = strtab; - cfg->num_l1_ents = 1 << smmu->sid_bits; - - /* Configure strtab_base_cfg for a linear table covering all SIDs */ - reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR); - reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits); - cfg->strtab_base_cfg = reg; + cfg->linear.num_ents = 1 << smmu->sid_bits; - arm_smmu_init_initial_stes(strtab, cfg->num_l1_ents); + arm_smmu_init_initial_stes(cfg->linear.table, cfg->linear.num_ents); return 0; } static int arm_smmu_init_strtab(struct arm_smmu_device *smmu) { - u64 reg; int ret; if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) ret = arm_smmu_init_strtab_2lvl(smmu); else ret = arm_smmu_init_strtab_linear(smmu); - if (ret) return ret; - /* Set the strtab base address */ - reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK; - reg |= STRTAB_BASE_RA; - smmu->strtab_cfg.strtab_base = reg; - ida_init(&smmu->vmid_map); return 0; @@ -3714,7 +3712,14 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu) if (ret) return ret; - return arm_smmu_init_strtab(smmu); + ret = arm_smmu_init_strtab(smmu); + if (ret) + return ret; + + if (smmu->impl_ops && smmu->impl_ops->init_structures) + return smmu->impl_ops->init_structures(smmu); + + return 0; } static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, @@ -3906,6 +3911,30 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu) return ret; } +static void arm_smmu_write_strtab(struct arm_smmu_device *smmu) +{ + struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; + dma_addr_t dma; + u32 reg; + + if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { + reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, + STRTAB_BASE_CFG_FMT_2LVL) | + FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, + ilog2(cfg->l2.num_l1_ents) + STRTAB_SPLIT) | + FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT); + dma = cfg->l2.l1_dma; + } else { + reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, + STRTAB_BASE_CFG_FMT_LINEAR) | + FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits); + dma = cfg->linear.ste_dma; + } + writeq_relaxed((dma & STRTAB_BASE_ADDR_MASK) | STRTAB_BASE_RA, + smmu->base + ARM_SMMU_STRTAB_BASE); + writel_relaxed(reg, smmu->base + ARM_SMMU_STRTAB_BASE_CFG); +} + static int arm_smmu_device_reset(struct arm_smmu_device *smmu) { int ret; @@ -3941,10 +3970,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu) writel_relaxed(reg, smmu->base + ARM_SMMU_CR2); /* Stream table */ - writeq_relaxed(smmu->strtab_cfg.strtab_base, - smmu->base + ARM_SMMU_STRTAB_BASE); - writel_relaxed(smmu->strtab_cfg.strtab_base_cfg, - smmu->base + ARM_SMMU_STRTAB_BASE_CFG); + arm_smmu_write_strtab(smmu); /* Command queue */ writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE); @@ -4031,6 +4057,14 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu) return ret; } + if (smmu->impl_ops && smmu->impl_ops->device_reset) { + ret = smmu->impl_ops->device_reset(smmu); + if (ret) { + dev_err(smmu->dev, "failed to reset impl\n"); + return ret; + } + } + return 0; } @@ -4320,18 +4354,55 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) } #ifdef CONFIG_ACPI -static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu) +#ifdef CONFIG_TEGRA241_CMDQV +static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node, + struct arm_smmu_device *smmu) +{ + const char *uid = kasprintf(GFP_KERNEL, "%u", node->identifier); + struct acpi_device *adev; + + /* Look for an NVDA200C node whose _UID matches the SMMU node ID */ + adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1); + if (adev) { + /* Tegra241 CMDQV driver is responsible for put_device() */ + smmu->impl_dev = &adev->dev; + smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV; + dev_info(smmu->dev, "found companion CMDQV device: %s\n", + dev_name(smmu->impl_dev)); + } + kfree(uid); +} +#else +static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node, + struct arm_smmu_device *smmu) { - switch (model) { +} +#endif + +static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node, + struct arm_smmu_device *smmu) +{ + struct acpi_iort_smmu_v3 *iort_smmu = + (struct acpi_iort_smmu_v3 *)node->node_data; + + switch (iort_smmu->model) { case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX: smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; break; case ACPI_IORT_SMMU_V3_HISILICON_HI161X: smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; break; + case ACPI_IORT_SMMU_V3_GENERIC: + /* + * Tegra241 implementation stores its SMMU options and impl_dev + * in DSDT. Thus, go through the ACPI tables unconditionally. + */ + acpi_smmu_dsdt_probe_tegra241_cmdqv(node, smmu); + break; } dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options); + return 0; } static int arm_smmu_device_acpi_probe(struct platform_device *pdev, @@ -4346,8 +4417,6 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, /* Retrieve SMMUv3 specific data */ iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data; - acpi_smmu_get_options(iort_smmu->model, smmu); - if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) smmu->features |= ARM_SMMU_FEAT_COHERENCY; @@ -4359,7 +4428,7 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, smmu->features |= ARM_SMMU_FEAT_HA; } - return 0; + return acpi_smmu_iort_probe_model(node, smmu); } #else static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev, @@ -4440,6 +4509,39 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu) iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); } +static void arm_smmu_impl_remove(void *data) +{ + struct arm_smmu_device *smmu = data; + + if (smmu->impl_ops && smmu->impl_ops->device_remove) + smmu->impl_ops->device_remove(smmu); +} + +/* + * Probe all the compiled in implementations. Each one checks to see if it + * matches this HW and if so returns a devm_krealloc'd arm_smmu_device which + * replaces the callers. Otherwise the original is returned or ERR_PTR. + */ +static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu) +{ + struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV); + int ret; + + if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV)) + new_smmu = tegra241_cmdqv_probe(smmu); + + if (new_smmu == ERR_PTR(-ENODEV)) + return smmu; + if (IS_ERR(new_smmu)) + return new_smmu; + + ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove, + new_smmu); + if (ret) + return ERR_PTR(ret); + return new_smmu; +} + static int arm_smmu_device_probe(struct platform_device *pdev) { int irq, ret; @@ -4461,6 +4563,10 @@ static int arm_smmu_device_probe(struct platform_device *pdev) if (ret) return ret; + smmu = arm_smmu_impl_probe(smmu); + if (IS_ERR(smmu)) + return PTR_ERR(smmu); + /* Base address */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index b332536a00d438cc5fc6b42aa7c3e6b0a51870e0..e41fb067d06daf5fb35a4223ffb0855a4a2fe05c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -14,6 +14,8 @@ #include #include +struct arm_smmu_device; + /* MMIO registers */ #define ARM_SMMU_IDR0 0x0 #define IDR0_ST_LVL GENMASK(28, 27) @@ -202,10 +204,8 @@ * 2lvl: 128k L1 entries, * 256 lazy entries per table (each table covers a PCI bus) */ -#define STRTAB_L1_SZ_SHIFT 20 #define STRTAB_SPLIT 8 -#define STRTAB_L1_DESC_DWORDS 1 #define STRTAB_L1_DESC_SPAN GENMASK_ULL(4, 0) #define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6) @@ -215,6 +215,26 @@ struct arm_smmu_ste { __le64 data[STRTAB_STE_DWORDS]; }; +#define STRTAB_NUM_L2_STES (1 << STRTAB_SPLIT) +struct arm_smmu_strtab_l2 { + struct arm_smmu_ste stes[STRTAB_NUM_L2_STES]; +}; + +struct arm_smmu_strtab_l1 { + __le64 l2ptr; +}; +#define STRTAB_MAX_L1_ENTRIES (1 << 17) + +static inline u32 arm_smmu_strtab_l1_idx(u32 sid) +{ + return sid / STRTAB_NUM_L2_STES; +} + +static inline u32 arm_smmu_strtab_l2_idx(u32 sid) +{ + return sid % STRTAB_NUM_L2_STES; +} + #define STRTAB_STE_0_V (1UL << 0) #define STRTAB_STE_0_CFG GENMASK_ULL(3, 1) #define STRTAB_STE_0_CFG_ABORT 0 @@ -267,6 +287,7 @@ struct arm_smmu_ste { #define STRTAB_STE_2_S2AA64 (1UL << 51) #define STRTAB_STE_2_S2ENDI (1UL << 52) #define STRTAB_STE_2_S2PTW (1UL << 54) +#define STRTAB_STE_2_S2S (1UL << 57) #define STRTAB_STE_2_S2R (1UL << 58) #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4) @@ -280,7 +301,6 @@ struct arm_smmu_ste { */ #define CTXDESC_L2_ENTRIES 1024 -#define CTXDESC_L1_DESC_DWORDS 1 #define CTXDESC_L1_DESC_V (1UL << 0) #define CTXDESC_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 12) @@ -290,6 +310,24 @@ struct arm_smmu_cd { __le64 data[CTXDESC_CD_DWORDS]; }; +struct arm_smmu_cdtab_l2 { + struct arm_smmu_cd cds[CTXDESC_L2_ENTRIES]; +}; + +struct arm_smmu_cdtab_l1 { + __le64 l2ptr; +}; + +static inline unsigned int arm_smmu_cdtab_l1_idx(unsigned int ssid) +{ + return ssid / CTXDESC_L2_ENTRIES; +} + +static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid) +{ + return ssid % CTXDESC_L2_ENTRIES; +} + #define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0) #define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6) #define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8) @@ -320,7 +358,7 @@ struct arm_smmu_cd { * When the SMMU only supports linear context descriptor tables, pick a * reasonable size limit (64kB). */ -#define CTXDESC_LINEAR_CDMAX ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3)) +#define CTXDESC_LINEAR_CDMAX ilog2(SZ_64K / sizeof(struct arm_smmu_cd)) /* Command queue */ #define CMDQ_ENT_SZ_SHIFT 4 @@ -566,10 +604,18 @@ struct arm_smmu_cmdq { atomic_long_t *valid_map; atomic_t owner_prod; atomic_t lock; + bool (*supports_cmd)(struct arm_smmu_cmdq_ent *ent); }; +static inline bool arm_smmu_cmdq_supports_cmd(struct arm_smmu_cmdq *cmdq, + struct arm_smmu_cmdq_ent *ent) +{ + return cmdq->supports_cmd ? cmdq->supports_cmd(ent) : true; +} + struct arm_smmu_cmdq_batch { u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS]; + struct arm_smmu_cmdq *cmdq; int num; }; @@ -584,24 +630,23 @@ struct arm_smmu_priq { }; /* High-level stream table and context descriptor structures */ -struct arm_smmu_strtab_l1_desc { - struct arm_smmu_ste *l2ptr; -}; - struct arm_smmu_ctx_desc { u16 asid; }; -struct arm_smmu_l1_ctx_desc { - struct arm_smmu_cd *l2ptr; - dma_addr_t l2ptr_dma; -}; - struct arm_smmu_ctx_desc_cfg { - __le64 *cdtab; + union { + struct { + struct arm_smmu_cd *table; + unsigned int num_ents; + } linear; + struct { + struct arm_smmu_cdtab_l1 *l1tab; + struct arm_smmu_cdtab_l2 **l2ptrs; + unsigned int num_l1_ents; + } l2; + }; dma_addr_t cdtab_dma; - struct arm_smmu_l1_ctx_desc *l1_desc; - unsigned int num_l1_ents; unsigned int used_ssids; u8 in_ste; u8 s1fmt; @@ -609,6 +654,12 @@ struct arm_smmu_ctx_desc_cfg { u8 s1cdmax; }; +static inline bool +arm_smmu_cdtab_allocated(struct arm_smmu_ctx_desc_cfg *cfg) +{ + return cfg->linear.table || cfg->l2.l1tab; +} + /* True if the cd table has SSIDS > 0 in use. */ static inline bool arm_smmu_ssids_in_use(struct arm_smmu_ctx_desc_cfg *cd_table) { @@ -620,18 +671,35 @@ struct arm_smmu_s2_cfg { }; struct arm_smmu_strtab_cfg { - __le64 *strtab; - dma_addr_t strtab_dma; - struct arm_smmu_strtab_l1_desc *l1_desc; - unsigned int num_l1_ents; + union { + struct { + struct arm_smmu_ste *table; + dma_addr_t ste_dma; + unsigned int num_ents; + } linear; + struct { + struct arm_smmu_strtab_l1 *l1tab; + struct arm_smmu_strtab_l2 **l2ptrs; + dma_addr_t l1_dma; + unsigned int num_l1_ents; + } l2; + }; +}; - u64 strtab_base; - u32 strtab_base_cfg; +struct arm_smmu_impl_ops { + int (*device_reset)(struct arm_smmu_device *smmu); + void (*device_remove)(struct arm_smmu_device *smmu); + int (*init_structures)(struct arm_smmu_device *smmu); + struct arm_smmu_cmdq *(*get_secondary_cmdq)( + struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent); }; /* An SMMUv3 instance */ struct arm_smmu_device { struct device *dev; + struct device *impl_dev; + const struct arm_smmu_impl_ops *impl_ops; + void __iomem *base; void __iomem *page1; @@ -664,6 +732,7 @@ struct arm_smmu_device { #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1) #define ARM_SMMU_OPT_MSIPOLL (1 << 2) #define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3) +#define ARM_SMMU_OPT_TEGRA241_CMDQV (1 << 4) u32 options; struct arm_smmu_cmdq cmdq; @@ -815,6 +884,15 @@ void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, unsigned long iova, size_t size); +void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq); +int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, + struct arm_smmu_queue *q, void __iomem *page, + unsigned long prod_off, unsigned long cons_off, + size_t dwords, const char *name); +int arm_smmu_cmdq_init(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq); + #ifdef CONFIG_ARM_SMMU_V3_SVA bool arm_smmu_sva_supported(struct arm_smmu_device *smmu); bool arm_smmu_master_sva_supported(struct arm_smmu_master *master); @@ -860,10 +938,15 @@ static inline void arm_smmu_sva_notifier_synchronize(void) {} #define arm_smmu_sva_domain_alloc NULL -static inline void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, - struct device *dev, - ioasid_t id) +#endif /* CONFIG_ARM_SMMU_V3_SVA */ + +#ifdef CONFIG_TEGRA241_CMDQV +struct arm_smmu_device *tegra241_cmdqv_probe(struct arm_smmu_device *smmu); +#else /* CONFIG_TEGRA241_CMDQV */ +static inline struct arm_smmu_device * +tegra241_cmdqv_probe(struct arm_smmu_device *smmu) { + return ERR_PTR(-ENODEV); } -#endif /* CONFIG_ARM_SMMU_V3_SVA */ +#endif /* CONFIG_TEGRA241_CMDQV */ #endif /* _ARM_SMMU_V3_H */ diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c new file mode 100644 index 0000000000000000000000000000000000000000..fcd13d301fff68b59b7488a330e79f69cbf856cc --- /dev/null +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -0,0 +1,909 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2021-2024 NVIDIA CORPORATION & AFFILIATES. */ + +#define dev_fmt(fmt) "tegra241_cmdqv: " fmt + +#include +#include +#include +#include +#include +#include + +#include + +#include "arm-smmu-v3.h" + +/* CMDQV register page base and size defines */ +#define TEGRA241_CMDQV_CONFIG_BASE (0) +#define TEGRA241_CMDQV_CONFIG_SIZE (SZ_64K) +#define TEGRA241_VCMDQ_PAGE0_BASE (TEGRA241_CMDQV_CONFIG_BASE + SZ_64K) +#define TEGRA241_VCMDQ_PAGE1_BASE (TEGRA241_VCMDQ_PAGE0_BASE + SZ_64K) +#define TEGRA241_VINTF_PAGE_BASE (TEGRA241_VCMDQ_PAGE1_BASE + SZ_64K) + +/* CMDQV global base regs */ +#define TEGRA241_CMDQV_CONFIG 0x0000 +#define CMDQV_EN BIT(0) + +#define TEGRA241_CMDQV_PARAM 0x0004 +#define CMDQV_NUM_VINTF_LOG2 GENMASK(11, 8) +#define CMDQV_NUM_VCMDQ_LOG2 GENMASK(7, 4) + +#define TEGRA241_CMDQV_STATUS 0x0008 +#define CMDQV_ENABLED BIT(0) + +#define TEGRA241_CMDQV_VINTF_ERR_MAP 0x0014 +#define TEGRA241_CMDQV_VINTF_INT_MASK 0x001C +#define TEGRA241_CMDQV_CMDQ_ERR_MAP(m) (0x0024 + 0x4*(m)) + +#define TEGRA241_CMDQV_CMDQ_ALLOC(q) (0x0200 + 0x4*(q)) +#define CMDQV_CMDQ_ALLOC_VINTF GENMASK(20, 15) +#define CMDQV_CMDQ_ALLOC_LVCMDQ GENMASK(7, 1) +#define CMDQV_CMDQ_ALLOCATED BIT(0) + +/* VINTF base regs */ +#define TEGRA241_VINTF(v) (0x1000 + 0x100*(v)) + +#define TEGRA241_VINTF_CONFIG 0x0000 +#define VINTF_HYP_OWN BIT(17) +#define VINTF_VMID GENMASK(16, 1) +#define VINTF_EN BIT(0) + +#define TEGRA241_VINTF_STATUS 0x0004 +#define VINTF_STATUS GENMASK(3, 1) +#define VINTF_ENABLED BIT(0) + +#define TEGRA241_VINTF_LVCMDQ_ERR_MAP_64(m) \ + (0x00C0 + 0x8*(m)) +#define LVCMDQ_ERR_MAP_NUM_64 2 + +/* VCMDQ base regs */ +/* -- PAGE0 -- */ +#define TEGRA241_VCMDQ_PAGE0(q) (TEGRA241_VCMDQ_PAGE0_BASE + 0x80*(q)) + +#define TEGRA241_VCMDQ_CONS 0x00000 +#define VCMDQ_CONS_ERR GENMASK(30, 24) + +#define TEGRA241_VCMDQ_PROD 0x00004 + +#define TEGRA241_VCMDQ_CONFIG 0x00008 +#define VCMDQ_EN BIT(0) + +#define TEGRA241_VCMDQ_STATUS 0x0000C +#define VCMDQ_ENABLED BIT(0) + +#define TEGRA241_VCMDQ_GERROR 0x00010 +#define TEGRA241_VCMDQ_GERRORN 0x00014 + +/* -- PAGE1 -- */ +#define TEGRA241_VCMDQ_PAGE1(q) (TEGRA241_VCMDQ_PAGE1_BASE + 0x80*(q)) +#define VCMDQ_ADDR GENMASK(47, 5) +#define VCMDQ_LOG2SIZE GENMASK(4, 0) +#define VCMDQ_LOG2SIZE_MAX 19 + +#define TEGRA241_VCMDQ_BASE 0x00000 +#define TEGRA241_VCMDQ_CONS_INDX_BASE 0x00008 + +/* VINTF logical-VCMDQ pages */ +#define TEGRA241_VINTFi_PAGE0(i) (TEGRA241_VINTF_PAGE_BASE + SZ_128K*(i)) +#define TEGRA241_VINTFi_PAGE1(i) (TEGRA241_VINTFi_PAGE0(i) + SZ_64K) +#define TEGRA241_VINTFi_LVCMDQ_PAGE0(i, q) \ + (TEGRA241_VINTFi_PAGE0(i) + 0x80*(q)) +#define TEGRA241_VINTFi_LVCMDQ_PAGE1(i, q) \ + (TEGRA241_VINTFi_PAGE1(i) + 0x80*(q)) + +/* MMIO helpers */ +#define REG_CMDQV(_cmdqv, _regname) \ + ((_cmdqv)->base + TEGRA241_CMDQV_##_regname) +#define REG_VINTF(_vintf, _regname) \ + ((_vintf)->base + TEGRA241_VINTF_##_regname) +#define REG_VCMDQ_PAGE0(_vcmdq, _regname) \ + ((_vcmdq)->page0 + TEGRA241_VCMDQ_##_regname) +#define REG_VCMDQ_PAGE1(_vcmdq, _regname) \ + ((_vcmdq)->page1 + TEGRA241_VCMDQ_##_regname) + + +static bool disable_cmdqv; +module_param(disable_cmdqv, bool, 0444); +MODULE_PARM_DESC(disable_cmdqv, + "This allows to disable CMDQV HW and use default SMMU internal CMDQ."); + +static bool bypass_vcmdq; +module_param(bypass_vcmdq, bool, 0444); +MODULE_PARM_DESC(bypass_vcmdq, + "This allows to bypass VCMDQ for debugging use or perf comparison."); + +/** + * struct tegra241_vcmdq - Virtual Command Queue + * @idx: Global index in the CMDQV + * @lidx: Local index in the VINTF + * @enabled: Enable status + * @cmdqv: Parent CMDQV pointer + * @vintf: Parent VINTF pointer + * @cmdq: Command Queue struct + * @page0: MMIO Page0 base address + * @page1: MMIO Page1 base address + */ +struct tegra241_vcmdq { + u16 idx; + u16 lidx; + + bool enabled; + + struct tegra241_cmdqv *cmdqv; + struct tegra241_vintf *vintf; + struct arm_smmu_cmdq cmdq; + + void __iomem *page0; + void __iomem *page1; +}; + +/** + * struct tegra241_vintf - Virtual Interface + * @idx: Global index in the CMDQV + * @enabled: Enable status + * @hyp_own: Owned by hypervisor (in-kernel) + * @cmdqv: Parent CMDQV pointer + * @lvcmdqs: List of logical VCMDQ pointers + * @base: MMIO base address + */ +struct tegra241_vintf { + u16 idx; + + bool enabled; + bool hyp_own; + + struct tegra241_cmdqv *cmdqv; + struct tegra241_vcmdq **lvcmdqs; + + void __iomem *base; +}; + +/** + * struct tegra241_cmdqv - CMDQ-V for SMMUv3 + * @smmu: SMMUv3 device + * @dev: CMDQV device + * @base: MMIO base address + * @irq: IRQ number + * @num_vintfs: Total number of VINTFs + * @num_vcmdqs: Total number of VCMDQs + * @num_lvcmdqs_per_vintf: Number of logical VCMDQs per VINTF + * @vintf_ids: VINTF id allocator + * @vintfs: List of VINTFs + */ +struct tegra241_cmdqv { + struct arm_smmu_device smmu; + struct device *dev; + + void __iomem *base; + int irq; + + /* CMDQV Hardware Params */ + u16 num_vintfs; + u16 num_vcmdqs; + u16 num_lvcmdqs_per_vintf; + + struct ida vintf_ids; + + struct tegra241_vintf **vintfs; +}; + +/* Config and Polling Helpers */ + +static inline int tegra241_cmdqv_write_config(struct tegra241_cmdqv *cmdqv, + void __iomem *addr_config, + void __iomem *addr_status, + u32 regval, const char *header, + bool *out_enabled) +{ + bool en = regval & BIT(0); + int ret; + + writel(regval, addr_config); + ret = readl_poll_timeout(addr_status, regval, + en ? regval & BIT(0) : !(regval & BIT(0)), + 1, ARM_SMMU_POLL_TIMEOUT_US); + if (ret) + dev_err(cmdqv->dev, "%sfailed to %sable, STATUS=0x%08X\n", + header, en ? "en" : "dis", regval); + if (out_enabled) + WRITE_ONCE(*out_enabled, regval & BIT(0)); + return ret; +} + +static inline int cmdqv_write_config(struct tegra241_cmdqv *cmdqv, u32 regval) +{ + return tegra241_cmdqv_write_config(cmdqv, + REG_CMDQV(cmdqv, CONFIG), + REG_CMDQV(cmdqv, STATUS), + regval, "CMDQV: ", NULL); +} + +static inline int vintf_write_config(struct tegra241_vintf *vintf, u32 regval) +{ + char header[16]; + + snprintf(header, 16, "VINTF%u: ", vintf->idx); + return tegra241_cmdqv_write_config(vintf->cmdqv, + REG_VINTF(vintf, CONFIG), + REG_VINTF(vintf, STATUS), + regval, header, &vintf->enabled); +} + +static inline char *lvcmdq_error_header(struct tegra241_vcmdq *vcmdq, + char *header, int hlen) +{ + WARN_ON(hlen < 64); + if (WARN_ON(!vcmdq->vintf)) + return ""; + snprintf(header, hlen, "VINTF%u: VCMDQ%u/LVCMDQ%u: ", + vcmdq->vintf->idx, vcmdq->idx, vcmdq->lidx); + return header; +} + +static inline int vcmdq_write_config(struct tegra241_vcmdq *vcmdq, u32 regval) +{ + char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); + + return tegra241_cmdqv_write_config(vcmdq->cmdqv, + REG_VCMDQ_PAGE0(vcmdq, CONFIG), + REG_VCMDQ_PAGE0(vcmdq, STATUS), + regval, h, &vcmdq->enabled); +} + +/* ISR Functions */ + +static void tegra241_vintf0_handle_error(struct tegra241_vintf *vintf) +{ + int i; + + for (i = 0; i < LVCMDQ_ERR_MAP_NUM_64; i++) { + u64 map = readq_relaxed(REG_VINTF(vintf, LVCMDQ_ERR_MAP_64(i))); + + while (map) { + unsigned long lidx = __ffs64(map); + struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx]; + u32 gerror = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)); + + __arm_smmu_cmdq_skip_err(&vintf->cmdqv->smmu, &vcmdq->cmdq); + writel(gerror, REG_VCMDQ_PAGE0(vcmdq, GERRORN)); + map &= ~BIT_ULL(lidx); + } + } +} + +static irqreturn_t tegra241_cmdqv_isr(int irq, void *devid) +{ + struct tegra241_cmdqv *cmdqv = (struct tegra241_cmdqv *)devid; + void __iomem *reg_vintf_map = REG_CMDQV(cmdqv, VINTF_ERR_MAP); + char err_str[256]; + u64 vintf_map; + + /* Use readl_relaxed() as register addresses are not 64-bit aligned */ + vintf_map = (u64)readl_relaxed(reg_vintf_map + 0x4) << 32 | + (u64)readl_relaxed(reg_vintf_map); + + snprintf(err_str, sizeof(err_str), + "vintf_map: %016llx, vcmdq_map %08x:%08x:%08x:%08x", vintf_map, + readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(3))), + readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(2))), + readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(1))), + readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(0)))); + + dev_warn(cmdqv->dev, "unexpected error reported. %s\n", err_str); + + /* Handle VINTF0 and its LVCMDQs */ + if (vintf_map & BIT_ULL(0)) { + tegra241_vintf0_handle_error(cmdqv->vintfs[0]); + vintf_map &= ~BIT_ULL(0); + } + + return IRQ_HANDLED; +} + +/* Command Queue Function */ + +static bool tegra241_guest_vcmdq_supports_cmd(struct arm_smmu_cmdq_ent *ent) +{ + switch (ent->opcode) { + case CMDQ_OP_TLBI_NH_ASID: + case CMDQ_OP_TLBI_NH_VA: + case CMDQ_OP_ATC_INV: + return true; + default: + return false; + } +} + +static struct arm_smmu_cmdq * +tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq_ent *ent) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + struct tegra241_vintf *vintf = cmdqv->vintfs[0]; + struct tegra241_vcmdq *vcmdq; + u16 lidx; + + if (READ_ONCE(bypass_vcmdq)) + return NULL; + + /* Use SMMU CMDQ if VINTF0 is uninitialized */ + if (!READ_ONCE(vintf->enabled)) + return NULL; + + /* + * Select a LVCMDQ to use. Here we use a temporal solution to + * balance out traffic on cmdq issuing: each cmdq has its own + * lock, if all cpus issue cmdlist using the same cmdq, only + * one CPU at a time can enter the process, while the others + * will be spinning at the same lock. + */ + lidx = smp_processor_id() % cmdqv->num_lvcmdqs_per_vintf; + vcmdq = vintf->lvcmdqs[lidx]; + if (!vcmdq || !READ_ONCE(vcmdq->enabled)) + return NULL; + + /* Unsupported CMD goes for smmu->cmdq pathway */ + if (!arm_smmu_cmdq_supports_cmd(&vcmdq->cmdq, ent)) + return NULL; + return &vcmdq->cmdq; +} + +/* HW Reset Functions */ + +static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq) +{ + char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); + u32 gerrorn, gerror; + + if (vcmdq_write_config(vcmdq, 0)) { + dev_err(vcmdq->cmdqv->dev, + "%sGERRORN=0x%X, GERROR=0x%X, CONS=0x%X\n", h, + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)), + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)), + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, CONS))); + } + writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, PROD)); + writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, CONS)); + writeq_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, BASE)); + writeq_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, CONS_INDX_BASE)); + + gerrorn = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)); + gerror = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)); + if (gerror != gerrorn) { + dev_warn(vcmdq->cmdqv->dev, + "%suncleared error detected, resetting\n", h); + writel(gerror, REG_VCMDQ_PAGE0(vcmdq, GERRORN)); + } + + dev_dbg(vcmdq->cmdqv->dev, "%sdeinited\n", h); +} + +static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq) +{ + char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); + int ret; + + /* Reset VCMDQ */ + tegra241_vcmdq_hw_deinit(vcmdq); + + /* Configure and enable VCMDQ */ + writeq_relaxed(vcmdq->cmdq.q.q_base, REG_VCMDQ_PAGE1(vcmdq, BASE)); + + ret = vcmdq_write_config(vcmdq, VCMDQ_EN); + if (ret) { + dev_err(vcmdq->cmdqv->dev, + "%sGERRORN=0x%X, GERROR=0x%X, CONS=0x%X\n", h, + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)), + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)), + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, CONS))); + return ret; + } + + dev_dbg(vcmdq->cmdqv->dev, "%sinited\n", h); + return 0; +} + +static void tegra241_vintf_hw_deinit(struct tegra241_vintf *vintf) +{ + u16 lidx; + + for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) + if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) + tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]); + vintf_write_config(vintf, 0); +} + +static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own) +{ + u32 regval; + u16 lidx; + int ret; + + /* Reset VINTF */ + tegra241_vintf_hw_deinit(vintf); + + /* Configure and enable VINTF */ + /* + * Note that HYP_OWN bit is wired to zero when running in guest kernel, + * whether enabling it here or not, as !HYP_OWN cmdq HWs only support a + * restricted set of supported commands. + */ + regval = FIELD_PREP(VINTF_HYP_OWN, hyp_own); + writel(regval, REG_VINTF(vintf, CONFIG)); + + ret = vintf_write_config(vintf, regval | VINTF_EN); + if (ret) + return ret; + /* + * As being mentioned above, HYP_OWN bit is wired to zero for a guest + * kernel, so read it back from HW to ensure that reflects in hyp_own + */ + vintf->hyp_own = !!(VINTF_HYP_OWN & readl(REG_VINTF(vintf, CONFIG))); + + for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) { + if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) { + ret = tegra241_vcmdq_hw_init(vintf->lvcmdqs[lidx]); + if (ret) { + tegra241_vintf_hw_deinit(vintf); + return ret; + } + } + } + + return 0; +} + +static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + u16 qidx, lidx, idx; + u32 regval; + int ret; + + /* Reset CMDQV */ + regval = readl_relaxed(REG_CMDQV(cmdqv, CONFIG)); + ret = cmdqv_write_config(cmdqv, regval & ~CMDQV_EN); + if (ret) + return ret; + ret = cmdqv_write_config(cmdqv, regval | CMDQV_EN); + if (ret) + return ret; + + /* Assign preallocated global VCMDQs to each VINTF as LVCMDQs */ + for (idx = 0, qidx = 0; idx < cmdqv->num_vintfs; idx++) { + for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) { + regval = FIELD_PREP(CMDQV_CMDQ_ALLOC_VINTF, idx); + regval |= FIELD_PREP(CMDQV_CMDQ_ALLOC_LVCMDQ, lidx); + regval |= CMDQV_CMDQ_ALLOCATED; + writel_relaxed(regval, + REG_CMDQV(cmdqv, CMDQ_ALLOC(qidx++))); + } + } + + return tegra241_vintf_hw_init(cmdqv->vintfs[0], true); +} + +/* VCMDQ Resource Helpers */ + +static void tegra241_vcmdq_free_smmu_cmdq(struct tegra241_vcmdq *vcmdq) +{ + struct arm_smmu_queue *q = &vcmdq->cmdq.q; + size_t nents = 1 << q->llq.max_n_shift; + size_t qsz = nents << CMDQ_ENT_SZ_SHIFT; + + if (!q->base) + return; + dmam_free_coherent(vcmdq->cmdqv->smmu.dev, qsz, q->base, q->base_dma); +} + +static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq) +{ + struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu; + struct arm_smmu_cmdq *cmdq = &vcmdq->cmdq; + struct arm_smmu_queue *q = &cmdq->q; + char name[16]; + int ret; + + snprintf(name, 16, "vcmdq%u", vcmdq->idx); + + q->llq.max_n_shift = VCMDQ_LOG2SIZE_MAX; + + /* Use the common helper to init the VCMDQ, and then... */ + ret = arm_smmu_init_one_queue(smmu, q, vcmdq->page0, + TEGRA241_VCMDQ_PROD, TEGRA241_VCMDQ_CONS, + CMDQ_ENT_DWORDS, name); + if (ret) + return ret; + + /* ...override q_base to write VCMDQ_BASE registers */ + q->q_base = q->base_dma & VCMDQ_ADDR; + q->q_base |= FIELD_PREP(VCMDQ_LOG2SIZE, q->llq.max_n_shift); + + if (!vcmdq->vintf->hyp_own) + cmdq->supports_cmd = tegra241_guest_vcmdq_supports_cmd; + + return arm_smmu_cmdq_init(smmu, cmdq); +} + +/* VINTF Logical VCMDQ Resource Helpers */ + +static void tegra241_vintf_deinit_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) +{ + vintf->lvcmdqs[lidx] = NULL; +} + +static int tegra241_vintf_init_lvcmdq(struct tegra241_vintf *vintf, u16 lidx, + struct tegra241_vcmdq *vcmdq) +{ + struct tegra241_cmdqv *cmdqv = vintf->cmdqv; + u16 idx = vintf->idx; + + vcmdq->idx = idx * cmdqv->num_lvcmdqs_per_vintf + lidx; + vcmdq->lidx = lidx; + vcmdq->cmdqv = cmdqv; + vcmdq->vintf = vintf; + vcmdq->page0 = cmdqv->base + TEGRA241_VINTFi_LVCMDQ_PAGE0(idx, lidx); + vcmdq->page1 = cmdqv->base + TEGRA241_VINTFi_LVCMDQ_PAGE1(idx, lidx); + + vintf->lvcmdqs[lidx] = vcmdq; + return 0; +} + +static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) +{ + struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx]; + char header[64]; + + tegra241_vcmdq_free_smmu_cmdq(vcmdq); + tegra241_vintf_deinit_lvcmdq(vintf, lidx); + + dev_dbg(vintf->cmdqv->dev, + "%sdeallocated\n", lvcmdq_error_header(vcmdq, header, 64)); + kfree(vcmdq); +} + +static struct tegra241_vcmdq * +tegra241_vintf_alloc_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) +{ + struct tegra241_cmdqv *cmdqv = vintf->cmdqv; + struct tegra241_vcmdq *vcmdq; + char header[64]; + int ret; + + vcmdq = kzalloc(sizeof(*vcmdq), GFP_KERNEL); + if (!vcmdq) + return ERR_PTR(-ENOMEM); + + ret = tegra241_vintf_init_lvcmdq(vintf, lidx, vcmdq); + if (ret) + goto free_vcmdq; + + /* Build an arm_smmu_cmdq for each LVCMDQ */ + ret = tegra241_vcmdq_alloc_smmu_cmdq(vcmdq); + if (ret) + goto deinit_lvcmdq; + + dev_dbg(cmdqv->dev, + "%sallocated\n", lvcmdq_error_header(vcmdq, header, 64)); + return vcmdq; + +deinit_lvcmdq: + tegra241_vintf_deinit_lvcmdq(vintf, lidx); +free_vcmdq: + kfree(vcmdq); + return ERR_PTR(ret); +} + +/* VINTF Resource Helpers */ + +static void tegra241_cmdqv_deinit_vintf(struct tegra241_cmdqv *cmdqv, u16 idx) +{ + kfree(cmdqv->vintfs[idx]->lvcmdqs); + ida_free(&cmdqv->vintf_ids, idx); + cmdqv->vintfs[idx] = NULL; +} + +static int tegra241_cmdqv_init_vintf(struct tegra241_cmdqv *cmdqv, u16 max_idx, + struct tegra241_vintf *vintf) +{ + + u16 idx; + int ret; + + ret = ida_alloc_max(&cmdqv->vintf_ids, max_idx, GFP_KERNEL); + if (ret < 0) + return ret; + idx = ret; + + vintf->idx = idx; + vintf->cmdqv = cmdqv; + vintf->base = cmdqv->base + TEGRA241_VINTF(idx); + + vintf->lvcmdqs = kcalloc(cmdqv->num_lvcmdqs_per_vintf, + sizeof(*vintf->lvcmdqs), GFP_KERNEL); + if (!vintf->lvcmdqs) { + ida_free(&cmdqv->vintf_ids, idx); + return -ENOMEM; + } + + cmdqv->vintfs[idx] = vintf; + return ret; +} + +/* Remove Helpers */ + +static void tegra241_vintf_remove_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) +{ + tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]); + tegra241_vintf_free_lvcmdq(vintf, lidx); +} + +static void tegra241_cmdqv_remove_vintf(struct tegra241_cmdqv *cmdqv, u16 idx) +{ + struct tegra241_vintf *vintf = cmdqv->vintfs[idx]; + u16 lidx; + + /* Remove LVCMDQ resources */ + for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) + if (vintf->lvcmdqs[lidx]) + tegra241_vintf_remove_lvcmdq(vintf, lidx); + + /* Remove VINTF resources */ + tegra241_vintf_hw_deinit(vintf); + + dev_dbg(cmdqv->dev, "VINTF%u: deallocated\n", vintf->idx); + tegra241_cmdqv_deinit_vintf(cmdqv, idx); + kfree(vintf); +} + +static void tegra241_cmdqv_remove(struct arm_smmu_device *smmu) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + u16 idx; + + /* Remove VINTF resources */ + for (idx = 0; idx < cmdqv->num_vintfs; idx++) { + if (cmdqv->vintfs[idx]) { + /* Only vintf0 should remain at this stage */ + WARN_ON(idx > 0); + tegra241_cmdqv_remove_vintf(cmdqv, idx); + } + } + + /* Remove cmdqv resources */ + ida_destroy(&cmdqv->vintf_ids); + + if (cmdqv->irq > 0) + free_irq(cmdqv->irq, cmdqv); + iounmap(cmdqv->base); + kfree(cmdqv->vintfs); + put_device(cmdqv->dev); /* smmu->impl_dev */ +} + +static struct arm_smmu_impl_ops tegra241_cmdqv_impl_ops = { + .get_secondary_cmdq = tegra241_cmdqv_get_cmdq, + .device_reset = tegra241_cmdqv_hw_reset, + .device_remove = tegra241_cmdqv_remove, +}; + +/* Probe Functions */ + +static int tegra241_cmdqv_acpi_is_memory(struct acpi_resource *res, void *data) +{ + struct resource_win win; + + return !acpi_dev_resource_address_space(res, &win); +} + +static int tegra241_cmdqv_acpi_get_irqs(struct acpi_resource *ares, void *data) +{ + struct resource r; + int *irq = data; + + if (*irq <= 0 && acpi_dev_resource_interrupt(ares, 0, &r)) + *irq = r.start; + return 1; /* No need to add resource to the list */ +} + +static struct resource * +tegra241_cmdqv_find_acpi_resource(struct device *dev, int *irq) +{ + struct acpi_device *adev = to_acpi_device(dev); + struct list_head resource_list; + struct resource_entry *rentry; + struct resource *res = NULL; + int ret; + + INIT_LIST_HEAD(&resource_list); + ret = acpi_dev_get_resources(adev, &resource_list, + tegra241_cmdqv_acpi_is_memory, NULL); + if (ret < 0) { + dev_err(dev, "failed to get memory resource: %d\n", ret); + return NULL; + } + + rentry = list_first_entry_or_null(&resource_list, + struct resource_entry, node); + if (!rentry) { + dev_err(dev, "failed to get memory resource entry\n"); + goto free_list; + } + + /* Caller must free the res */ + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + goto free_list; + + *res = *rentry->res; + + acpi_dev_free_resource_list(&resource_list); + + INIT_LIST_HEAD(&resource_list); + + if (irq) + ret = acpi_dev_get_resources(adev, &resource_list, + tegra241_cmdqv_acpi_get_irqs, irq); + if (ret < 0 || !irq || *irq <= 0) + dev_warn(dev, "no interrupt. errors will not be reported\n"); + +free_list: + acpi_dev_free_resource_list(&resource_list); + return res; +} + +static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + struct tegra241_vintf *vintf; + int lidx; + int ret; + + vintf = kzalloc(sizeof(*vintf), GFP_KERNEL); + if (!vintf) + goto out_fallback; + + /* Init VINTF0 for in-kernel use */ + ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf); + if (ret) { + dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret); + goto free_vintf; + } + + /* Preallocate logical VCMDQs to VINTF0 */ + for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) { + struct tegra241_vcmdq *vcmdq; + + vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx); + if (IS_ERR(vcmdq)) + goto free_lvcmdq; + } + + /* Now, we are ready to run all the impl ops */ + smmu->impl_ops = &tegra241_cmdqv_impl_ops; + return 0; + +free_lvcmdq: + for (lidx--; lidx >= 0; lidx--) + tegra241_vintf_free_lvcmdq(vintf, lidx); + tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx); +free_vintf: + kfree(vintf); +out_fallback: + dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n"); + smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV; + tegra241_cmdqv_remove(smmu); + return 0; +} + +struct dentry *cmdqv_debugfs_dir; + +static struct arm_smmu_device * +__tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, + int irq) +{ + static const struct arm_smmu_impl_ops init_ops = { + .init_structures = tegra241_cmdqv_init_structures, + .device_remove = tegra241_cmdqv_remove, + }; + struct tegra241_cmdqv *cmdqv = NULL; + struct arm_smmu_device *new_smmu; + void __iomem *base; + u32 regval; + int ret; + + static_assert(offsetof(struct tegra241_cmdqv, smmu) == 0); + + base = ioremap(res->start, resource_size(res)); + if (!base) { + dev_err(smmu->dev, "failed to ioremap\n"); + return NULL; + } + + regval = readl(base + TEGRA241_CMDQV_CONFIG); + if (disable_cmdqv) { + dev_info(smmu->dev, "Detected disable_cmdqv=true\n"); + writel(regval & ~CMDQV_EN, base + TEGRA241_CMDQV_CONFIG); + goto iounmap; + } + + cmdqv = devm_krealloc(smmu->dev, smmu, sizeof(*cmdqv), GFP_KERNEL); + if (!cmdqv) + goto iounmap; + new_smmu = &cmdqv->smmu; + + cmdqv->irq = irq; + cmdqv->base = base; + cmdqv->dev = smmu->impl_dev; + + if (cmdqv->irq > 0) { + ret = request_irq(irq, tegra241_cmdqv_isr, 0, "tegra241-cmdqv", + cmdqv); + if (ret) { + dev_err(cmdqv->dev, "failed to request irq (%d): %d\n", + cmdqv->irq, ret); + goto iounmap; + } + } + + regval = readl_relaxed(REG_CMDQV(cmdqv, PARAM)); + cmdqv->num_vintfs = 1 << FIELD_GET(CMDQV_NUM_VINTF_LOG2, regval); + cmdqv->num_vcmdqs = 1 << FIELD_GET(CMDQV_NUM_VCMDQ_LOG2, regval); + cmdqv->num_lvcmdqs_per_vintf = cmdqv->num_vcmdqs / cmdqv->num_vintfs; + + cmdqv->vintfs = + kcalloc(cmdqv->num_vintfs, sizeof(*cmdqv->vintfs), GFP_KERNEL); + if (!cmdqv->vintfs) + goto free_irq; + + ida_init(&cmdqv->vintf_ids); + +#ifdef CONFIG_IOMMU_DEBUGFS + if (!cmdqv_debugfs_dir) { + cmdqv_debugfs_dir = + debugfs_create_dir("tegra241_cmdqv", iommu_debugfs_dir); + debugfs_create_bool("bypass_vcmdq", 0644, cmdqv_debugfs_dir, + &bypass_vcmdq); + } +#endif + + /* Provide init-level ops only, until tegra241_cmdqv_init_structures */ + new_smmu->impl_ops = &init_ops; + + return new_smmu; + +free_irq: + if (cmdqv->irq > 0) + free_irq(cmdqv->irq, cmdqv); +iounmap: + iounmap(base); + return NULL; +} + +struct arm_smmu_device *tegra241_cmdqv_probe(struct arm_smmu_device *smmu) +{ + struct arm_smmu_device *new_smmu; + struct resource *res = NULL; + int irq; + + if (!smmu->dev->of_node) + res = tegra241_cmdqv_find_acpi_resource(smmu->impl_dev, &irq); + if (!res) + goto out_fallback; + + new_smmu = __tegra241_cmdqv_probe(smmu, res, irq); + kfree(res); + + if (new_smmu) + return new_smmu; + +out_fallback: + dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n"); + smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV; + put_device(smmu->impl_dev); + return ERR_PTR(-ENODEV); +} diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c index 4b2994b6126df5e2b9bf73b42d204d1660ccaa81..2fce4f6d4e1b234167313c22e530f4ad07930b5d 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c @@ -277,7 +277,7 @@ static int nvidia_smmu_init_context(struct arm_smmu_domain *smmu_domain, */ if (of_device_is_compatible(np, "nvidia,tegra234-smmu") || of_device_is_compatible(np, "nvidia,tegra194-smmu")) { - smmu->pgsize_bitmap = PAGE_SIZE; + smmu->pgsize_bitmap &= GENMASK(PAGE_SHIFT, 0); pgtbl_cfg->pgsize_bitmap = smmu->pgsize_bitmap; } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index a7f7c69a5627ab75d4e47d93c3abdb46bf40e851..17d10685282cc24381064f7c46dbbe574439cc06 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -422,7 +422,7 @@ void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx, void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx, const struct arm_smmu_context_fault_info *cfi) { - dev_dbg(smmu->dev, + dev_err(smmu->dev, "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", cfi->fsr, cfi->iova, cfi->fsynr, cfi->cbfrsynra, idx); diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 95918eeb55768e31b6e24497103529a03139e4bc..b20b734b90f58105d6da78d513e0856f9666bfa5 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1057,9 +1058,8 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size, return NULL; } -static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, - size_t size, enum dma_data_direction dir, gfp_t gfp, - unsigned long attrs) +struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs) { struct dma_sgt_handle *sh; @@ -1075,7 +1075,7 @@ static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, return &sh->sgt; } -static void iommu_dma_free_noncontiguous(struct device *dev, size_t size, +void iommu_dma_free_noncontiguous(struct device *dev, size_t size, struct sg_table *sgt, enum dma_data_direction dir) { struct dma_sgt_handle *sh = sgt_handle(sgt); @@ -1086,8 +1086,8 @@ static void iommu_dma_free_noncontiguous(struct device *dev, size_t size, kfree(sh); } -static void iommu_dma_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) +void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir) { phys_addr_t phys; @@ -1098,12 +1098,11 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev, if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_cpu(phys, size, dir); - if (is_swiotlb_buffer(dev, phys)) - swiotlb_sync_single_for_cpu(dev, phys, size, dir); + swiotlb_sync_single_for_cpu(dev, phys, size, dir); } -static void iommu_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) +void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir) { phys_addr_t phys; @@ -1111,16 +1110,14 @@ static void iommu_dma_sync_single_for_device(struct device *dev, return; phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); - if (is_swiotlb_buffer(dev, phys)) - swiotlb_sync_single_for_device(dev, phys, size, dir); + swiotlb_sync_single_for_device(dev, phys, size, dir); if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_device(phys, size, dir); } -static void iommu_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) +void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir) { struct scatterlist *sg; int i; @@ -1134,9 +1131,8 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev, arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); } -static void iommu_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) +void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir) { struct scatterlist *sg; int i; @@ -1151,9 +1147,9 @@ static void iommu_dma_sync_sg_for_device(struct device *dev, arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); } -static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) +dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) { phys_addr_t phys = page_to_phys(page) + offset; bool coherent = dev_is_dma_coherent(dev); @@ -1206,12 +1202,12 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, arch_sync_dma_for_device(phys, size, dir); iova = __iommu_dma_map(dev, phys, size, prot, dma_mask); - if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys)) + if (iova == DMA_MAPPING_ERROR) swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); return iova; } -static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, +void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { struct iommu_domain *domain = iommu_get_dma_domain(dev); @@ -1226,8 +1222,7 @@ static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, __iommu_dma_unmap(dev, dma_handle, size); - if (unlikely(is_swiotlb_buffer(dev, phys))) - swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); + swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); } /* @@ -1365,8 +1360,8 @@ static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg, * impedance-matching, to be able to hand off a suitably-aligned list, * but still preserve the original offsets and sizes for the caller. */ -static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) +int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs) { struct iommu_domain *domain = iommu_get_dma_domain(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; @@ -1485,8 +1480,8 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, return ret; } -static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) +void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs) { dma_addr_t end = 0, start; struct scatterlist *tmp; @@ -1535,7 +1530,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, __iommu_dma_unmap(dev, start, end - start); } -static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, +dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs) { return __iommu_dma_map(dev, phys, size, @@ -1543,7 +1538,7 @@ static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, dma_get_mask(dev)); } -static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, +void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { __iommu_dma_unmap(dev, handle, size); @@ -1580,7 +1575,7 @@ static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr) dma_free_contiguous(dev, page, alloc_size); } -static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, +void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, unsigned long attrs) { __iommu_dma_unmap(dev, handle, size); @@ -1624,8 +1619,8 @@ static void *iommu_dma_alloc_pages(struct device *dev, size_t size, return NULL; } -static void *iommu_dma_alloc(struct device *dev, size_t size, - dma_addr_t *handle, gfp_t gfp, unsigned long attrs) +void *iommu_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, unsigned long attrs) { bool coherent = dev_is_dma_coherent(dev); int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); @@ -1658,7 +1653,7 @@ static void *iommu_dma_alloc(struct device *dev, size_t size, return cpu_addr; } -static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, +int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { @@ -1689,7 +1684,7 @@ static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, vma->vm_page_prot); } -static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, +int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { @@ -1716,19 +1711,19 @@ static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, return ret; } -static unsigned long iommu_dma_get_merge_boundary(struct device *dev) +unsigned long iommu_dma_get_merge_boundary(struct device *dev) { struct iommu_domain *domain = iommu_get_dma_domain(dev); return (1UL << __ffs(domain->pgsize_bitmap)) - 1; } -static size_t iommu_dma_opt_mapping_size(void) +size_t iommu_dma_opt_mapping_size(void) { return iova_rcache_range(); } -static size_t iommu_dma_max_mapping_size(struct device *dev) +size_t iommu_dma_max_mapping_size(struct device *dev) { if (dev_is_untrusted(dev)) return swiotlb_max_mapping_size(dev); @@ -1736,31 +1731,6 @@ static size_t iommu_dma_max_mapping_size(struct device *dev) return SIZE_MAX; } -static const struct dma_map_ops iommu_dma_ops = { - .flags = DMA_F_PCI_P2PDMA_SUPPORTED, - .alloc = iommu_dma_alloc, - .free = iommu_dma_free, - .alloc_pages = dma_common_alloc_pages, - .free_pages = dma_common_free_pages, - .alloc_noncontiguous = iommu_dma_alloc_noncontiguous, - .free_noncontiguous = iommu_dma_free_noncontiguous, - .mmap = iommu_dma_mmap, - .get_sgtable = iommu_dma_get_sgtable, - .map_page = iommu_dma_map_page, - .unmap_page = iommu_dma_unmap_page, - .map_sg = iommu_dma_map_sg, - .unmap_sg = iommu_dma_unmap_sg, - .sync_single_for_cpu = iommu_dma_sync_single_for_cpu, - .sync_single_for_device = iommu_dma_sync_single_for_device, - .sync_sg_for_cpu = iommu_dma_sync_sg_for_cpu, - .sync_sg_for_device = iommu_dma_sync_sg_for_device, - .map_resource = iommu_dma_map_resource, - .unmap_resource = iommu_dma_unmap_resource, - .get_merge_boundary = iommu_dma_get_merge_boundary, - .opt_mapping_size = iommu_dma_opt_mapping_size, - .max_mapping_size = iommu_dma_max_mapping_size, -}; - void iommu_setup_dma_ops(struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); @@ -1768,19 +1738,15 @@ void iommu_setup_dma_ops(struct device *dev) if (dev_is_pci(dev)) dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac; - if (iommu_is_dma_domain(domain)) { - if (iommu_dma_init_domain(domain, dev)) - goto out_err; - dev->dma_ops = &iommu_dma_ops; - } else if (dev->dma_ops == &iommu_dma_ops) { - /* Clean up if we've switched *from* a DMA domain */ - dev->dma_ops = NULL; - } + dev->dma_iommu = iommu_is_dma_domain(domain); + if (dev->dma_iommu && iommu_dma_init_domain(domain, dev)) + goto out_err; return; out_err: - pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", - dev_name(dev)); + pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", + dev_name(dev)); + dev->dma_iommu = false; } static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index e9d2bff4659b7c09f4e14ea14032c861c088f7e1..30be786bff11e6da62c208387f26c9f435116f48 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -416,14 +416,12 @@ static struct iommu_group *fsl_pamu_device_group(struct device *dev) static struct iommu_device *fsl_pamu_probe_device(struct device *dev) { - int len; - /* * uboot must fill the fsl,liodn for platform devices to be supported by * the iommu. */ if (!dev_is_pci(dev) && - !of_get_property(dev->of_node, "fsl,liodn", &len)) + !of_property_present(dev->of_node, "fsl,liodn")) return ERR_PTR(-ENODEV); return &pamu_iommu; diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 0a6b9eda12a35919fdbb0fb21e6299b434b07a38..e329d264e56d243f7b74ccf63e2c3b70b7832db6 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -12,7 +12,6 @@ config DMAR_DEBUG config INTEL_IOMMU bool "Support for Intel IOMMU using DMA Remapping Devices" depends on PCI_MSI && ACPI && (X86 || IA64) - select DMA_OPS select IOMMU_API select IOMMU_IOVA select IOMMUFD_DRIVER if IOMMUFD diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index e8418cdd8331be56efeb5a92668f26db9100c9f8..e5b89f728ad3b28685234dd2c1e2de4c46404ce9 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -190,6 +190,13 @@ int cache_tag_assign_domain(struct dmar_domain *domain, u16 did = domain_get_id_for_dev(domain, dev); int ret; + /* domain->qi_bach will be freed in iommu_free_domain() path. */ + if (!domain->qi_batch) { + domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_KERNEL); + if (!domain->qi_batch) + return -ENOMEM; + } + ret = __cache_tag_assign_domain(domain, did, dev, pasid); if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED) return ret; @@ -245,7 +252,8 @@ static unsigned long calculate_psi_aligned_address(unsigned long start, * shared_bits are all equal in both pfn and end_pfn. */ shared_bits = ~(pfn ^ end_pfn) & ~bitmask; - mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG; + mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH; + aligned_pages = 1UL << mask; } *_pages = aligned_pages; @@ -254,6 +262,154 @@ static unsigned long calculate_psi_aligned_address(unsigned long start, return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask); } +static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch) +{ + if (!iommu || !batch->index) + return; + + qi_submit_sync(iommu, batch->descs, batch->index, 0); + + /* Reset the index value and clean the whole batch buffer. */ + memset(batch, 0, sizeof(*batch)); +} + +static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch) +{ + if (++batch->index == QI_MAX_BATCHED_DESC_COUNT) + qi_batch_flush_descs(iommu, batch); +} + +static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type, + struct qi_batch *batch) +{ + qi_desc_iotlb(iommu, did, addr, size_order, type, &batch->descs[batch->index]); + qi_batch_increment_index(iommu, batch); +} + +static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, + u16 qdep, u64 addr, unsigned int mask, + struct qi_batch *batch) +{ + /* + * According to VT-d spec, software is recommended to not submit any Device-TLB + * invalidation requests while address remapping hardware is disabled. + */ + if (!(iommu->gcmd & DMA_GCMD_TE)) + return; + + qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &batch->descs[batch->index]); + qi_batch_increment_index(iommu, batch); +} + +static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, + u64 addr, unsigned long npages, bool ih, + struct qi_batch *batch) +{ + /* + * npages == -1 means a PASID-selective invalidation, otherwise, + * a positive value for Page-selective-within-PASID invalidation. + * 0 is not a valid input. + */ + if (!npages) + return; + + qi_desc_piotlb(did, pasid, addr, npages, ih, &batch->descs[batch->index]); + qi_batch_increment_index(iommu, batch); +} + +static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, + u32 pasid, u16 qdep, u64 addr, + unsigned int size_order, struct qi_batch *batch) +{ + /* + * According to VT-d spec, software is recommended to not submit any + * Device-TLB invalidation requests while address remapping hardware + * is disabled. + */ + if (!(iommu->gcmd & DMA_GCMD_TE)) + return; + + qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order, + &batch->descs[batch->index]); + qi_batch_increment_index(iommu, batch); +} + +static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag, + unsigned long addr, unsigned long pages, + unsigned long mask, int ih) +{ + struct intel_iommu *iommu = tag->iommu; + u64 type = DMA_TLB_PSI_FLUSH; + + if (domain->use_first_level) { + qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr, + pages, ih, domain->qi_batch); + return; + } + + /* + * Fallback to domain selective flush if no PSI support or the size + * is too big. + */ + if (!cap_pgsel_inv(iommu->cap) || + mask > cap_max_amask_val(iommu->cap) || pages == -1) { + addr = 0; + mask = 0; + ih = 0; + type = DMA_TLB_DSI_FLUSH; + } + + if (ecap_qis(iommu->ecap)) + qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type, + domain->qi_batch); + else + __iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type); +} + +static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_tag *tag, + unsigned long addr, unsigned long mask) +{ + struct intel_iommu *iommu = tag->iommu; + struct device_domain_info *info; + u16 sid; + + info = dev_iommu_priv_get(tag->dev); + sid = PCI_DEVID(info->bus, info->devfn); + + if (tag->pasid == IOMMU_NO_PASID) { + qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, + addr, mask, domain->qi_batch); + if (info->dtlb_extra_inval) + qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, + addr, mask, domain->qi_batch); + return; + } + + qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid, + info->ats_qdep, addr, mask, domain->qi_batch); + if (info->dtlb_extra_inval) + qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid, + info->ats_qdep, addr, mask, + domain->qi_batch); +} + +static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag) +{ + struct intel_iommu *iommu = tag->iommu; + struct device_domain_info *info; + u16 sid; + + info = dev_iommu_priv_get(tag->dev); + sid = PCI_DEVID(info->bus, info->devfn); + + qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, + MAX_AGAW_PFN_WIDTH, domain->qi_batch); + if (info->dtlb_extra_inval) + qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, + MAX_AGAW_PFN_WIDTH, domain->qi_batch); +} + /* * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive) * when the memory mappings in the target domain have been modified. @@ -261,6 +417,7 @@ static unsigned long calculate_psi_aligned_address(unsigned long start, void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, unsigned long end, int ih) { + struct intel_iommu *iommu = NULL; unsigned long pages, mask, addr; struct cache_tag *tag; unsigned long flags; @@ -269,30 +426,14 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, spin_lock_irqsave(&domain->cache_lock, flags); list_for_each_entry(tag, &domain->cache_tags, node) { - struct intel_iommu *iommu = tag->iommu; - struct device_domain_info *info; - u16 sid; + if (iommu && iommu != tag->iommu) + qi_batch_flush_descs(iommu, domain->qi_batch); + iommu = tag->iommu; switch (tag->type) { case CACHE_TAG_IOTLB: case CACHE_TAG_NESTING_IOTLB: - if (domain->use_first_level) { - qi_flush_piotlb(iommu, tag->domain_id, - tag->pasid, addr, pages, ih); - } else { - /* - * Fallback to domain selective flush if no - * PSI support or the size is too big. - */ - if (!cap_pgsel_inv(iommu->cap) || - mask > cap_max_amask_val(iommu->cap)) - iommu->flush.flush_iotlb(iommu, tag->domain_id, - 0, 0, DMA_TLB_DSI_FLUSH); - else - iommu->flush.flush_iotlb(iommu, tag->domain_id, - addr | ih, mask, - DMA_TLB_PSI_FLUSH); - } + cache_tag_flush_iotlb(domain, tag, addr, pages, mask, ih); break; case CACHE_TAG_NESTING_DEVTLB: /* @@ -306,23 +447,13 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, mask = MAX_AGAW_PFN_WIDTH; fallthrough; case CACHE_TAG_DEVTLB: - info = dev_iommu_priv_get(tag->dev); - sid = PCI_DEVID(info->bus, info->devfn); - - if (tag->pasid == IOMMU_NO_PASID) - qi_flush_dev_iotlb(iommu, sid, info->pfsid, - info->ats_qdep, addr, mask); - else - qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, - tag->pasid, info->ats_qdep, - addr, mask); - - quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid, info->ats_qdep); + cache_tag_flush_devtlb_psi(domain, tag, addr, mask); break; } trace_cache_tag_flush_range(tag, start, end, addr, pages, mask); } + qi_batch_flush_descs(iommu, domain->qi_batch); spin_unlock_irqrestore(&domain->cache_lock, flags); } @@ -332,39 +463,30 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, */ void cache_tag_flush_all(struct dmar_domain *domain) { + struct intel_iommu *iommu = NULL; struct cache_tag *tag; unsigned long flags; spin_lock_irqsave(&domain->cache_lock, flags); list_for_each_entry(tag, &domain->cache_tags, node) { - struct intel_iommu *iommu = tag->iommu; - struct device_domain_info *info; - u16 sid; + if (iommu && iommu != tag->iommu) + qi_batch_flush_descs(iommu, domain->qi_batch); + iommu = tag->iommu; switch (tag->type) { case CACHE_TAG_IOTLB: case CACHE_TAG_NESTING_IOTLB: - if (domain->use_first_level) - qi_flush_piotlb(iommu, tag->domain_id, - tag->pasid, 0, -1, 0); - else - iommu->flush.flush_iotlb(iommu, tag->domain_id, - 0, 0, DMA_TLB_DSI_FLUSH); + cache_tag_flush_iotlb(domain, tag, 0, -1, 0, 0); break; case CACHE_TAG_DEVTLB: case CACHE_TAG_NESTING_DEVTLB: - info = dev_iommu_priv_get(tag->dev); - sid = PCI_DEVID(info->bus, info->devfn); - - qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, - 0, MAX_AGAW_PFN_WIDTH); - quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH, - IOMMU_NO_PASID, info->ats_qdep); + cache_tag_flush_devtlb_all(domain, tag); break; } trace_cache_tag_flush_all(tag); } + qi_batch_flush_descs(iommu, domain->qi_batch); spin_unlock_irqrestore(&domain->cache_lock, flags); } @@ -382,6 +504,7 @@ void cache_tag_flush_all(struct dmar_domain *domain) void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, unsigned long end) { + struct intel_iommu *iommu = NULL; unsigned long pages, mask, addr; struct cache_tag *tag; unsigned long flags; @@ -390,7 +513,9 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, spin_lock_irqsave(&domain->cache_lock, flags); list_for_each_entry(tag, &domain->cache_tags, node) { - struct intel_iommu *iommu = tag->iommu; + if (iommu && iommu != tag->iommu) + qi_batch_flush_descs(iommu, domain->qi_batch); + iommu = tag->iommu; if (!cap_caching_mode(iommu->cap) || domain->use_first_level) { iommu_flush_write_buffer(iommu); @@ -398,22 +523,11 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, } if (tag->type == CACHE_TAG_IOTLB || - tag->type == CACHE_TAG_NESTING_IOTLB) { - /* - * Fallback to domain selective flush if no - * PSI support or the size is too big. - */ - if (!cap_pgsel_inv(iommu->cap) || - mask > cap_max_amask_val(iommu->cap)) - iommu->flush.flush_iotlb(iommu, tag->domain_id, - 0, 0, DMA_TLB_DSI_FLUSH); - else - iommu->flush.flush_iotlb(iommu, tag->domain_id, - addr, mask, - DMA_TLB_PSI_FLUSH); - } + tag->type == CACHE_TAG_NESTING_IOTLB) + cache_tag_flush_iotlb(domain, tag, addr, pages, mask, 0); trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask); } + qi_batch_flush_descs(iommu, domain->qi_batch); spin_unlock_irqrestore(&domain->cache_lock, flags); } diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 36c98d87677b3a7a0ba005ab21b55aaf32497ef6..26554a3a2fa1652eabb86cc4789676a6218fdced 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1583,24 +1583,9 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type) { - u8 dw = 0, dr = 0; - struct qi_desc desc; - int ih = 0; - - if (cap_write_drain(iommu->cap)) - dw = 1; - - if (cap_read_drain(iommu->cap)) - dr = 1; - - desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw) - | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE; - desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) - | QI_IOTLB_AM(size_order); - desc.qw2 = 0; - desc.qw3 = 0; + qi_desc_iotlb(iommu, did, addr, size_order, type, &desc); qi_submit_sync(iommu, &desc, 1, 0); } @@ -1618,20 +1603,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, if (!(iommu->gcmd & DMA_GCMD_TE)) return; - if (mask) { - addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; - desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; - } else - desc.qw1 = QI_DEV_IOTLB_ADDR(addr); - - if (qdep >= QI_DEV_IOTLB_MAX_INVS) - qdep = 0; - - desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | - QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); - desc.qw2 = 0; - desc.qw3 = 0; - + qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &desc); qi_submit_sync(iommu, &desc, 1, 0); } @@ -1651,28 +1623,7 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, return; } - if (npages == -1) { - desc.qw0 = QI_EIOTLB_PASID(pasid) | - QI_EIOTLB_DID(did) | - QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | - QI_EIOTLB_TYPE; - desc.qw1 = 0; - } else { - int mask = ilog2(__roundup_pow_of_two(npages)); - unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask)); - - if (WARN_ON_ONCE(!IS_ALIGNED(addr, align))) - addr = ALIGN_DOWN(addr, align); - - desc.qw0 = QI_EIOTLB_PASID(pasid) | - QI_EIOTLB_DID(did) | - QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | - QI_EIOTLB_TYPE; - desc.qw1 = QI_EIOTLB_ADDR(addr) | - QI_EIOTLB_IH(ih) | - QI_EIOTLB_AM(mask); - } - + qi_desc_piotlb(did, pasid, addr, npages, ih, &desc); qi_submit_sync(iommu, &desc, 1, 0); } @@ -1680,7 +1631,6 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, u32 pasid, u16 qdep, u64 addr, unsigned int size_order) { - unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1); struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0}; /* @@ -1692,40 +1642,9 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, if (!(iommu->gcmd & DMA_GCMD_TE)) return; - desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) | - QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE | - QI_DEV_IOTLB_PFSID(pfsid); - - /* - * If S bit is 0, we only flush a single page. If S bit is set, - * The least significant zero bit indicates the invalidation address - * range. VT-d spec 6.5.2.6. - * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB. - * size order = 0 is PAGE_SIZE 4KB - * Max Invs Pending (MIP) is set to 0 for now until we have DIT in - * ECAP. - */ - if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order)) - pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n", - addr, size_order); - - /* Take page address */ - desc.qw1 = QI_DEV_EIOTLB_ADDR(addr); - - if (size_order) { - /* - * Existing 0s in address below size_order may be the least - * significant bit, we must set them to 1s to avoid having - * smaller size than desired. - */ - desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1, - VTD_PAGE_SHIFT); - /* Clear size_order bit to indicate size */ - desc.qw1 &= ~mask; - /* Set the S bit to indicate flushing more than 1 page */ - desc.qw1 |= QI_DEV_EIOTLB_SIZE; - } - + qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, + qdep, addr, size_order, + &desc); qi_submit_sync(iommu, &desc, 1, 0); } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 2e2bd8c1bcf5ff181300b1c59a11e169cc5410bf..9c07050e8e263188bd24eb207fa04aaf57a51a5c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -167,15 +167,6 @@ static void device_rbtree_remove(struct device_domain_info *info) spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); } -/* - * This domain is a statically identity mapping domain. - * 1. This domain creats a static 1:1 mapping to all usable memory. - * 2. It maps to each iommu if successful. - * 3. Each iommu mapps to this domain if successful. - */ -static struct dmar_domain *si_domain; -static int hw_pass_through = 1; - struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -293,11 +284,6 @@ static int __init intel_iommu_setup(char *str) } __setup("intel_iommu=", intel_iommu_setup); -static int domain_type_is_si(struct dmar_domain *domain) -{ - return domain->domain.type == IOMMU_DOMAIN_IDENTITY; -} - static int domain_pfn_supported(struct dmar_domain *domain, unsigned long pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; @@ -492,7 +478,6 @@ void domain_update_iommu_cap(struct dmar_domain *domain) domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw); domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain); - domain_update_iotlb(domain); } struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, @@ -1211,9 +1196,8 @@ static void __iommu_flush_context(struct intel_iommu *iommu, raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } -/* return value determine if we need a write buffer flush */ -static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, - u64 addr, unsigned int size_order, u64 type) +void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type) { int tlb_offset = ecap_iotlb_offset(iommu->ecap); u64 val = 0, val_iva = 0; @@ -1282,32 +1266,6 @@ domain_lookup_dev_info(struct dmar_domain *domain, return NULL; } -void domain_update_iotlb(struct dmar_domain *domain) -{ - struct dev_pasid_info *dev_pasid; - struct device_domain_info *info; - bool has_iotlb_device = false; - unsigned long flags; - - spin_lock_irqsave(&domain->lock, flags); - list_for_each_entry(info, &domain->devices, link) { - if (info->ats_enabled) { - has_iotlb_device = true; - break; - } - } - - list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { - info = dev_iommu_priv_get(dev_pasid->dev); - if (info->ats_enabled) { - has_iotlb_device = true; - break; - } - } - domain->has_iotlb_device = has_iotlb_device; - spin_unlock_irqrestore(&domain->lock, flags); -} - /* * The extra devTLB flush quirk impacts those QAT devices with PCI device * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device() @@ -1334,20 +1292,9 @@ static void iommu_enable_pci_caps(struct device_domain_info *info) return; pdev = to_pci_dev(info->dev); - - /* The PCIe spec, in its wisdom, declares that the behaviour of - the device if you enable PASID support after ATS support is - undefined. So always enable PASID support on devices which - have it, even if we can't yet know if we're ever going to - use it. */ - if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1)) - info->pasid_enabled = 1; - if (info->ats_supported && pci_ats_page_aligned(pdev) && - !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { + !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) info->ats_enabled = 1; - domain_update_iotlb(info->domain); - } } static void iommu_disable_pci_caps(struct device_domain_info *info) @@ -1362,12 +1309,6 @@ static void iommu_disable_pci_caps(struct device_domain_info *info) if (info->ats_enabled) { pci_disable_ats(pdev); info->ats_enabled = 0; - domain_update_iotlb(info->domain); - } - - if (info->pasid_enabled) { - pci_disable_pasid(pdev); - info->pasid_enabled = 0; } } @@ -1536,7 +1477,6 @@ static struct dmar_domain *alloc_domain(unsigned int type) domain->nid = NUMA_NO_NODE; if (first_level_by_default(type)) domain->use_first_level = true; - domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); INIT_LIST_HEAD(&domain->dev_pasids); INIT_LIST_HEAD(&domain->cache_tags); @@ -1644,9 +1584,65 @@ static void domain_exit(struct dmar_domain *domain) if (WARN_ON(!list_empty(&domain->devices))) return; + kfree(domain->qi_batch); kfree(domain); } +/* + * For kdump cases, old valid entries may be cached due to the + * in-flight DMA and copied pgtable, but there is no unmapping + * behaviour for them, thus we need an explicit cache flush for + * the newly-mapped device. For kdump, at this point, the device + * is supposed to finish reset at its driver probe stage, so no + * in-flight DMA will exist, and we don't need to worry anymore + * hereafter. + */ +static void copied_context_tear_down(struct intel_iommu *iommu, + struct context_entry *context, + u8 bus, u8 devfn) +{ + u16 did_old; + + if (!context_copied(iommu, bus, devfn)) + return; + + assert_spin_locked(&iommu->lock); + + did_old = context_domain_id(context); + context_clear_entry(context); + + if (did_old < cap_ndoms(iommu->cap)) { + iommu->flush.flush_context(iommu, did_old, + (((u16)bus) << 8) | devfn, + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + iommu->flush.flush_iotlb(iommu, did_old, 0, 0, + DMA_TLB_DSI_FLUSH); + } + + clear_context_copied(iommu, bus, devfn); +} + +/* + * It's a non-present to present mapping. If hardware doesn't cache + * non-present entry we only need to flush the write-buffer. If the + * _does_ cache non-present entries, then it does so in the special + * domain #0, which we have to flush: + */ +static void context_present_cache_flush(struct intel_iommu *iommu, u16 did, + u8 bus, u8 devfn) +{ + if (cap_caching_mode(iommu->cap)) { + iommu->flush.flush_context(iommu, 0, + (((u16)bus) << 8) | devfn, + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); + } else { + iommu_flush_write_buffer(iommu); + } +} + static int domain_context_mapping_one(struct dmar_domain *domain, struct intel_iommu *iommu, u8 bus, u8 devfn) @@ -1659,9 +1655,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, struct context_entry *context; int agaw, ret; - if (hw_pass_through && domain_type_is_si(domain)) - translation = CONTEXT_TT_PASS_THROUGH; - pr_debug("Set context mapping for %02x:%02x.%d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); @@ -1675,83 +1668,35 @@ static int domain_context_mapping_one(struct dmar_domain *domain, if (context_present(context) && !context_copied(iommu, bus, devfn)) goto out_unlock; - /* - * For kdump cases, old valid entries may be cached due to the - * in-flight DMA and copied pgtable, but there is no unmapping - * behaviour for them, thus we need an explicit cache flush for - * the newly-mapped device. For kdump, at this point, the device - * is supposed to finish reset at its driver probe stage, so no - * in-flight DMA will exist, and we don't need to worry anymore - * hereafter. - */ - if (context_copied(iommu, bus, devfn)) { - u16 did_old = context_domain_id(context); - - if (did_old < cap_ndoms(iommu->cap)) { - iommu->flush.flush_context(iommu, did_old, - (((u16)bus) << 8) | devfn, - DMA_CCMD_MASK_NOBIT, - DMA_CCMD_DEVICE_INVL); - iommu->flush.flush_iotlb(iommu, did_old, 0, 0, - DMA_TLB_DSI_FLUSH); - } - - clear_context_copied(iommu, bus, devfn); - } - + copied_context_tear_down(iommu, context, bus, devfn); context_clear_entry(context); - context_set_domain_id(context, did); - if (translation != CONTEXT_TT_PASS_THROUGH) { - /* - * Skip top levels of page tables for iommu which has - * less agaw than default. Unnecessary for PT mode. - */ - for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { - ret = -ENOMEM; - pgd = phys_to_virt(dma_pte_addr(pgd)); - if (!dma_pte_present(pgd)) - goto out_unlock; - } - - if (info && info->ats_supported) - translation = CONTEXT_TT_DEV_IOTLB; - else - translation = CONTEXT_TT_MULTI_LEVEL; + context_set_domain_id(context, did); - context_set_address_root(context, virt_to_phys(pgd)); - context_set_address_width(context, agaw); - } else { - /* - * In pass through mode, AW must be programmed to - * indicate the largest AGAW value supported by - * hardware. And ASR is ignored by hardware. - */ - context_set_address_width(context, iommu->msagaw); + /* + * Skip top levels of page tables for iommu which has + * less agaw than default. Unnecessary for PT mode. + */ + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { + ret = -ENOMEM; + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) + goto out_unlock; } + if (info && info->ats_supported) + translation = CONTEXT_TT_DEV_IOTLB; + else + translation = CONTEXT_TT_MULTI_LEVEL; + + context_set_address_root(context, virt_to_phys(pgd)); + context_set_address_width(context, agaw); context_set_translation_type(context, translation); context_set_fault_enable(context); context_set_present(context); if (!ecap_coherent(iommu->ecap)) clflush_cache_range(context, sizeof(*context)); - - /* - * It's a non-present to present mapping. If hardware doesn't cache - * non-present entry we only need to flush the write-buffer. If the - * _does_ cache non-present entries, then it does so in the special - * domain #0, which we have to flush: - */ - if (cap_caching_mode(iommu->cap)) { - iommu->flush.flush_context(iommu, 0, - (((u16)bus) << 8) | devfn, - DMA_CCMD_MASK_NOBIT, - DMA_CCMD_DEVICE_INVL); - iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); - } else { - iommu_flush_write_buffer(iommu); - } - + context_present_cache_flush(iommu, did, bus, devfn); ret = 0; out_unlock: @@ -1956,6 +1901,7 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 { struct intel_iommu *iommu = info->iommu; struct context_entry *context; + u16 did; spin_lock(&iommu->lock); context = iommu_context_addr(iommu, bus, devfn, 0); @@ -1964,10 +1910,11 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 return; } + did = context_domain_id(context); context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock(&iommu->lock); - intel_context_flush_present(info, context, true); + intel_context_flush_present(info, context, did, true); } static int domain_setup_first_level(struct intel_iommu *iommu, @@ -2010,80 +1957,6 @@ static bool dev_is_real_dma_subdevice(struct device *dev) pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev); } -static int iommu_domain_identity_map(struct dmar_domain *domain, - unsigned long first_vpfn, - unsigned long last_vpfn) -{ - /* - * RMRR range might have overlap with physical memory range, - * clear it first - */ - dma_pte_clear_range(domain, first_vpfn, last_vpfn); - - return __domain_mapping(domain, first_vpfn, - first_vpfn, last_vpfn - first_vpfn + 1, - DMA_PTE_READ|DMA_PTE_WRITE, GFP_KERNEL); -} - -static int md_domain_init(struct dmar_domain *domain, int guest_width); - -static int __init si_domain_init(int hw) -{ - struct dmar_rmrr_unit *rmrr; - struct device *dev; - int i, nid, ret; - - si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY); - if (!si_domain) - return -EFAULT; - - if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { - domain_exit(si_domain); - si_domain = NULL; - return -EFAULT; - } - - if (hw) - return 0; - - for_each_online_node(nid) { - unsigned long start_pfn, end_pfn; - int i; - - for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { - ret = iommu_domain_identity_map(si_domain, - mm_to_dma_pfn_start(start_pfn), - mm_to_dma_pfn_end(end_pfn-1)); - if (ret) - return ret; - } - } - - /* - * Identity map the RMRRs so that devices with RMRRs could also use - * the si_domain. - */ - for_each_rmrr_units(rmrr) { - for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, - i, dev) { - unsigned long long start = rmrr->base_address; - unsigned long long end = rmrr->end_address; - - if (WARN_ON(end < start || - end >> agaw_to_width(si_domain->agaw))) - continue; - - ret = iommu_domain_identity_map(si_domain, - mm_to_dma_pfn_start(start >> PAGE_SHIFT), - mm_to_dma_pfn_end(end >> PAGE_SHIFT)); - if (ret) - return ret; - } - } - - return 0; -} - static int dmar_domain_attach_device(struct dmar_domain *domain, struct device *dev) { @@ -2106,8 +1979,6 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (!sm_supported(iommu)) ret = domain_context_mapping(domain, dev); - else if (hw_pass_through && domain_type_is_si(domain)) - ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); else if (domain->use_first_level) ret = domain_setup_first_level(iommu, domain, dev, IOMMU_NO_PASID); else @@ -2116,8 +1987,7 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (ret) goto out_block_translation; - if (sm_supported(info->iommu) || !domain_type_is_si(info->domain)) - iommu_enable_pci_caps(info); + iommu_enable_pci_caps(info); ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); if (ret) @@ -2161,6 +2031,16 @@ static bool device_rmrr_is_relaxable(struct device *dev) static int device_def_domain_type(struct device *dev) { + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + + /* + * Hardware does not support the passthrough translation mode. + * Always use a dynamaic mapping domain. + */ + if (!ecap_pass_through(iommu->ecap)) + return IOMMU_DOMAIN_DMA; + if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); @@ -2451,8 +2331,6 @@ static int __init init_dmars(void) } } - if (!ecap_pass_through(iommu->ecap)) - hw_pass_through = 0; intel_svm_check(iommu); } @@ -2468,10 +2346,6 @@ static int __init init_dmars(void) check_tylersburg_isoch(); - ret = si_domain_init(hw_pass_through); - if (ret) - goto free_iommu; - /* * for each drhd * enable fault log @@ -2517,10 +2391,6 @@ static int __init init_dmars(void) disable_dmar_iommu(iommu); free_dmar_iommu(iommu); } - if (si_domain) { - domain_exit(si_domain); - si_domain = NULL; - } return ret; } @@ -2895,12 +2765,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) if (ret) goto out; - if (hw_pass_through && !ecap_pass_through(iommu->ecap)) { - pr_warn("%s: Doesn't support hardware pass through.\n", - iommu->name); - return -ENXIO; - } - sp = domain_update_iommu_superpage(NULL, iommu) - 1; if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) { pr_warn("%s: Doesn't support large page.\n", @@ -3169,43 +3033,6 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) return 0; } -static int intel_iommu_memory_notifier(struct notifier_block *nb, - unsigned long val, void *v) -{ - struct memory_notify *mhp = v; - unsigned long start_vpfn = mm_to_dma_pfn_start(mhp->start_pfn); - unsigned long last_vpfn = mm_to_dma_pfn_end(mhp->start_pfn + - mhp->nr_pages - 1); - - switch (val) { - case MEM_GOING_ONLINE: - if (iommu_domain_identity_map(si_domain, - start_vpfn, last_vpfn)) { - pr_warn("Failed to build identity map for [%lx-%lx]\n", - start_vpfn, last_vpfn); - return NOTIFY_BAD; - } - break; - - case MEM_OFFLINE: - case MEM_CANCEL_ONLINE: - { - LIST_HEAD(freelist); - - domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist); - iommu_put_pages_list(&freelist); - } - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block intel_iommu_memory_nb = { - .notifier_call = intel_iommu_memory_notifier, - .priority = 0 -}; - static void intel_disable_iommus(void) { struct intel_iommu *iommu = NULL; @@ -3543,12 +3370,7 @@ int __init intel_iommu_init(void) iommu_pmu_register(iommu); } - up_read(&dmar_global_lock); - - if (si_domain && !hw_pass_through) - register_memory_notifier(&intel_iommu_memory_nb); - down_read(&dmar_global_lock); if (probe_acpi_namespace_devices()) pr_warn("ACPI name space devices didn't probe correctly\n"); @@ -3695,7 +3517,6 @@ static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_st xa_init(&domain->iommu_array); domain->nid = dev_to_node(dev); - domain->has_iotlb_device = info->ats_enabled; domain->use_first_level = first_stage; /* calculate the address width */ @@ -3764,8 +3585,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) domain->geometry.force_aperture = true; return domain; - case IOMMU_DOMAIN_IDENTITY: - return &si_domain->domain; default: return NULL; } @@ -3832,8 +3651,7 @@ static void intel_iommu_domain_free(struct iommu_domain *domain) WARN_ON(dmar_domain->nested_parent && !list_empty(&dmar_domain->s1_domains)); - if (domain != &si_domain->domain) - domain_exit(dmar_domain); + domain_exit(dmar_domain); } int prepare_domain_attach_device(struct iommu_domain *domain, @@ -3883,11 +3701,9 @@ int prepare_domain_attach_device(struct iommu_domain *domain, static int intel_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { - struct device_domain_info *info = dev_iommu_priv_get(dev); int ret; - if (info->domain) - device_block_translation(dev); + device_block_translation(dev); ret = prepare_domain_attach_device(domain, dev); if (ret) @@ -4164,6 +3980,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) dev_iommu_priv_set(dev, info); if (pdev && pci_ats_supported(pdev)) { + pci_prepare_ats(pdev, VTD_PAGE_SHIFT); ret = device_rbtree_insert(iommu, info); if (ret) goto free; @@ -4185,6 +4002,16 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) intel_iommu_debugfs_create_dev(info); + /* + * The PCIe spec, in its wisdom, declares that the behaviour of the + * device is undefined if you enable PASID support after ATS support. + * So always enable PASID support on devices which have it, even if + * we can't yet know if we're ever going to use it. + */ + if (info->pasid_supported && + !pci_enable_pasid(pdev, info->pasid_supported & ~1)) + info->pasid_enabled = 1; + return &iommu->iommu; free_table: intel_pasid_free_table(dev); @@ -4201,6 +4028,11 @@ static void intel_iommu_release_device(struct device *dev) struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; + if (info->pasid_enabled) { + pci_disable_pasid(to_pci_dev(dev)); + info->pasid_enabled = 0; + } + mutex_lock(&iommu->iopf_lock); if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev))) device_rbtree_remove(info); @@ -4322,6 +4154,7 @@ static int context_flip_pri(struct device_domain_info *info, bool enable) struct intel_iommu *iommu = info->iommu; u8 bus = info->bus, devfn = info->devfn; struct context_entry *context; + u16 did; spin_lock(&iommu->lock); if (context_copied(iommu, bus, devfn)) { @@ -4334,6 +4167,7 @@ static int context_flip_pri(struct device_domain_info *info, bool enable) spin_unlock(&iommu->lock); return -ENODEV; } + did = context_domain_id(context); if (enable) context_set_sm_pre(context); @@ -4342,7 +4176,7 @@ static int context_flip_pri(struct device_domain_info *info, bool enable) if (!ecap_coherent(iommu->ecap)) clflush_cache_range(context, sizeof(*context)); - intel_context_flush_present(info, context, true); + intel_context_flush_present(info, context, did, true); spin_unlock(&iommu->lock); return 0; @@ -4493,11 +4327,17 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, struct iommu_domain *domain) { struct device_domain_info *info = dev_iommu_priv_get(dev); - struct dmar_domain *dmar_domain = to_dmar_domain(domain); struct dev_pasid_info *curr, *dev_pasid = NULL; struct intel_iommu *iommu = info->iommu; + struct dmar_domain *dmar_domain; unsigned long flags; + if (domain->type == IOMMU_DOMAIN_IDENTITY) { + intel_pasid_tear_down_entry(iommu, dev, pasid, false); + return; + } + + dmar_domain = to_dmar_domain(domain); spin_lock_irqsave(&dmar_domain->lock, flags); list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) { if (curr->dev == dev && curr->pasid == pasid) { @@ -4552,9 +4392,7 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, if (ret) goto out_detach_iommu; - if (domain_type_is_si(dmar_domain)) - ret = intel_pasid_setup_pass_through(iommu, dev, pasid); - else if (dmar_domain->use_first_level) + if (dmar_domain->use_first_level) ret = domain_setup_first_level(iommu, dmar_domain, dev, pasid); else @@ -4724,9 +4562,111 @@ static const struct iommu_dirty_ops intel_dirty_ops = { .read_and_clear_dirty = intel_iommu_read_and_clear_dirty, }; +static int context_setup_pass_through(struct device *dev, u8 bus, u8 devfn) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + struct context_entry *context; + + spin_lock(&iommu->lock); + context = iommu_context_addr(iommu, bus, devfn, 1); + if (!context) { + spin_unlock(&iommu->lock); + return -ENOMEM; + } + + if (context_present(context) && !context_copied(iommu, bus, devfn)) { + spin_unlock(&iommu->lock); + return 0; + } + + copied_context_tear_down(iommu, context, bus, devfn); + context_clear_entry(context); + context_set_domain_id(context, FLPT_DEFAULT_DID); + + /* + * In pass through mode, AW must be programmed to indicate the largest + * AGAW value supported by hardware. And ASR is ignored by hardware. + */ + context_set_address_width(context, iommu->msagaw); + context_set_translation_type(context, CONTEXT_TT_PASS_THROUGH); + context_set_fault_enable(context); + context_set_present(context); + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(context, sizeof(*context)); + context_present_cache_flush(iommu, FLPT_DEFAULT_DID, bus, devfn); + spin_unlock(&iommu->lock); + + return 0; +} + +static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void *data) +{ + struct device *dev = data; + + if (dev != &pdev->dev) + return 0; + + return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff); +} + +static int device_setup_pass_through(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + + if (!dev_is_pci(dev)) + return context_setup_pass_through(dev, info->bus, info->devfn); + + return pci_for_each_dma_alias(to_pci_dev(dev), + context_setup_pass_through_cb, dev); +} + +static int identity_domain_attach_dev(struct iommu_domain *domain, struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + int ret; + + device_block_translation(dev); + + if (dev_is_real_dma_subdevice(dev)) + return 0; + + if (sm_supported(iommu)) { + ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); + if (!ret) + iommu_enable_pci_caps(info); + } else { + ret = device_setup_pass_through(dev); + } + + return ret; +} + +static int identity_domain_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + + if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) + return -EOPNOTSUPP; + + return intel_pasid_setup_pass_through(iommu, dev, pasid); +} + +static struct iommu_domain identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &(const struct iommu_domain_ops) { + .attach_dev = identity_domain_attach_dev, + .set_dev_pasid = identity_domain_set_dev_pasid, + }, +}; + const struct iommu_ops intel_iommu_ops = { .blocked_domain = &blocking_domain, .release_domain = &blocking_domain, + .identity_domain = &identity_domain, .capable = intel_iommu_capable, .hw_info = intel_iommu_hw_info, .domain_alloc = intel_iommu_domain_alloc, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 2086688743f09cbf4f1e1a537328279bb5a2db96..fe9cbfbe38f7f75bcd3ab6ce95bc464873308bc5 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -584,11 +584,23 @@ struct iommu_domain_info { * to VT-d spec, section 9.3 */ }; +/* + * We start simply by using a fixed size for the batched descriptors. This + * size is currently sufficient for our needs. Future improvements could + * involve dynamically allocating the batch buffer based on actual demand, + * allowing us to adjust the batch size for optimal performance in different + * scenarios. + */ +#define QI_MAX_BATCHED_DESC_COUNT 16 +struct qi_batch { + struct qi_desc descs[QI_MAX_BATCHED_DESC_COUNT]; + unsigned int index; +}; + struct dmar_domain { int nid; /* node id */ struct xarray iommu_array; /* Attached IOMMU array */ - u8 has_iotlb_device: 1; u8 iommu_coherency: 1; /* indicate coherency of iommu access */ u8 force_snooping : 1; /* Create IOPTEs with snoop control */ u8 set_pte_snp:1; @@ -609,6 +621,7 @@ struct dmar_domain { spinlock_t cache_lock; /* Protect the cache tag list */ struct list_head cache_tags; /* Cache tag list */ + struct qi_batch *qi_batch; /* Batched QI descriptors */ int iommu_superpage;/* Level of superpages supported: 0 == 4KiB (no superpages), 1 == 2MiB, @@ -1067,6 +1080,115 @@ static inline unsigned long nrpages_to_size(unsigned long npages) return npages << VTD_PAGE_SHIFT; } +static inline void qi_desc_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type, + struct qi_desc *desc) +{ + u8 dw = 0, dr = 0; + int ih = 0; + + if (cap_write_drain(iommu->cap)) + dw = 1; + + if (cap_read_drain(iommu->cap)) + dr = 1; + + desc->qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw) + | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE; + desc->qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) + | QI_IOTLB_AM(size_order); + desc->qw2 = 0; + desc->qw3 = 0; +} + +static inline void qi_desc_dev_iotlb(u16 sid, u16 pfsid, u16 qdep, u64 addr, + unsigned int mask, struct qi_desc *desc) +{ + if (mask) { + addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; + desc->qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; + } else { + desc->qw1 = QI_DEV_IOTLB_ADDR(addr); + } + + if (qdep >= QI_DEV_IOTLB_MAX_INVS) + qdep = 0; + + desc->qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | + QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); + desc->qw2 = 0; + desc->qw3 = 0; +} + +static inline void qi_desc_piotlb(u16 did, u32 pasid, u64 addr, + unsigned long npages, bool ih, + struct qi_desc *desc) +{ + if (npages == -1) { + desc->qw0 = QI_EIOTLB_PASID(pasid) | + QI_EIOTLB_DID(did) | + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | + QI_EIOTLB_TYPE; + desc->qw1 = 0; + } else { + int mask = ilog2(__roundup_pow_of_two(npages)); + unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask)); + + if (WARN_ON_ONCE(!IS_ALIGNED(addr, align))) + addr = ALIGN_DOWN(addr, align); + + desc->qw0 = QI_EIOTLB_PASID(pasid) | + QI_EIOTLB_DID(did) | + QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | + QI_EIOTLB_TYPE; + desc->qw1 = QI_EIOTLB_ADDR(addr) | + QI_EIOTLB_IH(ih) | + QI_EIOTLB_AM(mask); + } +} + +static inline void qi_desc_dev_iotlb_pasid(u16 sid, u16 pfsid, u32 pasid, + u16 qdep, u64 addr, + unsigned int size_order, + struct qi_desc *desc) +{ + unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1); + + desc->qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) | + QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE | + QI_DEV_IOTLB_PFSID(pfsid); + + /* + * If S bit is 0, we only flush a single page. If S bit is set, + * The least significant zero bit indicates the invalidation address + * range. VT-d spec 6.5.2.6. + * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB. + * size order = 0 is PAGE_SIZE 4KB + * Max Invs Pending (MIP) is set to 0 for now until we have DIT in + * ECAP. + */ + if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order)) + pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n", + addr, size_order); + + /* Take page address */ + desc->qw1 = QI_DEV_EIOTLB_ADDR(addr); + + if (size_order) { + /* + * Existing 0s in address below size_order may be the least + * significant bit, we must set them to 1s to avoid having + * smaller size than desired. + */ + desc->qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1, + VTD_PAGE_SHIFT); + /* Clear size_order bit to indicate size */ + desc->qw1 &= ~mask; + /* Set the S bit to indicate flushing more than 1 page */ + desc->qw1 |= QI_DEV_EIOTLB_SIZE; + } +} + /* Convert value to context PASID directory size field coding. */ #define context_pdts(pds) (((pds) & 0x7) << 9) @@ -1098,13 +1220,15 @@ void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu, int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, unsigned int count, unsigned long options); + +void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type); /* * Options used in qi_submit_sync: * QI_OPT_WAIT_DRAIN - Wait for PRQ drain completion, spec 6.5.2.8. */ #define QI_OPT_WAIT_DRAIN BIT(0) -void domain_update_iotlb(struct dmar_domain *domain); int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); void device_block_translation(struct device *dev); @@ -1154,7 +1278,7 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, void intel_context_flush_present(struct device_domain_info *info, struct context_entry *context, - bool affect_domains); + u16 did, bool affect_domains); #ifdef CONFIG_INTEL_IOMMU_SVM void intel_svm_check(struct intel_iommu *iommu); diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index e59403767faea1d9360ca2d311c8bbb5369e2588..fcf56e262b07ba3920284db2ec2fd372c48e1c73 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -1356,12 +1356,11 @@ static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data, case X86_IRQ_ALLOC_TYPE_IOAPIC: /* Set source-id of interrupt request */ set_ioapic_sid(irte, info->devid); - apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: Set IRTE entry (P:%d FPD:%d Dst_Mode:%d Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X Avail:%X Vector:%02X Dest:%08X SID:%04X SQ:%X SVT:%X)\n", - info->devid, irte->present, irte->fpd, - irte->dst_mode, irte->redir_hint, - irte->trigger_mode, irte->dlvry_mode, - irte->avail, irte->vector, irte->dest_id, - irte->sid, irte->sq, irte->svt); + apic_pr_verbose("IOAPIC[%d]: Set IRTE entry (P:%d FPD:%d Dst_Mode:%d Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X Avail:%X Vector:%02X Dest:%08X SID:%04X SQ:%X SVT:%X)\n", + info->devid, irte->present, irte->fpd, irte->dst_mode, + irte->redir_hint, irte->trigger_mode, irte->dlvry_mode, + irte->avail, irte->vector, irte->dest_id, irte->sid, + irte->sq, irte->svt); sub_handle = info->ioapic.pin; break; case X86_IRQ_ALLOC_TYPE_HPET: diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index 16a2bcf5cfeb9d2ea745e0aaf09bbc4df5cf6449..433c58944401f9f4c15ba816d5e2b177a120bb18 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -66,8 +66,6 @@ static int intel_nested_attach_dev(struct iommu_domain *domain, list_add(&info->link, &dmar_domain->devices); spin_unlock_irqrestore(&dmar_domain->lock, flags); - domain_update_iotlb(dmar_domain); - return 0; unassign_tag: cache_tag_unassign_domain(dmar_domain, dev, IOMMU_NO_PASID); @@ -85,6 +83,7 @@ static void intel_nested_domain_free(struct iommu_domain *domain) spin_lock(&s2_domain->s1_lock); list_del(&dmar_domain->s2_link); spin_unlock(&s2_domain->s1_lock); + kfree(dmar_domain->qi_batch); kfree(dmar_domain); } diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 5792c817cefa56872cf3434368e967a5a62e6dd9..2e5fa0a232999fad15ae91b8bb581f64307552b2 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -264,9 +264,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, else iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); - /* Device IOTLB doesn't need to be flushed in caching mode. */ - if (!cap_caching_mode(iommu->cap)) - devtlb_invalidation_with_pasid(iommu, dev, pasid); + devtlb_invalidation_with_pasid(iommu, dev, pasid); } /* @@ -493,9 +491,7 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); - /* Device IOTLB doesn't need to be flushed in caching mode. */ - if (!cap_caching_mode(iommu->cap)) - devtlb_invalidation_with_pasid(iommu, dev, pasid); + devtlb_invalidation_with_pasid(iommu, dev, pasid); return 0; } @@ -572,9 +568,7 @@ void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu, pasid_cache_invalidation_with_pasid(iommu, did, pasid); qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); - /* Device IOTLB doesn't need to be flushed in caching mode. */ - if (!cap_caching_mode(iommu->cap)) - devtlb_invalidation_with_pasid(iommu, dev, pasid); + devtlb_invalidation_with_pasid(iommu, dev, pasid); } /** @@ -683,6 +677,7 @@ static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; struct context_entry *context; + u16 did; spin_lock(&iommu->lock); context = iommu_context_addr(iommu, bus, devfn, false); @@ -691,10 +686,11 @@ static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) return; } + did = context_domain_id(context); context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock(&iommu->lock); - intel_context_flush_present(info, context, false); + intel_context_flush_present(info, context, did, false); } static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data) @@ -885,10 +881,9 @@ static void __context_flush_dev_iotlb(struct device_domain_info *info) */ void intel_context_flush_present(struct device_domain_info *info, struct context_entry *context, - bool flush_domains) + u16 did, bool flush_domains) { struct intel_iommu *iommu = info->iommu; - u16 did = context_domain_id(context); struct pasid_entry *pte; int i; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 0e3a9b38bef210793cc9841f09d6e3d54c474932..078d1e32a24eeb1f60f28859a1678ddddc82f297 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -184,7 +184,10 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) static void intel_mm_free_notifier(struct mmu_notifier *mn) { - kfree(container_of(mn, struct dmar_domain, notifier)); + struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier); + + kfree(domain->qi_batch); + kfree(domain); } static const struct mmu_notifier_ops intel_mmuops = { @@ -311,7 +314,7 @@ void intel_drain_pasid_prq(struct device *dev, u32 pasid) domain = info->domain; pdev = to_pci_dev(dev); sid = PCI_DEVID(info->bus, info->devfn); - did = domain_id_iommu(domain, iommu); + did = domain ? domain_id_iommu(domain, iommu) : FLPT_DEFAULT_DID; qdep = pci_ats_queue_depth(pdev); /* diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index cd679c13752e00c30766fd060ff77b8017934645..4674e618797c155581e0254347edcb9a9c92a416 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -115,6 +115,59 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param, return group; } +static struct iommu_attach_handle *find_fault_handler(struct device *dev, + struct iopf_fault *evt) +{ + struct iommu_fault *fault = &evt->fault; + struct iommu_attach_handle *attach_handle; + + if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) { + attach_handle = iommu_attach_handle_get(dev->iommu_group, + fault->prm.pasid, 0); + if (IS_ERR(attach_handle)) { + const struct iommu_ops *ops = dev_iommu_ops(dev); + + if (!ops->user_pasid_table) + return NULL; + /* + * The iommu driver for this device supports user- + * managed PASID table. Therefore page faults for + * any PASID should go through the NESTING domain + * attached to the device RID. + */ + attach_handle = iommu_attach_handle_get( + dev->iommu_group, IOMMU_NO_PASID, + IOMMU_DOMAIN_NESTED); + if (IS_ERR(attach_handle)) + return NULL; + } + } else { + attach_handle = iommu_attach_handle_get(dev->iommu_group, + IOMMU_NO_PASID, 0); + + if (IS_ERR(attach_handle)) + return NULL; + } + + if (!attach_handle->domain->iopf_handler) + return NULL; + + return attach_handle; +} + +static void iopf_error_response(struct device *dev, struct iopf_fault *evt) +{ + const struct iommu_ops *ops = dev_iommu_ops(dev); + struct iommu_fault *fault = &evt->fault; + struct iommu_page_response resp = { + .pasid = fault->prm.pasid, + .grpid = fault->prm.grpid, + .code = IOMMU_PAGE_RESP_INVALID + }; + + ops->page_response(dev, evt, &resp); +} + /** * iommu_report_device_fault() - Report fault event to device driver * @dev: the device @@ -153,23 +206,39 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param, * handling framework should guarantee that the iommu domain could only be * freed after the device has stopped generating page faults (or the iommu * hardware has been set to block the page faults) and the pending page faults - * have been flushed. + * have been flushed. In case no page fault handler is attached or no iopf params + * are setup, then the ops->page_response() is called to complete the evt. + * + * Returns 0 on success, or an error in case of a bad/failed iopf setup. */ -void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) +int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) { + struct iommu_attach_handle *attach_handle; struct iommu_fault *fault = &evt->fault; struct iommu_fault_param *iopf_param; struct iopf_group abort_group = {}; struct iopf_group *group; + attach_handle = find_fault_handler(dev, evt); + if (!attach_handle) + goto err_bad_iopf; + + /* + * Something has gone wrong if a fault capable domain is attached but no + * iopf_param is setup + */ iopf_param = iopf_get_dev_fault_param(dev); if (WARN_ON(!iopf_param)) - return; + goto err_bad_iopf; if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { - report_partial_fault(iopf_param, fault); + int ret; + + ret = report_partial_fault(iopf_param, fault); iopf_put_dev_fault_param(iopf_param); /* A request that is not the last does not need to be ack'd */ + + return ret; } /* @@ -184,38 +253,7 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) if (group == &abort_group) goto err_abort; - if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) { - group->attach_handle = iommu_attach_handle_get(dev->iommu_group, - fault->prm.pasid, - 0); - if (IS_ERR(group->attach_handle)) { - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (!ops->user_pasid_table) - goto err_abort; - - /* - * The iommu driver for this device supports user- - * managed PASID table. Therefore page faults for - * any PASID should go through the NESTING domain - * attached to the device RID. - */ - group->attach_handle = - iommu_attach_handle_get(dev->iommu_group, - IOMMU_NO_PASID, - IOMMU_DOMAIN_NESTED); - if (IS_ERR(group->attach_handle)) - goto err_abort; - } - } else { - group->attach_handle = - iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0); - if (IS_ERR(group->attach_handle)) - goto err_abort; - } - - if (!group->attach_handle->domain->iopf_handler) - goto err_abort; + group->attach_handle = attach_handle; /* * On success iopf_handler must call iopf_group_response() and @@ -224,7 +262,7 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) if (group->attach_handle->domain->iopf_handler(group)) goto err_abort; - return; + return 0; err_abort: dev_warn_ratelimited(dev, "iopf with pasid %d aborted\n", @@ -234,6 +272,14 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) __iopf_free_group(group); else iopf_free_group(group); + + return 0; + +err_bad_iopf: + if (fault->type == IOMMU_FAULT_PAGE_REQ) + iopf_error_response(dev, evt); + + return -EINVAL; } EXPORT_SYMBOL_GPL(iommu_report_device_fault); diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index fd9a5c136b5d758631d67c527426164aa21447a1..8967b0e38851a90aa162aea5cdec65855383f2e7 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -286,13 +286,13 @@ static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries, sizeof(*ptep) * num_entries, DMA_TO_DEVICE); } -static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg) +static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg, int num_entries) { + for (int i = 0; i < num_entries; i++) + ptep[i] = 0; - *ptep = 0; - - if (!cfg->coherent_walk) - __arm_lpae_sync_pte(ptep, 1, cfg); + if (!cfg->coherent_walk && num_entries) + __arm_lpae_sync_pte(ptep, num_entries, cfg); } static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, @@ -665,26 +665,29 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, max_entries = arm_lpae_max_entries(unmap_idx_start, data); num_entries = min_t(int, pgcount, max_entries); - while (i < num_entries) { - pte = READ_ONCE(*ptep); + /* Find and handle non-leaf entries */ + for (i = 0; i < num_entries; i++) { + pte = READ_ONCE(ptep[i]); if (WARN_ON(!pte)) break; - __arm_lpae_clear_pte(ptep, &iop->cfg); - if (!iopte_leaf(pte, lvl, iop->fmt)) { + __arm_lpae_clear_pte(&ptep[i], &iop->cfg, 1); + /* Also flush any partial walks */ io_pgtable_tlb_flush_walk(iop, iova + i * size, size, ARM_LPAE_GRANULE(data)); __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); - } else if (!iommu_iotlb_gather_queued(gather)) { - io_pgtable_tlb_add_page(iop, gather, iova + i * size, size); } - - ptep++; - i++; } + /* Clear the remaining entries */ + __arm_lpae_clear_pte(ptep, &iop->cfg, i); + + if (gather && !iommu_iotlb_gather_queued(gather)) + for (int j = 0; j < i; j++) + io_pgtable_tlb_add_page(iop, gather, iova + j * size, size); + return i * size; } else if (iopte_leaf(pte, lvl, iop->fmt)) { /* diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 59ae9a4ad0170f16d3cd3cc7812a68f89ad1bb51..ba2aed1b7014c461d7ee318e8c10ae6adaa5e8e8 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -1,12 +1,12 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ +#include #include #include -#include #include -#include "../iommu-priv.h" +#include "../iommu-priv.h" #include "io_pagetable.h" #include "iommufd_private.h" @@ -327,8 +327,9 @@ static int iommufd_group_setup_msi(struct iommufd_group *igroup, return 0; } -static int iommufd_hwpt_paging_attach(struct iommufd_hwpt_paging *hwpt_paging, - struct iommufd_device *idev) +static int +iommufd_device_attach_reserved_iova(struct iommufd_device *idev, + struct iommufd_hwpt_paging *hwpt_paging) { int rc; @@ -354,6 +355,7 @@ static int iommufd_hwpt_paging_attach(struct iommufd_hwpt_paging *hwpt_paging, int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev) { + struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); int rc; mutex_lock(&idev->igroup->lock); @@ -363,8 +365,8 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, goto err_unlock; } - if (hwpt_is_paging(hwpt)) { - rc = iommufd_hwpt_paging_attach(to_hwpt_paging(hwpt), idev); + if (hwpt_paging) { + rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging); if (rc) goto err_unlock; } @@ -387,9 +389,8 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, mutex_unlock(&idev->igroup->lock); return 0; err_unresv: - if (hwpt_is_paging(hwpt)) - iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt, - idev->dev); + if (hwpt_paging) + iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); err_unlock: mutex_unlock(&idev->igroup->lock); return rc; @@ -399,6 +400,7 @@ struct iommufd_hw_pagetable * iommufd_hw_pagetable_detach(struct iommufd_device *idev) { struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt; + struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); mutex_lock(&idev->igroup->lock); list_del(&idev->group_item); @@ -406,9 +408,8 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev) iommufd_hwpt_detach_device(hwpt, idev); idev->igroup->hwpt = NULL; } - if (hwpt_is_paging(hwpt)) - iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt, - idev->dev); + if (hwpt_paging) + iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); mutex_unlock(&idev->igroup->lock); /* Caller must destroy hwpt */ @@ -451,17 +452,17 @@ iommufd_group_remove_reserved_iova(struct iommufd_group *igroup, } static int -iommufd_group_do_replace_paging(struct iommufd_group *igroup, - struct iommufd_hwpt_paging *hwpt_paging) +iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup, + struct iommufd_hwpt_paging *hwpt_paging) { - struct iommufd_hw_pagetable *old_hwpt = igroup->hwpt; + struct iommufd_hwpt_paging *old_hwpt_paging; struct iommufd_device *cur; int rc; lockdep_assert_held(&igroup->lock); - if (!hwpt_is_paging(old_hwpt) || - hwpt_paging->ioas != to_hwpt_paging(old_hwpt)->ioas) { + old_hwpt_paging = find_hwpt_paging(igroup->hwpt); + if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) { list_for_each_entry(cur, &igroup->device_list, group_item) { rc = iopt_table_enforce_dev_resv_regions( &hwpt_paging->ioas->iopt, cur->dev, NULL); @@ -484,6 +485,8 @@ static struct iommufd_hw_pagetable * iommufd_device_do_replace(struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt) { + struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); + struct iommufd_hwpt_paging *old_hwpt_paging; struct iommufd_group *igroup = idev->igroup; struct iommufd_hw_pagetable *old_hwpt; unsigned int num_devices; @@ -507,9 +510,8 @@ iommufd_device_do_replace(struct iommufd_device *idev, } old_hwpt = igroup->hwpt; - if (hwpt_is_paging(hwpt)) { - rc = iommufd_group_do_replace_paging(igroup, - to_hwpt_paging(hwpt)); + if (hwpt_paging) { + rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt_paging); if (rc) goto err_unlock; } @@ -518,11 +520,10 @@ iommufd_device_do_replace(struct iommufd_device *idev, if (rc) goto err_unresv; - if (hwpt_is_paging(old_hwpt) && - (!hwpt_is_paging(hwpt) || - to_hwpt_paging(hwpt)->ioas != to_hwpt_paging(old_hwpt)->ioas)) - iommufd_group_remove_reserved_iova(igroup, - to_hwpt_paging(old_hwpt)); + old_hwpt_paging = find_hwpt_paging(old_hwpt); + if (old_hwpt_paging && + (!hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas)) + iommufd_group_remove_reserved_iova(igroup, old_hwpt_paging); igroup->hwpt = hwpt; @@ -540,9 +541,8 @@ iommufd_device_do_replace(struct iommufd_device *idev, /* Caller must destroy old_hwpt */ return old_hwpt; err_unresv: - if (hwpt_is_paging(hwpt)) - iommufd_group_remove_reserved_iova(igroup, - to_hwpt_paging(old_hwpt)); + if (hwpt_paging) + iommufd_group_remove_reserved_iova(igroup, hwpt_paging); err_unlock: mutex_unlock(&idev->igroup->lock); return ERR_PTR(rc); diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c index 54d6cd20a6730d4ea82d9aa89b290ffc2f7b003e..df03411c8728933a3305e39bca5d07d5de82ddae 100644 --- a/drivers/iommu/iommufd/fault.c +++ b/drivers/iommu/iommufd/fault.c @@ -3,14 +3,14 @@ */ #define pr_fmt(fmt) "iommufd: " fmt +#include #include #include +#include #include #include -#include #include #include -#include #include #include "../iommu-priv.h" @@ -128,7 +128,7 @@ iommufd_device_get_attach_handle(struct iommufd_device *idev) struct iommu_attach_handle *handle; handle = iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0); - if (!handle) + if (IS_ERR(handle)) return NULL; return to_iommufd_handle(handle); @@ -305,6 +305,16 @@ static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *b if (rc) break; + static_assert((int)IOMMUFD_PAGE_RESP_SUCCESS == + (int)IOMMU_PAGE_RESP_SUCCESS); + static_assert((int)IOMMUFD_PAGE_RESP_INVALID == + (int)IOMMU_PAGE_RESP_INVALID); + if (response.code != IOMMUFD_PAGE_RESP_SUCCESS && + response.code != IOMMUFD_PAGE_RESP_INVALID) { + rc = -EINVAL; + break; + } + group = xa_erase(&fault->response, response.cookie); if (!group) { rc = -EINVAL; diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 5ea1e6e79dff8ced1d83b5fe5d262505a3274e88..d06bf6e6c19fd207a1b84d018920865b12a5b571 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -225,7 +225,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, if ((flags & ~IOMMU_HWPT_FAULT_ID_VALID) || !user_data->len || !ops->domain_alloc_user) return ERR_PTR(-EOPNOTSUPP); - if (parent->auto_domain || !parent->nest_parent) + if (parent->auto_domain || !parent->nest_parent || + parent->common.domain->owner != ops) return ERR_PTR(-EINVAL); hwpt_nested = __iommufd_object_alloc( @@ -247,7 +248,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, } hwpt->domain->owner = ops; - if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { + if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED || + !hwpt->domain->ops->cache_invalidate_user)) { rc = -EINVAL; goto out_abort; } diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 9f193c933de6ef6ee58ef6e2dbb01c98c2d82a8d..4bf7ccd39d465c997aaae031ee308347326e3c38 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -8,17 +8,17 @@ * The datastructure uses the iopt_pages to optimize the storage of the PFNs * between the domains and xarray. */ +#include +#include +#include #include #include -#include #include -#include #include -#include #include -#include "io_pagetable.h" #include "double_span.h" +#include "io_pagetable.h" struct iopt_pages_list { struct iopt_pages *pages; diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h index 0ec3509b7e339243e1c70a62c2bb0300191e5075..c61d74471684eefd0bb11052d99b68e688249449 100644 --- a/drivers/iommu/iommufd/io_pagetable.h +++ b/drivers/iommu/iommufd/io_pagetable.h @@ -6,8 +6,8 @@ #define __IO_PAGETABLE_H #include -#include #include +#include #include #include "iommufd_private.h" diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c index 157a89b993e438b5558af501e209a9928ac42236..2c4b2bb11e78ce4de21450a3b2e3c0efe4da7594 100644 --- a/drivers/iommu/iommufd/ioas.c +++ b/drivers/iommu/iommufd/ioas.c @@ -3,8 +3,8 @@ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ #include -#include #include +#include #include #include "io_pagetable.h" diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 92efe30a8f0d00fa92197261b26c69c09fd4fd9a..5d3768d7709924d05464ad2c2c3c4e505fb31dcf 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -4,13 +4,14 @@ #ifndef __IOMMUFD_PRIVATE_H #define __IOMMUFD_PRIVATE_H -#include -#include -#include -#include #include #include +#include +#include +#include +#include #include + #include "../iommu-priv.h" struct iommu_domain; @@ -324,6 +325,25 @@ to_hwpt_paging(struct iommufd_hw_pagetable *hwpt) return container_of(hwpt, struct iommufd_hwpt_paging, common); } +static inline struct iommufd_hwpt_nested * +to_hwpt_nested(struct iommufd_hw_pagetable *hwpt) +{ + return container_of(hwpt, struct iommufd_hwpt_nested, common); +} + +static inline struct iommufd_hwpt_paging * +find_hwpt_paging(struct iommufd_hw_pagetable *hwpt) +{ + switch (hwpt->obj.type) { + case IOMMUFD_OBJ_HWPT_PAGING: + return to_hwpt_paging(hwpt); + case IOMMUFD_OBJ_HWPT_NESTED: + return to_hwpt_nested(hwpt)->parent; + default: + return NULL; + } +} + static inline struct iommufd_hwpt_paging * iommufd_get_hwpt_paging(struct iommufd_ucmd *ucmd, u32 id) { diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index acbbba1c66716e9fb67273436d8f39967b127bfa..f4bc23a92f9a2e7e30cae4d3a90913621963eab6 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -4,8 +4,8 @@ #ifndef _UAPI_IOMMUFD_TEST_H #define _UAPI_IOMMUFD_TEST_H -#include #include +#include enum { IOMMU_TEST_OP_ADD_RESERVED = 1, diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index 6b09b7856789010c77257d6697f77e31809106fc..2cdc4f542df4723bd8656f215bfb9712c981b09c 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -3,10 +3,10 @@ * Copyright (c) 2022, Oracle and/or its affiliates. * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved */ +#include #include #include #include -#include #define BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 83bbd7c5d160847044101234425e29e031407f98..b5f5d27ee9634e6e5710363bb84f0e62b94a65b3 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -8,15 +8,15 @@ */ #define pr_fmt(fmt) "iommufd: " fmt +#include #include #include -#include -#include +#include #include +#include #include -#include +#include #include -#include #include "io_pagetable.h" #include "iommufd_private.h" diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 117f644a0c5b75c6701001b08b4986627c3bc8f2..93d806c9c073180edcdadf8fd7569d234feedf7a 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -45,16 +45,16 @@ * last_iova + 1 can overflow. An iopt_pages index will always be much less than * ULONG_MAX so last_index + 1 cannot overflow. */ +#include +#include +#include +#include #include #include -#include #include -#include -#include -#include -#include "io_pagetable.h" #include "double_span.h" +#include "io_pagetable.h" #ifndef CONFIG_IOMMUFD_TEST #define TEMP_MEMORY_LIMIT 65536 diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index f95e32e2913330d201c0ab6eb792625a538b1797..540437be168a0d85bb321a23d059036a9d107a0c 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -3,13 +3,14 @@ * * Kernel side components to support tools/testing/selftests/iommu */ -#include -#include -#include -#include #include +#include #include +#include +#include #include +#include +#include #include #include "../iommu-priv.h" @@ -273,7 +274,7 @@ static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain, return 0; } -const struct iommu_dirty_ops dirty_ops = { +static const struct iommu_dirty_ops dirty_ops = { .set_dirty_tracking = mock_domain_set_dirty_tracking, .read_and_clear_dirty = mock_domain_read_and_clear_dirty, }; @@ -1342,7 +1343,7 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, unsigned long page_size, void __user *uptr, u32 flags) { - unsigned long bitmap_size, i, max; + unsigned long i, max; struct iommu_test_cmd *cmd = ucmd->cmd; struct iommufd_hw_pagetable *hwpt; struct mock_iommu_domain *mock; @@ -1363,15 +1364,14 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, } max = length / page_size; - bitmap_size = DIV_ROUND_UP(max, BITS_PER_BYTE); - - tmp = kvzalloc(bitmap_size, GFP_KERNEL_ACCOUNT); + tmp = kvzalloc(DIV_ROUND_UP(max, BITS_PER_LONG) * sizeof(unsigned long), + GFP_KERNEL_ACCOUNT); if (!tmp) { rc = -ENOMEM; goto out_put; } - if (copy_from_user(tmp, uptr, bitmap_size)) { + if (copy_from_user(tmp, uptr,DIV_ROUND_UP(max, BITS_PER_BYTE))) { rc = -EFAULT; goto out_free; } diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index b657cc09605f4d482e08c7851b3bd664611f18a1..ff55b8c307126945d5583241d20ed2ba4597842a 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -804,8 +804,7 @@ static int ipmmu_init_arm_mapping(struct device *dev) if (!mmu->mapping) { struct dma_iommu_mapping *mapping; - mapping = arm_iommu_create_mapping(&platform_bus_type, - SZ_1G, SZ_2G); + mapping = arm_iommu_create_mapping(dev, SZ_1G, SZ_2G); if (IS_ERR(mapping)) { dev_err(mmu->dev, "failed to create ARM IOMMU mapping\n"); ret = PTR_ERR(mapping); diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index c6ea5b4baff3be9d3b518175ba5816e69f94e132..ee4e55b6b1900308bd06961d71269953e36e1e51 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -433,8 +433,7 @@ static int mtk_iommu_v1_create_mapping(struct device *dev, mtk_mapping = data->mapping; if (!mtk_mapping) { /* MTK iommu support 4GB iova address space. */ - mtk_mapping = arm_iommu_create_mapping(&platform_bus_type, - 0, 1ULL << 32); + mtk_mapping = arm_iommu_create_mapping(dev, 0, 1ULL << 32); if (IS_ERR(mtk_mapping)) return PTR_ERR(mtk_mapping); diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 78d61da75257c258cfaed37869220d32ad555eeb..e7a6a1611d193bce7ea71537a7e339904a9341b5 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -214,7 +214,7 @@ void of_iommu_get_resv_regions(struct device *dev, struct list_head *list) * that represent reservations in the IOVA space, which are regions that should * not be mapped. */ - if (of_find_property(it.node, "reg", NULL)) { + if (of_property_present(it.node, "reg")) { err = of_address_to_resource(it.node, 0, &phys); if (err < 0) { dev_err(dev, "failed to parse memory region %pOF: %d\n", diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 4365d9936e68676f50da245e287005675ae1f44e..7f633bb5efef166b551f3d632bd221d9b5dd1bd1 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -837,7 +837,7 @@ static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev, const struct iommu_ops *ops = smmu->iommu.ops; int err; - err = iommu_fwspec_init(dev, of_fwnode_handle(dev->of_node)); + err = iommu_fwspec_init(dev, dev_fwnode(smmu->dev)); if (err < 0) { dev_err(dev, "failed to initialize fwspec: %d\n", err); return err; diff --git a/drivers/media/platform/ti/omap3isp/isp.c b/drivers/media/platform/ti/omap3isp/isp.c index 1cda23244c7bc70a8f3092ad9e434e2a0df31bb3..91101ba88ef01f44ea0a9e764204f9630160938a 100644 --- a/drivers/media/platform/ti/omap3isp/isp.c +++ b/drivers/media/platform/ti/omap3isp/isp.c @@ -1965,7 +1965,7 @@ static int isp_attach_iommu(struct isp_device *isp) * Create the ARM mapping, used by the ARM DMA mapping core to allocate * VAs. This will allocate a corresponding IOMMU domain. */ - mapping = arm_iommu_create_mapping(&platform_bus_type, SZ_1G, SZ_2G); + mapping = arm_iommu_create_mapping(isp->dev, SZ_1G, SZ_2G); if (IS_ERR(mapping)) { dev_err(isp->dev, "failed to create ARM IOMMU mapping\n"); return PTR_ERR(mapping); diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 9ce0d20a6c581204bb4c10e4a11b10e03d3e17a2..feef537257d0570c0971cdecb18a9e2f475b59eb 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -1022,7 +1022,7 @@ static const struct dma_map_ops ccio_ops = { .map_sg = ccio_map_sg, .unmap_sg = ccio_unmap_sg, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index 05e7103d1d407a97298bc68196ddde7ee4730283..6f5b919280ff06400b68c6ab20c1b98711d2aca3 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -1090,7 +1090,7 @@ static const struct dma_map_ops sba_ops = { .map_sg = sba_map_sg, .unmap_sg = sba_unmap_sg, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index f9cc2e10b676a96a0a61701751fc6b12791405fc..0fe7c589e760604a879ed077bd55dc7b028dd9d1 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -46,6 +46,39 @@ bool pci_ats_supported(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_ats_supported); +/** + * pci_prepare_ats - Setup the PS for ATS + * @dev: the PCI device + * @ps: the IOMMU page shift + * + * This must be done by the IOMMU driver on the PF before any VFs are created to + * ensure that the VF can have ATS enabled. + * + * Returns 0 on success, or negative on failure. + */ +int pci_prepare_ats(struct pci_dev *dev, int ps) +{ + u16 ctrl; + + if (!pci_ats_supported(dev)) + return -EINVAL; + + if (WARN_ON(dev->ats_enabled)) + return -EBUSY; + + if (ps < PCI_ATS_MIN_STU) + return -EINVAL; + + if (dev->is_virtfn) + return 0; + + dev->ats_stu = ps; + ctrl = PCI_ATS_CTRL_STU(dev->ats_stu - PCI_ATS_MIN_STU); + pci_write_config_word(dev, dev->ats_cap + PCI_ATS_CTRL, ctrl); + return 0; +} +EXPORT_SYMBOL_GPL(pci_prepare_ats); + /** * pci_enable_ats - enable the ATS capability * @dev: the PCI device diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 0935b77baf7d368b87203a3599ec9057094d173e..244941814f97ec84562d9feae34457b349b701e8 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -36,9 +36,6 @@ #include #endif #include "pci.h" -#ifdef CONFIG_PSWIOTLB -#include -#endif DEFINE_MUTEX(pci_slot_mutex); @@ -4565,15 +4562,6 @@ void __weak pcibios_set_master(struct pci_dev *dev) */ void pci_set_master(struct pci_dev *dev) { -#ifdef CONFIG_PSWIOTLB - if ((pswiotlb_force_disable != true) && - is_phytium_ps_socs()) { - dev->dev.can_use_pswiotlb = pswiotlb_is_dev_in_passthroughlist(dev); - dev_info(&dev->dev, "The device %s use pswiotlb because vendor 0x%04x %s in pswiotlb passthroughlist\n", - dev->dev.can_use_pswiotlb ? "would" : "would NOT", - dev->vendor, dev->dev.can_use_pswiotlb ? "is NOT" : "is"); - } -#endif __pci_set_master(dev, true); pcibios_set_master(dev); } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index bfab2fd8c18f0e1519b16e19d35e87cd5d3e156b..534d4ab7642a94ece45cc3682c4f15a4fa912513 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -21,9 +21,6 @@ #include #include #include "pci.h" -#ifdef CONFIG_PSWIOTLB -#include -#endif #define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */ #define CARDBUS_RESERVE_BUSNR 3 @@ -2569,13 +2566,6 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) dma_set_max_seg_size(&dev->dev, 65536); dma_set_seg_boundary(&dev->dev, 0xffffffff); -#ifdef CONFIG_PSWIOTLB - if ((pswiotlb_force_disable != true) && - is_phytium_ps_socs()) { - pswiotlb_store_local_node(dev, bus); - dma_set_seg_boundary(&dev->dev, 0xffffffffffff); - } -#endif pcie_failed_link_retrain(dev); diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c index 76f6f26265a3b8ba4b7a192bd6d4cdf0ba867d10..29257d2639dbf0178aa792151f39628a8b180731 100644 --- a/drivers/xen/grant-dma-ops.c +++ b/drivers/xen/grant-dma-ops.c @@ -282,7 +282,7 @@ static int xen_grant_dma_supported(struct device *dev, u64 mask) static const struct dma_map_ops xen_grant_dma_ops = { .alloc = xen_grant_dma_alloc, .free = xen_grant_dma_free, - .alloc_pages = xen_grant_dma_alloc_pages, + .alloc_pages_op = xen_grant_dma_alloc_pages, .free_pages = xen_grant_dma_free_pages, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 042253460b983e4c86c644883af073e7b9b0f503..ef56a2500ed69a70d8628cc3f83f8b969014aeb7 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -96,7 +96,8 @@ static inline int range_straddles_page_boundary(phys_addr_t p, size_t size) return 0; } -static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr) +static struct io_tlb_pool *xen_swiotlb_find_pool(struct device *dev, + dma_addr_t dma_addr) { unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr)); unsigned long xen_pfn = bfn_to_local_pfn(bfn); @@ -107,8 +108,8 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr) * in our domain. Therefore _only_ check address within our domain. */ if (pfn_valid(PFN_DOWN(paddr))) - return is_swiotlb_buffer(dev, paddr); - return 0; + return swiotlb_find_pool(dev, paddr); + return NULL; } #ifdef CONFIG_X86 @@ -238,8 +239,9 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, * Ensure that the address returned is DMA'ble */ if (unlikely(!dma_capable(dev, dev_addr, size, true))) { - swiotlb_tbl_unmap_single(dev, map, size, dir, - attrs | DMA_ATTR_SKIP_CPU_SYNC); + __swiotlb_tbl_unmap_single(dev, map, size, dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC, + swiotlb_find_pool(dev, map)); return DMA_MAPPING_ERROR; } @@ -265,6 +267,7 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { phys_addr_t paddr = xen_dma_to_phys(hwdev, dev_addr); + struct io_tlb_pool *pool; BUG_ON(dir == DMA_NONE); @@ -276,8 +279,10 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, } /* NOTE: We use dev_addr here, not paddr! */ - if (is_xen_swiotlb_buffer(hwdev, dev_addr)) - swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); + pool = xen_swiotlb_find_pool(hwdev, dev_addr); + if (pool) + __swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, + attrs, pool); } static void @@ -285,6 +290,7 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) { phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr); + struct io_tlb_pool *pool; if (!dev_is_dma_coherent(dev)) { if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) @@ -293,8 +299,9 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr, xen_dma_sync_for_cpu(dev, dma_addr, size, dir); } - if (is_xen_swiotlb_buffer(dev, dma_addr)) - swiotlb_sync_single_for_cpu(dev, paddr, size, dir); + pool = xen_swiotlb_find_pool(dev, dma_addr); + if (pool) + __swiotlb_sync_single_for_cpu(dev, paddr, size, dir, pool); } static void @@ -302,9 +309,11 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) { phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr); + struct io_tlb_pool *pool; - if (is_xen_swiotlb_buffer(dev, dma_addr)) - swiotlb_sync_single_for_device(dev, paddr, size, dir); + pool = xen_swiotlb_find_pool(dev, dma_addr); + if (pool) + __swiotlb_sync_single_for_device(dev, paddr, size, dir, pool); if (!dev_is_dma_coherent(dev)) { if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) @@ -414,7 +423,7 @@ const struct dma_map_ops xen_swiotlb_dma_ops = { .dma_supported = xen_swiotlb_dma_supported, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, .max_mapping_size = swiotlb_max_mapping_size, }; diff --git a/include/linux/device.h b/include/linux/device.h index cc04849e5ed660c07526de6cfb567ccd6f8b5fb8..8c009a0be5798491e7fb10dbd91b033eb7df3c4c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -654,8 +654,6 @@ struct device_physical_location { * @dma_io_tlb_pools: List of transient swiotlb memory pools. * @dma_io_tlb_lock: Protects changes to the list of active pools. * @dma_uses_io_tlb: %true if device has used the software IO TLB. - * @dma_p_io_tlb_mem: Phytium Software IO TLB allocator. Not for driver use. - * @dma_uses_p_io_tlb: %true if device has used the Phytium software IO TLB. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. * @fwnode: Associated device node supplied by platform firmware. @@ -694,6 +692,9 @@ struct device_physical_location { * and optionall (if the coherent mask is large enough) also * for dma allocations. This flag is managed by the dma ops * instance from ->dma_supported. + * @dma_skip_sync: DMA sync operations can be skipped for coherent buffers. + * @dma_iommu: Device is using default IOMMU implementation for DMA and + * doesn't rely on dma_ops structure. * * At the lowest level, every device in a Linux system is represented by an * instance of struct device. The device structure contains the information @@ -762,11 +763,6 @@ struct device { #ifdef CONFIG_SWIOTLB struct io_tlb_mem *dma_io_tlb_mem; #endif -#ifdef CONFIG_PSWIOTLB - struct p_io_tlb_mem *dma_p_io_tlb_mem; - bool dma_uses_p_io_tlb; - bool can_use_pswiotlb; -#endif #ifdef CONFIG_SWIOTLB_DYNAMIC struct list_head dma_io_tlb_pools; spinlock_t dma_io_tlb_lock; @@ -781,9 +777,6 @@ struct device { #ifdef CONFIG_NUMA int numa_node; /* NUMA node this device is close to */ nodemask_t gi_node; /* GPU gi node the device is close to */ -#ifdef CONFIG_PSWIOTLB - int local_node; /* NUMA node this device is really belong to */ -#endif #endif dev_t devt; /* dev_t, creates the sysfs "dev" */ u32 id; /* device instance */ @@ -815,6 +808,12 @@ struct device { #ifdef CONFIG_DMA_OPS_BYPASS bool dma_ops_bypass : 1; #endif +#ifdef CONFIG_DMA_NEED_SYNC + bool dma_skip_sync:1; +#endif +#ifdef CONFIG_IOMMU_DMA + bool dma_iommu:1; +#endif CK_KABI_RESERVE(1) CK_KABI_RESERVE(2) diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index cc848e7380f7f35632f1d097a2feaa52b52cb440..780d54a089e807eebfdf4ae3265122c0c5b450c9 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -13,23 +13,13 @@ struct cma; struct iommu_ops; -/* - * Values for struct dma_map_ops.flags: - * - * DMA_F_PCI_P2PDMA_SUPPORTED: Indicates the dma_map_ops implementation can - * handle PCI P2PDMA pages in the map_sg/unmap_sg operation. - */ -#define DMA_F_PCI_P2PDMA_SUPPORTED (1 << 0) - struct dma_map_ops { - unsigned int flags; - void *(*alloc)(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void (*free)(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs); - struct page *(*alloc_pages)(struct device *dev, size_t size, + struct page *(*alloc_pages_op)(struct device *dev, size_t size, dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); void (*free_pages)(struct device *dev, size_t size, struct page *vaddr, @@ -280,6 +270,15 @@ static inline bool dev_is_dma_coherent(struct device *dev) } #endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ +static inline void dma_reset_need_sync(struct device *dev) +{ +#ifdef CONFIG_DMA_NEED_SYNC + /* Reset it only once so that the function can be called on hotpath */ + if (unlikely(dev->dma_skip_sync)) + dev->dma_skip_sync = false; +#endif +} + /* * Check whether potential kmalloc() buffers are safe for non-coherent DMA. */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 608e8296ba206ad6291335a8b8c51a39cf2c1af4..337d9b50e8f12fbee39e3c737b42ca25086d7e5a 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -117,14 +117,6 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, enum dma_data_direction dir, unsigned long attrs); void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs); -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir); -void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir); -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir); -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir); void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs); void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, @@ -146,7 +138,6 @@ int dma_set_coherent_mask(struct device *dev, u64 mask); u64 dma_get_required_mask(struct device *dev); size_t dma_max_mapping_size(struct device *dev); size_t dma_opt_mapping_size(struct device *dev); -bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); unsigned long dma_get_merge_boundary(struct device *dev); struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, enum dma_data_direction dir, gfp_t gfp, unsigned long attrs); @@ -194,22 +185,6 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { } -static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ -} -static inline void dma_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ -} -static inline void dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sg, int nelems, enum dma_data_direction dir) -{ -} -static inline void dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sg, int nelems, enum dma_data_direction dir) -{ -} static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return -ENOMEM; @@ -272,10 +247,6 @@ static inline size_t dma_opt_mapping_size(struct device *dev) { return 0; } -static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) -{ - return false; -} static inline unsigned long dma_get_merge_boundary(struct device *dev) { return 0; @@ -305,6 +276,82 @@ static inline int dma_mmap_noncontiguous(struct device *dev, } #endif /* CONFIG_HAS_DMA */ +#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC) +void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir); +void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir); +void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir); +void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir); +bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr); + +static inline bool dma_dev_need_sync(const struct device *dev) +{ + /* Always call DMA sync operations when debugging is enabled */ + return !dev->dma_skip_sync || IS_ENABLED(CONFIG_DMA_API_DEBUG); +} + +static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + if (dma_dev_need_sync(dev)) + __dma_sync_single_for_cpu(dev, addr, size, dir); +} + +static inline void dma_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + if (dma_dev_need_sync(dev)) + __dma_sync_single_for_device(dev, addr, size, dir); +} + +static inline void dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction dir) +{ + if (dma_dev_need_sync(dev)) + __dma_sync_sg_for_cpu(dev, sg, nelems, dir); +} + +static inline void dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction dir) +{ + if (dma_dev_need_sync(dev)) + __dma_sync_sg_for_device(dev, sg, nelems, dir); +} + +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + return dma_dev_need_sync(dev) ? __dma_need_sync(dev, dma_addr) : false; +} +#else /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */ +static inline bool dma_dev_need_sync(const struct device *dev) +{ + return false; +} +static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ +} +static inline void dma_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ +} +static inline void dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction dir) +{ +} +static inline void dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction dir) +{ +} +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + return false; +} +#endif /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */ + struct page *dma_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); void dma_free_pages(struct device *dev, size_t size, struct page *page, diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index f9a81761bfceda1a3b5175c661d20e7de76b88ab..b1ecfc3cd5bcc07569b51271df91bc2610267ef5 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -171,6 +171,10 @@ struct io_pgtable_cfg { u64 ttbr[4]; u32 n_ttbrs; } apple_dart_cfg; + + struct { + int nid; + } amd; }; }; diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h new file mode 100644 index 0000000000000000000000000000000000000000..d30a58bf00fdf0237606069bd91987cc20468ff6 --- /dev/null +++ b/include/linux/iommu-dma.h @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved + * + * DMA operations that map physical memory through IOMMU. + */ +#ifndef _LINUX_IOMMU_DMA_H +#define _LINUX_IOMMU_DMA_H + +#include + +#ifdef CONFIG_IOMMU_DMA +dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs); +void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, unsigned long attrs); +int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); +void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); +void *iommu_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, unsigned long attrs); +int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +unsigned long iommu_dma_get_merge_boundary(struct device *dev); +size_t iommu_dma_opt_mapping_size(void); +size_t iommu_dma_max_mapping_size(struct device *dev); +void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, unsigned long attrs); +dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, unsigned long attrs); +void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, unsigned long attrs); +struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs); +void iommu_dma_free_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir); +void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir); +void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir); +void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir); +void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir); +#else +static inline dma_addr_t iommu_dma_map_page(struct device *dev, + struct page *page, unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + return DMA_MAPPING_ERROR; +} +static inline void iommu_dma_unmap_page(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + return -EINVAL; +} +static inline void iommu_dma_unmap_sg(struct device *dev, + struct scatterlist *sg, int nents, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline void *iommu_dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, unsigned long attrs) +{ + return NULL; +} +static inline int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + return -EINVAL; +} +static inline int iommu_dma_get_sgtable(struct device *dev, + struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, + size_t size, unsigned long attrs) +{ + return -EINVAL; +} +static inline unsigned long iommu_dma_get_merge_boundary(struct device *dev) +{ + return 0; +} +static inline size_t iommu_dma_opt_mapping_size(void) +{ + return 0; +} +static inline size_t iommu_dma_max_mapping_size(struct device *dev) +{ + return 0; +} +static inline void iommu_dma_free(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle, unsigned long attrs) +{ +} +static inline dma_addr_t iommu_dma_map_resource(struct device *dev, + phys_addr_t phys, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + return DMA_MAPPING_ERROR; +} +static inline void iommu_dma_unmap_resource(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline struct sg_table * +iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs) +{ + return NULL; +} +static inline void iommu_dma_free_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir) +{ +} +static inline void iommu_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction dir) +{ +} +static inline void iommu_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) +{ +} +static inline void iommu_dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ +} +static inline void iommu_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ +} +#endif /* CONFIG_IOMMU_DMA */ +#endif /* _LINUX_IOMMU_DMA_H */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 32d0d7f7cb08e668376b648955a945469621bdcc..1a7fa47dfab0df4fd8bda50687f01facd9c61de6 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -326,6 +326,9 @@ enum iommu_dev_features { #define IOMMU_PASID_INVALID (-1U) typedef unsigned int ioasid_t; +/* Read but do not clear any dirty bits */ +#define IOMMU_DIRTY_NO_CLEAR (1 << 0) + #ifdef CONFIG_IOMMU_API /** @@ -365,9 +368,6 @@ struct iommu_dirty_bitmap { struct iommu_iotlb_gather *gather; }; -/* Read but do not clear any dirty bits */ -#define IOMMU_DIRTY_NO_CLEAR (1 << 0) - /** * struct iommu_dirty_ops - domain specific dirty tracking operations * @set_dirty_tracking: Enable or Disable dirty tracking on the iommu domain @@ -812,8 +812,6 @@ extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); -extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, @@ -1597,7 +1595,7 @@ struct iopf_queue *iopf_queue_alloc(const char *name); void iopf_queue_free(struct iopf_queue *queue); int iopf_queue_discard_partial(struct iopf_queue *queue); void iopf_free_group(struct iopf_group *group); -void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); +int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); void iopf_group_response(struct iopf_group *group, enum iommu_page_response_code status); #else @@ -1635,9 +1633,10 @@ static inline void iopf_free_group(struct iopf_group *group) { } -static inline void +static inline int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) { + return -ENODEV; } static inline void iopf_group_response(struct iopf_group *group, diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index ffc3a949f8374f7610a0c9e0d3460b6f17ccdb73..30f832a60ccb33f286e80398bf75c5d20a3e38cb 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -6,17 +6,17 @@ #ifndef __LINUX_IOMMUFD_H #define __LINUX_IOMMUFD_H -#include -#include #include +#include +#include struct device; -struct iommufd_device; -struct page; -struct iommufd_ctx; -struct iommufd_access; struct file; struct iommu_group; +struct iommufd_access; +struct iommufd_ctx; +struct iommufd_device; +struct page; struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index ce61f32fdacc19c3ce68dc3451d211bdbb606e40..5e60fa51787820e3b43b9e915ea947681d81cddc 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -203,13 +203,6 @@ enum pageflags { /* Reuse PG_dirty to indicate whether the duplicate page is a master or slave */ PG_dup_slave = PG_dirty, #endif - -#ifdef CONFIG_PSWIOTLB - /* check if pswiotlb is sync already */ - PG_pswiotlbsync = __NR_PAGEFLAGS + 1, - /* check if the page is used for pswiotlb */ - PG_pswiotlb, -#endif }; #define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1) diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index df54cd5b15db09c1c02163d5f7d5493d184addb2..0e8b74e63767a6f09d8f4110f0cd74cb417569c9 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -8,6 +8,7 @@ /* Address Translation Service */ bool pci_ats_supported(struct pci_dev *dev); int pci_enable_ats(struct pci_dev *dev, int ps); +int pci_prepare_ats(struct pci_dev *dev, int ps); void pci_disable_ats(struct pci_dev *dev); int pci_ats_queue_depth(struct pci_dev *dev); int pci_ats_page_aligned(struct pci_dev *dev); @@ -16,6 +17,8 @@ static inline bool pci_ats_supported(struct pci_dev *d) { return false; } static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; } +static inline int pci_prepare_ats(struct pci_dev *dev, int ps) +{ return -ENODEV; } static inline void pci_disable_ats(struct pci_dev *d) { } static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; } diff --git a/include/linux/pswiotlb.h b/include/linux/pswiotlb.h deleted file mode 100644 index 548a54730fedabd547ce6994568f821181573cfe..0000000000000000000000000000000000000000 --- a/include/linux/pswiotlb.h +++ /dev/null @@ -1,333 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_PSWIOTLB_H -#define __LINUX_PSWIOTLB_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct device; -struct page; -struct scatterlist; -extern bool pswiotlb_force_disable; -struct p_io_tlb_pool; - -#define SOC_ID_PS23064 0x8 -#define SOC_ID_PS24080 0x6 -#define MIDR_PS 0x700F8620 -#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) -#define PSWIOTLB_VERBOSE (1 << 0) /* verbose initialization */ -#define PSWIOTLB_FORCEOFF (1 << 1) /* force phytium bounce buffering off*/ -#define PSWIOTLB_ANY (1 << 2) /* allow any memory for the buffer */ -#define PSWIOTLB_FREE_THRESHOLD 30 -static bool is_ps_socs; - -/* - * Maximum allowable number of contiguous slabs to map, - * must be a power of 2. What is the appropriate value ? - * The complexity of {map,unmap}_single is linearly dependent on this value. - */ -#define P_IO_TLB_SEGSIZE 1024 - -/* - * log of the size of each Phytium IO TLB slab. The number of slabs is command line - * controllable. - */ -#define P_IO_TLB_SHIFT 11 -#define P_IO_TLB_SIZE (1 << P_IO_TLB_SHIFT) - -/* default to 256MB */ -#define P_IO_TLB_DEFAULT_SIZE (256UL<<20) -#define P_IO_TLB_INC_THR (64UL<<20) -#define P_IO_TLB_EXT_WATERMARK (80) - -/* passthroughlist which incompatible with pswiotlb temporarily */ -#define BL_PCI_VENDOR_ID_NVIDIA 0x10de -#define BL_PCI_VENDOR_ID_ILUVATAR 0x1E3E -#define BL_PCI_VENDOR_ID_METAX 0x9999 - -unsigned long pswiotlb_size_or_default(void); -void __init pswiotlb_init_remap(bool addressing_limit, int nid, unsigned int flags, - int (*remap)(void *tlb, unsigned long nslabs)); - -phys_addr_t pswiotlb_tbl_map_single(struct device *hwdev, int nid, phys_addr_t phys, - size_t mapping_size, size_t alloc_size, unsigned int alloc_align_mask, - enum dma_data_direction dir, - unsigned long attrs); - -extern void pswiotlb_tbl_unmap_single(struct device *hwdev, - int nid, - phys_addr_t tlb_addr, - size_t offset, - size_t mapping_size, - enum dma_data_direction dir, - unsigned long attrs, - struct p_io_tlb_pool *pool); - -void pswiotlb_sync_single_for_device(struct device *dev, int nid, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, struct p_io_tlb_pool *pool); -void pswiotlb_sync_single_for_cpu(struct device *dev, int nid, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, struct p_io_tlb_pool *pool); -dma_addr_t pswiotlb_map(struct device *dev, int nid, phys_addr_t phys, - size_t size, enum dma_data_direction dir, unsigned long attrs); -void pswiotlb_store_local_node(struct pci_dev *dev, struct pci_bus *bus); -void iommu_dma_unmap_sg_pswiotlb(struct device *dev, struct scatterlist *sg, unsigned long iova, - size_t mapped, int nents, enum dma_data_direction dir, unsigned long attrs); -#ifdef CONFIG_PSWIOTLB -struct pswiotlb_passthroughlist { - struct list_head node; - unsigned short vendor; - unsigned short device; - bool from_grub; -}; -/** - * struct p_io_tlb_pool - Phytium IO TLB memory pool descriptor - * @start: The start address of the pswiotlb memory pool. Used to do a quick - * range check to see if the memory was in fact allocated by this - * API. - * @end: The end address of the pswiotlb memory pool. Used to do a quick - * range check to see if the memory was in fact allocated by this - * API. - * @nslabs: The number of Phytium IO TLB blocks (in groups of 64) between @start and - * @end. For default pswiotlb, this is command line adjustable via - * setup_io_tlb_npages. - * @used: The number of used Phytium IO TLB block. - * @list: The free list describing the number of free entries available - * from each index. - * @index: The index to start searching in the next round. - * @orig_addr: The original address corresponding to a mapped entry. - * @alloc_size: Size of the allocated buffer. - * @lock: The lock to protect the above data structures in the map and - * unmap calls. - * @vaddr: The vaddr of the pswiotlb memory pool. The pswiotlb memory pool - * may be remapped in the memory encrypted case and store virtual - * address for bounce buffer operation. - * @nslabs: The number of Phytium IO TLB slots between @start and @end. For the - * default pswiotlb, this can be adjusted with a boot parameter, - * see setup_io_tlb_npages(). - * @late_alloc: %true if allocated using the page allocator. - * @nareas: Number of areas in the pool. - * @area_nslabs: Number of slots in each area. - * @areas: Array of memory area descriptors. - * @slots: Array of slot descriptors. - * @node: Member of the Phytium IO TLB memory pool list. - * @rcu: RCU head for pswiotlb_dyn_free(). - * @transient: %true if transient memory pool. - * @busy_flag: %true if the pool is used by devices. - * @free_cnt: Counters every time the pool is free when checked by monitor. - * @free_th: Free threshold determine when to free the pool to memory. - * @busy_recode: Bitmap to record the busy status of the areas in the pool. - * @node_min_addr: Minimum physical address of the numa node. - * @numa_max_addr: Maximum physical address of the numa node. - * @numa_node_id: Numa node id the pool belong to. - */ -struct p_io_tlb_pool { - phys_addr_t start; - phys_addr_t end; - void *vaddr; - unsigned long nslabs; - bool late_alloc; - unsigned int nareas; - unsigned int area_nslabs; - struct p_io_tlb_area *areas; - struct p_io_tlb_slot *slots; - struct list_head node; - struct rcu_head rcu; - bool transient; - bool busy_flag; - unsigned int free_cnt; - unsigned int free_th; - unsigned long *busy_record; - phys_addr_t node_min_addr; - phys_addr_t node_max_addr; - int numa_node_id; -}; - -/** - * struct p_io_tlb_mem - Phytium Software IO TLB allocator - * @defpool: Default (initial) Phytium IO TLB memory pool descriptor. - * @pool: Phytium IO TLB memory pool descriptor (if not dynamic). - * @nslabs: Total number of Phytium IO TLB slabs in all pools. - * @debugfs: The dentry to debugfs. - * @force_bounce: %true if pswiotlb bouncing is forced - * @for_alloc: %true if the pool is used for memory allocation - * @can_grow: %true if more pools can be allocated dynamically. - * @phys_limit: Maximum allowed physical address. - * @pool_addr: Array where all the pools stored. - * @capacity: Number of pools which could be allocated. - * @whole_size: Number of pools which stored in the pool array. - * @lock: Lock to synchronize changes to the list. - * @pools: List of Phytium IO TLB memory pool descriptors (if dynamic). - * @dyn_alloc: Dynamic Phytium IO TLB pool allocation work. - * @total_used: The total number of slots in the pool that are currently used - * across all areas. Used only for calculating used_hiwater in - * debugfs. - * @used_hiwater: The high water mark for total_used. Used only for reporting - * in debugfs. - * @node_min_addr: Minimum physical address of the numa node. - * @numa_max_addr: Maximum physical address of the numa node. - * @numa_node_id: Numa node id the mem belong to. - */ -struct p_io_tlb_mem { - struct p_io_tlb_pool defpool; - unsigned long nslabs; - struct dentry *debugfs; - bool force_bounce; - bool for_alloc; - bool can_grow; - u64 phys_limit; - struct p_io_tlb_pool *pool_addr[64*1024/8]; - int capacity; - int whole_size; - spinlock_t lock; - struct list_head pools; - struct work_struct dyn_alloc; -#ifdef CONFIG_DEBUG_FS - atomic_long_t total_used; - atomic_long_t used_hiwater; -#endif - phys_addr_t node_min_addr; - phys_addr_t node_max_addr; - unsigned long node_total_mem; - int numa_node_id; -}; - -extern struct p_io_tlb_mem p_io_tlb_default_mem[MAX_NUMNODES]; - -struct p_io_tlb_pool *pswiotlb_find_pool(struct device *dev, int nid, phys_addr_t paddr); - -static inline bool is_phytium_ps_socs(void) -{ - unsigned int soc_id; - unsigned int midr; - - if (likely(is_ps_socs)) - return true; - - soc_id = read_sysreg_s(SYS_AIDR_EL1); - midr = read_cpuid_id(); - if ((soc_id == SOC_ID_PS23064 || soc_id == SOC_ID_PS24080) - && midr == MIDR_PS) { - is_ps_socs = true; - return true; - } else - return false; -} - -static inline bool is_pswiotlb_buffer(struct device *dev, int nid, phys_addr_t paddr, - struct p_io_tlb_pool **pool) -{ - struct p_io_tlb_mem *mem = &dev->dma_p_io_tlb_mem[nid]; - struct page *page; - - if (!paddr) - return false; - - page = pfn_to_page(PFN_DOWN(paddr)); - - if (test_bit(PG_pswiotlb, &page->flags) == false) - return false; - - if (!mem) - return false; - - /* - * All PSWIOTLB buffer addresses must have been returned by - * pswiotlb_tbl_map_single() and passed to a device driver. - * If a PSWIOTLB address is checked on another CPU, then it was - * presumably loaded by the device driver from an unspecified private - * data structure. Make sure that this load is ordered before reading - * dev->dma_uses_p_io_tlb here and mem->pools in pswiotlb_find_pool(). - * - * This barrier pairs with smp_mb() in pswiotlb_find_slots(). - */ - smp_rmb(); - - *pool = pswiotlb_find_pool(dev, nid, paddr); - if (READ_ONCE(dev->dma_uses_p_io_tlb) && *pool) - return true; - - return false; -} - -static inline bool dma_is_in_local_node(struct device *dev, int nid, dma_addr_t addr, size_t size) -{ - dma_addr_t end = addr + size - 1; - struct p_io_tlb_mem *mem = &p_io_tlb_default_mem[nid]; - - if (addr >= mem->node_min_addr && end <= mem->node_max_addr) - return true; - - return false; -} - -void pswiotlb_init(bool addressing_limited, unsigned int flags); -void pswiotlb_dev_init(struct device *dev); -size_t pswiotlb_max_mapping_size(struct device *dev); -bool is_pswiotlb_allocated(struct device *dev); -bool is_pswiotlb_active(struct device *dev); -void __init pswiotlb_adjust_size(unsigned long size); -phys_addr_t default_pswiotlb_base(struct device *dev); -phys_addr_t default_pswiotlb_limit(struct device *dev); -bool pswiotlb_is_dev_in_passthroughlist(struct pci_dev *dev); -#else -static inline void pswiotlb_init(bool addressing_limited, unsigned int flags) -{ -} - -static inline void pswiotlb_dev_init(struct device *dev) -{ -} -static inline bool is_pswiotlb_buffer(struct device *dev, int nid, phys_addr_t paddr, - struct p_io_tlb_pool **pool) -{ - return false; -} -static inline bool dma_is_in_local_node(struct device *dev, int nid, dma_addr_t addr, size_t size) -{ - return false; -} -static inline size_t pswiotlb_max_mapping_size(struct device *dev) -{ - return SIZE_MAX; -} - -static inline bool is_pswiotlb_allocated(struct device *dev) -{ - return false; -} -static inline bool is_pswiotlb_active(struct device *dev) -{ - return false; -} - -static inline void pswiotlb_adjust_size(unsigned long size) -{ -} - -static inline phys_addr_t default_pswiotlb_base(struct device *dev) -{ - return 0; -} - -static inline phys_addr_t default_pswiotlb_limit(struct device *dev) -{ - return 0; -} - -static inline bool pswiotlb_is_dev_in_passthroughlist(struct pci_dev *dev) -{ - return false; -} -#endif /* CONFIG_PSWIOTLB */ - -extern void pswiotlb_print_info(int); -extern bool pswiotlb_dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size); - -#endif /* __LINUX_PSWIOTLB_H */ diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index d45529cbd0bd5ea369ced2808e1de690ae8e5368..1bad36e3e4ef1f4c577b2f2912c967f5845e442a 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -332,7 +332,7 @@ static inline void sg_dma_unmark_bus_address(struct scatterlist *sg) * Description: * Returns true if the scatterlist was marked for SWIOTLB bouncing. Not all * elements may have been bounced, so the caller would have to check - * individual SG entries with is_swiotlb_buffer(). + * individual SG entries with swiotlb_find_pool(). */ static inline bool sg_dma_is_swiotlb(struct scatterlist *sg) { diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 05e6f1b3474ee878e674e8c551f94fa4c122ed00..1752e8ae6bd4236aa346b97284b326fcdf33f513 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -42,24 +42,6 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, int (*remap)(void *tlb, unsigned long nslabs)); extern void __init swiotlb_update_mem_attributes(void); -phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, - size_t mapping_size, - unsigned int alloc_aligned_mask, enum dma_data_direction dir, - unsigned long attrs); - -extern void swiotlb_tbl_unmap_single(struct device *hwdev, - phys_addr_t tlb_addr, - size_t mapping_size, - enum dma_data_direction dir, - unsigned long attrs); - -void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir); -void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir); -dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, - size_t size, enum dma_data_direction dir, unsigned long attrs); - #ifdef CONFIG_SWIOTLB /** @@ -140,37 +122,27 @@ struct io_tlb_mem { #endif }; -#ifdef CONFIG_SWIOTLB_DYNAMIC - -struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr); - -#else - -static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev, - phys_addr_t paddr) -{ - return &dev->dma_io_tlb_mem->defpool; -} - -#endif +struct io_tlb_pool *__swiotlb_find_pool(struct device *dev, phys_addr_t paddr); /** - * is_swiotlb_buffer() - check if a physical address belongs to a swiotlb + * swiotlb_find_pool() - find swiotlb pool to which a physical address belongs * @dev: Device which has mapped the buffer. * @paddr: Physical address within the DMA buffer. * - * Check if @paddr points into a bounce buffer. + * Find the swiotlb pool that @paddr points into. * * Return: - * * %true if @paddr points into a bounce buffer - * * %false otherwise + * * pool address if @paddr points into a bounce buffer + * * NULL if @paddr does not point into a bounce buffer. As such, this function + * can be used to determine if @paddr denotes a swiotlb bounce buffer. */ -static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) +static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev, + phys_addr_t paddr) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; if (!mem) - return false; + return NULL; #ifdef CONFIG_SWIOTLB_DYNAMIC /* @@ -179,16 +151,19 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) * If a SWIOTLB address is checked on another CPU, then it was * presumably loaded by the device driver from an unspecified private * data structure. Make sure that this load is ordered before reading - * dev->dma_uses_io_tlb here and mem->pools in swiotlb_find_pool(). + * dev->dma_uses_io_tlb here and mem->pools in __swiotlb_find_pool(). * * This barrier pairs with smp_mb() in swiotlb_find_slots(). */ smp_rmb(); - return READ_ONCE(dev->dma_uses_io_tlb) && - swiotlb_find_pool(dev, paddr); + if (READ_ONCE(dev->dma_uses_io_tlb)) + return __swiotlb_find_pool(dev, paddr); #else - return paddr >= mem->defpool.start && paddr < mem->defpool.end; + if (paddr >= mem->defpool.start && paddr < mem->defpool.end) + return &mem->defpool; #endif + + return NULL; } static inline bool is_swiotlb_force_bounce(struct device *dev) @@ -216,9 +191,10 @@ static inline void swiotlb_dev_init(struct device *dev) { } -static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) +static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev, + phys_addr_t paddr) { - return false; + return NULL; } static inline bool is_swiotlb_force_bounce(struct device *dev) { @@ -257,6 +233,49 @@ static inline phys_addr_t default_swiotlb_limit(void) } #endif /* CONFIG_SWIOTLB */ +phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, + size_t mapping_size, unsigned int alloc_aligned_mask, + enum dma_data_direction dir, unsigned long attrs); +dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, unsigned long attrs); + +void __swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, + size_t mapping_size, enum dma_data_direction dir, + unsigned long attrs, struct io_tlb_pool *pool); +static inline void swiotlb_tbl_unmap_single(struct device *dev, + phys_addr_t addr, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr); + + if (unlikely(pool)) + __swiotlb_tbl_unmap_single(dev, addr, size, dir, attrs, pool); +} + +void __swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + struct io_tlb_pool *pool); +static inline void swiotlb_sync_single_for_device(struct device *dev, + phys_addr_t addr, size_t size, enum dma_data_direction dir) +{ + struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr); + + if (unlikely(pool)) + __swiotlb_sync_single_for_device(dev, addr, size, dir, pool); +} + +void __swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + struct io_tlb_pool *pool); +static inline void swiotlb_sync_single_for_cpu(struct device *dev, + phys_addr_t addr, size_t size, enum dma_data_direction dir) +{ + struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr); + + if (unlikely(pool)) + __swiotlb_sync_single_for_cpu(dev, addr, size, dir, pool); +} + extern void swiotlb_print_info(void); #ifdef CONFIG_DMA_RESTRICTED_POOL diff --git a/include/trace/events/pswiotlb.h b/include/trace/events/pswiotlb.h deleted file mode 100644 index ed26c41a4046200216eae0ba71712fb99027b0a4..0000000000000000000000000000000000000000 --- a/include/trace/events/pswiotlb.h +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM pswiotlb - -#if !defined(_TRACE_PSWIOTLB_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_PSWIOTLB_H - -#include - -TRACE_EVENT(pswiotlb_bounced, - - TP_PROTO(struct device *dev, - dma_addr_t dev_addr, - size_t size), - - TP_ARGS(dev, dev_addr, size), - - TP_STRUCT__entry( - __string(dev_name, dev_name(dev)) - __field(u64, dma_mask) - __field(dma_addr_t, dev_addr) - __field(size_t, size) - __field(bool, force) - ), - - TP_fast_assign( - __assign_str(dev_name, dev_name(dev)); - __entry->dma_mask = (dev->dma_mask ? *dev->dma_mask : 0); - __entry->dev_addr = dev_addr; - __entry->size = size; - ), - - TP_printk("dev_name: %s dma_mask=%llx dev_addr=%llx size=%zu %s", - __get_str(dev_name), - __entry->dma_mask, - (unsigned long long)__entry->dev_addr, - __entry->size, - __entry->force ? "NORMAL" : "FORCEOFF") -); - -#endif /* _TRACE_PSWIOTLB_H */ - -/* This part must be outside protection */ -#include diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index ede2b464a7619ad24d0cb06a9fe6f0c63f64285b..72010f71c5e479e26be02435fdc45e990832b687 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -4,8 +4,8 @@ #ifndef _UAPI_IOMMUFD_H #define _UAPI_IOMMUFD_H -#include #include +#include #define IOMMUFD_TYPE (';') @@ -37,20 +37,20 @@ enum { IOMMUFD_CMD_BASE = 0x80, IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE, - IOMMUFD_CMD_IOAS_ALLOC, - IOMMUFD_CMD_IOAS_ALLOW_IOVAS, - IOMMUFD_CMD_IOAS_COPY, - IOMMUFD_CMD_IOAS_IOVA_RANGES, - IOMMUFD_CMD_IOAS_MAP, - IOMMUFD_CMD_IOAS_UNMAP, - IOMMUFD_CMD_OPTION, - IOMMUFD_CMD_VFIO_IOAS, - IOMMUFD_CMD_HWPT_ALLOC, - IOMMUFD_CMD_GET_HW_INFO, - IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, - IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, - IOMMUFD_CMD_HWPT_INVALIDATE, - IOMMUFD_CMD_FAULT_QUEUE_ALLOC, + IOMMUFD_CMD_IOAS_ALLOC = 0x81, + IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82, + IOMMUFD_CMD_IOAS_COPY = 0x83, + IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84, + IOMMUFD_CMD_IOAS_MAP = 0x85, + IOMMUFD_CMD_IOAS_UNMAP = 0x86, + IOMMUFD_CMD_OPTION = 0x87, + IOMMUFD_CMD_VFIO_IOAS = 0x88, + IOMMUFD_CMD_HWPT_ALLOC = 0x89, + IOMMUFD_CMD_GET_HW_INFO = 0x8a, + IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b, + IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c, + IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d, + IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, }; /** @@ -400,8 +400,8 @@ struct iommu_hwpt_vtd_s1 { * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table */ enum iommu_hwpt_data_type { - IOMMU_HWPT_DATA_NONE, - IOMMU_HWPT_DATA_VTD_S1, + IOMMU_HWPT_DATA_NONE = 0, + IOMMU_HWPT_DATA_VTD_S1 = 1, }; /** @@ -491,8 +491,8 @@ struct iommu_hw_info_vtd { * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type */ enum iommu_hw_info_type { - IOMMU_HW_INFO_TYPE_NONE, - IOMMU_HW_INFO_TYPE_INTEL_VTD, + IOMMU_HW_INFO_TYPE_NONE = 0, + IOMMU_HW_INFO_TYPE_INTEL_VTD = 1, }; /** @@ -629,7 +629,7 @@ struct iommu_hwpt_get_dirty_bitmap { * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1 */ enum iommu_hwpt_invalidate_data_type { - IOMMU_HWPT_INVALIDATE_DATA_VTD_S1, + IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0, }; /** @@ -765,14 +765,10 @@ struct iommu_hwpt_pgfault { * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the * access. This is the "Invalid Request" in PCI * 10.4.2.1. - * @IOMMUFD_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from - * this device if possible. This is the "Response - * Failure" in PCI 10.4.2.1. */ enum iommufd_page_response_code { IOMMUFD_PAGE_RESP_SUCCESS = 0, - IOMMUFD_PAGE_RESP_INVALID, - IOMMUFD_PAGE_RESP_FAILURE, + IOMMUFD_PAGE_RESP_INVALID = 1, }; /** diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index f13515fc1384c6e583e4a8236c4866bdd4bf2557..43872a7588f984e6ce0f7bb2716467c89fc862eb 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -8,8 +8,12 @@ config HAS_DMA depends on !NO_DMA default y +config DMA_OPS_HELPERS + bool + config DMA_OPS depends on HAS_DMA + select DMA_OPS_HELPERS bool # @@ -107,6 +111,11 @@ config DMA_BOUNCE_UNALIGNED_KMALLOC bool depends on SWIOTLB +config DMA_NEED_SYNC + def_bool ARCH_HAS_SYNC_DMA_FOR_DEVICE || ARCH_HAS_SYNC_DMA_FOR_CPU || \ + ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_API_DEBUG || DMA_OPS || \ + SWIOTLB + config DMA_RESTRICTED_POOL bool "DMA Restricted Pool" depends on OF && OF_RESERVED_MEM && SWIOTLB @@ -270,5 +279,3 @@ config DMA_MAP_BENCHMARK performance of dma_(un)map_page. See tools/testing/selftests/dma/dma_map_benchmark.c - -source "kernel/dma/phytium/Kconfig" diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index c7c3cb4499e9c70de4b1fcb0132865035f8ccf38..2e6e933cf7f3fb042febe920069af7d0587cd29a 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_HAS_DMA) += mapping.o direct.o -obj-$(CONFIG_DMA_OPS) += ops_helpers.o +obj-$(CONFIG_DMA_OPS_HELPERS) += ops_helpers.o obj-$(CONFIG_DMA_OPS) += dummy.o obj-$(CONFIG_DMA_CMA) += contiguous.o obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o @@ -10,4 +10,3 @@ obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o obj-$(CONFIG_MMU) += remap.o obj-$(CONFIG_DMA_MAP_BENCHMARK) += map_benchmark.o -obj-$(CONFIG_PSWIOTLB) += phytium/ diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 5bc41e33fa5c78c2f5a46fd3486a22bd5d716374..a600819799637bd9f650cb0d334aba9d332a7094 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -52,10 +52,6 @@ #include #include -#ifdef CONFIG_PSWIOTLB -#include "./phytium/pswiotlb-dma.h" -#endif - #ifdef CONFIG_CMA_SIZE_MBYTES #define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES #else @@ -359,10 +355,6 @@ static struct page *cma_alloc_aligned(struct cma *cma, size_t size, gfp_t gfp) */ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) { -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) - return NULL; -#endif #ifdef CONFIG_DMA_NUMA_CMA int nid = dev_to_node(dev); #endif @@ -415,10 +407,6 @@ void dma_free_contiguous(struct device *dev, struct page *page, size_t size) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) - __free_pages(page, get_order(size)); -#endif /* if dev has its own cma, free page from there */ if (dev->cma_area) { if (cma_release(dev->cma_area, page, count)) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index fc2d10b2aca6fcd999c84fdb98df6b796bc26f4f..682babc5e5cc575583f71510d5a4116abfa048f5 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -415,9 +415,7 @@ void dma_direct_sync_sg_for_device(struct device *dev, for_each_sg(sgl, sg, nents, i) { phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_device(dev, paddr, sg->length, - dir); + swiotlb_sync_single_for_device(dev, paddr, sg->length, dir); if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_device(paddr, sg->length, @@ -441,9 +439,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev, if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_cpu(paddr, sg->length, dir); - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_cpu(dev, paddr, sg->length, - dir); + swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir); if (dir == DMA_FROM_DEVICE) arch_dma_mark_clean(paddr, sg->length); @@ -611,7 +607,7 @@ size_t dma_direct_max_mapping_size(struct device *dev) bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr) { return !dev_is_dma_coherent(dev) || - is_swiotlb_buffer(dev, dma_to_phys(dev, dma_addr)); + swiotlb_find_pool(dev, dma_to_phys(dev, dma_addr)); } /** diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index 97ec892ea0b5acec3adc7a7b947e6572ff9deb80..8aa7cb69fcbd5f6f5f217362d801588778fcf1bf 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h @@ -57,8 +57,7 @@ static inline void dma_direct_sync_single_for_device(struct device *dev, { phys_addr_t paddr = dma_to_phys(dev, addr); - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_device(dev, paddr, size, dir); + swiotlb_sync_single_for_device(dev, paddr, size, dir); if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_device(paddr, size, dir); @@ -74,8 +73,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev, arch_sync_dma_for_cpu_all(); } - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_cpu(dev, paddr, size, dir); + swiotlb_sync_single_for_cpu(dev, paddr, size, dir); if (dir == DMA_FROM_DEVICE) arch_dma_mark_clean(paddr, size); @@ -120,8 +118,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) dma_direct_sync_single_for_cpu(dev, addr, size, dir); - if (unlikely(is_swiotlb_buffer(dev, phys))) - swiotlb_tbl_unmap_single(dev, phys, size, dir, + swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); } #endif /* _KERNEL_DMA_DIRECT_H */ diff --git a/kernel/dma/dummy.c b/kernel/dma/dummy.c index b492d59ac77ef0f4eda13cabc1b104c077c22754..92de80e5b057ed11a3caf4cba4d96a4e795bdaec 100644 --- a/kernel/dma/dummy.c +++ b/kernel/dma/dummy.c @@ -17,6 +17,15 @@ static dma_addr_t dma_dummy_map_page(struct device *dev, struct page *page, { return DMA_MAPPING_ERROR; } +static void dma_dummy_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + /* + * Dummy ops doesn't support map_page, so unmap_page should never be + * called. + */ + WARN_ON_ONCE(true); +} static int dma_dummy_map_sg(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, @@ -25,6 +34,16 @@ static int dma_dummy_map_sg(struct device *dev, struct scatterlist *sgl, return -EINVAL; } +static void dma_dummy_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + unsigned long attrs) +{ + /* + * Dummy ops doesn't support map_sg, so unmap_sg should never be called. + */ + WARN_ON_ONCE(true); +} + static int dma_dummy_supported(struct device *hwdev, u64 mask) { return 0; @@ -33,6 +52,8 @@ static int dma_dummy_supported(struct device *hwdev, u64 mask) const struct dma_map_ops dma_dummy_ops = { .mmap = dma_dummy_mmap, .map_page = dma_dummy_map_page, + .unmap_page = dma_dummy_unmap_page, .map_sg = dma_dummy_map_sg, + .unmap_sg = dma_dummy_unmap_sg, .dma_supported = dma_dummy_supported, }; diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index f6af86aaa2526009b51c25ac92e0000c47953a15..ca43011b2412f19ab3359ca0e8de5b31568bb6a0 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -10,15 +10,13 @@ #include #include #include +#include #include #include #include #include #include "debug.h" #include "direct.h" -#ifdef CONFIG_PSWIOTLB -#include "./phytium/pswiotlb-dma.h" -#endif #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ @@ -116,11 +114,27 @@ void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, } EXPORT_SYMBOL(dmam_alloc_attrs); +#ifdef CONFIG_IOMMU_DMA +static bool use_dma_iommu(struct device *dev) +{ + return dev->dma_iommu; +} +#else +static bool use_dma_iommu(struct device *dev) +{ + return false; +} +#endif + static bool dma_go_direct(struct device *dev, dma_addr_t mask, const struct dma_map_ops *ops) { + if (use_dma_iommu(dev)) + return false; + if (likely(!ops)) return true; + #ifdef CONFIG_DMA_OPS_BYPASS if (dev->dma_ops_bypass) return min_not_zero(mask, dev->bus_dma_limit) >= @@ -146,6 +160,7 @@ static inline bool dma_map_direct(struct device *dev, { return dma_go_direct(dev, *dev->dma_mask, ops); } + dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, size_t offset, size_t size, enum dma_data_direction dir, unsigned long attrs) @@ -158,15 +173,11 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, if (WARN_ON_ONCE(!dev->dma_mask)) return DMA_MAPPING_ERROR; -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - addr = pswiotlb_dma_map_page_distribute(dev, page, offset, size, dir, attrs); - return addr; - } -#endif if (dma_map_direct(dev, ops) || arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size)) addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); + else if (use_dma_iommu(dev)) + addr = iommu_dma_map_page(dev, page, offset, size, dir, attrs); else addr = ops->map_page(dev, page, offset, size, dir, attrs); kmsan_handle_dma(page, offset, size, dir); @@ -175,26 +186,24 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, return addr; } EXPORT_SYMBOL(dma_map_page_attrs); + void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - pswiotlb_dma_unmap_page_attrs_distribute(dev, addr, size, dir, attrs); - return; - } -#endif if (dma_map_direct(dev, ops) || arch_dma_unmap_page_direct(dev, addr + size)) dma_direct_unmap_page(dev, addr, size, dir, attrs); - else if (ops->unmap_page) + else if (use_dma_iommu(dev)) + iommu_dma_unmap_page(dev, addr, size, dir, attrs); + else ops->unmap_page(dev, addr, size, dir, attrs); debug_dma_unmap_page(dev, addr, size, dir); } EXPORT_SYMBOL(dma_unmap_page_attrs); + static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { @@ -205,15 +214,12 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, if (WARN_ON_ONCE(!dev->dma_mask)) return 0; -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - ents = pswiotlb_dma_map_sg_attrs_distribute(dev, sg, nents, dir, attrs); - return ents; - } -#endif + if (dma_map_direct(dev, ops) || arch_dma_map_sg_direct(dev, sg, nents)) ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); + else if (use_dma_iommu(dev)) + ents = iommu_dma_map_sg(dev, sg, nents, dir, attrs); else ents = ops->map_sg(dev, sg, nents, dir, attrs); @@ -296,6 +302,7 @@ int dma_map_sgtable(struct device *dev, struct sg_table *sgt, return 0; } EXPORT_SYMBOL_GPL(dma_map_sgtable); + void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) @@ -304,15 +311,11 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); debug_dma_unmap_sg(dev, sg, nents, dir); -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - pswiotlb_dma_unmap_sg_attrs_distribute(dev, sg, nents, dir, attrs); - return; - } -#endif if (dma_map_direct(dev, ops) || arch_dma_unmap_sg_direct(dev, sg, nents)) dma_direct_unmap_sg(dev, sg, nents, dir, attrs); + else if (use_dma_iommu(dev)) + iommu_dma_unmap_sg(dev, sg, nents, dir, attrs); else if (ops->unmap_sg) ops->unmap_sg(dev, sg, nents, dir, attrs); } @@ -331,6 +334,8 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, if (dma_map_direct(dev, ops)) addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs); + else if (use_dma_iommu(dev)) + addr = iommu_dma_map_resource(dev, phys_addr, size, dir, attrs); else if (ops->map_resource) addr = ops->map_resource(dev, phys_addr, size, dir, attrs); @@ -345,87 +350,120 @@ void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (!dma_map_direct(dev, ops) && ops->unmap_resource) + if (dma_map_direct(dev, ops)) + ; /* nothing to do: uncached and no swiotlb */ + else if (use_dma_iommu(dev)) + iommu_dma_unmap_resource(dev, addr, size, dir, attrs); + else if (ops->unmap_resource) ops->unmap_resource(dev, addr, size, dir, attrs); debug_dma_unmap_resource(dev, addr, size, dir); } EXPORT_SYMBOL(dma_unmap_resource); -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, + +#ifdef CONFIG_DMA_NEED_SYNC +void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - pswiotlb_dma_sync_single_for_cpu_distribute(dev, addr, size, dir); - return; - } -#endif if (dma_map_direct(dev, ops)) dma_direct_sync_single_for_cpu(dev, addr, size, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_single_for_cpu(dev, addr, size, dir); else if (ops->sync_single_for_cpu) ops->sync_single_for_cpu(dev, addr, size, dir); debug_dma_sync_single_for_cpu(dev, addr, size, dir); } -EXPORT_SYMBOL(dma_sync_single_for_cpu); -void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, +EXPORT_SYMBOL(__dma_sync_single_for_cpu); + +void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - pswiotlb_dma_sync_single_for_device_distribute(dev, addr, size, dir); - return; - } -#endif if (dma_map_direct(dev, ops)) dma_direct_sync_single_for_device(dev, addr, size, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_single_for_device(dev, addr, size, dir); else if (ops->sync_single_for_device) ops->sync_single_for_device(dev, addr, size, dir); debug_dma_sync_single_for_device(dev, addr, size, dir); } -EXPORT_SYMBOL(dma_sync_single_for_device); -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, +EXPORT_SYMBOL(__dma_sync_single_for_device); + +void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - pswiotlb_dma_sync_sg_for_cpu_distribute(dev, sg, nelems, dir); - return; - } -#endif if (dma_map_direct(dev, ops)) dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_sg_for_cpu(dev, sg, nelems, dir); else if (ops->sync_sg_for_cpu) ops->sync_sg_for_cpu(dev, sg, nelems, dir); debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); } -EXPORT_SYMBOL(dma_sync_sg_for_cpu); -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, +EXPORT_SYMBOL(__dma_sync_sg_for_cpu); + +void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - pswiotlb_dma_sync_sg_for_device_distribute(dev, sg, nelems, dir); - return; - } -#endif if (dma_map_direct(dev, ops)) dma_direct_sync_sg_for_device(dev, sg, nelems, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_sg_for_device(dev, sg, nelems, dir); else if (ops->sync_sg_for_device) ops->sync_sg_for_device(dev, sg, nelems, dir); debug_dma_sync_sg_for_device(dev, sg, nelems, dir); } -EXPORT_SYMBOL(dma_sync_sg_for_device); +EXPORT_SYMBOL(__dma_sync_sg_for_device); + +bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_map_direct(dev, ops)) + /* + * dma_skip_sync could've been reset on first SWIOTLB buffer + * mapping, but @dma_addr is not necessary an SWIOTLB buffer. + * In this case, fall back to more granular check. + */ + return dma_direct_need_sync(dev, dma_addr); + return true; +} +EXPORT_SYMBOL_GPL(__dma_need_sync); + +static void dma_setup_need_sync(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_map_direct(dev, ops) || use_dma_iommu(dev)) + /* + * dma_skip_sync will be reset to %false on first SWIOTLB buffer + * mapping, if any. During the device initialization, it's + * enough to check only for the DMA coherence. + */ + dev->dma_skip_sync = dev_is_dma_coherent(dev); + else if (!ops->sync_single_for_device && !ops->sync_single_for_cpu && + !ops->sync_sg_for_device && !ops->sync_sg_for_cpu) + /* + * Synchronization is not possible when none of DMA sync ops + * is set. + */ + dev->dma_skip_sync = true; + else + dev->dma_skip_sync = false; +} +#else /* !CONFIG_DMA_NEED_SYNC */ +static inline void dma_setup_need_sync(struct device *dev) { } +#endif /* !CONFIG_DMA_NEED_SYNC */ /* * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems @@ -447,6 +485,9 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, if (dma_alloc_direct(dev, ops)) return dma_direct_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); + if (use_dma_iommu(dev)) + return iommu_dma_get_sgtable(dev, sgt, cpu_addr, dma_addr, + size, attrs); if (!ops->get_sgtable) return -ENXIO; return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); @@ -483,6 +524,8 @@ bool dma_can_mmap(struct device *dev) if (dma_alloc_direct(dev, ops)) return dma_direct_can_mmap(dev); + if (use_dma_iommu(dev)) + return true; return ops->mmap != NULL; } EXPORT_SYMBOL_GPL(dma_can_mmap); @@ -509,6 +552,9 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, if (dma_alloc_direct(dev, ops)) return dma_direct_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); + if (use_dma_iommu(dev)) + return iommu_dma_mmap(dev, vma, cpu_addr, dma_addr, size, + attrs); if (!ops->mmap) return -ENXIO; return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); @@ -560,6 +606,8 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, if (dma_alloc_direct(dev, ops)) cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs); + else if (use_dma_iommu(dev)) + cpu_addr = iommu_dma_alloc(dev, size, dma_handle, flag, attrs); else if (ops->alloc) cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); else @@ -592,6 +640,8 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); if (dma_alloc_direct(dev, ops)) dma_direct_free(dev, size, cpu_addr, dma_handle, attrs); + else if (use_dma_iommu(dev)) + iommu_dma_free(dev, size, cpu_addr, dma_handle, attrs); else if (ops->free) ops->free(dev, size, cpu_addr, dma_handle, attrs); } @@ -612,9 +662,11 @@ static struct page *__dma_alloc_pages(struct device *dev, size_t size, size = PAGE_ALIGN(size); if (dma_alloc_direct(dev, ops)) return dma_direct_alloc_pages(dev, size, dma_handle, dir, gfp); - if (!ops->alloc_pages) + if (use_dma_iommu(dev)) + return dma_common_alloc_pages(dev, size, dma_handle, dir, gfp); + if (!ops->alloc_pages_op) return NULL; - return ops->alloc_pages(dev, size, dma_handle, dir, gfp); + return ops->alloc_pages_op(dev, size, dma_handle, dir, gfp); } struct page *dma_alloc_pages(struct device *dev, size_t size, @@ -636,6 +688,8 @@ static void __dma_free_pages(struct device *dev, size_t size, struct page *page, size = PAGE_ALIGN(size); if (dma_alloc_direct(dev, ops)) dma_direct_free_pages(dev, size, page, dma_handle, dir); + else if (use_dma_iommu(dev)) + dma_common_free_pages(dev, size, page, dma_handle, dir); else if (ops->free_pages) ops->free_pages(dev, size, page, dma_handle, dir); } @@ -698,6 +752,8 @@ struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, if (ops && ops->alloc_noncontiguous) sgt = ops->alloc_noncontiguous(dev, size, dir, gfp, attrs); + else if (use_dma_iommu(dev)) + sgt = iommu_dma_alloc_noncontiguous(dev, size, dir, gfp, attrs); else sgt = alloc_single_sgt(dev, size, dir, gfp); @@ -726,6 +782,8 @@ void dma_free_noncontiguous(struct device *dev, size_t size, debug_dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir); if (ops && ops->free_noncontiguous) ops->free_noncontiguous(dev, size, sgt, dir); + else if (use_dma_iommu(dev)) + iommu_dma_free_noncontiguous(dev, size, sgt, dir); else free_single_sgt(dev, size, sgt, dir); } @@ -773,6 +831,8 @@ static int dma_supported(struct device *dev, u64 mask) { const struct dma_map_ops *ops = get_dma_ops(dev); + if (WARN_ON(ops && use_dma_iommu(dev))) + return false; /* * ->dma_supported sets the bypass flag, so we must always call * into the method here unless the device is truly direct mapped. @@ -788,17 +848,14 @@ bool dma_pci_p2pdma_supported(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); - /* if ops is not set, dma direct will be used which supports P2PDMA */ - if (!ops) - return true; - /* * Note: dma_ops_bypass is not checked here because P2PDMA should * not be used with dma mapping ops that do not have support even * if the specific device is bypassing them. */ - return ops->flags & DMA_F_PCI_P2PDMA_SUPPORTED; + /* if ops is not set, dma direct and default IOMMU support P2PDMA */ + return !ops; } EXPORT_SYMBOL_GPL(dma_pci_p2pdma_supported); @@ -815,6 +872,8 @@ int dma_set_mask(struct device *dev, u64 mask) arch_dma_set_mask(dev, mask); *dev->dma_mask = mask; + dma_setup_need_sync(dev); + return 0; } EXPORT_SYMBOL(dma_set_mask); @@ -842,6 +901,8 @@ size_t dma_max_mapping_size(struct device *dev) if (dma_map_direct(dev, ops)) size = dma_direct_max_mapping_size(dev); + else if (use_dma_iommu(dev)) + size = iommu_dma_max_mapping_size(dev); else if (ops && ops->max_mapping_size) size = ops->max_mapping_size(dev); @@ -854,27 +915,22 @@ size_t dma_opt_mapping_size(struct device *dev) const struct dma_map_ops *ops = get_dma_ops(dev); size_t size = SIZE_MAX; - if (ops && ops->opt_mapping_size) + if (use_dma_iommu(dev)) + size = iommu_dma_opt_mapping_size(); + else if (ops && ops->opt_mapping_size) size = ops->opt_mapping_size(); return min(dma_max_mapping_size(dev), size); } EXPORT_SYMBOL_GPL(dma_opt_mapping_size); -bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (dma_map_direct(dev, ops)) - return dma_direct_need_sync(dev, dma_addr); - return ops->sync_single_for_cpu || ops->sync_single_for_device; -} -EXPORT_SYMBOL_GPL(dma_need_sync); - unsigned long dma_get_merge_boundary(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); + if (use_dma_iommu(dev)) + return iommu_dma_get_merge_boundary(dev); + if (!ops || !ops->get_merge_boundary) return 0; /* can't merge */ diff --git a/kernel/dma/phytium/Kconfig b/kernel/dma/phytium/Kconfig deleted file mode 100644 index 8553a65027ee3baaecd5d289325b9e756c57b22a..0000000000000000000000000000000000000000 --- a/kernel/dma/phytium/Kconfig +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config PSWIOTLB - bool "Phytium software IO TLB" - select NEED_DMA_MAP_STATE - depends on ARCH_PHYTIUM && NUMA - help - This enables phytium software IO TLB. You can disable phytium software - IO TLB using "pswiotlb=forceoff" on the kernel command line if you do - not need it when PSWIOTLB is Y. diff --git a/kernel/dma/phytium/Makefile b/kernel/dma/phytium/Makefile deleted file mode 100644 index f94ea59e950f535ccbaf6653361aefa368faeae0..0000000000000000000000000000000000000000 --- a/kernel/dma/phytium/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -obj-$(CONFIG_PSWIOTLB) += pswiotlb.o -obj-$(CONFIG_PSWIOTLB) += pswiotlb-mapping.o -obj-$(CONFIG_PSWIOTLB) += pswiotlb-direct.o -obj-$(CONFIG_PSWIOTLB) += pswiotlb-iommu.o diff --git a/kernel/dma/phytium/pswiotlb-direct.c b/kernel/dma/phytium/pswiotlb-direct.c deleted file mode 100644 index f5e1b62c67c9d0f91fc05f2667d2ab1cbdc4fd66..0000000000000000000000000000000000000000 --- a/kernel/dma/phytium/pswiotlb-direct.c +++ /dev/null @@ -1,146 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DMA operations based on Phytium software IO tlb that - * map physical memory directly without using an IOMMU. - * - * Copyright (c) 2024, Phytium Technology Co., Ltd. - */ -#include /* for max_pfn */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "pswiotlb-dma.h" - -/* - * The following functions are ported from - * ./drivers/dma/direct.c - * static inline dma_addr_t phys_to_dma_direct(struct device *dev, - * phys_addr_t phys); - */ - -static inline dma_addr_t phys_to_dma_direct(struct device *dev, - phys_addr_t phys) -{ - if (force_dma_unencrypted(dev)) - return phys_to_dma_unencrypted(dev, phys); - return phys_to_dma(dev, phys); -} - -bool pswiotlb_dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) -{ - dma_addr_t dma_addr = phys_to_dma_direct(dev, phys); - - if (dma_addr == DMA_MAPPING_ERROR) - return false; - return dma_addr + size - 1 <= - min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); -} - -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ - defined(CONFIG_PSWIOTLB) -void pswiotlb_dma_direct_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - for_each_sg(sgl, sg, nents, i) { - phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); - - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_device(dev, paddr, sg->length, - dir); - - if (is_pswiotlb_active(dev) && - unlikely(is_pswiotlb_buffer(dev, nid, paddr, &pool))) - pswiotlb_sync_single_for_device(dev, nid, paddr, - sg->length, dir, pool); - - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_device(paddr, sg->length, - dir); - } -} -#endif - -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ - defined(CONFIG_PSWIOTLB) -void pswiotlb_dma_direct_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - for_each_sg(sgl, sg, nents, i) { - phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); - - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu(paddr, sg->length, dir); - - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_cpu(dev, paddr, sg->length, - dir); - - if (is_pswiotlb_active(dev) && - unlikely(is_pswiotlb_buffer(dev, nid, paddr, &pool))) - pswiotlb_sync_single_for_cpu(dev, nid, paddr, - sg->length, dir, pool); - - if (dir == DMA_FROM_DEVICE) - arch_dma_mark_clean(paddr, sg->length); - } - - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu_all(); -} - -/* - * Unmaps segments, except for ones marked as pci_p2pdma which do not - * require any further action as they contain a bus address. - */ -void pswiotlb_dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - pswiotlb_dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir, - attrs); -} -#endif - -int pswiotlb_dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, - enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *sg; - int i, ret; - - for_each_sg(sgl, sg, nents, i) { - sg->dma_address = pswiotlb_dma_direct_map_page(dev, sg_page(sg), - sg->offset, sg->length, dir, attrs); - if (sg->dma_address == DMA_MAPPING_ERROR) { - ret = -EIO; - goto out_unmap; - } - sg_dma_len(sg) = sg->length; - } - - return nents; - -out_unmap: - pswiotlb_dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); - return ret; -} diff --git a/kernel/dma/phytium/pswiotlb-dma.h b/kernel/dma/phytium/pswiotlb-dma.h deleted file mode 100644 index 98302401febf08abfec1f6ebcc3c9243f85c773d..0000000000000000000000000000000000000000 --- a/kernel/dma/phytium/pswiotlb-dma.h +++ /dev/null @@ -1,334 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * DMA operations based on Phytium software IO tlb that - * map physical memory. - * - * Copyright (c) 2024, Phytium Technology Co., Ltd. - */ -#ifndef _KERNEL_PSWIOTLB_DMA_DIRECT_H -#define _KERNEL_PSWIOTLB_DMA_DIRECT_H - -#include -#include -#include - -extern bool pswiotlb_force_disable; -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ - defined(CONFIG_PSWIOTLB) -void pswiotlb_dma_direct_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir); -#else -static inline void pswiotlb_dma_direct_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ -} -#endif - -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ - defined(CONFIG_PSWIOTLB) -void pswiotlb_dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs); -void pswiotlb_dma_direct_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir); -#else -static inline void pswiotlb_dma_direct_unmap_sg(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ -} -static inline void pswiotlb_dma_direct_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ -} -#endif - -#ifdef CONFIG_PSWIOTLB -int pswiotlb_dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs); -dma_addr_t pswiotlb_dma_map_page_distribute(struct device *dev, struct page *page, - size_t offset, size_t size, enum dma_data_direction dir, - unsigned long attrs); -void pswiotlb_dma_unmap_page_attrs_distribute(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs); -int pswiotlb_dma_map_sg_attrs_distribute(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs); -void pswiotlb_dma_unmap_sg_attrs_distribute(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs); -void pswiotlb_dma_sync_single_for_cpu_distribute(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir); -void pswiotlb_dma_sync_single_for_device_distribute(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir); -void pswiotlb_dma_sync_sg_for_cpu_distribute(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir); -void pswiotlb_dma_sync_sg_for_device_distribute(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir); -dma_addr_t pswiotlb_iommu_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs); -void pswiotlb_iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir, unsigned long attrs); -int pswiotlb_iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs); -void pswiotlb_iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs); -void pswiotlb_iommu_dma_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir); -void pswiotlb_iommu_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir); -void pswiotlb_iommu_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nelems, enum dma_data_direction dir); -void pswiotlb_iommu_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nelems, enum dma_data_direction dir); - -static inline bool check_if_pswiotlb_is_applicable(struct device *dev) -{ - if (dev->can_use_pswiotlb && is_phytium_ps_socs() - && !pswiotlb_force_disable) { - if (dev->numa_node == NUMA_NO_NODE || - dev->numa_node != dev->local_node) - dev->numa_node = dev->local_node; - - if (dev_is_pci(dev) && (dev->numa_node != NUMA_NO_NODE)) - return true; - } - - return false; -} - -static inline void pswiotlb_dma_direct_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - phys_addr_t paddr = dma_to_phys(dev, addr); - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_device(dev, paddr, size, dir); - - if (is_pswiotlb_active(dev)) { - if (unlikely(is_pswiotlb_buffer(dev, nid, paddr, &pool))) - pswiotlb_sync_single_for_device(dev, nid, paddr, size, dir, pool); - } - - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_device(paddr, size, dir); -} - -static inline void pswiotlb_dma_direct_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - phys_addr_t paddr = dma_to_phys(dev, addr); - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - if (!dev_is_dma_coherent(dev)) { - arch_sync_dma_for_cpu(paddr, size, dir); - arch_sync_dma_for_cpu_all(); - } - - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_cpu(dev, paddr, size, dir); - - if (is_pswiotlb_active(dev)) { - if (unlikely(is_pswiotlb_buffer(dev, nid, paddr, &pool))) - pswiotlb_sync_single_for_cpu(dev, nid, paddr, size, dir, pool); - } - - if (dir == DMA_FROM_DEVICE) - arch_dma_mark_clean(paddr, size); -} - -static inline dma_addr_t pswiotlb_dma_direct_map_page(struct device *dev, - struct page *page, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - phys_addr_t phys = page_to_phys(page) + offset; - dma_addr_t dma_addr = phys_to_dma(dev, phys); - int nid = dev->numa_node; - - if (is_swiotlb_force_bounce(dev)) - return swiotlb_map(dev, phys, size, dir, attrs); - - if (unlikely(!dma_capable(dev, dma_addr, size, true)) || - dma_kmalloc_needs_bounce(dev, size, dir)) { - if (is_swiotlb_active(dev)) - return swiotlb_map(dev, phys, size, dir, attrs); - - dev_WARN_ONCE(dev, 1, - "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", - &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); - return DMA_MAPPING_ERROR; - } - - /* check whether dma addr is in local node */ - if (is_pswiotlb_active(dev)) { - if (dir != DMA_TO_DEVICE) { - if (unlikely(!dma_is_in_local_node(dev, nid, dma_addr, size))) { - dma_addr = pswiotlb_map(dev, nid, phys, size, dir, attrs); - if (dma_addr == DMA_MAPPING_ERROR) { - dma_addr = phys_to_dma(dev, phys); - dev_warn_once(dev, - "Failed to allocate memory from pswiotlb, fall back to non-local dma\n"); - } else - return dma_addr; - } - } - } - - if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(phys, size, dir); - return dma_addr; -} - -static inline void pswiotlb_dma_direct_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - phys_addr_t phys = dma_to_phys(dev, addr); - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - !dev_is_dma_coherent(dev)) { - arch_sync_dma_for_cpu(phys, size, dir); - arch_sync_dma_for_cpu_all(); - } - - if (unlikely(is_swiotlb_buffer(dev, phys))) - swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); - - if (is_pswiotlb_active(dev)) { - if (unlikely(is_pswiotlb_buffer(dev, nid, phys, &pool))) - pswiotlb_tbl_unmap_single(dev, nid, phys, 0, size, dir, attrs, pool); - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && (dir == DMA_FROM_DEVICE)) - arch_dma_mark_clean(phys, size); - } -} -#else -static inline int pswiotlb_dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - return 0; -} - -static inline dma_addr_t pswiotlb_dma_map_page_distribute(struct device *dev, - struct page *page, size_t offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - return 0; -} - -static inline void pswiotlb_dma_unmap_page_attrs_distribute(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ -} - -static inline int pswiotlb_dma_map_sg_attrs_distribute(struct device *dev, - struct scatterlist *sg, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - return 0; -} - -static inline void pswiotlb_dma_unmap_sg_attrs_distribute(struct device *dev, - struct scatterlist *sg, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ -} - -static inline void pswiotlb_dma_sync_single_for_cpu_distribute(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ -} - -static inline void pswiotlb_dma_sync_single_for_device_distribute(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ -} - -static inline void pswiotlb_dma_sync_sg_for_cpu_distribute(struct device *dev, - struct scatterlist *sg, int nelems, enum dma_data_direction dir) -{ -} - -static inline void pswiotlb_dma_sync_sg_for_device_distribute(struct device *dev, - struct scatterlist *sg, int nelems, enum dma_data_direction dir) -{ -} - -static inline dma_addr_t pswiotlb_iommu_dma_map_page(struct device *dev, - struct page *page, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - return 0; -} - -static inline void pswiotlb_iommu_dma_unmap_page(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ -} - -static inline int pswiotlb_iommu_dma_map_sg(struct device *dev, - struct scatterlist *sg, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - return 0; -} - -static inline void pswiotlb_iommu_dma_unmap_sg(struct device *dev, - struct scatterlist *sg, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ -} - -static inline void pswiotlb_iommu_dma_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) -{ -} - -static inline void pswiotlb_iommu_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) -{ -} - -static inline void pswiotlb_iommu_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nelems, enum dma_data_direction dir) -{ -} - -static inline void pswiotlb_iommu_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nelems, enum dma_data_direction dir) -{ -} - -static inline bool check_if_pswiotlb_is_applicable(struct device *dev) -{ - return false; -} - -static inline void pswiotlb_dma_direct_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ -} - -static inline void pswiotlb_dma_direct_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ -} - -static inline dma_addr_t pswiotlb_dma_direct_map_page(struct device *dev, - struct page *page, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - return 0; -} - -static inline void pswiotlb_dma_direct_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ -} -#endif /* CONFIG_PSWIOTLB*/ -#endif /* _KERNEL_PSWIOTLB_DMA_DIRECT_H */ diff --git a/kernel/dma/phytium/pswiotlb-iommu.c b/kernel/dma/phytium/pswiotlb-iommu.c deleted file mode 100644 index 4c3f6b9ac9f1ef42fd522fdafdc06409f1a4f111..0000000000000000000000000000000000000000 --- a/kernel/dma/phytium/pswiotlb-iommu.c +++ /dev/null @@ -1,1214 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DMA operations based on Phytium software IO tlb that - * map physical memory indirectly with an IOMMU. - * - * Copyright (c) 2024, Phytium Technology Co., Ltd. - */ - -#define pr_fmt(fmt) "pswiotlb iommu: " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_ARCH_PHYTIUM -#include -#endif - -#include "pswiotlb-dma.h" - -enum iommu_dma_cookie_type { - IOMMU_DMA_IOVA_COOKIE, - IOMMU_DMA_MSI_COOKIE, -}; - -struct iommu_dma_cookie { - enum iommu_dma_cookie_type type; - union { - /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ - struct { - struct iova_domain iovad; - - struct iova_fq __percpu *fq; /* Flush queue */ - /* Number of TLB flushes that have been started */ - atomic64_t fq_flush_start_cnt; - /* Number of TLB flushes that have been finished */ - atomic64_t fq_flush_finish_cnt; - /* Timer to regularily empty the flush queues */ - struct timer_list fq_timer; - /* 1 when timer is active, 0 when not */ - atomic_t fq_timer_on; - }; - /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */ - dma_addr_t msi_iova; - }; - struct list_head msi_page_list; - - /* Domain for flush queue callback; NULL if flush queue not in use */ - struct iommu_domain *fq_domain; - struct mutex mutex; -}; - -static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled); - -/* Number of entries per flush queue */ -#define IOVA_FQ_SIZE 256 - -/* Timeout (in ms) after which entries are flushed from the queue */ -#define IOVA_FQ_TIMEOUT 10 - -/* Flush queue entry for deferred flushing */ -struct iova_fq_entry { - unsigned long iova_pfn; - unsigned long pages; - struct list_head freelist; - u64 counter; /* Flush counter when this entry was added */ -}; - -/* Per-CPU flush queue structure */ -struct iova_fq { - struct iova_fq_entry entries[IOVA_FQ_SIZE]; - unsigned int head, tail; - spinlock_t lock; -}; - -#define fq_ring_for_each(i, fq) \ - for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) - -/* - * The following functions are ported from - * ./drivers/iommu/dma-iommu.c - * ./drivers/iommu/iommu.c - * static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, - * phys_addr_t paddr, size_t size, size_t *count); - * static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, - * phys_addr_t paddr, size_t size, int prot, - * gfp_t gfp, size_t *mapped); - * static int __iommu_map(struct iommu_domain *domain, unsigned long iova, - * phys_addr_t paddr, size_t size, int prot, gfp_t gfp); - * static bool dev_is_untrusted(struct device *dev); - * static int dma_info_to_prot(enum dma_data_direction dir, bool coherent, - * unsigned long attrs); - * static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, - * size_t size, u64 dma_limit, struct device *dev); - * static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, - * dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather); - * static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, - * size_t size); - * static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, - * size_t size, int prot, u64 dma_mask); - * static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, - * dma_addr_t dma_addr); - * static void __invalidate_sg(struct scatterlist *sg, int nents); - */ - -static inline bool fq_full(struct iova_fq *fq) -{ - assert_spin_locked(&fq->lock); - return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); -} - -static inline unsigned int fq_ring_add(struct iova_fq *fq) -{ - unsigned int idx = fq->tail; - - assert_spin_locked(&fq->lock); - - fq->tail = (idx + 1) % IOVA_FQ_SIZE; - - return idx; -} - -static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq) -{ - u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt); - unsigned int idx; - - assert_spin_locked(&fq->lock); - - fq_ring_for_each(idx, fq) { - - if (fq->entries[idx].counter >= counter) - break; - - put_pages_list(&fq->entries[idx].freelist); - free_iova_fast(&cookie->iovad, - fq->entries[idx].iova_pfn, - fq->entries[idx].pages); - - fq->head = (fq->head + 1) % IOVA_FQ_SIZE; - } -} - -static void fq_flush_iotlb(struct iommu_dma_cookie *cookie) -{ - atomic64_inc(&cookie->fq_flush_start_cnt); - cookie->fq_domain->ops->flush_iotlb_all(cookie->fq_domain); - atomic64_inc(&cookie->fq_flush_finish_cnt); -} - -static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, size_t *count) -{ - unsigned int pgsize_idx, pgsize_idx_next; - unsigned long pgsizes; - size_t offset, pgsize, pgsize_next; - unsigned long addr_merge = paddr | iova; - - /* Page sizes supported by the hardware and small enough for @size */ - pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); - - /* Constrain the page sizes further based on the maximum alignment */ - if (likely(addr_merge)) - pgsizes &= GENMASK(__ffs(addr_merge), 0); - - /* Make sure we have at least one suitable page size */ - BUG_ON(!pgsizes); - - /* Pick the biggest page size remaining */ - pgsize_idx = __fls(pgsizes); - pgsize = BIT(pgsize_idx); - if (!count) - return pgsize; - - /* Find the next biggest support page size, if it exists */ - pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); - if (!pgsizes) - goto out_set_count; - - pgsize_idx_next = __ffs(pgsizes); - pgsize_next = BIT(pgsize_idx_next); - - /* - * There's no point trying a bigger page size unless the virtual - * and physical addresses are similarly offset within the larger page. - */ - if ((iova ^ paddr) & (pgsize_next - 1)) - goto out_set_count; - - /* Calculate the offset to the next page size alignment boundary */ - offset = pgsize_next - (addr_merge & (pgsize_next - 1)); - - /* - * If size is big enough to accommodate the larger page, reduce - * the number of smaller pages. - */ - if (offset + pgsize_next <= size) - size = offset; - -out_set_count: - *count = size >> pgsize_idx; - return pgsize; -} - -static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, - gfp_t gfp, size_t *mapped) -{ - const struct iommu_domain_ops *ops = domain->ops; - size_t pgsize, count; - int ret = -EINVAL; - - pgsize = iommu_pgsize(domain, iova, paddr, size, &count); - - pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", - iova, &paddr, pgsize, count); - - if (ops->map_pages) - ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, - gfp, mapped); - - return ret; -} - -static int __iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) -{ - const struct iommu_domain_ops *ops = domain->ops; - unsigned long orig_iova = iova; - unsigned int min_pagesz; - size_t orig_size = size; - phys_addr_t orig_paddr = paddr; - int ret = 0; - - if (unlikely(!ops->map_pages || domain->pgsize_bitmap == 0UL)) - return -ENODEV; - - if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) - return -EINVAL; - - /* find out the minimum page size supported */ - min_pagesz = 1 << __ffs(domain->pgsize_bitmap); - - /* - * both the virtual address and the physical one, as well as - * the size of the mapping, must be aligned (at least) to the - * size of the smallest page supported by the hardware - */ - if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { - pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", - iova, &paddr, size, min_pagesz); - return -EINVAL; - } - - pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); - - while (size) { - size_t mapped = 0; - - ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp, - &mapped); - /* - * Some pages may have been mapped, even if an error occurred, - * so we should account for those so they can be unmapped. - */ - size -= mapped; - - if (ret) - break; - - iova += mapped; - paddr += mapped; - } - - /* unroll mapping in case something went wrong */ - if (ret) - iommu_unmap(domain, orig_iova, orig_size - size); - else - trace_map(orig_iova, orig_paddr, orig_size); - - return ret; -} - -static ssize_t __iommu_map_sg_dma(struct device *dev, struct iommu_domain *domain, - unsigned long iova, struct scatterlist *sg, unsigned int nents, - int prot, gfp_t gfp, unsigned long attrs) -{ - const struct iommu_domain_ops *ops = domain->ops; - size_t mapped = 0; - int ret; - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - size_t aligned_size; - int nid = dev->numa_node; - enum dma_data_direction dir = prot & (DMA_TO_DEVICE | DMA_FROM_DEVICE | DMA_BIDIRECTIONAL); - struct scatterlist *sg_orig = sg; - struct scatterlist *s; - int i; - - might_sleep_if(gfpflags_allow_blocking(gfp)); - - /* Discourage passing strange GFP flags */ - if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | - __GFP_HIGHMEM))) - return -EINVAL; - - for_each_sg(sg, s, nents, i) { - phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset; - - /* check whether dma addr is in local node */ - if (dir != DMA_TO_DEVICE) { - aligned_size = s->length; - if ((!dma_is_in_local_node(dev, nid, phys, - aligned_size)) && (pswiotlb_force_disable != true)) { - aligned_size = iova_align(iovad, s->length); - phys = pswiotlb_tbl_map_single(dev, nid, - phys, s->length, aligned_size, iova_mask(iovad), dir, attrs); - if (phys == DMA_MAPPING_ERROR) { - phys = page_to_phys(sg_page(s)) + s->offset; - dev_warn_once(dev, - "Failed to allocate memory from pswiotlb, fall back to non-local dma\n"); - } - } - } - if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(phys, s->length, dir); - - ret = __iommu_map(domain, iova + mapped, phys, - s->length, prot, gfp); - if (ret) - goto out_err; - - mapped += s->length; - } - - if (ops->iotlb_sync_map) - ops->iotlb_sync_map(domain, iova, mapped); - return mapped; - -out_err: - /* undo mappings already done */ - iommu_dma_unmap_sg_pswiotlb(dev, sg_orig, iova, - mapped, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); - iommu_unmap(domain, iova, mapped); - - return ret; -} - -static ssize_t pswiotlb_iommu_map_sg_atomic_dma(struct device *dev, - struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot, - unsigned long attrs) -{ - return __iommu_map_sg_dma(dev, domain, iova, sg, nents, prot, GFP_ATOMIC, attrs); -} - -static bool dev_is_untrusted(struct device *dev) -{ - return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; -} - -static bool dev_use_swiotlb(struct device *dev, size_t size, - enum dma_data_direction dir) -{ - return IS_ENABLED(CONFIG_SWIOTLB) && - (dev_is_untrusted(dev) || - dma_kmalloc_needs_bounce(dev, size, dir)); -} - -/** - * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API - * page flags. - * @dir: Direction of DMA transfer - * @coherent: Is the DMA master cache-coherent? - * @attrs: DMA attributes for the mapping - * - * Return: corresponding IOMMU API page protection flags - */ -static int dma_info_to_prot(enum dma_data_direction dir, bool coherent, - unsigned long attrs) -{ - int prot = coherent ? IOMMU_CACHE : 0; - - if (attrs & DMA_ATTR_PRIVILEGED) - prot |= IOMMU_PRIV; - - switch (dir) { - case DMA_BIDIRECTIONAL: - return prot | IOMMU_READ | IOMMU_WRITE; - case DMA_TO_DEVICE: - return prot | IOMMU_READ; - case DMA_FROM_DEVICE: - return prot | IOMMU_WRITE; - default: - return 0; - } -} - -static void queue_iova(struct iommu_dma_cookie *cookie, - unsigned long pfn, unsigned long pages, - struct list_head *freelist) -{ - struct iova_fq *fq; - unsigned long flags; - unsigned int idx; - - /* - * Order against the IOMMU driver's pagetable update from unmapping - * @pte, to guarantee that fq_flush_iotlb() observes that if called - * from a different CPU before we release the lock below. Full barrier - * so it also pairs with iommu_dma_init_fq() to avoid seeing partially - * written fq state here. - */ - smp_mb(); - - fq = raw_cpu_ptr(cookie->fq); - spin_lock_irqsave(&fq->lock, flags); - - /* - * First remove all entries from the flush queue that have already been - * flushed out on another CPU. This makes the fq_full() check below less - * likely to be true. - */ - fq_ring_free(cookie, fq); - - if (fq_full(fq)) { - fq_flush_iotlb(cookie); - fq_ring_free(cookie, fq); - } - - idx = fq_ring_add(fq); - - fq->entries[idx].iova_pfn = pfn; - fq->entries[idx].pages = pages; - fq->entries[idx].counter = atomic64_read(&cookie->fq_flush_start_cnt); - list_splice(freelist, &fq->entries[idx].freelist); - - spin_unlock_irqrestore(&fq->lock, flags); - - /* Avoid false sharing as much as possible. */ - if (!atomic_read(&cookie->fq_timer_on) && - !atomic_xchg(&cookie->fq_timer_on, 1)) - mod_timer(&cookie->fq_timer, - jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); -} - -static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, - size_t size, u64 dma_limit, struct device *dev) -{ - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - unsigned long shift, iova_len, iova; - - if (cookie->type == IOMMU_DMA_MSI_COOKIE) { - cookie->msi_iova += size; - return cookie->msi_iova - size; - } - - shift = iova_shift(iovad); - iova_len = size >> shift; - - dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit); - - if (domain->geometry.force_aperture) - dma_limit = min_t(u64, dma_limit, (u64)domain->geometry.aperture_end); - - /* - * Try to use all the 32-bit PCI addresses first. The original SAC vs. - * DAC reasoning loses relevance with PCIe, but enough hardware and - * firmware bugs are still lurking out there that it's safest not to - * venture into the 64-bit space until necessary. - * - * If your device goes wrong after seeing the notice then likely either - * its driver is not setting DMA masks accurately, the hardware has - * some inherent bug in handling >32-bit addresses, or not all the - * expected address bits are wired up between the device and the IOMMU. - */ - if (dma_limit > DMA_BIT_MASK(32) && dev->iommu->pci_32bit_workaround) { - iova = alloc_iova_fast(iovad, iova_len, - DMA_BIT_MASK(32) >> shift, false); - if (iova) - goto done; - - dev->iommu->pci_32bit_workaround = false; - dev_notice(dev, "Using %d-bit DMA addresses\n", bits_per(dma_limit)); - } - - iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, true); -done: - return (dma_addr_t)iova << shift; -} - -static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, - dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather) -{ - struct iova_domain *iovad = &cookie->iovad; - - /* The MSI case is only ever cleaning up its most recent allocation */ - if (cookie->type == IOMMU_DMA_MSI_COOKIE) - cookie->msi_iova -= size; - else if (gather && gather->queued) - queue_iova(cookie, iova_pfn(iovad, iova), - size >> iova_shift(iovad), - &gather->freelist); - else - free_iova_fast(iovad, iova_pfn(iovad, iova), - size >> iova_shift(iovad)); -} - -static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, - size_t size) -{ - struct iommu_domain *domain = iommu_get_dma_domain(dev); - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - size_t iova_off = iova_offset(iovad, dma_addr); - struct iommu_iotlb_gather iotlb_gather; - size_t unmapped; - - dma_addr -= iova_off; - size = iova_align(iovad, size + iova_off); - iommu_iotlb_gather_init(&iotlb_gather); - iotlb_gather.queued = READ_ONCE(cookie->fq_domain); - - unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather); - WARN_ON(unmapped != size); - - if (!iotlb_gather.queued) - iommu_iotlb_sync(domain, &iotlb_gather); - iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather); -} - -static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, - size_t size, int prot, u64 dma_mask) -{ - struct iommu_domain *domain = iommu_get_dma_domain(dev); - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - size_t iova_off = iova_offset(iovad, phys); - dma_addr_t iova; - - if (static_branch_unlikely(&iommu_deferred_attach_enabled) && - iommu_deferred_attach(dev, domain)) - return DMA_MAPPING_ERROR; - - size = iova_align(iovad, size + iova_off); - - iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev); - if (!iova) - return DMA_MAPPING_ERROR; - - if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) { - iommu_dma_free_iova(cookie, iova, size, NULL); - return DMA_MAPPING_ERROR; - } - return iova + iova_off; -} - -void pswiotlb_iommu_dma_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) -{ - phys_addr_t phys; - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - if (is_pswiotlb_active(dev)) { - phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu(phys, size, dir); - - if (is_pswiotlb_buffer(dev, nid, phys, &pool)) - pswiotlb_sync_single_for_cpu(dev, nid, phys, size, dir, pool); - - if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir)) - return; - - if (is_swiotlb_buffer(dev, phys)) - swiotlb_sync_single_for_cpu(dev, phys, size, dir); - } else { - if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir)) - return; - phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu(phys, size, dir); - if (is_swiotlb_buffer(dev, phys)) - swiotlb_sync_single_for_cpu(dev, phys, size, dir); - } -} - -void pswiotlb_iommu_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) -{ - phys_addr_t phys; - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - if (is_pswiotlb_active(dev)) { - phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); - if (is_pswiotlb_buffer(dev, nid, phys, &pool)) - pswiotlb_sync_single_for_device(dev, nid, phys, size, dir, pool); - - if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir)) - return; - } else { - if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir)) - return; - - phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); - } - - if (is_swiotlb_buffer(dev, phys)) - swiotlb_sync_single_for_device(dev, phys, size, dir); - - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_device(phys, size, dir); -} - -void pswiotlb_iommu_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - int nid = dev->numa_node; - dma_addr_t start_orig; - phys_addr_t phys; - struct iommu_domain *domain = iommu_get_dma_domain(dev); - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - struct p_io_tlb_pool *pool; - - if (is_pswiotlb_active(dev)) { - start_orig = sg_dma_address(sgl); - for_each_sg(sgl, sg, nelems, i) { - if (dir != DMA_TO_DEVICE) { - unsigned int s_iova_off = iova_offset(iovad, sg->offset); - - if (i > 0) - start_orig += s_iova_off; - phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), start_orig); - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu(phys, sg->length, dir); - - if (is_pswiotlb_buffer(dev, nid, phys, &pool)) - pswiotlb_sync_single_for_cpu(dev, nid, phys, - sg->length, dir, pool); - start_orig -= s_iova_off; - start_orig += iova_align(iovad, sg->length + s_iova_off); - } else { - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); - } - } - } else { - if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev)) - return; - - for_each_sg(sgl, sg, nelems, i) { - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); - - if (is_swiotlb_buffer(dev, sg_phys(sg))) - swiotlb_sync_single_for_cpu(dev, sg_phys(sg), - sg->length, dir); - } - } -} - -void pswiotlb_iommu_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - if (is_pswiotlb_active(dev)) { - for_each_sg(sgl, sg, nelems, i) { - if (is_pswiotlb_buffer(dev, nid, sg_phys(sg), &pool)) - pswiotlb_sync_single_for_device(dev, nid, sg_phys(sg), - sg->length, dir, pool); - if (dev_is_dma_coherent(dev) && !sg_dma_is_swiotlb(sgl)) - continue; - - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); - } - } else { - if (dev_is_dma_coherent(dev) && !sg_dma_is_swiotlb(sgl)) - return; - - for_each_sg(sgl, sg, nelems, i) { - if (is_swiotlb_buffer(dev, sg_phys(sg))) - swiotlb_sync_single_for_device(dev, sg_phys(sg), - sg->length, dir); - - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); - } - } -} - -dma_addr_t pswiotlb_iommu_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - phys_addr_t phys = page_to_phys(page) + offset; - bool coherent = dev_is_dma_coherent(dev); - - int prot = dma_info_to_prot(dir, coherent, attrs); - struct iommu_domain *domain = iommu_get_dma_domain(dev); - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - size_t aligned_size = size; - dma_addr_t iova, dma_mask = dma_get_mask(dev); - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - /* - * If both the physical buffer start address and size are - * page aligned, we don't need to use a bounce page. - */ - if (dev_use_swiotlb(dev, size, dir) && - iova_offset(iovad, phys | size)) { - void *padding_start; - size_t padding_size; - - if (!is_swiotlb_active(dev)) { - dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n"); - return DMA_MAPPING_ERROR; - } - - aligned_size = iova_align(iovad, size); - phys = swiotlb_tbl_map_single(dev, phys, size, - iova_mask(iovad), dir, attrs); - - if (phys == DMA_MAPPING_ERROR) - return DMA_MAPPING_ERROR; - - /* Cleanup the padding area. */ - padding_start = phys_to_virt(phys); - padding_size = aligned_size; - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) { - padding_start += size; - padding_size -= size; - } - - memset(padding_start, 0, padding_size); - } - - /* check whether dma addr is in local node */ - if (is_pswiotlb_active(dev)) { - if (dir != DMA_TO_DEVICE) { - if (unlikely(!dma_is_in_local_node(dev, nid, phys, aligned_size))) { - aligned_size = iova_align(iovad, size); - phys = pswiotlb_tbl_map_single(dev, nid, phys, size, - aligned_size, iova_mask(iovad), - dir, attrs); - if (phys == DMA_MAPPING_ERROR) { - phys = page_to_phys(page) + offset; - dev_warn_once(dev, - "Failed to allocate memory from pswiotlb, fall back to non-local dma\n"); - } - } - } - } - - if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(phys, size, dir); - - iova = __iommu_dma_map(dev, phys, size, prot, dma_mask); - if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys)) - swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); - if (iova == DMA_MAPPING_ERROR && is_pswiotlb_buffer(dev, nid, phys, &pool)) - pswiotlb_tbl_unmap_single(dev, nid, phys, 0, size, dir, attrs, pool); - return iova; -} - -void pswiotlb_iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - struct iommu_domain *domain = iommu_get_dma_domain(dev); - phys_addr_t phys; - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - phys = iommu_iova_to_phys(domain, dma_handle); - if (WARN_ON(!phys)) - return; - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu(phys, size, dir); - - __iommu_dma_unmap(dev, dma_handle, size); - - if (unlikely(is_swiotlb_buffer(dev, phys))) - swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); - - if (is_pswiotlb_active(dev) && - is_pswiotlb_buffer(dev, nid, phys, &pool)) - pswiotlb_tbl_unmap_single(dev, nid, phys, 0, size, dir, attrs, pool); -} - -static void iommu_dma_unmap_page_sg(struct device *dev, dma_addr_t dma_handle, - size_t offset, size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - struct iommu_domain *domain = iommu_get_dma_domain(dev); - phys_addr_t phys; - int nid = dev->numa_node; - struct p_io_tlb_pool *pool; - - phys = iommu_iova_to_phys(domain, dma_handle); - - if (WARN_ON(!phys)) - return; - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu(phys, size, dir); - - if (is_pswiotlb_buffer(dev, nid, phys, &pool)) - pswiotlb_tbl_unmap_single(dev, nid, phys, offset, size, dir, attrs, pool); -} - -/* - * Prepare a successfully-mapped scatterlist to give back to the caller. - * - * At this point the segments are already laid out by pswiotlb_iommu_dma_map_sg() to - * avoid individually crossing any boundaries, so we merely need to check a - * segment's start address to avoid concatenating across one. - */ -static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, - dma_addr_t dma_addr) -{ - struct scatterlist *s, *cur = sg; - unsigned long seg_mask = dma_get_seg_boundary(dev); - unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev); - int i, count = 0; - - for_each_sg(sg, s, nents, i) { - /* Restore this segment's original unaligned fields first */ - dma_addr_t s_dma_addr = sg_dma_address(s); - unsigned int s_iova_off = sg_dma_address(s); - unsigned int s_length = sg_dma_len(s); - unsigned int s_iova_len = s->length; - - sg_dma_address(s) = DMA_MAPPING_ERROR; - sg_dma_len(s) = 0; - - if (sg_dma_is_bus_address(s)) { - if (i > 0) - cur = sg_next(cur); - - sg_dma_unmark_bus_address(s); - sg_dma_address(cur) = s_dma_addr; - sg_dma_len(cur) = s_length; - sg_dma_mark_bus_address(cur); - count++; - cur_len = 0; - continue; - } - - s->offset += s_iova_off; - s->length = s_length; - - /* - * Now fill in the real DMA data. If... - * - there is a valid output segment to append to - * - and this segment starts on an IOVA page boundary - * - but doesn't fall at a segment boundary - * - and wouldn't make the resulting output segment too long - */ - if (cur_len && !s_iova_off && (dma_addr & seg_mask) && - (max_len - cur_len >= s_length)) { - /* ...then concatenate it with the previous one */ - cur_len += s_length; - } else { - /* Otherwise start the next output segment */ - if (i > 0) - cur = sg_next(cur); - cur_len = s_length; - count++; - - sg_dma_address(cur) = dma_addr + s_iova_off; - } - - sg_dma_len(cur) = cur_len; - dma_addr += s_iova_len; - - if (s_length + s_iova_off < s_iova_len) - cur_len = 0; - } - return count; -} - -/* - * If mapping failed, then just restore the original list, - * but making sure the DMA fields are invalidated. - */ -static void __invalidate_sg(struct scatterlist *sg, int nents) -{ - struct scatterlist *s; - int i; - - for_each_sg(sg, s, nents, i) { - if (sg_dma_is_bus_address(s)) { - sg_dma_unmark_bus_address(s); - } else { - if (sg_dma_address(s) != DMA_MAPPING_ERROR) - s->offset += sg_dma_address(s); - if (sg_dma_len(s)) - s->length = sg_dma_len(s); - } - sg_dma_address(s) = DMA_MAPPING_ERROR; - sg_dma_len(s) = 0; - } -} - -static void iommu_dma_unmap_sg_pswiotlb_pagesize(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *s; - int i; - - for_each_sg(sg, s, nents, i) - pswiotlb_iommu_dma_unmap_page(dev, sg_dma_address(s), - sg_dma_len(s), dir, attrs); -} - -void iommu_dma_unmap_sg_pswiotlb(struct device *dev, struct scatterlist *sg, - unsigned long iova_start, size_t mapped, int nents, - enum dma_data_direction dir, unsigned long attrs) -{ - dma_addr_t start, start_orig; - struct scatterlist *s; - struct scatterlist *sg_orig = sg; - int i; - - start = iova_start; - start_orig = start; - for_each_sg(sg_orig, s, nents, i) { - if (!mapped || (start_orig > (start + mapped))) - break; - if (s->length == 0) - break; - iommu_dma_unmap_page_sg(dev, start_orig, 0, - s->length, dir, attrs); - start_orig += s->length; - } -} - -static int iommu_dma_map_sg_pswiotlb_pagesize(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *s; - int i; - - sg_dma_mark_swiotlb(sg); - - for_each_sg(sg, s, nents, i) { - sg_dma_address(s) = pswiotlb_iommu_dma_map_page(dev, sg_page(s), - s->offset, s->length, dir, attrs); - if (sg_dma_address(s) == DMA_MAPPING_ERROR) - goto out_unmap; - sg_dma_len(s) = s->length; - } - - return nents; - -out_unmap: - iommu_dma_unmap_sg_pswiotlb_pagesize(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); - return -EIO; -} - -/* - * The DMA API client is passing in a scatterlist which could describe - * any old buffer layout, but the IOMMU API requires everything to be - * aligned to IOMMU pages. Hence the need for this complicated bit of - * impedance-matching, to be able to hand off a suitably-aligned list, - * but still preserve the original offsets and sizes for the caller. - */ -int pswiotlb_iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct iommu_domain *domain = iommu_get_dma_domain(dev); - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - struct scatterlist *s, *prev = NULL; - int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs); - struct pci_p2pdma_map_state p2pdma_state = {}; - enum pci_p2pdma_map_type map; - dma_addr_t iova; - size_t iova_len = 0; - unsigned long mask = dma_get_seg_boundary(dev); - ssize_t ret; - int i; - - if (static_branch_unlikely(&iommu_deferred_attach_enabled)) { - ret = iommu_deferred_attach(dev, domain); - goto out; - } - - if (dir != DMA_TO_DEVICE && is_pswiotlb_active(dev) - && ((nents == 1) && (sg->length < PAGE_SIZE))) - return iommu_dma_map_sg_pswiotlb_pagesize(dev, sg, nents, dir, attrs); - - if ((dir == DMA_TO_DEVICE) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - pswiotlb_iommu_dma_sync_sg_for_device(dev, sg, nents, dir); - - /* - * Work out how much IOVA space we need, and align the segments to - * IOVA granules for the IOMMU driver to handle. With some clever - * trickery we can modify the list in-place, but reversibly, by - * stashing the unaligned parts in the as-yet-unused DMA fields. - */ - for_each_sg(sg, s, nents, i) { - size_t s_iova_off = iova_offset(iovad, s->offset); - size_t s_length = s->length; - size_t pad_len = (mask - iova_len + 1) & mask; - - if (is_pci_p2pdma_page(sg_page(s))) { - map = pci_p2pdma_map_segment(&p2pdma_state, dev, s); - switch (map) { - case PCI_P2PDMA_MAP_BUS_ADDR: - /* - * iommu_map_sg() will skip this segment as - * it is marked as a bus address, - * __finalise_sg() will copy the dma address - * into the output segment. - */ - continue; - case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: - /* - * Mapping through host bridge should be - * mapped with regular IOVAs, thus we - * do nothing here and continue below. - */ - break; - default: - ret = -EREMOTEIO; - goto out_restore_sg; - } - } - - sg_dma_address(s) = s_iova_off; - sg_dma_len(s) = s_length; - s->offset -= s_iova_off; - s_length = iova_align(iovad, s_length + s_iova_off); - s->length = s_length; - - /* - * Due to the alignment of our single IOVA allocation, we can - * depend on these assumptions about the segment boundary mask: - * - If mask size >= IOVA size, then the IOVA range cannot - * possibly fall across a boundary, so we don't care. - * - If mask size < IOVA size, then the IOVA range must start - * exactly on a boundary, therefore we can lay things out - * based purely on segment lengths without needing to know - * the actual addresses beforehand. - * - The mask must be a power of 2, so pad_len == 0 if - * iova_len == 0, thus we cannot dereference prev the first - * time through here (i.e. before it has a meaningful value). - */ - if (pad_len && pad_len < s_length - 1) { - prev->length += pad_len; - iova_len += pad_len; - } - - iova_len += s_length; - prev = s; - } - - if (!iova_len) - return __finalise_sg(dev, sg, nents, 0); - - iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev); - if (!iova) { - ret = -ENOMEM; - goto out_restore_sg; - } - - /* - * We'll leave any physical concatenation to the IOMMU driver's - * implementation - it knows better than we do. - */ - if (dir != DMA_TO_DEVICE && is_pswiotlb_active(dev)) - ret = pswiotlb_iommu_map_sg_atomic_dma(dev, domain, iova, sg, nents, prot, attrs); - else - ret = iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); - - if (ret < 0 || ret < iova_len) - goto out_free_iova; - - return __finalise_sg(dev, sg, nents, iova); - -out_free_iova: - iommu_dma_free_iova(cookie, iova, iova_len, NULL); -out_restore_sg: - __invalidate_sg(sg, nents); -out: - if (ret != -ENOMEM && ret != -EREMOTEIO) - return -EINVAL; - return ret; -} - -void pswiotlb_iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - dma_addr_t start, end = 0, start_orig; - struct scatterlist *tmp, *s; - struct scatterlist *sg_orig = sg; - int i; - struct iommu_domain *domain = iommu_get_dma_domain(dev); - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - - if ((dir != DMA_TO_DEVICE) && ((nents == 1) && (sg->length < PAGE_SIZE))) { - iommu_dma_unmap_sg_pswiotlb_pagesize(dev, sg, nents, dir, attrs); - return; - } - - if ((dir == DMA_TO_DEVICE) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - pswiotlb_iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir); - - /* - * The scatterlist segments are mapped into a single - * contiguous IOVA allocation, the start and end points - * just have to be determined. - */ - for_each_sg(sg, tmp, nents, i) { - if (sg_dma_is_bus_address(tmp)) { - sg_dma_unmark_bus_address(tmp); - continue; - } - - if (sg_dma_len(tmp) == 0) - break; - - start = sg_dma_address(tmp); - break; - } - - if (is_pswiotlb_active(dev)) { - /* check whether dma addr is in local node */ - start_orig = start; - if (dir != DMA_TO_DEVICE) { - for_each_sg(sg_orig, s, nents, i) { - unsigned int s_iova_off = iova_offset(iovad, s->offset); - - if (i > 0) - start_orig += s_iova_off; - iommu_dma_unmap_page_sg(dev, start_orig, - s_iova_off, s->length, - dir, attrs); - start_orig -= s_iova_off; - start_orig += iova_align(iovad, s->length + s_iova_off); - } - } - } - - nents -= i; - for_each_sg(tmp, tmp, nents, i) { - if (sg_dma_is_bus_address(tmp)) { - sg_dma_unmark_bus_address(tmp); - continue; - } - - if (sg_dma_len(tmp) == 0) - break; - - end = sg_dma_address(tmp) + sg_dma_len(tmp); - } - - if (end) - __iommu_dma_unmap(dev, start, end - start); -} diff --git a/kernel/dma/phytium/pswiotlb-mapping.c b/kernel/dma/phytium/pswiotlb-mapping.c deleted file mode 100644 index 65674b7bdeab7e3fcf53517ef86d08c164858492..0000000000000000000000000000000000000000 --- a/kernel/dma/phytium/pswiotlb-mapping.c +++ /dev/null @@ -1,157 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Auxiliary DMA operations used by arch-independent dma-mapping - * routines when Phytium software IO tlb is required. - * - * Copyright (c) 2024, Phytium Technology Co., Ltd. - */ -#include /* for max_pfn */ -#include -#include -#include -#include -#include -#include -#include -#include "../debug.h" -#include "../direct.h" -#include "pswiotlb-dma.h" - -/* - * The following functions are ported from - * ./drivers/dma/mapping.c - * static bool dma_go_direct(struct device *dev, dma_addr_t mask, - * const struct dma_map_ops *ops); - * static inline bool dma_map_direct(struct device *dev, - * const struct dma_map_ops *ops); - */ - -static bool dma_go_direct(struct device *dev, dma_addr_t mask, - const struct dma_map_ops *ops) -{ - if (likely(!ops)) - return true; -#ifdef CONFIG_DMA_OPS_BYPASS - if (dev->dma_ops_bypass) - return min_not_zero(mask, dev->bus_dma_limit) >= - dma_direct_get_required_mask(dev); -#endif - return false; -} - -static inline bool dma_map_direct(struct device *dev, - const struct dma_map_ops *ops) -{ - return dma_go_direct(dev, *dev->dma_mask, ops); -} -dma_addr_t pswiotlb_dma_map_page_distribute(struct device *dev, struct page *page, - size_t offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - dma_addr_t addr; - - if (dma_map_direct(dev, ops) || - arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size)) - addr = pswiotlb_dma_direct_map_page(dev, page, offset, size, dir, attrs); - else - addr = pswiotlb_iommu_dma_map_page(dev, page, offset, size, dir, attrs); - debug_dma_map_page(dev, page, offset, size, dir, addr, attrs); - - return addr; -} - -void pswiotlb_dma_unmap_page_attrs_distribute(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (dma_map_direct(dev, ops) || - arch_dma_unmap_page_direct(dev, addr + size)) - pswiotlb_dma_direct_unmap_page(dev, addr, size, dir, attrs); - else if (ops->unmap_page) - pswiotlb_iommu_dma_unmap_page(dev, addr, size, dir, attrs); - debug_dma_unmap_page(dev, addr, size, dir); -} - -int pswiotlb_dma_map_sg_attrs_distribute(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - int ents; - - if (dma_map_direct(dev, ops) || - arch_dma_map_sg_direct(dev, sg, nents)) - ents = pswiotlb_dma_direct_map_sg(dev, sg, nents, dir, attrs); - else - ents = pswiotlb_iommu_dma_map_sg(dev, sg, nents, dir, attrs); - - if (ents > 0) - debug_dma_map_sg(dev, sg, nents, ents, dir, attrs); - else if (WARN_ON_ONCE(ents != -EINVAL && ents != -ENOMEM && - ents != -EIO)) - return -EIO; - - return ents; -} - -void pswiotlb_dma_unmap_sg_attrs_distribute(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (dma_map_direct(dev, ops) || - arch_dma_unmap_sg_direct(dev, sg, nents)) - pswiotlb_dma_direct_unmap_sg(dev, sg, nents, dir, attrs); - else if (ops->unmap_sg) - pswiotlb_iommu_dma_unmap_sg(dev, sg, nents, dir, attrs); -} - -void pswiotlb_dma_sync_single_for_cpu_distribute(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (dma_map_direct(dev, ops)) - pswiotlb_dma_direct_sync_single_for_cpu(dev, addr, size, dir); - else if (ops->sync_single_for_cpu) - pswiotlb_iommu_dma_sync_single_for_cpu(dev, addr, size, dir); - debug_dma_sync_single_for_cpu(dev, addr, size, dir); -} - -void pswiotlb_dma_sync_single_for_device_distribute(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (dma_map_direct(dev, ops)) - pswiotlb_dma_direct_sync_single_for_device(dev, addr, size, dir); - else if (ops->sync_single_for_device) - pswiotlb_iommu_dma_sync_single_for_device(dev, addr, size, dir); - debug_dma_sync_single_for_device(dev, addr, size, dir); -} - -void pswiotlb_dma_sync_sg_for_cpu_distribute(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (dma_map_direct(dev, ops)) - pswiotlb_dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir); - else if (ops->sync_sg_for_cpu) - pswiotlb_iommu_dma_sync_sg_for_cpu(dev, sg, nelems, dir); - debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); -} - -void pswiotlb_dma_sync_sg_for_device_distribute(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (dma_map_direct(dev, ops)) - pswiotlb_dma_direct_sync_sg_for_device(dev, sg, nelems, dir); - else if (ops->sync_sg_for_device) - pswiotlb_iommu_dma_sync_sg_for_device(dev, sg, nelems, dir); - debug_dma_sync_sg_for_device(dev, sg, nelems, dir); -} diff --git a/kernel/dma/phytium/pswiotlb.c b/kernel/dma/phytium/pswiotlb.c deleted file mode 100644 index 31ee5ed734488fbcb20fc61b08c41b740423a8df..0000000000000000000000000000000000000000 --- a/kernel/dma/phytium/pswiotlb.c +++ /dev/null @@ -1,1736 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Phytium software IO tlb to improve DMA performance. - * - * Copyright (c) 2024, Phytium Technology Co., Ltd. - */ - -#define pr_fmt(fmt) "Phytium software IO TLB: " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_DEBUG_FS -#include -#endif -#ifdef CONFIG_DMA_RESTRICTED_POOL -#include -#include -#include -#include -#include -#endif - -#include - -#define CREATE_TRACE_POINTS -#include - -#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - P_IO_TLB_SHIFT)) - -/* - * Minimum Phytium IO TLB size to bother booting with. If we can't - * allocate a contiguous 1MB, we're probably in trouble anyway. - */ -#define P_IO_TLB_MIN_SLABS ((1<<20) >> P_IO_TLB_SHIFT) -#define PSWIOTLB_VERSION "1.0.0" -#define INVALID_PHYS_ADDR (~(phys_addr_t)0) - -int pswiotlb_node_num; -bool pswiotlb_mtimer_alive; - -/** - * struct p_io_tlb_slot - Phytium IO TLB slot descriptor - * @orig_addr: The original address corresponding to a mapped entry. - * @alloc_size: Size of the allocated buffer. - * @list: The free list describing the number of free entries available - * from each index. - */ -struct p_io_tlb_slot { - phys_addr_t orig_addr; - size_t alloc_size; - unsigned int list; -}; - -bool pswiotlb_force_disable; - -static struct page *alloc_dma_pages(int nid, gfp_t gfp, size_t bytes); - -struct p_io_tlb_mem p_io_tlb_default_mem[MAX_NUMNODES]; -static struct timer_list service_timer; - -static unsigned long default_npslabs = P_IO_TLB_DEFAULT_SIZE >> P_IO_TLB_SHIFT; -static unsigned long dynamic_inc_thr_npslabs = P_IO_TLB_INC_THR >> P_IO_TLB_SHIFT; -static unsigned long default_npareas; - -LIST_HEAD(passthroughlist); -static spinlock_t passthroughlist_lock; -static struct pswiotlb_passthroughlist passthroughlist_entry[1024]; -static struct dentry *passthroughlist_debugfs; -static struct dentry *pswiotlb_debugfs; -/** - * struct p_io_tlb_area - Phytium IO TLB memory area descriptor - * - * This is a single area with a single lock. - * - * @used: The number of used Phytium IO TLB block. - * @index: The slot index to start searching in this area for next round. - * @lock: The lock to protect the above data structures in the map and - * unmap calls. - */ -struct p_io_tlb_area { - unsigned long used; - unsigned int index; - spinlock_t lock; -}; - -static struct pswiotlb_passthroughlist_entry { - unsigned short vendor; - unsigned short device; -} ps_passthroughlist[] = { - {BL_PCI_VENDOR_ID_NVIDIA, 0xFFFF}, - {BL_PCI_VENDOR_ID_ILUVATAR, 0xFFFF}, - {BL_PCI_VENDOR_ID_METAX, 0xFFFF}, - {} -}; - -/* - * Round up number of slabs to the next power of 2. The last area is going - * be smaller than the rest if default_npslabs is not power of two. - * The number of slot in an area should be a multiple of P_IO_TLB_SEGSIZE, - * otherwise a segment may span two or more areas. It conflicts with free - * contiguous slots tracking: free slots are treated contiguous no matter - * whether they cross an area boundary. - * - * Return true if default_npslabs is rounded up. - */ -static bool round_up_default_npslabs(void) -{ - if (!default_npareas) - return false; - - if (default_npslabs < P_IO_TLB_SEGSIZE * default_npareas) - default_npslabs = P_IO_TLB_SEGSIZE * default_npareas; - else if (is_power_of_2(default_npslabs)) - return false; - default_npslabs = roundup_pow_of_two(default_npslabs); - return true; -} - -/** - * pswiotlb_adjust_nareas() - adjust the number of areas and slots - * @nareas: Desired number of areas. Zero is treated as 1. - * - * Adjust the default number of areas in a memory pool. - * The default size of the memory pool may also change to meet minimum area - * size requirements. - */ -static void pswiotlb_adjust_nareas(unsigned int nareas) -{ - if (!nareas) - nareas = 1; - else if (!is_power_of_2(nareas)) - nareas = roundup_pow_of_two(nareas); - - default_npareas = nareas; - - pr_info("area num %d.\n", nareas); - if (round_up_default_npslabs()) - pr_info("PSWIOTLB bounce buffer size roundup to %luMB", - (default_npslabs << P_IO_TLB_SHIFT) >> 20); -} - -/** - * limit_nareas() - get the maximum number of areas for a given memory pool size - * @nareas: Desired number of areas. - * @nslots: Total number of slots in the memory pool. - * - * Limit the number of areas to the maximum possible number of areas in - * a memory pool of the given size. - * - * Return: Maximum possible number of areas. - */ -static unsigned int limit_nareas(unsigned int nareas, unsigned long nslots) -{ - if (nslots < nareas * P_IO_TLB_SEGSIZE) - return nslots / P_IO_TLB_SEGSIZE; - return nareas; -} - -static int __init -setup_p_io_tlb_npages(char *str) -{ - unsigned long nareas; - - if (!strcmp(str, "forceoff")) { - pswiotlb_force_disable = true; - } else if (isdigit(*str) && !kstrtoul(str, 0, &default_npslabs)) { - default_npslabs = ALIGN(default_npslabs, P_IO_TLB_SEGSIZE); - str = strchr(str, ','); - if (str++ && isdigit(*str) && !kstrtoul(str, 0, &nareas)) - pswiotlb_adjust_nareas(nareas); - } - return 0; -} -early_param("pswiotlb", setup_p_io_tlb_npages); - -static int __init -setup_pswiotlb_passthroughlist(char *str) -{ - char tmp_str[5] = {'\0'}; - unsigned long flags; - int i, j, k; - int ret; - - for (i = 0, j = 0, k = 0; i < strlen(str) + 1; i++) { - if (*(str + i) != ',' && *(str + i) != '\0') { - tmp_str[j++] = *(str + i); - } else { - j = 0; - - ret = kstrtou16(tmp_str, 16, &passthroughlist_entry[k].vendor); - if (ret) - return ret; - - passthroughlist_entry[k].from_grub = true; - - spin_lock_irqsave(&passthroughlist_lock, flags); - list_add_rcu(&passthroughlist_entry[k].node, &passthroughlist); - spin_unlock_irqrestore(&passthroughlist_lock, flags); - - k++; - } - } - - return 0; -} -early_param("pswiotlb_passthroughlist", setup_pswiotlb_passthroughlist); - -unsigned long pswiotlb_size_or_default(void) -{ - return default_npslabs << P_IO_TLB_SHIFT; -} - -void __init pswiotlb_adjust_size(unsigned long size) -{ - if (default_npslabs != P_IO_TLB_DEFAULT_SIZE >> P_IO_TLB_SHIFT) - return; - size = ALIGN(size, P_IO_TLB_SIZE); - default_npslabs = ALIGN(size >> P_IO_TLB_SHIFT, P_IO_TLB_SEGSIZE); - if (round_up_default_npslabs()) - size = default_npslabs << P_IO_TLB_SHIFT; - pr_info("PSWIOTLB bounce buffer size adjusted to %luMB", size >> 20); -} - -void pswiotlb_print_info(int nid) -{ - struct p_io_tlb_pool *mem = &p_io_tlb_default_mem[nid].defpool; - - if (!mem->nslabs) { - pr_warn("No local mem of numa node %d\n", nid); - return; - } - - pr_info("numa %d mapped [mem %pa-%pa] (%luMB)\n", nid, &mem->start, &mem->end, - (mem->nslabs << P_IO_TLB_SHIFT) >> 20); -} - -static inline unsigned long io_tlb_offset(unsigned long val) -{ - return val & (P_IO_TLB_SEGSIZE - 1); -} - -static inline unsigned long nr_slots(u64 val) -{ - return DIV_ROUND_UP(val, P_IO_TLB_SIZE); -} - -static void pswiotlb_record_mem_range(struct p_io_tlb_mem *mem) -{ - unsigned long start_pfn, end_pfn; - unsigned long min_pfn = (~(phys_addr_t)0 >> PAGE_SHIFT), max_pfn = 0; - int i, nid; - unsigned long total_pfn = 0; - - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { - pr_info(" node %3d: [mem %#018Lx-%#018Lx]\n", nid, - (u64)start_pfn << PAGE_SHIFT, - ((u64)end_pfn << PAGE_SHIFT) - 1); - if (nid == mem->numa_node_id) { - if (min_pfn > start_pfn) - min_pfn = start_pfn; - if (max_pfn < end_pfn) - max_pfn = end_pfn; - total_pfn += end_pfn - start_pfn + 1; - } - } - - mem->node_min_addr = (u64)min_pfn << PAGE_SHIFT; - mem->node_max_addr = ((u64)max_pfn << PAGE_SHIFT) - 1; - mem->node_total_mem = (u64)total_pfn << PAGE_SHIFT; -} - -static void pswiotlb_init_io_tlb_pool(struct p_io_tlb_pool *mem, int nid, phys_addr_t start, - unsigned long npslabs, bool late_alloc, unsigned int nareas) -{ - void *vaddr = phys_to_virt(start); - unsigned long bytes = npslabs << P_IO_TLB_SHIFT, i; - - mem->nslabs = npslabs; - mem->start = start; - mem->end = mem->start + bytes; - mem->late_alloc = late_alloc; - mem->numa_node_id = nid; - mem->nareas = nareas; - mem->area_nslabs = npslabs / mem->nareas; - mem->free_th = PSWIOTLB_FREE_THRESHOLD; - - for (i = 0; i < mem->nareas; i++) { - spin_lock_init(&mem->areas[i].lock); - mem->areas[i].index = 0; - mem->areas[i].used = 0; - } - - for (i = 0; i < mem->nslabs; i++) { - mem->slots[i].list = P_IO_TLB_SEGSIZE - io_tlb_offset(i); - mem->slots[i].orig_addr = INVALID_PHYS_ADDR; - mem->slots[i].alloc_size = 0; - } - memset(vaddr, 0, bytes); - mem->vaddr = vaddr; -} - -/** - * add_mem_pool() - add a memory pool to the allocator - * @mem: Phytium software IO TLB allocator. - * @pool: Memory pool to be added. - */ -static void add_mem_pool(struct p_io_tlb_mem *mem, struct p_io_tlb_pool *pool) -{ - spin_lock(&mem->lock); - if (mem->capacity != mem->whole_size) { - mem->pool_addr[mem->whole_size] = mem->pool_addr[mem->capacity]; - mem->pool_addr[mem->capacity] = pool; - } else { - mem->pool_addr[mem->capacity] = pool; - } - /* prevent any other writes prior to this time */ - smp_wmb(); - mem->capacity++; - mem->whole_size++; - mem->nslabs += pool->nslabs; - spin_unlock(&mem->lock); -} - -static void __init *pswiotlb_memblock_alloc(unsigned long npslabs, - int nid, unsigned int flags, - int (*remap)(void *tlb, unsigned long npslabs)) -{ - size_t bytes = PAGE_ALIGN(npslabs << P_IO_TLB_SHIFT); - void *tlb; - - tlb = memblock_alloc_node(bytes, PAGE_SIZE, nid); - - if (!tlb) { - pr_warn("%s: Failed to allocate %zu bytes tlb structure\n", - __func__, bytes); - return NULL; - } - - if (remap && remap(tlb, npslabs) < 0) { - memblock_free(tlb, PAGE_ALIGN(bytes)); - pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes); - return NULL; - } - - return tlb; -} - -static void check_if_pswiotlb_in_local_node(struct p_io_tlb_mem *mem, - struct p_io_tlb_pool *pool) -{ - if ((pool->start < mem->node_min_addr) || - pool->end > mem->node_max_addr) { - mem->nslabs = 0; - pool->nslabs = 0; - } -} - -/* - * Statically reserve bounce buffer space and initialize bounce buffer data - * structures for the Phytium software IO TLB used to implement the DMA API. - */ -void __init pswiotlb_init_remap(bool addressing_limit, int nid, unsigned int flags, - int (*remap)(void *tlb, unsigned long npslabs)) -{ - struct p_io_tlb_pool *mem = &p_io_tlb_default_mem[nid].defpool; - unsigned long npslabs; - unsigned int nareas; - size_t alloc_size; - void *tlb; - - if (!addressing_limit) - return; - if (pswiotlb_force_disable) - return; - - if (!remap) - p_io_tlb_default_mem[nid].can_grow = true; - p_io_tlb_default_mem[nid].phys_limit = virt_to_phys(high_memory - 1); - - if (!default_npareas) - pswiotlb_adjust_nareas(num_possible_cpus()); - - npslabs = default_npslabs; - nareas = limit_nareas(default_npareas, npslabs); - while ((tlb = pswiotlb_memblock_alloc(npslabs, nid, flags, remap)) == NULL) { - if (npslabs <= P_IO_TLB_MIN_SLABS) - return; - npslabs = ALIGN(npslabs >> 1, P_IO_TLB_SEGSIZE); - nareas = limit_nareas(nareas, npslabs); - } - - if (default_npslabs != npslabs) { - pr_info("PSWIOTLB bounce buffer size adjusted %lu -> %lu slabs", - default_npslabs, npslabs); - default_npslabs = npslabs; - } - - alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), npslabs)); - mem->slots = memblock_alloc(alloc_size, PAGE_SIZE); - if (!mem->slots) { - pr_warn("%s: Failed to allocate %zu bytes align=0x%lx\n", - __func__, alloc_size, PAGE_SIZE); - return; - } - - mem->areas = memblock_alloc(array_size(sizeof(struct p_io_tlb_area), - nareas), SMP_CACHE_BYTES); - if (!mem->areas) { - pr_warn("%s: Failed to allocate mem->areas.\n", __func__); - return; - } - - pswiotlb_init_io_tlb_pool(mem, nid, __pa(tlb), npslabs, false, nareas); - add_mem_pool(&p_io_tlb_default_mem[nid], mem); - check_if_pswiotlb_in_local_node(&p_io_tlb_default_mem[nid], mem); - - if (flags & PSWIOTLB_VERBOSE) - pswiotlb_print_info(nid); -} -/** - * pswiotlb_free_tlb() - free a dynamically allocated Phytium IO TLB buffer - * @vaddr: Virtual address of the buffer. - * @bytes: Size of the buffer. - */ -static void pswiotlb_free_tlb(void *vaddr, size_t bytes) -{ - if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && - dma_free_from_pool(NULL, vaddr, bytes)) - return; - - /* Intentional leak if pages cannot be encrypted again. */ - if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes))) - __free_pages(virt_to_page(vaddr), get_order(bytes)); -} -/** - * pswiotlb_alloc_tlb() - allocate a dynamic Phytium IO TLB buffer - * @dev: Device for which a memory pool is allocated. - * @bytes: Size of the buffer. - * @phys_limit: Maximum allowed physical address of the buffer. - * @gfp: GFP flags for the allocation. - * - * Return: Allocated pages, or %NULL on allocation failure. - */ -static struct page *pswiotlb_alloc_tlb(struct device *dev, int nid, size_t bytes, - u64 phys_limit, gfp_t gfp) -{ - struct page *page; - - /* - * Allocate from the atomic pools if memory is encrypted and - * the allocation is atomic, because decrypting may block. - */ - if (!gfpflags_allow_blocking(gfp) && dev && force_dma_unencrypted(dev)) { - void *vaddr; - - if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL)) - return NULL; - - return dma_alloc_from_pool(dev, bytes, &vaddr, gfp, - pswiotlb_dma_coherent_ok); - } - - gfp &= ~GFP_ZONEMASK; - if (phys_limit <= DMA_BIT_MASK(zone_dma_bits)) - gfp |= __GFP_DMA; - else if (phys_limit <= DMA_BIT_MASK(32)) - gfp |= __GFP_DMA32; - - while ((page = alloc_dma_pages(nid, gfp, bytes)) && - page_to_phys(page) + bytes - 1 > phys_limit) { - /* allocated, but too high */ - __free_pages(page, get_order(bytes)); - - if (IS_ENABLED(CONFIG_ZONE_DMA32) && - phys_limit < DMA_BIT_MASK(64) && - !(gfp & (__GFP_DMA32 | __GFP_DMA))) - gfp |= __GFP_DMA32; - else if (IS_ENABLED(CONFIG_ZONE_DMA) && - !(gfp & __GFP_DMA)) - gfp = (gfp & ~__GFP_DMA32) | __GFP_DMA; - else - return NULL; - } - - return page; -} -/** - * pswiotlb_alloc_pool() - allocate a new Phytium IO TLB memory pool - * @dev: Device for which a memory pool is allocated. - * @minslabs: Minimum number of slabs. - * @nslabs: Desired (maximum) number of slabs. - * @nareas: Number of areas. - * @phys_limit: Maximum DMA buffer physical address. - * @gfp: GFP flags for the allocations. - * - * Allocate and initialize a new Phytium IO TLB memory pool. The actual number of - * slabs may be reduced if allocation of @nslabs fails. If even - * @minslabs cannot be allocated, this function fails. - * - * Return: New memory pool, or %NULL on allocation failure. - */ -static struct p_io_tlb_pool *pswiotlb_alloc_pool(struct device *dev, - int nid, unsigned long minslabs, unsigned long nslabs, - unsigned int nareas, u64 phys_limit, bool transient, gfp_t gfp) -{ - struct p_io_tlb_pool *pool; - unsigned int slot_order; - struct page *tlb; - size_t pool_size; - size_t tlb_size; - - if (nslabs > SLABS_PER_PAGE << MAX_ORDER) { - nslabs = SLABS_PER_PAGE << MAX_ORDER; - nareas = limit_nareas(nareas, nslabs); - } - - pool_size = sizeof(*pool) + array_size(sizeof(*pool->areas), nareas); - pool = kzalloc(pool_size, gfp); - if (!pool) - goto error; - pool->areas = (void *)pool + sizeof(*pool); - - if (!transient) { - nslabs = ALIGN(nslabs >> 1, P_IO_TLB_SEGSIZE); - nareas = limit_nareas(nareas, nslabs); - } - tlb_size = nslabs << P_IO_TLB_SHIFT; - while (!(tlb = pswiotlb_alloc_tlb(dev, nid, tlb_size, phys_limit, gfp))) { - if (nslabs <= minslabs) - goto error_tlb; - nslabs = ALIGN(nslabs >> 1, P_IO_TLB_SEGSIZE); - nareas = limit_nareas(nareas, nslabs); - tlb_size = nslabs << P_IO_TLB_SHIFT; - } - if (page_to_nid(tlb) != nid) - goto error_slots; - - slot_order = get_order(array_size(sizeof(*pool->slots), nslabs)); - pool->slots = (struct p_io_tlb_slot *) - __get_free_pages(gfp, slot_order); - if (!pool->slots) - goto error_slots; - - pswiotlb_init_io_tlb_pool(pool, nid, page_to_phys(tlb), nslabs, true, nareas); - return pool; - -error_slots: - pswiotlb_free_tlb(page_address(tlb), tlb_size); -error_tlb: - kfree(pool); -error: - return NULL; -} -static void pswiotlb_prepare_release_pool(struct p_io_tlb_mem *mem, - struct p_io_tlb_pool *pool, int pool_idx) -{ - int capacity; - - spin_lock(&mem->lock); - capacity = mem->capacity; - mem->pool_addr[pool_idx] = mem->pool_addr[capacity - 1]; - mem->pool_addr[capacity - 1] = pool; - mem->capacity--; - mem->nslabs -= pool->nslabs; - spin_unlock(&mem->lock); -} -static void pswiotlb_release_pool(struct p_io_tlb_mem *mem, - struct p_io_tlb_pool *pool, int pool_idx) -{ - unsigned int bytes = pool->nslabs * P_IO_TLB_SIZE; - unsigned int order = get_order(bytes); - struct page *page_start; - size_t slots_size = array_size(sizeof(*pool->slots), pool->nslabs); - int pool_idx1; - - spin_lock(&mem->lock); - pool_idx1 = mem->whole_size - 1; - mem->pool_addr[pool_idx] = mem->pool_addr[pool_idx1]; - mem->whole_size--; - spin_unlock(&mem->lock); - - bitmap_free(pool->busy_record); - free_pages((unsigned long)pool->slots, get_order(slots_size)); - page_start = pfn_to_page(PFN_DOWN(pool->start)); - __free_pages(page_start, order); - kfree(pool); -} -static void pswiotlb_monitor_service(struct timer_list *timer) -{ - int i, j, pool_idx; - struct p_io_tlb_pool *pool; - struct p_io_tlb_mem *mem; - int capacity, whole_size; - - for (i = 0; i < pswiotlb_node_num; i++) { - mem = &p_io_tlb_default_mem[i]; - whole_size = mem->whole_size; - capacity = mem->capacity; - rcu_read_lock(); - for (pool_idx = 1; pool_idx < whole_size; pool_idx++) { - pool = mem->pool_addr[pool_idx]; - for (j = 0; j < DIV_ROUND_UP(pool->nareas, BITS_PER_LONG); j++) { - if (*(pool->busy_record + j) != 0) { - pool->busy_flag = true; - break; - } - pool->busy_flag = false; - } - if (!pool->busy_flag) - pool->free_cnt++; - else - pool->free_cnt = 0; - if (pool->free_cnt >= pool->free_th && pool_idx < capacity) { - pswiotlb_prepare_release_pool(mem, pool, pool_idx); - capacity--; - } - if (pool->free_cnt >= 2 * pool->free_th && !pool->busy_flag) { - pswiotlb_release_pool(mem, pool, pool_idx); - whole_size--; - } - } - rcu_read_unlock(); - } - - mod_timer(timer, jiffies + 2 * HZ); -} -static struct p_io_tlb_pool *pswiotlb_formal_alloc(struct device *dev, - struct p_io_tlb_mem *mem) -{ - struct p_io_tlb_pool *pool; - - pool = pswiotlb_alloc_pool(dev, mem->numa_node_id, - P_IO_TLB_MIN_SLABS, dynamic_inc_thr_npslabs, - dynamic_inc_thr_npslabs, mem->phys_limit, - 0, GFP_NOWAIT | __GFP_NOWARN); - if (!pool) { - pr_warn_once("Failed to allocate new formal pool"); - return NULL; - } - - pool->busy_record = bitmap_zalloc(pool->nareas, GFP_KERNEL); - if (!pool->busy_record) { - pr_warn_ratelimited("%s: Failed to allocate pool busy record.\n", __func__); - return NULL; - } - - add_mem_pool(mem, pool); - - return pool; -} - -/** - * pswiotlb_dyn_free() - RCU callback to free a memory pool - * @rcu: RCU head in the corresponding struct p_io_tlb_pool. - */ -static void pswiotlb_dyn_free(struct rcu_head *rcu) -{ - struct p_io_tlb_pool *pool = container_of(rcu, struct p_io_tlb_pool, rcu); - size_t slots_size = array_size(sizeof(*pool->slots), pool->nslabs); - size_t tlb_size = pool->end - pool->start; - - free_pages((unsigned long)pool->slots, get_order(slots_size)); - pswiotlb_free_tlb(pool->vaddr, tlb_size); - kfree(pool); -} -static void pswiotlb_init_tlb_mem_dynamic(struct p_io_tlb_mem *mem, int nid) -{ - spin_lock_init(&mem->lock); - mem->capacity = 0; - mem->whole_size = 0; - mem->numa_node_id = nid; -} - -bool pswiotlb_is_dev_in_passthroughlist(struct pci_dev *dev) -{ - struct pswiotlb_passthroughlist *bl_entry; - - rcu_read_lock(); - list_for_each_entry_rcu(bl_entry, &passthroughlist, node) { - if (bl_entry->vendor == dev->vendor) { - rcu_read_unlock(); - goto out; - } - } - rcu_read_unlock(); - - return true; -out: - return false; -} - -static void pswiotlb_show_passthroughlist(void) -{ - struct pswiotlb_passthroughlist *bl_entry; - - pr_info("The following vendors devices belong to are incompatible with pswiotlb temporarily:\n"); - rcu_read_lock(); - list_for_each_entry_rcu(bl_entry, &passthroughlist, node) - printk(KERN_INFO "0x%06x", bl_entry->vendor); - rcu_read_unlock(); -} - -static void __init pswiotlb_passthroughlist_init(void) -{ - int dev_num = 0; - int i; - size_t alloc_size; - struct pswiotlb_passthroughlist *passthroughlist_array; - - spin_lock_init(&passthroughlist_lock); - - for (i = 0; ps_passthroughlist[i].vendor != 0; i++) - dev_num++; - - alloc_size = PAGE_ALIGN(array_size(sizeof(struct pswiotlb_passthroughlist), dev_num)); - passthroughlist_array = memblock_alloc(alloc_size, PAGE_SIZE); - if (!passthroughlist_array) { - pr_warn("%s: Failed to allocate memory for passthroughlist\n", - __func__); - return; - } - - for (i = 0; i < dev_num; i++) { - passthroughlist_array[i].vendor = ps_passthroughlist[i].vendor; - passthroughlist_array[i].device = ps_passthroughlist[i].device; - - spin_lock(&passthroughlist_lock); - list_add_rcu(&passthroughlist_array[i].node, &passthroughlist); - spin_unlock(&passthroughlist_lock); - } - - pswiotlb_show_passthroughlist(); -} - -/* - * Statically reserve bounce buffer space and initialize bounce buffer data - * structures for the software IO TLB used to implement the DMA API. - */ -void __init pswiotlb_init(bool addressing_limit, unsigned int flags) -{ - int i; - int nid; - unsigned long start_pfn, end_pfn; - - /* Get number of numa node*/ - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid); - pswiotlb_node_num = nid + 1; - pr_info("Total number of numa nodes is %d\n", pswiotlb_node_num); - for (i = 0; i < pswiotlb_node_num; i++) { - struct p_io_tlb_mem *mem = &p_io_tlb_default_mem[i]; - - pswiotlb_init_tlb_mem_dynamic(mem, i); - pswiotlb_record_mem_range(mem); - pr_info(" node %3d memory range: [%#018Lx-%#018Lx], total memory: %ldMB\n", - i, mem->node_min_addr, mem->node_max_addr, - mem->node_total_mem >> 20); - } - /* Get P TLB memory according to numa node id */ - for (i = 0; i < pswiotlb_node_num; i++) - pswiotlb_init_remap(addressing_limit, i, flags, NULL); - - pswiotlb_passthroughlist_init(); -} - -/** - * alloc_dma_pages() - allocate pages to be used for DMA - * @gfp: GFP flags for the allocation. - * @bytes: Size of the buffer. - * - * Allocate pages from the buddy allocator. If successful, make the allocated - * pages decrypted that they can be used for DMA. - * - * Return: Decrypted pages, or %NULL on failure. - */ -static struct page *alloc_dma_pages(int nid, gfp_t gfp, size_t bytes) -{ - unsigned int order = get_order(bytes); - struct page *page; - void *vaddr; - - page = alloc_pages_node(nid, gfp, order); - if (!page) - return NULL; - - vaddr = page_address(page); - if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes))) - goto error; - return page; - -error: - __free_pages(page, order); - return NULL; -} - -/** - * pswiotlb_find_pool() - find the Phytium IO TLB pool for a physical address - * @dev: Device which has mapped the DMA buffer. - * @paddr: Physical address within the DMA buffer. - * - * Find the Phytium IO TLB memory pool descriptor which contains the given physical - * address, if any. - * - * Return: Memory pool which contains @paddr, or %NULL if none. - */ -struct p_io_tlb_pool *pswiotlb_find_pool(struct device *dev, int nid, phys_addr_t paddr) -{ - struct p_io_tlb_mem *mem = &dev->dma_p_io_tlb_mem[nid]; - struct p_io_tlb_pool *pool; - int i; - int whole_size; - - /* prevent any other reads prior to this time */ - smp_rmb(); - whole_size = mem->whole_size; - rcu_read_lock(); - for (i = 0; i < whole_size; i++) { - pool = mem->pool_addr[i]; - if (paddr >= pool->start && paddr < pool->end) - goto out; - } - - pool = NULL; -out: - rcu_read_unlock(); - return pool; -} - -/** - * pswiotlb_dev_init() - initialize pswiotlb fields in &struct device - * @dev: Device to be initialized. - */ -void pswiotlb_dev_init(struct device *dev) -{ - dev->dma_uses_p_io_tlb = false; -} - -void pswiotlb_store_local_node(struct pci_dev *dev, struct pci_bus *bus) -{ - int nid; - struct p_io_tlb_pool *defpool; - struct p_io_tlb_mem *mem; - - dev->dev.local_node = pcibus_to_node(bus); - /* register pswiotlb resources */ - dev->dev.dma_p_io_tlb_mem = p_io_tlb_default_mem; - nid = dev->dev.local_node; - defpool = &dev->dev.dma_p_io_tlb_mem[nid].defpool; - mem = &dev->dev.dma_p_io_tlb_mem[nid]; - pci_info(dev, "numa node: %d, pswiotlb defpool range: [%#018Lx-%#018Lx]\n" - "local node range: [%#018Lx-%#018Lx]\n", nid, - defpool->start, defpool->end, mem->node_min_addr, mem->node_max_addr); -} -/* - * Return the offset into a pswiotlb slot required to keep the device happy. - */ -static unsigned int pswiotlb_align_offset(struct device *dev, u64 addr) -{ - return addr & dma_get_min_align_mask(dev) & (P_IO_TLB_SIZE - 1); -} -/* - * Bounce: copy the pswiotlb buffer from or back to the original dma location - */ -static void pswiotlb_bounce(struct device *dev, int nid, phys_addr_t tlb_addr, size_t size, - enum dma_data_direction dir, struct p_io_tlb_pool *mem) -{ - int index = (tlb_addr - mem->start) >> P_IO_TLB_SHIFT; - phys_addr_t orig_addr = mem->slots[index].orig_addr; - size_t alloc_size = mem->slots[index].alloc_size; - unsigned long pfn = PFN_DOWN(orig_addr); - unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start; - unsigned int tlb_offset, orig_addr_offset; - - if (orig_addr == INVALID_PHYS_ADDR) - return; - - tlb_offset = tlb_addr & (P_IO_TLB_SIZE - 1); - orig_addr_offset = pswiotlb_align_offset(dev, orig_addr); - if (tlb_offset < orig_addr_offset) { - dev_WARN_ONCE(dev, 1, - "Access before mapping start detected. orig offset %u, requested offset %u.\n", - orig_addr_offset, tlb_offset); - return; - } - - tlb_offset -= orig_addr_offset; - if (tlb_offset > alloc_size) { - dev_WARN_ONCE(dev, 1, - "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu+%u.\n", - alloc_size, size, tlb_offset); - return; - } - - orig_addr += tlb_offset; - alloc_size -= tlb_offset; - - if (size > alloc_size) { - dev_WARN_ONCE(dev, 1, - "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu.\n", - alloc_size, size); - size = alloc_size; - } - - if (PageHighMem(pfn_to_page(pfn))) { - unsigned int offset = orig_addr & ~PAGE_MASK; - struct page *page; - unsigned int sz = 0; - unsigned long flags; - - dev_info(dev, "%s line=%d !!!!!!HighMem!!!!!! dir: %d, tlb_addr: %#018Lx, size: %#lx\n", - __func__, __LINE__, dir, tlb_addr, size); - - while (size) { - sz = min_t(size_t, PAGE_SIZE - offset, size); - - local_irq_save(flags); - page = pfn_to_page(pfn); - if (dir == DMA_TO_DEVICE) - memcpy_from_page(vaddr, page, offset, sz); - else - memcpy_to_page(page, offset, vaddr, sz); - local_irq_restore(flags); - - size -= sz; - pfn++; - vaddr += sz; - offset = 0; - } - } else if (dir == DMA_TO_DEVICE) { - memcpy(vaddr, phys_to_virt(orig_addr), size); - } else { - memcpy(phys_to_virt(orig_addr), vaddr, size); - } -} -static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx) -{ - return start + (idx << P_IO_TLB_SHIFT); -} -/* - * Carefully handle integer overflow which can occur when boundary_mask == ~0UL. - */ -static inline unsigned long get_max_slots(unsigned long boundary_mask) -{ - return (boundary_mask >> P_IO_TLB_SHIFT) + 1; -} - -static unsigned int wrap_area_index(struct p_io_tlb_pool *mem, unsigned int index) -{ - if (index >= mem->area_nslabs) - return 0; - return index; -} - -/** - * pswiotlb_area_find_slots() - search for slots in one Phytium IO TLB memory area - * @dev: Device which maps the buffer. - * @pool: Memory pool to be searched. - * @area_index: Index of the Phytium IO TLB memory area to be searched. - * @orig_addr: Original (non-bounced) Phytium IO buffer address. - * @alloc_size: Total requested size of the bounce buffer, - * including initial alignment padding. - * @alloc_align_mask: Required alignment of the allocated buffer. - * - * Find a suitable sequence of Phytium IO TLB entries for the request and allocate - * a buffer from the given Phytium IO TLB memory area. - * This function takes care of locking. - * - * Return: Index of the first allocated slot, or -1 on error. - */ -static int pswiotlb_area_find_slots(struct device *dev, int nid, struct p_io_tlb_pool *pool, - int area_index, phys_addr_t orig_addr, size_t alloc_size, - unsigned int alloc_align_mask) -{ - struct p_io_tlb_area *area = pool->areas + area_index; - unsigned long boundary_mask = dma_get_seg_boundary(dev); - dma_addr_t tbl_dma_addr = - phys_to_dma_unencrypted(dev, pool->start) & boundary_mask; - unsigned long max_slots = get_max_slots(boundary_mask); - unsigned int iotlb_align_mask = - dma_get_min_align_mask(dev) | alloc_align_mask; - unsigned int nslots = nr_slots(alloc_size), stride; - unsigned int offset = pswiotlb_align_offset(dev, orig_addr); - unsigned int index, slots_checked, count = 0, i; - unsigned long flags; - unsigned int slot_base; - unsigned int slot_index; - - WARN_ON(!nslots); - WARN_ON(area_index >= pool->nareas); - - /* - * For allocations of PAGE_SIZE or larger only look for page aligned - * allocations. - */ - if (alloc_size >= PAGE_SIZE) - iotlb_align_mask |= ~PAGE_MASK; - iotlb_align_mask &= ~(P_IO_TLB_SIZE - 1); - - /* - * For mappings with an alignment requirement don't bother looping to - * unaligned slots once we found an aligned one. - */ - stride = (iotlb_align_mask >> P_IO_TLB_SHIFT) + 1; - - if (spin_trylock_irqsave(&area->lock, flags)) { - if (unlikely(nslots > pool->area_nslabs - area->used)) - goto not_found; - - slot_base = area_index * pool->area_nslabs; - index = area->index; - - for (slots_checked = 0; slots_checked < pool->area_nslabs;) { - slot_index = slot_base + index; - - if (orig_addr && - (slot_addr(tbl_dma_addr, slot_index) & - iotlb_align_mask) != (orig_addr & iotlb_align_mask)) { - index = wrap_area_index(pool, index + 1); - slots_checked++; - continue; - } - - if (!iommu_is_span_boundary(slot_index, nslots, - nr_slots(tbl_dma_addr), - max_slots)) { - if (pool->slots[slot_index].list >= nslots) - goto found; - } - index = wrap_area_index(pool, index + stride); - slots_checked += stride; - } - } else { - return -1; - } - -not_found: - spin_unlock_irqrestore(&area->lock, flags); - return -1; - -found: - /* - * If we find a slot that indicates we have 'nslots' number of - * contiguous buffers, we allocate the buffers from that slot onwards - * and set the list of free entries to '0' indicating unavailable. - */ - for (i = slot_index; i < slot_index + nslots; i++) { - pool->slots[i].list = 0; - pool->slots[i].alloc_size = alloc_size - (offset + - ((i - slot_index) << P_IO_TLB_SHIFT)); - } - for (i = slot_index - 1; - io_tlb_offset(i) != P_IO_TLB_SEGSIZE - 1 && - pool->slots[i].list; i--) - pool->slots[i].list = ++count; - - /* - * Update the indices to avoid searching in the next round. - */ - area->index = wrap_area_index(pool, index + nslots); - area->used += nslots; - spin_unlock_irqrestore(&area->lock, flags); - - return slot_index; -} - -/** - * pswiotlb_pool_find_slots() - search for slots in one memory pool - * @dev: Device which maps the buffer. - * @pool: Memory pool to be searched. - * @orig_addr: Original (non-bounced)Phytium IO buffer address. - * @alloc_size: Total requested size of the bounce buffer, - * including initial alignment padding. - * @alloc_align_mask: Required alignment of the allocated buffer. - * - * Search through one memory pool to find a sequence of slots that match the - * allocation constraints. - * - * Return: Index of the first allocated slot, or -1 on error. - */ -static int pswiotlb_pool_find_slots(struct device *dev, int nid, struct p_io_tlb_pool *pool, - phys_addr_t orig_addr, size_t alloc_size, - unsigned int alloc_align_mask) -{ - int start = raw_smp_processor_id() & (pool->nareas - 1); - int i = start, index; - - do { - index = pswiotlb_area_find_slots(dev, nid, pool, i, orig_addr, - alloc_size, alloc_align_mask); - if (index >= 0) { - if ((pool != &p_io_tlb_default_mem[nid].defpool) && - !pool->transient) { - bitmap_set(pool->busy_record, i, 1); - } - return index; - } - if (++i >= pool->nareas) - i = 0; - } while (i != start); - - return -1; -} - -/** - * pswiotlb_find_slots() - search for slots in the whole pswiotlb - * @dev: Device which maps the buffer. - * @orig_addr: Original (non-bounced) Phytium IO buffer address. - * @alloc_size: Total requested size of the bounce buffer, - * including initial alignment padding. - * @alloc_align_mask: Required alignment of the allocated buffer. - * @retpool: Used memory pool, updated on return. - * - * Search through the whole Phytium software IO TLB to find a sequence of slots that - * match the allocation constraints. - * - * Return: Index of the first allocated slot, or -1 on error. - */ -static int pswiotlb_find_slots(struct device *dev, int nid, phys_addr_t orig_addr, - size_t alloc_size, unsigned int alloc_align_mask, - struct p_io_tlb_pool **retpool) -{ - struct p_io_tlb_mem *mem = &dev->dma_p_io_tlb_mem[nid]; - struct p_io_tlb_pool *pool; - int index; - int try_pool_idx; - int i; - int cpuid; - int current_ratio; - unsigned long pswiotlb_mem; - unsigned long nslabs_per_pool = dynamic_inc_thr_npslabs; - - cpuid = raw_smp_processor_id(); - - rcu_read_lock(); -#ifndef CONFIG_ARM64_4K_PAGES - for (i = 0; i < 15; i++) { - if (i == 0) { - pool = mem->pool_addr[0]; - index = pswiotlb_pool_find_slots(dev, nid, pool, orig_addr, - alloc_size, alloc_align_mask); - } else if (i == 1 && mem->capacity > (cpuid + 1)) { - pool = mem->pool_addr[cpuid + 1]; - index = pswiotlb_pool_find_slots(dev, nid, pool, orig_addr, - alloc_size, alloc_align_mask); - } else { - try_pool_idx = get_random_u32() % mem->capacity; - pool = mem->pool_addr[try_pool_idx]; - index = pswiotlb_pool_find_slots(dev, nid, pool, orig_addr, - alloc_size, alloc_align_mask); - } - - if (index >= 0) { - rcu_read_unlock(); - goto found; - } - } -#else - for (i = 0; i < 15; i++) { - try_pool_idx = get_random_u32() % mem->capacity; - pool = mem->pool_addr[try_pool_idx]; - index = pswiotlb_pool_find_slots(dev, nid, pool, orig_addr, - alloc_size, alloc_align_mask); - - if (index >= 0) { - rcu_read_unlock(); - goto found; - } - } -#endif - rcu_read_unlock(); - if (nslabs_per_pool > SLABS_PER_PAGE << MAX_ORDER) - nslabs_per_pool = SLABS_PER_PAGE << MAX_ORDER; - - nslabs_per_pool = ALIGN(nslabs_per_pool >> 1, P_IO_TLB_SEGSIZE); - pswiotlb_mem = P_IO_TLB_DEFAULT_SIZE + - (nslabs_per_pool << P_IO_TLB_SHIFT) * (mem->whole_size - 1); - current_ratio = (pswiotlb_mem * 100 + mem->node_total_mem / 2) / mem->node_total_mem; - if (current_ratio >= P_IO_TLB_EXT_WATERMARK) { - dev_warn_once(dev, "Total pswiotlb (%ld MB) exceeds the watermark (%d%%)\n" - "of memory (%ld MB) in node %d, pswiotlb expansion is prohibited.\n", - pswiotlb_mem >> 20, P_IO_TLB_EXT_WATERMARK, - mem->node_total_mem >> 20, nid); - return -1; - } - - if (!mem->can_grow) - return -1; - - pool = pswiotlb_formal_alloc(dev, mem); - if (!pool) - return -1; - - /* retry */ - rcu_read_lock(); - index = pswiotlb_pool_find_slots(dev, nid, pool, orig_addr, - alloc_size, alloc_align_mask); - rcu_read_unlock(); - - if (index < 0) { - pswiotlb_dyn_free(&pool->rcu); - return -1; - } - -found: - WRITE_ONCE(dev->dma_uses_p_io_tlb, true); - - /* - * The general barrier orders reads and writes against a presumed store - * of the PSWIOTLB buffer address by a device driver (to a driver private - * data structure). It serves two purposes. - * - * First, the store to dev->dma_uses_p_io_tlb must be ordered before the - * presumed store. This guarantees that the returned buffer address - * cannot be passed to another CPU before updating dev->dma_uses_p_io_tlb. - * - * Second, the load from mem->pools must be ordered before the same - * presumed store. This guarantees that the returned buffer address - * cannot be observed by another CPU before an update of the RCU list - * that was made by pswiotlb_dyn_alloc() on a third CPU (cf. multicopy - * atomicity). - * - * See also the comment in is_pswiotlb_buffer(). - */ - smp_mb(); - - *retpool = pool; - return index; -} -#ifdef CONFIG_DEBUG_FS - -/** - * mem_used() - get number of used slots in an allocator - * @mem: Phytium software IO TLB allocator. - * - * The result is accurate in this version of the function, because an atomic - * counter is available if CONFIG_DEBUG_FS is set. - * - * Return: Number of used slots. - */ -static unsigned long mem_used(struct p_io_tlb_mem *mem) -{ - return atomic_long_read(&mem->total_used); -} - -#else /* !CONFIG_DEBUG_FS */ - -/** - * mem_pool_used() - get number of used slots in a memory pool - * @pool: Phytium software IO TLB memory pool. - * - * The result is not accurate, see mem_used(). - * - * Return: Approximate number of used slots. - */ -static unsigned long mem_pool_used(struct p_io_tlb_pool *pool) -{ - int i; - unsigned long used = 0; - - for (i = 0; i < pool->nareas; i++) - used += pool->areas[i].used; - return used; -} - -/** - * mem_used() - get number of used slots in an allocator - * @mem: Phytium software IO TLB allocator. - * - * The result is not accurate, because there is no locking of individual - * areas. - * - * Return: Approximate number of used slots. - */ -static unsigned long mem_used(struct p_io_tlb_mem *mem) -{ - struct p_io_tlb_pool *pool; - unsigned long used = 0; - - rcu_read_lock(); - list_for_each_entry_rcu(pool, &mem->pools, node) - used += mem_pool_used(pool); - rcu_read_unlock(); - - return used; -} - -#endif /* CONFIG_DEBUG_FS */ - -phys_addr_t pswiotlb_tbl_map_single(struct device *dev, int nid, phys_addr_t orig_addr, - size_t mapping_size, size_t alloc_size, - unsigned int alloc_align_mask, enum dma_data_direction dir, - unsigned long attrs) -{ - struct p_io_tlb_mem *mem = &dev->dma_p_io_tlb_mem[nid]; - unsigned int offset = pswiotlb_align_offset(dev, orig_addr); - struct p_io_tlb_pool *pool; - unsigned int i; - unsigned long index; - phys_addr_t tlb_addr; - struct page *page; - - if (alloc_size > (P_IO_TLB_SEGSIZE << P_IO_TLB_SHIFT)) { - dev_warn_ratelimited(dev, "alloc size 0x%lx is larger than segment(0x%x) of pswiotlb\n", - alloc_size, P_IO_TLB_SEGSIZE << P_IO_TLB_SHIFT); - return (phys_addr_t)DMA_MAPPING_ERROR; - } - - if (!mem || !mem->nslabs) { - dev_warn_ratelimited(dev, - "Can not allocate PSWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); - return (phys_addr_t)DMA_MAPPING_ERROR; - } - - if (mapping_size > alloc_size) { - dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)", - mapping_size, alloc_size); - return (phys_addr_t)DMA_MAPPING_ERROR; - } - - index = pswiotlb_find_slots(dev, nid, orig_addr, - alloc_size + offset, alloc_align_mask, &pool); - if (index == -1) { - if (!(attrs & DMA_ATTR_NO_WARN)) - dev_warn_once(dev, - "pswiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", - alloc_size, mem->nslabs, mem_used(mem)); - return (phys_addr_t)DMA_MAPPING_ERROR; - } - - /* - * Save away the mapping from the original address to the DMA address. - * This is needed when we sync the memory. Then we sync the buffer if - * needed. - */ - for (i = 0; i < nr_slots(alloc_size + offset); i++) - pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); - tlb_addr = slot_addr(pool->start, index) + offset; - page = pfn_to_page(PFN_DOWN(tlb_addr)); - set_bit(PG_pswiotlb, &page->flags); - - /* - * When dir == DMA_FROM_DEVICE we could omit the copy from the orig - * to the tlb buffer, if we knew for sure the device will - * overwrite the entire current content. But we don't. Thus - * unconditional bounce may prevent leaking pswiotlb content (i.e. - * kernel memory) to user-space. - */ - pswiotlb_bounce(dev, nid, tlb_addr, mapping_size, DMA_TO_DEVICE, pool); - return tlb_addr; -} -static void pswiotlb_release_slots(struct device *dev, int nid, phys_addr_t tlb_addr, - struct p_io_tlb_pool *mem) -{ - unsigned long flags; - unsigned int offset = pswiotlb_align_offset(dev, tlb_addr); - int index = (tlb_addr - offset - mem->start) >> P_IO_TLB_SHIFT; - int nslots = nr_slots(mem->slots[index].alloc_size + offset); - int aindex = index / mem->area_nslabs; - struct p_io_tlb_area *area = &mem->areas[aindex]; - int count, i; - struct page *page = pfn_to_page(PFN_DOWN(tlb_addr)); - - /* - * Return the buffer to the free list by setting the corresponding - * entries to indicate the number of contiguous entries available. - * While returning the entries to the free list, we merge the entries - * with slots below and above the pool being returned. - */ - WARN_ON(aindex >= mem->nareas); - - spin_lock_irqsave(&area->lock, flags); - if (index + nslots < ALIGN(index + 1, P_IO_TLB_SEGSIZE)) - count = mem->slots[index + nslots].list; - else - count = 0; - - /* - * Step 1: return the slots to the free list, merging the slots with - * superceeding slots - */ - for (i = index + nslots - 1; i >= index; i--) { - mem->slots[i].list = ++count; - mem->slots[i].orig_addr = INVALID_PHYS_ADDR; - mem->slots[i].alloc_size = 0; - } - - /* - * Step 2: merge the returned slots with the preceding slots, if - * available (non zero) - */ - for (i = index - 1; - io_tlb_offset(i) != P_IO_TLB_SEGSIZE - 1 && mem->slots[i].list; - i--) - mem->slots[i].list = ++count; - area->used -= nslots; - if ((mem != &p_io_tlb_default_mem[nid].defpool) && (area->used == 0)) - bitmap_clear(mem->busy_record, aindex, 1); - clear_bit(PG_pswiotlb, &page->flags); - spin_unlock_irqrestore(&area->lock, flags); -} -/* - * tlb_addr is the physical address of the bounce buffer to unmap. - */ -void pswiotlb_tbl_unmap_single(struct device *dev, int nid, phys_addr_t tlb_addr, - size_t offset, size_t mapping_size, enum dma_data_direction dir, - unsigned long attrs, struct p_io_tlb_pool *pool) -{ - struct page *page = pfn_to_page(PFN_DOWN(tlb_addr)); - /* - * First, sync the memory before unmapping the entry - */ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) && - (test_bit(PG_pswiotlbsync, &page->flags) == false)) - pswiotlb_bounce(dev, nid, tlb_addr, mapping_size, DMA_FROM_DEVICE, pool); - - tlb_addr -= offset; - pswiotlb_release_slots(dev, nid, tlb_addr, pool); - - clear_bit(PG_pswiotlbsync, &page->flags); -} -void pswiotlb_sync_single_for_device(struct device *dev, int nid, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, struct p_io_tlb_pool *pool) -{ - if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) - pswiotlb_bounce(dev, nid, tlb_addr, size, DMA_TO_DEVICE, pool); - else - WARN_ON(dir != DMA_FROM_DEVICE); -} - -void pswiotlb_sync_single_for_cpu(struct device *dev, int nid, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, struct p_io_tlb_pool *pool) -{ - if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) { - struct page *page = pfn_to_page(PFN_DOWN(tlb_addr)); - - pswiotlb_bounce(dev, nid, tlb_addr, size, DMA_FROM_DEVICE, pool); - set_bit(PG_pswiotlbsync, &page->flags); - } else - WARN_ON(dir != DMA_TO_DEVICE); -} -/* - * Create a pswiotlb mapping for the buffer at @paddr, and in case of DMAing - * to the device copy the data into it as well. - */ -dma_addr_t pswiotlb_map(struct device *dev, int nid, phys_addr_t paddr, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - phys_addr_t pswiotlb_addr; - dma_addr_t dma_addr; - - trace_pswiotlb_bounced(dev, phys_to_dma(dev, paddr), size); - - pswiotlb_addr = pswiotlb_tbl_map_single(dev, nid, paddr, size, - PAGE_ALIGN(size), PAGE_SIZE - 1, dir, attrs); - if (pswiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) - return DMA_MAPPING_ERROR; - - dma_addr = phys_to_dma_unencrypted(dev, pswiotlb_addr); - - if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(pswiotlb_addr, size, dir); - return dma_addr; -} -size_t pswiotlb_max_mapping_size(struct device *dev) -{ - int min_align_mask = dma_get_min_align_mask(dev); - int min_align = 0; - - /* - * pswiotlb_find_slots() skips slots according to - * min align mask. This affects max mapping size. - * Take it into acount here. - */ - if (min_align_mask) - min_align = roundup(min_align_mask, P_IO_TLB_SIZE); - - return ((size_t)P_IO_TLB_SIZE) * P_IO_TLB_SEGSIZE - min_align; -} - -/** - * is_pswiotlb_allocated() - check if the default Phytium software IO TLB is initialized - */ -bool is_pswiotlb_allocated(struct device *dev) -{ - int nid = dev->local_node; - return p_io_tlb_default_mem[nid].nslabs; -} - -bool is_pswiotlb_active(struct device *dev) -{ - int nid = dev->local_node; - struct p_io_tlb_mem *mem = &dev->dma_p_io_tlb_mem[nid]; - - return mem && mem->nslabs; -} - -/** - * default_pswiotlb_base() - get the base address of the default PSWIOTLB - * - * Get the lowest physical address used by the default Phytium software IO TLB pool. - */ -phys_addr_t default_pswiotlb_base(struct device *dev) -{ - int nid = dev->local_node; - - p_io_tlb_default_mem[nid].can_grow = false; - - return p_io_tlb_default_mem[nid].defpool.start; -} - -/** - * default_pswiotlb_limit() - get the address limit of the default PSWIOTLB - * - * Get the highest physical address used by the default Phytium software IO TLB pool. - */ -phys_addr_t default_pswiotlb_limit(struct device *dev) -{ - int nid = dev->local_node; - - return p_io_tlb_default_mem[nid].phys_limit; -} -#ifdef CONFIG_DEBUG_FS - -static int p_io_tlb_used_get(void *data, u64 *val) -{ - struct p_io_tlb_mem *mem = data; - - *val = mem_used(mem); - return 0; -} - -static int p_io_tlb_hiwater_get(void *data, u64 *val) -{ - struct p_io_tlb_mem *mem = data; - - *val = atomic_long_read(&mem->used_hiwater); - return 0; -} - -static int p_io_tlb_hiwater_set(void *data, u64 val) -{ - struct p_io_tlb_mem *mem = data; - - /* Only allow setting to zero */ - if (val != 0) - return -EINVAL; - - atomic_long_set(&mem->used_hiwater, val); - return 0; -} - -DEFINE_DEBUGFS_ATTRIBUTE(fops_p_io_tlb_used, p_io_tlb_used_get, NULL, "%llu\n"); -DEFINE_DEBUGFS_ATTRIBUTE(fops_p_io_tlb_hiwater, p_io_tlb_hiwater_get, - p_io_tlb_hiwater_set, "%llu\n"); - -static void pswiotlb_create_debugfs_files(struct p_io_tlb_mem *mem, - int nid, const char *dirname) -{ - atomic_long_set(&mem->total_used, 0); - atomic_long_set(&mem->used_hiwater, 0); - - mem->debugfs = debugfs_create_dir(dirname, pswiotlb_debugfs); - if (!mem->nslabs) - return; - - debugfs_create_ulong("p_io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs); - debugfs_create_file("p_io_tlb_used", 0400, mem->debugfs, mem, - &fops_p_io_tlb_used); - debugfs_create_file("p_io_tlb_used_hiwater", 0600, mem->debugfs, mem, - &fops_p_io_tlb_hiwater); -} - -static int passthroughlist_display_show(struct seq_file *m, void *v) -{ - struct pswiotlb_passthroughlist *bl_entry; - - rcu_read_lock(); - list_for_each_entry_rcu(bl_entry, &passthroughlist, node) { - seq_printf(m, "0x%04x\n", bl_entry->vendor); - } - rcu_read_unlock(); - - return 0; -} - -static int version_display_show(struct seq_file *m, void *v) -{ - seq_puts(m, "pswiotlb version "); - seq_printf(m, "%s\n", PSWIOTLB_VERSION); - - return 0; -} - -static int passthroughlist_add(void *data, u64 val) -{ - struct pswiotlb_passthroughlist *bl_entry; - unsigned long flags; - - bl_entry = kzalloc(sizeof(*bl_entry), GFP_ATOMIC); - if (!bl_entry) - return -ENOMEM; - - bl_entry->vendor = val; - bl_entry->from_grub = false; - - spin_lock_irqsave(&passthroughlist_lock, flags); - list_add_rcu(&bl_entry->node, &passthroughlist); - spin_unlock_irqrestore(&passthroughlist_lock, flags); - - return 0; -} - -static int passthroughlist_del(void *data, u64 val) -{ - struct pswiotlb_passthroughlist *bl_entry; - unsigned long flags; - - rcu_read_lock(); - list_for_each_entry_rcu(bl_entry, &passthroughlist, node) { - if (bl_entry->vendor == val) - goto found; - } - rcu_read_unlock(); - - return 0; -found: - rcu_read_unlock(); - spin_lock_irqsave(&passthroughlist_lock, flags); - list_del_rcu(&bl_entry->node); - spin_unlock_irqrestore(&passthroughlist_lock, flags); - - if (bl_entry->from_grub == false) - kfree(bl_entry); - - return 0; -} - -DEFINE_SHOW_ATTRIBUTE(passthroughlist_display); -DEFINE_SHOW_ATTRIBUTE(version_display); -DEFINE_DEBUGFS_ATTRIBUTE(fops_passthroughlist_add, NULL, - passthroughlist_add, "%llu\n"); -DEFINE_DEBUGFS_ATTRIBUTE(fops_passthroughlist_del, NULL, - passthroughlist_del, "%llu\n"); - -static void pswiotlb_create_passthroughlist_debugfs_files(const char *dirname) -{ - passthroughlist_debugfs = debugfs_create_dir(dirname, pswiotlb_debugfs); - if (!passthroughlist_debugfs) - return; - - debugfs_create_file("show_devices", 0400, passthroughlist_debugfs, NULL, - &passthroughlist_display_fops); - debugfs_create_file("add_device", 0600, passthroughlist_debugfs, NULL, - &fops_passthroughlist_add); - debugfs_create_file("del_device", 0600, passthroughlist_debugfs, NULL, - &fops_passthroughlist_del); -} - -static void pswiotlb_create_pswiotlb_debugfs_files(const char *dirname) -{ - int i; - char name[20] = ""; - char passthroughlist_name[50] = ""; - - pswiotlb_debugfs = debugfs_create_dir(dirname, pswiotlb_debugfs); - if (!pswiotlb_debugfs) - return; - - debugfs_create_file("version", 0400, pswiotlb_debugfs, NULL, - &version_display_fops); - - for (i = 0; i < pswiotlb_node_num; i++) { - sprintf(name, "%s-%d", "pswiotlb", i); - pswiotlb_create_debugfs_files(&p_io_tlb_default_mem[i], i, name); - } - sprintf(passthroughlist_name, "%s", "pswiotlb-passthroughlist"); - pswiotlb_create_passthroughlist_debugfs_files(passthroughlist_name); -} - -static int __init pswiotlb_create_default_debugfs(void) -{ - char name[20] = ""; - - if (!pswiotlb_mtimer_alive && !pswiotlb_force_disable) { - pr_info("setup pswiotlb monitor timer service\n"); - timer_setup(&service_timer, pswiotlb_monitor_service, 0); - pswiotlb_mtimer_alive = true; - - /* check pswiotlb every 2 seconds*/ - mod_timer(&service_timer, jiffies + 2 * HZ); - } - - if (!pswiotlb_force_disable) { - sprintf(name, "%s", "pswiotlb"); - pswiotlb_create_pswiotlb_debugfs_files(name); - } - - return 0; -} - -late_initcall(pswiotlb_create_default_debugfs); - -#else /* !CONFIG_DEBUG_FS */ - -static inline void pswiotlb_create_debugfs_files(struct p_io_tlb_mem *mem, - const char *dirname) -{ -} - -#endif /* CONFIG_DEBUG_FS */ diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 2c6cbeab80213d51f99b781267d7443342a49f56..191b1bc8b9459b958a6aec497751f1014a87adda 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -763,16 +763,18 @@ static void swiotlb_dyn_free(struct rcu_head *rcu) } /** - * swiotlb_find_pool() - find the IO TLB pool for a physical address + * __swiotlb_find_pool() - find the IO TLB pool for a physical address * @dev: Device which has mapped the DMA buffer. * @paddr: Physical address within the DMA buffer. * * Find the IO TLB memory pool descriptor which contains the given physical - * address, if any. + * address, if any. This function is for use only when the dev is known to + * be using swiotlb. Use swiotlb_find_pool() for the more general case + * when this condition is not met. * * Return: Memory pool which contains @paddr, or %NULL if none. */ -struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr) +struct io_tlb_pool *__swiotlb_find_pool(struct device *dev, phys_addr_t paddr) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; struct io_tlb_pool *pool; @@ -855,9 +857,8 @@ static unsigned int swiotlb_align_offset(struct device *dev, * Bounce: copy the swiotlb buffer from or back to the original dma location */ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size, - enum dma_data_direction dir) + enum dma_data_direction dir, struct io_tlb_pool *mem) { - struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT; phys_addr_t orig_addr = mem->slots[index].orig_addr; size_t alloc_size = mem->slots[index].alloc_size; @@ -1213,7 +1214,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, * that was made by swiotlb_dyn_alloc() on a third CPU (cf. multicopy * atomicity). * - * See also the comment in is_swiotlb_buffer(). + * See also the comment in swiotlb_find_pool(). */ smp_mb(); @@ -1366,6 +1367,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, return (phys_addr_t)DMA_MAPPING_ERROR; } + /* + * If dma_skip_sync was set, reset it on first SWIOTLB buffer + * mapping to always sync SWIOTLB buffers. + */ + dma_reset_need_sync(dev); + /* * Save away the mapping from the original address to the DMA address. * This is needed when we sync the memory. Then we sync the buffer if @@ -1385,13 +1392,13 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, * unconditional bounce may prevent leaking swiotlb content (i.e. * kernel memory) to user-space. */ - swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE); + swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE, pool); return tlb_addr; } -static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) +static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr, + struct io_tlb_pool *mem) { - struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); unsigned long flags; unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr); int index, nslots, aindex; @@ -1455,11 +1462,9 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) * * Return: %true if @tlb_addr belonged to a transient pool that was released. */ -static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr) +static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr, + struct io_tlb_pool *pool) { - struct io_tlb_pool *pool; - - pool = swiotlb_find_pool(dev, tlb_addr); if (!pool->transient) return false; @@ -1471,7 +1476,7 @@ static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr) #else /* !CONFIG_SWIOTLB_DYNAMIC */ static inline bool swiotlb_del_transient(struct device *dev, - phys_addr_t tlb_addr) + phys_addr_t tlb_addr, struct io_tlb_pool *pool) { return false; } @@ -1481,36 +1486,39 @@ static inline bool swiotlb_del_transient(struct device *dev, /* * tlb_addr is the physical address of the bounce buffer to unmap. */ -void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr, - size_t mapping_size, enum dma_data_direction dir, - unsigned long attrs) +void __swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr, + size_t mapping_size, enum dma_data_direction dir, + unsigned long attrs, struct io_tlb_pool *pool) { /* * First, sync the memory before unmapping the entry */ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE); + swiotlb_bounce(dev, tlb_addr, mapping_size, + DMA_FROM_DEVICE, pool); - if (swiotlb_del_transient(dev, tlb_addr)) + if (swiotlb_del_transient(dev, tlb_addr, pool)) return; - swiotlb_release_slots(dev, tlb_addr); + swiotlb_release_slots(dev, tlb_addr, pool); } -void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir) +void __swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + struct io_tlb_pool *pool) { if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) - swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE); + swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE, pool); else BUG_ON(dir != DMA_FROM_DEVICE); } -void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir) +void __swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + struct io_tlb_pool *pool) { if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) - swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE); + swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE, pool); else BUG_ON(dir != DMA_TO_DEVICE); } @@ -1534,8 +1542,9 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, /* Ensure that the address returned is DMA'ble */ dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr); if (unlikely(!dma_capable(dev, dma_addr, size, true))) { - swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir, - attrs | DMA_ATTR_SKIP_CPU_SYNC); + __swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC, + swiotlb_find_pool(dev, swiotlb_addr)); dev_WARN_ONCE(dev, 1, "swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); @@ -1695,7 +1704,7 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) if (unlikely(!PAGE_ALIGNED(tlb_addr))) { dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n", &tlb_addr); - swiotlb_release_slots(dev, tlb_addr); + swiotlb_release_slots(dev, tlb_addr, pool); return NULL; } @@ -1705,11 +1714,13 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) bool swiotlb_free(struct device *dev, struct page *page, size_t size) { phys_addr_t tlb_addr = page_to_phys(page); + struct io_tlb_pool *pool; - if (!is_swiotlb_buffer(dev, tlb_addr)) + pool = swiotlb_find_pool(dev, tlb_addr); + if (!pool) return false; - swiotlb_release_slots(dev, tlb_addr); + swiotlb_release_slots(dev, tlb_addr, pool); return true; }