From 9168b58df76a1eed12158603942b52fabb85341d Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Thu, 13 Nov 2025 11:23:18 +0800 Subject: [PATCH 1/4] iommu/arm-smmu-v3: Add a check to avoid invalid iotlb sync mainline inclusion from mainline-v5.13-rc1 commit 6cc7e5a9c6b02507b9be5a99b51e970afa91c85f category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID5X4C Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=6cc7e5a9c6b02507b9be5a99b51e970afa91c85f ---------------------------------------- It may send a invalid tlb sync for smmuv3 if iotlb_gather is not valid (iotlb_gather->pgsize = 0). So add a check to avoid invalid iotlb sync for it. Signed-off-by: Xiang Chen Link: https://lore.kernel.org/r/1617109106-121844-1-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Will Deacon Conflicts: drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c [fix context conflicts] Signed-off-by: Lin Yujun --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 12b418ed65be..e67c1feeca5f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2960,6 +2960,9 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain, { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + if (!gather->pgsize) + return; + arm_smmu_tlb_inv_range_domain(gather->start, gather->end - gather->start + 1, gather->pgsize, true, smmu_domain); } -- Gitee From 25c108ec51a6644d17f6b6cc13a32530f2de05c0 Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Thu, 13 Nov 2025 11:23:19 +0800 Subject: [PATCH 2/4] dma-mapping: benchmark: Add support for multi-pages map/unmap mainline inclusion from mainline-v5.13-rc1 commit ca947482b0b30443e6da1f0f5ba7244e34a4f65a category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID5X4C Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=ca947482b0b30443e6da1f0f5ba7244e34a4f65a ---------------------------------------- Currently it only support one page map/unmap once a time for dma-map benchmark, but there are some other scenaries which need to support for multi-page map/unmap: for those multi-pages interfaces such as dma_alloc_coherent() and dma_map_sg(), the time spent on multi-pages map/unmap is not the time of a single page * npages (not linear) as it may use block description instead of page description when it is satified with the size such as 2M/1G, and also it can send a single TLB invalidation command to invalidate multi-pages instead of multi-times when RIL is enabled (which will short the time of unmap). So it is necessary to add support for multi-pages map/unmap. Add a parameter "-g" to support multi-pages map/unmap. Signed-off-by: Xiang Chen Acked-by: Barry Song Signed-off-by: Christoph Hellwig Conflicts: kernel/dma/map_benchmark.c [fix context conflicts] Signed-off-by: Lin Yujun --- kernel/dma/map_benchmark.c | 21 ++++++++++++------- .../testing/selftests/dma/dma_map_benchmark.c | 20 ++++++++++++++---- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c index 5c13c9352bb5..a0e86a73daeb 100644 --- a/kernel/dma/map_benchmark.c +++ b/kernel/dma/map_benchmark.c @@ -38,7 +38,8 @@ struct map_benchmark { __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ __u32 dma_trans_ns; /* time for DMA transmission in ns */ - __u8 expansion[80]; /* For future use */ + __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */ + __u8 expansion[76]; /* For future use */ }; struct map_benchmark_data { @@ -58,9 +59,11 @@ static int map_benchmark_thread(void *data) void *buf; dma_addr_t dma_addr; struct map_benchmark_data *map = data; + int npages = map->bparam.granule; + u64 size = npages * PAGE_SIZE; int ret = 0; - buf = (void *)__get_free_page(GFP_KERNEL); + buf = alloc_pages_exact(size, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -76,10 +79,10 @@ static int map_benchmark_thread(void *data) * 66 means evertything goes well! 66 is lucky. */ if (map->dir != DMA_FROM_DEVICE) - memset(buf, 0x66, PAGE_SIZE); + memset(buf, 0x66, size); map_stime = ktime_get(); - dma_addr = dma_map_single(map->dev, buf, PAGE_SIZE, map->dir); + dma_addr = dma_map_single(map->dev, buf, size, map->dir); if (unlikely(dma_mapping_error(map->dev, dma_addr))) { pr_err("dma_map_single failed on %s\n", dev_name(map->dev)); @@ -93,7 +96,7 @@ static int map_benchmark_thread(void *data) ndelay(map->bparam.dma_trans_ns); unmap_stime = ktime_get(); - dma_unmap_single(map->dev, dma_addr, PAGE_SIZE, map->dir); + dma_unmap_single(map->dev, dma_addr, size, map->dir); unmap_etime = ktime_get(); unmap_delta = ktime_sub(unmap_etime, unmap_stime); @@ -112,7 +115,7 @@ static int map_benchmark_thread(void *data) } out: - free_page((unsigned long)buf); + free_pages_exact(buf, size); return ret; } @@ -202,7 +205,6 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd, struct map_benchmark_data *map = file->private_data; void __user *argp = (void __user *)arg; u64 old_dma_mask; - int ret; if (copy_from_user(&map->bparam, argp, sizeof(map->bparam))) @@ -234,6 +236,11 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd, return -EINVAL; } + if (map->bparam.granule < 1 || map->bparam.granule > 1024) { + pr_err("invalid granule size\n"); + return -EINVAL; + } + switch (map->bparam.dma_dir) { case DMA_MAP_BIDIRECTIONAL: map->dir = DMA_BIDIRECTIONAL; diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c index fb23ce9617ea..6f2caa6c50f7 100644 --- a/tools/testing/selftests/dma/dma_map_benchmark.c +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -40,7 +40,8 @@ struct map_benchmark { __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ __u32 dma_trans_ns; /* time for DMA transmission in ns */ - __u8 expansion[80]; /* For future use */ + __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */ + __u8 expansion[76]; /* For future use */ }; int main(int argc, char **argv) @@ -51,11 +52,13 @@ int main(int argc, char **argv) int threads = 1, seconds = 20, node = -1; /* default dma mask 32bit, bidirectional DMA */ int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL; + /* default granule 1 PAGESIZE */ + int granule = 1; int cmd = DMA_MAP_BENCHMARK; char *p; - while ((opt = getopt(argc, argv, "t:s:n:b:d:x:")) != -1) { + while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) { switch (opt) { case 't': threads = atoi(optarg); @@ -75,6 +78,9 @@ int main(int argc, char **argv) case 'x': xdelay = atoi(optarg); break; + case 'g': + granule = atoi(optarg); + break; default: return -1; } @@ -110,6 +116,11 @@ int main(int argc, char **argv) exit(1); } + if (granule < 1 || granule > 1024) { + fprintf(stderr, "invalid granule size\n"); + exit(1); + } + fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR); if (fd == -1) { perror("open"); @@ -123,14 +134,15 @@ int main(int argc, char **argv) map.dma_bits = bits; map.dma_dir = dir; map.dma_trans_ns = xdelay; + map.granule = granule; if (ioctl(fd, cmd, &map)) { perror("ioctl"); exit(1); } - printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s\n", - threads, seconds, node, dir[directions]); + printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n", + threads, seconds, node, dir[directions], granule); printf("average map latency(us):%.1f standard deviation:%.1f\n", map.avg_map_100ns/10.0, map.map_stddev/10.0); printf("average unmap latency(us):%.1f standard deviation:%.1f\n", -- Gitee From 49b83686152e334d6ea133a214201ee865bf7b68 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 13 Nov 2025 11:23:20 +0800 Subject: [PATCH 3/4] iommu: avoid taking iova_rbtree_lock twice mainline inclusion from mainline-v5.11-rc1 commit 3a651b3a27a1ee35879499ead3942dc854a20968 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID5X4C Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=3a651b3a27a1ee35879499ead3942dc854a20968 ---------------------------------------- Both find_iova() and __free_iova() take iova_rbtree_lock, there is no reason to take and release it twice inside free_iova(). Fold them into one critical section by calling the unlock versions instead. Signed-off-by: Cong Wang Reviewed-by: Robin Murphy Signed-off-by: John Garry Link: https://lore.kernel.org/r/1605608734-84416-5-git-send-email-john.garry@huawei.com Signed-off-by: Will Deacon Signed-off-by: Lin Yujun --- drivers/iommu/iova.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 99d5e942aff6..7e4a5cfe49ae 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -412,10 +412,14 @@ EXPORT_SYMBOL_GPL(__free_iova); void free_iova(struct iova_domain *iovad, unsigned long pfn) { - struct iova *iova = find_iova(iovad, pfn); + unsigned long flags; + struct iova *iova; + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + iova = private_find_iova(iovad, pfn); if (iova) - __free_iova(iovad, iova); + private_free_iova(iovad, iova); + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); } EXPORT_SYMBOL_GPL(free_iova); -- Gitee From 0ac6795cd040faa77852d7e13bc6c48fa57d167d Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Thu, 13 Nov 2025 11:23:21 +0800 Subject: [PATCH 4/4] iommu/iova: Put free_iova_mem() outside of spinlock iova_rbtree_lock mainline inclusion from mainline-v5.14-rc1 commit 7978724f399ae7eba5b6d36ae5a7224d5bf3859a category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID5X4C Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=7978724f399ae7eba5b6d36ae5a7224d5bf3859a ---------------------------------------- It is not necessary to put free_iova_mem() inside of spinlock/unlock iova_rbtree_lock which only leads to more completion for the spinlock. It has a small promote on the performance after the change. And also rename private_free_iova() as remove_iova() because the function will not free iova after that change. Signed-off-by: Xiang Chen Reviewed-by: John Garry Acked-by: Robin Murphy Link: https://lore.kernel.org/r/1620647582-194621-1-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Joerg Roedel Signed-off-by: Lin Yujun --- drivers/iommu/iova.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 7e4a5cfe49ae..5e4d536c01a6 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -357,12 +357,11 @@ private_find_iova(struct iova_domain *iovad, unsigned long pfn) return NULL; } -static void private_free_iova(struct iova_domain *iovad, struct iova *iova) +static void remove_iova(struct iova_domain *iovad, struct iova *iova) { assert_spin_locked(&iovad->iova_rbtree_lock); __cached_rbnode_delete_update(iovad, iova); rb_erase(&iova->node, &iovad->rbroot); - free_iova_mem(iova); } /** @@ -397,8 +396,9 @@ __free_iova(struct iova_domain *iovad, struct iova *iova) unsigned long flags; spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - private_free_iova(iovad, iova); + remove_iova(iovad, iova); spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + free_iova_mem(iova); } EXPORT_SYMBOL_GPL(__free_iova); @@ -417,10 +417,13 @@ free_iova(struct iova_domain *iovad, unsigned long pfn) spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); iova = private_find_iova(iovad, pfn); - if (iova) - private_free_iova(iovad, iova); + if (!iova) { + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return; + } + remove_iova(iovad, iova); spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - + free_iova_mem(iova); } EXPORT_SYMBOL_GPL(free_iova); @@ -858,7 +861,8 @@ iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad) if (WARN_ON(!iova)) continue; - private_free_iova(iovad, iova); + remove_iova(iovad, iova); + free_iova_mem(iova); } spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); -- Gitee