diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index edcb7911b3ce87b5b1b7a17ac97ea5f7d643591a..36dda61d5a5add054aa379af557b94132c5bc941 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -5994,7 +5994,6 @@ CONFIG_ARM_SMMU_V3_PM=y # CONFIG_QCOM_IOMMU is not set # CONFIG_VIRTIO_IOMMU is not set CONFIG_SMMU_BYPASS_DEV=y -CONFIG_IOVA_MAX_GLOBAL_MAGS=128 # # Remoteproc drivers diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index b630e58c49b686e54d6744a4648f8e1d21c501a2..f04a2bde00184eeefedf0c53dce9c0f8bcb5b1ee 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -439,13 +439,3 @@ config SMMU_BYPASS_DEV upstreamed in mainline. endif # IOMMU_SUPPORT - -config IOVA_MAX_GLOBAL_MAGS - int "Set the max iova global magzines in iova rcache" - range 16 2048 - default "32" - help - Iova rcache global magizine is shared among every cpu. The size of - it can be a bottle neck when lots of cpus are contending to use it. - If you are suffering from the speed of allocing iova with more than - 128 cpus, try to tune this config larger. diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 203331b549708ca5810e06c88202bbe75fcdfdbe..dec7634f35fab66dceeb8788981ad4d8c229613e 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -11,6 +11,7 @@ #include #include #include +#include /* The anchor node sits above the top of the usable address space */ #define IOVA_ANCHOR ~0UL @@ -792,12 +793,24 @@ split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, * dynamic size tuning described in the paper. */ -#define IOVA_MAG_SIZE 128 +/* + * As kmalloc's buffer size is fixed to power of 2, 127 is chosen to + * assure size of 'iova_magazine' to be 1024 bytes, so that no memory + * will be wasted. Since only full magazines are inserted into the depot, + * we don't need to waste PFN capacity on a separate list head either. + */ +#define IOVA_MAG_SIZE 127 + +#define IOVA_DEPOT_DELAY msecs_to_jiffies(100) struct iova_magazine { - unsigned long size; + union { + unsigned long size; + struct iova_magazine *next; + }; unsigned long pfns[IOVA_MAG_SIZE]; }; +static_assert(!(sizeof(struct iova_magazine) & (sizeof(struct iova_magazine) - 1))); struct iova_cpu_rcache { spinlock_t lock; @@ -877,6 +890,41 @@ static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) mag->pfns[mag->size++] = pfn; } +static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache) +{ + struct iova_magazine *mag = rcache->depot; + + rcache->depot = mag->next; + mag->size = IOVA_MAG_SIZE; + rcache->depot_size--; + return mag; +} + +static void iova_depot_push(struct iova_rcache *rcache, struct iova_magazine *mag) +{ + mag->next = rcache->depot; + rcache->depot = mag; + rcache->depot_size++; +} + +static void iova_depot_work_func(struct work_struct *work) +{ + struct iova_rcache *rcache = container_of(work, typeof(*rcache), work.work); + struct iova_magazine *mag = NULL; + unsigned long flags; + + spin_lock_irqsave(&rcache->lock, flags); + if (rcache->depot_size > num_online_cpus()) + mag = iova_depot_pop(rcache); + spin_unlock_irqrestore(&rcache->lock, flags); + + if (mag) { + iova_magazine_free_pfns(mag, rcache->iovad); + iova_magazine_free(mag); + schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY); + } +} + static void init_iova_rcaches(struct iova_domain *iovad) { struct iova_cpu_rcache *cpu_rcache; @@ -887,7 +935,8 @@ static void init_iova_rcaches(struct iova_domain *iovad) for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { rcache = &iovad->rcaches[i]; spin_lock_init(&rcache->lock); - rcache->depot_size = 0; + rcache->iovad = iovad; + INIT_DELAYED_WORK(&rcache->work, iova_depot_work_func); rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size()); if (WARN_ON(!rcache->cpu_rcaches)) continue; @@ -910,7 +959,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, struct iova_rcache *rcache, unsigned long iova_pfn) { - struct iova_magazine *mag_to_free = NULL; struct iova_cpu_rcache *cpu_rcache; bool can_insert = false; unsigned long flags; @@ -928,13 +976,9 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, if (new_mag) { spin_lock(&rcache->lock); - if (rcache->depot_size < MAX_GLOBAL_MAGS) { - rcache->depot[rcache->depot_size++] = - cpu_rcache->loaded; - } else { - mag_to_free = cpu_rcache->loaded; - } + iova_depot_push(rcache, cpu_rcache->loaded); spin_unlock(&rcache->lock); + schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY); cpu_rcache->loaded = new_mag; can_insert = true; @@ -946,11 +990,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, spin_unlock_irqrestore(&cpu_rcache->lock, flags); - if (mag_to_free) { - iova_magazine_free_pfns(mag_to_free, iovad); - iova_magazine_free(mag_to_free); - } - return can_insert; } @@ -988,9 +1027,9 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, has_pfn = true; } else { spin_lock(&rcache->lock); - if (rcache->depot_size > 0) { + if (rcache->depot) { iova_magazine_free(cpu_rcache->loaded); - cpu_rcache->loaded = rcache->depot[--rcache->depot_size]; + cpu_rcache->loaded = iova_depot_pop(rcache); has_pfn = true; } spin_unlock(&rcache->lock); @@ -1029,7 +1068,7 @@ static void free_iova_rcaches(struct iova_domain *iovad) struct iova_rcache *rcache; struct iova_cpu_rcache *cpu_rcache; unsigned int cpu; - int i, j; + int i; for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { rcache = &iovad->rcaches[i]; @@ -1039,8 +1078,9 @@ static void free_iova_rcaches(struct iova_domain *iovad) iova_magazine_free(cpu_rcache->prev); } free_percpu(rcache->cpu_rcaches); - for (j = 0; j < rcache->depot_size; ++j) - iova_magazine_free(rcache->depot[j]); + cancel_delayed_work_sync(&rcache->work); + while (rcache->depot) + iova_magazine_free(iova_depot_pop(rcache)); } } diff --git a/include/linux/iova.h b/include/linux/iova.h index 3cb469b366d704483e921f4b9c0877b8326842d8..25c447124638072d2b9250a2c28e667c225865ce 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -26,13 +26,14 @@ struct iova_magazine; struct iova_cpu_rcache; #define IOVA_RANGE_CACHE_MAX_SIZE 6 /* log of max cached IOVA range size (in pages) */ -#define MAX_GLOBAL_MAGS CONFIG_IOVA_MAX_GLOBAL_MAGS /* magazines per bin */ struct iova_rcache { spinlock_t lock; - unsigned long depot_size; - struct iova_magazine *depot[MAX_GLOBAL_MAGS]; + unsigned int depot_size; + struct iova_magazine *depot; struct iova_cpu_rcache __percpu *cpu_rcaches; + struct iova_domain *iovad; + struct delayed_work work; }; struct iova_domain;