From cd49163cfa93a8aa1f6a916b5a0e06a9b69b097d Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Tue, 4 Jun 2024 09:07:25 +0800 Subject: [PATCH 1/2] iommu/iova: free iovas on each cpu in flush queue hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I9SOOM --------------------------------- The iova free logic in flush queue always try to free iova on the cpu who trigger the timer callback function, which does not fully utilizing the capability of cpu rcache in iova_rcache. Always free iovas on each cpu who free the iova can speed up the speed of freeing iova and better utilzing the capalibity of cpu_rcache. Signed-off-by: Zhang Zekun --- drivers/iommu/iova.c | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index dec7634f35fa..f3c5b0e887ce 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -18,7 +18,7 @@ static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, - unsigned long size); + unsigned long size, int cpu); static unsigned long iova_rcache_get(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn); @@ -77,7 +77,7 @@ static void free_iova_flush_queue(struct iova_domain *iovad) iovad->entry_dtor = NULL; } -static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq); +static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq, int cpu); static void free_iova_work_func(struct work_struct *work) { struct iova_domain *iovad; @@ -90,7 +90,7 @@ static void free_iova_work_func(struct work_struct *work) fq = per_cpu_ptr(iovad->fq, cpu); spin_lock_irqsave(&fq->lock, flags); - fq_ring_free(iovad, fq); + fq_ring_free(iovad, fq, cpu); spin_unlock_irqrestore(&fq->lock, flags); } } @@ -459,6 +459,15 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long size, } EXPORT_SYMBOL_GPL(alloc_iova_fast); +static void +free_iova_fast_internal(struct iova_domain *iovad, unsigned long pfn, unsigned long size, int cpu) +{ + if (iova_rcache_insert(iovad, pfn, size, cpu)) + return; + + free_iova(iovad, pfn); +} + /** * free_iova_fast - free iova pfn range into rcache * @iovad: - iova domain in question. @@ -470,13 +479,11 @@ EXPORT_SYMBOL_GPL(alloc_iova_fast); void free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size) { - if (iova_rcache_insert(iovad, pfn, size)) - return; - - free_iova(iovad, pfn); + free_iova_fast_internal(iovad, pfn, size, smp_processor_id()); } EXPORT_SYMBOL_GPL(free_iova_fast); + #define fq_ring_for_each(i, fq) \ for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) @@ -497,7 +504,7 @@ static inline unsigned fq_ring_add(struct iova_fq *fq) return idx; } -static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq) +static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq, int cpu) { u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt); unsigned idx; @@ -512,9 +519,9 @@ static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq) if (iovad->entry_dtor) iovad->entry_dtor(fq->entries[idx].data); - free_iova_fast(iovad, + free_iova_fast_internal(iovad, fq->entries[idx].iova_pfn, - fq->entries[idx].pages); + fq->entries[idx].pages, cpu); fq->head = (fq->head + 1) % IOVA_FQ_SIZE; } @@ -565,6 +572,7 @@ void queue_iova(struct iova_domain *iovad, struct iova_fq *fq = raw_cpu_ptr(iovad->fq); unsigned long flags; unsigned idx; + int cpu = smp_processor_id(); /* * Order against the IOMMU driver's pagetable update from unmapping @@ -580,11 +588,11 @@ void queue_iova(struct iova_domain *iovad, * flushed out on another CPU. This makes the fq_full() check below less * likely to be true. */ - fq_ring_free(iovad, fq); + fq_ring_free(iovad, fq, cpu); if (fq_full(fq)) { iova_domain_flush(iovad); - fq_ring_free(iovad, fq); + fq_ring_free(iovad, fq, cpu); } idx = fq_ring_add(fq); @@ -957,13 +965,13 @@ static void init_iova_rcaches(struct iova_domain *iovad) */ static bool __iova_rcache_insert(struct iova_domain *iovad, struct iova_rcache *rcache, - unsigned long iova_pfn) + unsigned long iova_pfn, int cpu) { struct iova_cpu_rcache *cpu_rcache; bool can_insert = false; unsigned long flags; - cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); + cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); spin_lock_irqsave(&cpu_rcache->lock, flags); if (!iova_magazine_full(cpu_rcache->loaded)) { @@ -994,14 +1002,14 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, } static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, - unsigned long size) + unsigned long size, int cpu) { unsigned int log_size = order_base_2(size); if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) return false; - return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn); + return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn, cpu); } /* -- Gitee From 4a2759c9b3eab9e59977cb72380adbd7d060bf4d Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Tue, 4 Jun 2024 09:07:26 +0800 Subject: [PATCH 2/2] iommu/iova: Try to schedule out when free iova too long hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I9SOOM ------------------------------- There are could be soft lockup in free_iova_work_func(). Add a schedule point to avoid softlockup. Call trace: free_iova_work_func+0xdc/0x170 process_one_work+0x1d0/0x4c0 worker_thread+0x150/0x400 kthread+0x104/0x130 ret_from_fork+0x10/0x18 watchdog: BUG: soft lockup - CPU#172 stuck for 23s! [kworker/172:1:1154] Signed-off-by: Zhang Zekun --- drivers/iommu/iova.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index f3c5b0e887ce..99d5e942aff6 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -92,6 +92,7 @@ static void free_iova_work_func(struct work_struct *work) spin_lock_irqsave(&fq->lock, flags); fq_ring_free(iovad, fq, cpu); spin_unlock_irqrestore(&fq->lock, flags); + cond_resched(); } } -- Gitee