From 0f472b5ea90f2a21d5bb2f7b508f1ed1eaf5bb20 Mon Sep 17 00:00:00 2001 From: Tianrui Zhao Date: Tue, 8 Apr 2025 10:25:33 +0800 Subject: [PATCH 1/5] anolis: drm/amdgpu: Fix pcie order dislocation ANBZ: #20490 Signed-off-by: Tianrui Zhao Signed-off-by: Juxin Gao --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 + drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 163 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h | 6 + drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 4 + drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h | 3 + 5 files changed, 181 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 13c97ba7a820..9c1d396fabc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1279,6 +1279,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, uint64_t seq; int r; +#ifdef CONFIG_LOONGARCH + while (amdgpu_ih_fix_is_busy(p->adev)) + msleep(20); +#endif + for (i = 0; i < p->gang_size; ++i) drm_sched_job_arm(&p->jobs[i]->base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index f3b0aaf3ebc6..5d358656086c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -26,6 +26,10 @@ #include "amdgpu.h" #include "amdgpu_ih.h" +#ifdef CONFIG_LOONGARCH +static void amdgpu_ih_handle_fix_work(struct work_struct *work); +#endif + /** * amdgpu_ih_ring_init - initialize the IH state * @@ -71,6 +75,15 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, ih->wptr_cpu = &ih->ring[ih->ring_size / 4]; ih->rptr_addr = dma_addr + ih->ring_size + 4; ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1]; + + #ifdef CONFIG_LOONGARCH + INIT_WORK(&adev->irq.ih.fix_work, amdgpu_ih_handle_fix_work); + for (r = 0; r < (adev->irq.ih.ring_size >> 2); r++) + adev->irq.ih.ring[r] = 0xDEADBEFF; + /* Avoid Compiler Optimization */ + mb(); + #endif + } else { unsigned wptr_offs, rptr_offs; @@ -98,6 +111,15 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, ih->wptr_cpu = &adev->wb.wb[wptr_offs]; ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4; ih->rptr_cpu = &adev->wb.wb[rptr_offs]; + + #ifdef CONFIG_LOONGARCH + INIT_WORK(&adev->irq.ih.fix_work, amdgpu_ih_handle_fix_work); + for (r = 0; r < (adev->irq.ih.ring_size >> 2); r++) + adev->irq.ih.ring[r] = 0xDEADBEFF; + /* Avoid Compiler Optimization */ + mb(); + #endif + } init_waitqueue_head(&ih->wait_process); @@ -119,6 +141,10 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) if (!ih->ring) return; +#ifdef CONFIG_LOONGARCH + cancel_work_sync(&adev->irq.ih.fix_work); +#endif + if (ih->use_bus_addr) { /* add 8 bytes for the rptr/wptr shadows and @@ -135,6 +161,122 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) } } +#ifdef CONFIG_LOONGARCH + +int amdgpu_ih_fix_is_busy(struct amdgpu_device *adev) +{ + return atomic_read(&adev->irq.cs_lock); +} + +static int amdgpu_ih_fix_loongarch_pcie_order_start( + struct amdgpu_ih_ring *ih, + u32 rptr, u32 wptr, + bool forever) +{ + int i; + int check_cnt = 0; + u32 ring_end = ih->ring_size >> 2; + + if (rptr == wptr) + return 0; + + rptr = rptr >> 2; + wptr = wptr >> 2; + + wptr = (rptr > wptr) ? ring_end : wptr; + +restart_check: + if (!forever && ++check_cnt > 1) + return -ENAVAIL; + + if (forever) + msleep(20); + + for (i = rptr; i < wptr; i += 1) { + if (le32_to_cpu(ih->ring[i]) == 0xDEADBEFF) + goto restart_check; + } + + if (rptr > wptr) { + for (i = 0; i < wptr; i += 1) { + if (le32_to_cpu(ih->ring[i]) == 0xDEADBEFF) + goto restart_check; + } + } + + return 0; +} + +static int amdgpu_ih_fix_loongarch_pcie_order_end( + struct amdgpu_ih_ring *ih, + u32 rptr, u32 wptr) +{ + int i; + u32 ring_end = ih->ring_size >> 2; + + if (rptr == wptr) + return 0; + + rptr = rptr >> 2; + wptr = wptr >> 2; + + wptr = (rptr > wptr) ? ring_end : wptr; + + for (i = rptr; i < wptr; i += 1) + ih->ring[i] = 0xDEADBEFF; + + if (rptr > wptr) { + for (i = 0; i < wptr; i += 1) + ih->ring[i] = 0xDEADBEFF; + } + + /* Avoid Compiler Optimization */ + mb(); + return 0; +} + +static void amdgpu_ih_handle_fix_work(struct work_struct *work) +{ + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, irq.ih.fix_work); + struct amdgpu_ih_ring *ih = &adev->irq.ih; + struct amdgpu_iv_entry entry; + + u32 wptr; + u32 old_rptr; + int restart_fg = 0; + +restart: + + wptr = amdgpu_ih_get_wptr(adev, ih); + /* Order reading of wptr vs. reading of IH ring data */ + rmb(); + + old_rptr = ih->rptr; + amdgpu_ih_fix_loongarch_pcie_order_start(&adev->irq.ih, old_rptr, wptr, true); + + while (adev->irq.ih.rptr != wptr) { + u32 ring_index = adev->irq.ih.rptr >> 2; + + amdgpu_irq_dispatch(adev, ih); + ih->rptr &= ih->ptr_mask; + } + + amdgpu_ih_fix_loongarch_pcie_order_end(&adev->irq.ih, old_rptr, adev->irq.ih.rptr); + + amdgpu_ih_set_rptr(adev, ih); + /* Avoid Compiler Optimization */ + mb(); + + if (ih->rptr != amdgpu_ih_get_wptr(adev, ih)) { + restart_fg = 1; + goto restart; + } + + atomic_set(&adev->irq.cs_lock, 0); +} +#endif + /** * amdgpu_ih_ring_write - write IV to the ring buffer * @@ -209,6 +351,10 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { unsigned int count; u32 wptr; +#ifdef CONFIG_LOONGARCH + u32 old_rptr; + int r; +#endif if (!ih->enabled || adev->shutdown) return IRQ_NONE; @@ -222,11 +368,28 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) /* Order reading of wptr vs. reading of IH ring data */ rmb(); +#ifdef CONFIG_LOONGARCH + old_rptr = adev->irq.ih.rptr; + r = amdgpu_ih_fix_loongarch_pcie_order_start(&adev->irq.ih, old_rptr, wptr, false); + if (r) { + if (old_rptr == ((wptr + 16) & adev->irq.ih.ptr_mask)) + return IRQ_NONE; + + atomic_xchg(&adev->irq.cs_lock, 1); + schedule_work(&adev->irq.ih.fix_work); + return IRQ_NONE; + } +#endif + while (ih->rptr != wptr && --count) { amdgpu_irq_dispatch(adev, ih); ih->rptr &= ih->ptr_mask; } +#ifdef CONFIG_LOONGARCH + amdgpu_ih_fix_loongarch_pcie_order_end(&adev->irq.ih, old_rptr, adev->irq.ih.rptr); +#endif + amdgpu_ih_set_rptr(adev, ih); wake_up_all(&ih->wait_process); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 508f02eb0cf8..375afd059cc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -72,6 +72,9 @@ struct amdgpu_ih_ring { /* For waiting on IH processing at checkpoint. */ wait_queue_head_t wait_process; uint64_t processed_timestamp; +#ifdef CONFIG_LOONGARCH + struct work_struct fix_work; +#endif }; /* return true if time stamp t2 is after t1 with 48bit wrap around */ @@ -110,4 +113,7 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr, signed int offset); +#ifdef CONFIG_LOONGARCH +int amdgpu_ih_fix_is_busy(struct amdgpu_device *adev); +#endif #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 5978edf7ea71..1aaf3f91b148 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -275,6 +275,10 @@ int amdgpu_irq_init(struct amdgpu_device *adev) spin_lock_init(&adev->irq.lock); +#ifdef CONFIG_LOONGARCH + atomic_set(&adev->irq.cs_lock, 0); +#endif + /* Enable MSI if not disabled by module parameter */ adev->irq.msi_enabled = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index 04c0b4fa17a4..3cf74a4b4c35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -100,6 +100,9 @@ struct amdgpu_irq { uint32_t srbm_soft_reset; u32 retry_cam_doorbell_index; bool retry_cam_enabled; +#ifdef CONFIG_LOONGARCH + atomic_t cs_lock; +#endif }; enum interrupt_node_id_per_aid { -- Gitee From 41853a7eadbd3d5afd90ac47a1ee1e30d6e0ff75 Mon Sep 17 00:00:00 2001 From: lvjianmin Date: Wed, 10 Jul 2024 11:41:15 +0800 Subject: [PATCH 2/5] anolis: drm/radeon: repeat the same EOP packet for EOP workaround on CIK ANBZ: #20490 Ths first EOP packet with a sequence number as seq-1 seems to confuse some PCIe hardware (e.g. Loongson 7A PCHs). Use the real sequence number instead. Signed-off-by: Icenowy Zheng Signed-off-by: lvjianmin Signed-off-by: Juxin Gao --- drivers/gpu/drm/radeon/cik.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index c2d6b723aea8..b36515c5cf5e 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3554,7 +3554,11 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, addr & 0xfffffffc); radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(0)); +#ifdef CONFIG_LOONGARCH + radeon_ring_write(ring, fence->seq); +#else radeon_ring_write(ring, fence->seq - 1); +#endif radeon_ring_write(ring, 0); /* Then send the real EOP event down the pipe. */ -- Gitee From 29df4d54749d2f2d9816589836931d3bcbd47520 Mon Sep 17 00:00:00 2001 From: lvjianmin Date: Wed, 10 Jul 2024 12:03:13 +0800 Subject: [PATCH 3/5] anolis: drm/amdgpu: make duplicated EOP packet for GFX7/8 have real content ANBZ: #20490 The duplication of EOP packets for GFX7/8, with the former one have seq-1 written and the latter one have seq written, seems to confuse some hardware platform (e.g. Loongson 7A series PCIe controllers). Make the content of the duplicated EOP packet the same with the real one, only masking any possible interrupts. Signed-off-by: Icenowy Zheng Signed-off-by: lvjianmin Signed-off-by: Juxin Gao --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 893d349aae07..c8750e60c8dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2127,9 +2127,15 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, EVENT_INDEX(5))); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | +#ifdef CONFIG_LOONGARCH + DATA_SEL(write64bit ? 2 : 1) | INT_SEL(0)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); +#else DATA_SEL(1) | INT_SEL(0)); amdgpu_ring_write(ring, lower_32_bits(seq - 1)); amdgpu_ring_write(ring, upper_32_bits(seq - 1)); +#endif /* Then send the real EOP event down the pipe. */ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 1471eeffd428..c3fa6b04bad4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6194,9 +6194,15 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, EVENT_INDEX(5))); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | +#ifdef CONFIG_LOONGARCH + DATA_SEL(write64bit ? 2 : 1) | INT_SEL(0)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); +#else DATA_SEL(1) | INT_SEL(0)); amdgpu_ring_write(ring, lower_32_bits(seq - 1)); amdgpu_ring_write(ring, upper_32_bits(seq - 1)); +#endif /* Then send the real EOP event down the pipe: * EVENT_WRITE_EOP - flush caches, send int */ -- Gitee From 70ab5836b13e4675efb9801371ba7828cc619e63 Mon Sep 17 00:00:00 2001 From: Tianrui Zhao Date: Tue, 8 Apr 2025 09:38:11 +0800 Subject: [PATCH 4/5] anolis: drm/amdgpu: make duplicated EOP packet for GFX6 have real content ANBZ: #20490 Signed-off-by: Tianrui Zhao Signed-off-by: Juxin Gao --- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 34f9211b2679..d5cf46c9a510 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1818,6 +1818,17 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, 0xFFFFFFFF); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 10); /* poll interval */ +#ifdef CONFIG_LOONGARCH + /* EVENT_WRITE_EOP - flush caches, no send int */ + amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); + amdgpu_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5)); + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | + ((write64bit ? 2 : 1) << CP_EOP_DONE_DATA_CNTL__DATA_SEL__SHIFT) | + (0 << CP_EOP_DONE_DATA_CNTL__INT_SEL__SHIFT)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); +#endif /* EVENT_WRITE_EOP - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); amdgpu_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5)); @@ -3469,7 +3480,11 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = { .set_wptr = gfx_v6_0_ring_set_wptr_gfx, .emit_frame_size = 5 + 5 + /* hdp flush / invalidate */ +#ifdef CONFIG_LOONGARCH + 20 + 20 + 20 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ +#else 14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ +#endif 7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */ SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v6_0_ring_emit_vm_flush */ 3 + 2 + /* gfx_v6_ring_emit_cntxcntl including vgt flush */ @@ -3498,7 +3513,11 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = { 5 + 5 + /* hdp flush / invalidate */ 7 + /* gfx_v6_0_ring_emit_pipeline_sync */ SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v6_0_ring_emit_vm_flush */ +#ifdef CONFIG_LOONGARCH + 20 + 20 + 20 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ +#else 14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ +#endif 5, /* SURFACE_SYNC */ .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */ .emit_ib = gfx_v6_0_ring_emit_ib, -- Gitee From 5e955d6d2de7707fe1cd06766df8af73de08c443 Mon Sep 17 00:00:00 2001 From: Juxin Gao Date: Fri, 18 Apr 2025 15:28:59 +0800 Subject: [PATCH 5/5] anolis: drm/amdgpu: Fix RX550 pcie order dislocation. ANBZ: #20490 Signed-off-by: Tianrui Zhao Signed-off-by: Juxin Gao --- drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 5d358656086c..d0b9075b4a82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -193,13 +193,13 @@ static int amdgpu_ih_fix_loongarch_pcie_order_start( msleep(20); for (i = rptr; i < wptr; i += 1) { - if (le32_to_cpu(ih->ring[i]) == 0xDEADBEFF) + if (le32_to_cpu(ih->ring[i]) == 0xDEADBEFF && (i % 4) != 3) goto restart_check; } if (rptr > wptr) { for (i = 0; i < wptr; i += 1) { - if (le32_to_cpu(ih->ring[i]) == 0xDEADBEFF) + if (le32_to_cpu(ih->ring[i]) == 0xDEADBEFF && (i % 4) != 3) goto restart_check; } } @@ -372,7 +372,8 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) old_rptr = adev->irq.ih.rptr; r = amdgpu_ih_fix_loongarch_pcie_order_start(&adev->irq.ih, old_rptr, wptr, false); if (r) { - if (old_rptr == ((wptr + 16) & adev->irq.ih.ptr_mask)) + if (old_rptr == ((wptr + 16) & adev->irq.ih.ptr_mask) || + old_rptr == ((wptr + 32) & adev->irq.ih.ptr_mask)) { return IRQ_NONE; atomic_xchg(&adev->irq.cs_lock, 1); -- Gitee