diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 13c97ba7a820b4f9d7e1bdfa1d2abe44baee4784..9c1d396fabc1fa0b1eae2a225f2ae6acef06b1c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1279,6 +1279,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, uint64_t seq; int r; +#ifdef CONFIG_LOONGARCH + while (amdgpu_ih_fix_is_busy(p->adev)) + msleep(20); +#endif + for (i = 0; i < p->gang_size; ++i) drm_sched_job_arm(&p->jobs[i]->base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index f3b0aaf3ebc69e7f90f8cf0c3f0e5d3417991669..0cad8a113c1c3ed5e0cbc66a42e33c582af2b645 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -26,6 +26,10 @@ #include "amdgpu.h" #include "amdgpu_ih.h" +#ifdef CONFIG_LOONGARCH +static void amdgpu_ih_handle_fix_work(struct work_struct *work); +#endif + /** * amdgpu_ih_ring_init - initialize the IH state * @@ -71,6 +75,15 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, ih->wptr_cpu = &ih->ring[ih->ring_size / 4]; ih->rptr_addr = dma_addr + ih->ring_size + 4; ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1]; + + #ifdef CONFIG_LOONGARCH + INIT_WORK(&adev->irq.ih.fix_work, amdgpu_ih_handle_fix_work); + for (r = 0; r < (adev->irq.ih.ring_size >> 2); r++) + adev->irq.ih.ring[r] = 0xDEADBEFF; + /* memory barrier for writing into ih ring */ + mb(); + #endif + } else { unsigned wptr_offs, rptr_offs; @@ -98,6 +111,15 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, ih->wptr_cpu = &adev->wb.wb[wptr_offs]; ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4; ih->rptr_cpu = &adev->wb.wb[rptr_offs]; + + #ifdef CONFIG_LOONGARCH + INIT_WORK(&adev->irq.ih.fix_work, amdgpu_ih_handle_fix_work); + for (r = 0; r < (adev->irq.ih.ring_size >> 2); r++) + adev->irq.ih.ring[r] = 0xDEADBEFF; + /* memory barrier for writing into ih ring */ + mb(); + #endif + } init_waitqueue_head(&ih->wait_process); @@ -119,6 +141,10 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) if (!ih->ring) return; +#ifdef CONFIG_LOONGARCH + cancel_work_sync(&adev->irq.ih.fix_work); +#endif + if (ih->use_bus_addr) { /* add 8 bytes for the rptr/wptr shadows and @@ -135,6 +161,119 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) } } +#ifdef CONFIG_LOONGARCH + +int amdgpu_ih_fix_is_busy(struct amdgpu_device *adev) +{ + return atomic_read(&adev->irq.cs_lock); +} + +static int amdgpu_ih_fix_loongarch_pcie_order_start(struct amdgpu_ih_ring *ih, + u32 rptr, u32 wptr, + bool forever) +{ + int i; + int check_cnt = 0; + u32 ring_end = ih->ring_size >> 2; + + if (rptr == wptr) + return 0; + + rptr = rptr >> 2; + wptr = wptr >> 2; + + wptr = (rptr > wptr) ? ring_end : wptr; + +restart_check: + if (!forever && ++check_cnt > 1) + return -ENAVAIL; + + if (forever) + msleep(20); + + for (i = rptr; i < wptr; i += 1) { + if (le32_to_cpu(ih->ring[i]) == 0xDEADBEFF && (i % 4) != 3) + goto restart_check; + } + + if (rptr > wptr) { + for (i = 0; i < wptr; i += 1) { + if (le32_to_cpu(ih->ring[i]) == 0xDEADBEFF && (i % 4) != 3) + goto restart_check; + } + } + + return 0; +} + +static int amdgpu_ih_fix_loongarch_pcie_order_end(struct amdgpu_ih_ring *ih, + u32 rptr, u32 wptr) +{ + int i; + u32 ring_end = ih->ring_size >> 2; + + if (rptr == wptr) + return 0; + + rptr = rptr >> 2; + wptr = wptr >> 2; + + wptr = (rptr > wptr) ? ring_end : wptr; + + for (i = rptr; i < wptr; i += 1) + ih->ring[i] = 0xDEADBEFF; + + if (rptr > wptr) { + for (i = 0; i < wptr; i += 1) + ih->ring[i] = 0xDEADBEFF; + } + /* memory barrier for writing into ih ring */ + mb(); + return 0; +} + +static void amdgpu_ih_handle_fix_work(struct work_struct *work) +{ + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, irq.ih.fix_work); + struct amdgpu_ih_ring *ih = &adev->irq.ih; + struct amdgpu_iv_entry entry; + + u32 wptr; + u32 old_rptr; + int restart_fg = 0; + +restart: + + wptr = amdgpu_ih_get_wptr(adev, ih); + /* Order reading of wptr vs. reading of IH ring data */ + rmb(); + + old_rptr = ih->rptr; + amdgpu_ih_fix_loongarch_pcie_order_start(&adev->irq.ih, old_rptr, wptr, true); + + while (adev->irq.ih.rptr != wptr) { + u32 ring_index = adev->irq.ih.rptr >> 2; + + amdgpu_irq_dispatch(adev, ih); + ih->rptr &= ih->ptr_mask; + } + + amdgpu_ih_fix_loongarch_pcie_order_end(&adev->irq.ih, old_rptr, adev->irq.ih.rptr); + + amdgpu_ih_set_rptr(adev, ih); + /* memory barrier for setting rptr */ + mb(); + + if (ih->rptr != amdgpu_ih_get_wptr(adev, ih)) { + restart_fg = 1; + goto restart; + } + + atomic_set(&adev->irq.cs_lock, 0); +} +#endif + /** * amdgpu_ih_ring_write - write IV to the ring buffer * @@ -209,6 +348,10 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { unsigned int count; u32 wptr; +#ifdef CONFIG_LOONGARCH + u32 old_rptr; + int r; +#endif if (!ih->enabled || adev->shutdown) return IRQ_NONE; @@ -222,11 +365,29 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) /* Order reading of wptr vs. reading of IH ring data */ rmb(); +#ifdef CONFIG_LOONGARCH + old_rptr = adev->irq.ih.rptr; + r = amdgpu_ih_fix_loongarch_pcie_order_start(&adev->irq.ih, old_rptr, wptr, false); + if (r) { + if (old_rptr == ((wptr + 16) & adev->irq.ih.ptr_mask) || + old_rptr == ((wptr + 32) & adev->irq.ih.ptr_mask)) + return IRQ_NONE; + + atomic_xchg(&adev->irq.cs_lock, 1); + schedule_work(&adev->irq.ih.fix_work); + return IRQ_NONE; + } +#endif + while (ih->rptr != wptr && --count) { amdgpu_irq_dispatch(adev, ih); ih->rptr &= ih->ptr_mask; } +#ifdef CONFIG_LOONGARCH + amdgpu_ih_fix_loongarch_pcie_order_end(&adev->irq.ih, old_rptr, adev->irq.ih.rptr); +#endif + amdgpu_ih_set_rptr(adev, ih); wake_up_all(&ih->wait_process); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 508f02eb0cf8f958d26853cd7b4698091554d94e..375afd059cc5f90d83ab0103980ba05bf7a61750 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -72,6 +72,9 @@ struct amdgpu_ih_ring { /* For waiting on IH processing at checkpoint. */ wait_queue_head_t wait_process; uint64_t processed_timestamp; +#ifdef CONFIG_LOONGARCH + struct work_struct fix_work; +#endif }; /* return true if time stamp t2 is after t1 with 48bit wrap around */ @@ -110,4 +113,7 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr, signed int offset); +#ifdef CONFIG_LOONGARCH +int amdgpu_ih_fix_is_busy(struct amdgpu_device *adev); +#endif #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 5978edf7ea71e49771383b54e2f5827d40859368..1aaf3f91b148ac05545415f51811f47715595b20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -275,6 +275,10 @@ int amdgpu_irq_init(struct amdgpu_device *adev) spin_lock_init(&adev->irq.lock); +#ifdef CONFIG_LOONGARCH + atomic_set(&adev->irq.cs_lock, 0); +#endif + /* Enable MSI if not disabled by module parameter */ adev->irq.msi_enabled = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index 04c0b4fa17a4e7bf9628c52f9d03cbcd443f81ee..3cf74a4b4c357b93376fdfc70f89e3d85baf0f12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -100,6 +100,9 @@ struct amdgpu_irq { uint32_t srbm_soft_reset; u32 retry_cam_doorbell_index; bool retry_cam_enabled; +#ifdef CONFIG_LOONGARCH + atomic_t cs_lock; +#endif }; enum interrupt_node_id_per_aid { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 34f9211b26793ff705aef4ab75e926dfba1f1dbb..d5cf46c9a510c62090c79209257e47d5945b5dd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1818,6 +1818,17 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, 0xFFFFFFFF); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 10); /* poll interval */ +#ifdef CONFIG_LOONGARCH + /* EVENT_WRITE_EOP - flush caches, no send int */ + amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); + amdgpu_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5)); + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | + ((write64bit ? 2 : 1) << CP_EOP_DONE_DATA_CNTL__DATA_SEL__SHIFT) | + (0 << CP_EOP_DONE_DATA_CNTL__INT_SEL__SHIFT)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); +#endif /* EVENT_WRITE_EOP - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); amdgpu_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5)); @@ -3469,7 +3480,11 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = { .set_wptr = gfx_v6_0_ring_set_wptr_gfx, .emit_frame_size = 5 + 5 + /* hdp flush / invalidate */ +#ifdef CONFIG_LOONGARCH + 20 + 20 + 20 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ +#else 14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ +#endif 7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */ SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v6_0_ring_emit_vm_flush */ 3 + 2 + /* gfx_v6_ring_emit_cntxcntl including vgt flush */ @@ -3498,7 +3513,11 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = { 5 + 5 + /* hdp flush / invalidate */ 7 + /* gfx_v6_0_ring_emit_pipeline_sync */ SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v6_0_ring_emit_vm_flush */ +#ifdef CONFIG_LOONGARCH + 20 + 20 + 20 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ +#else 14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ +#endif 5, /* SURFACE_SYNC */ .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */ .emit_ib = gfx_v6_0_ring_emit_ib, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 893d349aae078cedfe9baf78043fef7d25783a2b..7deef04e6adb65a89207cc665fc55bde279bda26 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2127,11 +2127,17 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, EVENT_INDEX(5))); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | +#ifdef CONFIG_LOONGARCH + DATA_SEL(write64bit ? 2 : 1) | INT_SEL(0)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); +#else DATA_SEL(1) | INT_SEL(0)); amdgpu_ring_write(ring, lower_32_bits(seq - 1)); amdgpu_ring_write(ring, upper_32_bits(seq - 1)); /* Then send the real EOP event down the pipe. */ +#endif amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 1471eeffd428e732122bdc659892119274ff49c5..b54b8f3eea91ffeefc194d8de6765c79bb419d13 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6194,12 +6194,18 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, EVENT_INDEX(5))); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | +#ifdef CONFIG_LOONGARCH + DATA_SEL(write64bit ? 2 : 1) | INT_SEL(0)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); +#else DATA_SEL(1) | INT_SEL(0)); amdgpu_ring_write(ring, lower_32_bits(seq - 1)); amdgpu_ring_write(ring, upper_32_bits(seq - 1)); /* Then send the real EOP event down the pipe: * EVENT_WRITE_EOP - flush caches, send int */ +#endif amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index c2d6b723aea8ac21dfb04c7fec8ede57f8970f85..b36515c5cf5ec191d9ed3559ce265e548f15d792 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3554,7 +3554,11 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, addr & 0xfffffffc); radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(0)); +#ifdef CONFIG_LOONGARCH + radeon_ring_write(ring, fence->seq); +#else radeon_ring_write(ring, fence->seq - 1); +#endif radeon_ring_write(ring, 0); /* Then send the real EOP event down the pipe. */