From 1607e88bbd158771562c6ab36641dd14e549c607 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Fri, 21 Mar 2025 11:45:50 +0800 Subject: [PATCH 1/6] RDMA/hns: Fix soft lockup during bt pages loop maillist inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBV0G0 CVE: NA Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?h=wip/leon-for-rc&id=25655580136de59ec89f09089dd28008ea440fc9 ---------------------------------------------------------------------- Driver runs a for-loop when allocating bt pages and mapping them with buffer pages. When a large buffer (e.g. MR over 100GB) is being allocated, it may require a considerable loop count. This will lead to soft lockup: watchdog: BUG: soft lockup - CPU#27 stuck for 22s! ... Call trace: hem_list_alloc_mid_bt+0x124/0x394 [hns_roce_hw_v2] hns_roce_hem_list_request+0xf8/0x160 [hns_roce_hw_v2] hns_roce_mtr_create+0x2e4/0x360 [hns_roce_hw_v2] alloc_mr_pbl+0xd4/0x17c [hns_roce_hw_v2] hns_roce_reg_user_mr+0xf8/0x190 [hns_roce_hw_v2] ib_uverbs_reg_mr+0x118/0x290 watchdog: BUG: soft lockup - CPU#35 stuck for 23s! ... Call trace: hns_roce_hem_list_find_mtt+0x7c/0xb0 [hns_roce_hw_v2] mtr_map_bufs+0xc4/0x204 [hns_roce_hw_v2] hns_roce_mtr_create+0x31c/0x3c4 [hns_roce_hw_v2] alloc_mr_pbl+0xb0/0x160 [hns_roce_hw_v2] hns_roce_reg_user_mr+0x108/0x1c0 [hns_roce_hw_v2] ib_uverbs_reg_mr+0x120/0x2bc Add a cond_resched() to fix soft lockup during these loops. In order not to affect the allocation performance of normal-size buffer, set the loop count of a 100GB MR as the threshold to call cond_resched(). Fixes: 38389eaa4db1 ("RDMA/hns: Add mtr support for mixed multihop addressing") Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20250311084857.3803665-3-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_hem.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 26e74fc8d6eb..22f94889f03c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1362,6 +1362,11 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, return ret; } +/* This is the bottom bt pages number of a 100G MR on 4K OS, assuming + * the bt page size is not expanded by cal_best_bt_pg_sz() + */ +#define RESCHED_LOOP_CNT_THRESHOLD_ON_4K 12800 + /* construct the base address table and link them by address hop config */ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, @@ -1370,6 +1375,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, { const struct hns_roce_buf_region *r; int ofs, end; + int loop; int unit; int ret; int i; @@ -1387,7 +1393,10 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, continue; end = r->offset + r->count; - for (ofs = r->offset; ofs < end; ofs += unit) { + for (ofs = r->offset, loop = 1; ofs < end; ofs += unit, loop++) { + if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K)) + cond_resched(); + ret = hem_list_alloc_mid_bt(hr_dev, r, unit, ofs, hem_list->mid_bt[i], &hem_list->btm_bt); @@ -1444,9 +1453,14 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, struct list_head *head = &hem_list->btm_bt; struct hns_roce_hem_item *hem, *temp_hem; void *cpu_base = NULL; + int loop = 1; int nr = 0; list_for_each_entry_safe(hem, temp_hem, head, sibling) { + if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K)) + cond_resched(); + loop++; + if (hem_list_page_is_in_range(hem, offset)) { nr = offset - hem->start; cpu_base = hem->addr + nr * BA_BYTE_LEN; -- Gitee From 7ca6a3a75fdbd636e21443a3d4d16597ba624af6 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Fri, 21 Mar 2025 11:45:51 +0800 Subject: [PATCH 2/6] RDMA/hns: Fix unmatched condition in error path of alloc_user_qp_db() maillist inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBV0G0 CVE: NA Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?h=wip/leon-for-rc&id=b9f59a24ba35a7d955a9f8e148dd9f85b7b40a01 ---------------------------------------------------------------------- Currently the condition of unmapping sdb in error path is not exactly the same as the condition of mapping in alloc_user_qp_db(). This may cause a problem of unmapping an unmapped db in some case, such as when the QP is XRC TGT. Unified the two conditions. Fixes: 90ae0b57e4a5 ("RDMA/hns: Combine enable flags of qp") Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20250311084857.3803665-4-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_qp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 9a3ac810be44..fbfbde36e2e4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1017,12 +1017,14 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_ib_create_qp *ucmd, struct hns_roce_ib_create_qp_resp *resp) { + bool has_sdb = user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd); struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, ibucontext); + bool has_rdb = user_qp_has_rdb(hr_dev, init_attr, udata, resp); struct ib_device *ibdev = &hr_dev->ib_dev; int ret; - if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) { + if (has_sdb) { ret = hns_roce_db_map_user(uctx, ucmd->sdb_addr, &hr_qp->sdb); if (ret) { ibdev_err(ibdev, @@ -1033,7 +1035,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev, hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB; } - if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) { + if (has_rdb) { ret = hns_roce_db_map_user(uctx, ucmd->db_addr, &hr_qp->rdb); if (ret) { ibdev_err(ibdev, @@ -1047,7 +1049,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev, return 0; err_sdb: - if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) + if (has_sdb) hns_roce_db_unmap_user(uctx, &hr_qp->sdb, false); err_out: return ret; -- Gitee From 403f2fb17868c06d688baffeaea2f9d4eeebf281 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Fri, 21 Mar 2025 11:45:52 +0800 Subject: [PATCH 3/6] RDMA/hns: Fix invalid sq params not being blocked maillist inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBV0G0 CVE: NA Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?h=wip/leon-for-rc&id=13c90c222049764bb7e6a1689bd785f424bd8bd5 ---------------------------------------------------------------------- SQ params from userspace are checked in by set_user_sq_size(). But when the check fails, the function doesn't return but instead keep running and overwrite 'ret'. As a result, the invalid params will not get blocked actually. Add a return right after the failed check. Besides, although the check result of kernel sq params will not be overwritten, to keep coding style unified, move default_congest_type() before set_kernel_sq_size(). Fixes: 2dc6891c562b ("RDMA/hns: Support userspace configuring congestion control algorithm with QP granularity") Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20250311084857.3803665-5-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_qp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index fbfbde36e2e4..9e40e2f1d140 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1272,24 +1272,23 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ibucontext); hr_qp->config = uctx->config; ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); - if (ret) + if (ret) { ibdev_err(ibdev, "failed to set user SQ size, ret = %d.\n", ret); + return ret; + } ret = set_congest_param(hr_dev, hr_qp, ucmd); - if (ret) - return ret; } else { if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) hr_qp->config = HNS_ROCE_EXSGE_FLAGS; + default_congest_type(hr_dev, hr_qp); ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp); if (ret) ibdev_err(ibdev, "failed to set kernel SQ size, ret = %d.\n", ret); - - default_congest_type(hr_dev, hr_qp); } return ret; -- Gitee From 2112ebfb01bb64fb55f13a96c3dfef5ae27e65d1 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Fri, 21 Mar 2025 11:45:53 +0800 Subject: [PATCH 4/6] RDMA/hns: Fix a missing rollback in error path of hns_roce_create_qp_common() maillist inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBV0G0 CVE: NA Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?h=wip/leon-for-rc&id=444907dd45cbe62fd69398805b6e2c626fab5b3a ---------------------------------------------------------------------- When ib_copy_to_udata() fails in hns_roce_create_qp_common(), hns_roce_qp_remove() should be called in the error path to clean up resources in hns_roce_qp_store(). Fixes: 0f00571f9433 ("RDMA/hns: Use new SQ doorbell register for HIP09") Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20250311084857.3803665-6-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 9e40e2f1d140..715ecf5c709b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1368,7 +1368,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, min(udata->outlen, sizeof(resp))); if (ret) { ibdev_err(ibdev, "copy qp resp failed!\n"); - goto err_store; + goto err_flow_ctrl; } } -- Gitee From ecbb99fa2fcf50c6002dd011c0b746352656f2da Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Fri, 21 Mar 2025 11:45:54 +0800 Subject: [PATCH 5/6] RDMA/hns: Fix wrong value of max_sge_rd maillist inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBV0G0 CVE: NA Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?h=wip/leon-for-rc&id=6b5e41a8b51fce520bb09bd651a29ef495e990de ---------------------------------------------------------------------- There is no difference between the sge of READ and non-READ operations in hns RoCE. Set max_sge_rd to the same value as max_send_sge. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20250311084857.3803665-8-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index f211b2ebed28..8f110e64e601 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -239,7 +239,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev, IB_DEVICE_RC_RNR_NAK_GEN; props->max_send_sge = hr_dev->caps.max_sq_sg; props->max_recv_sge = hr_dev->caps.max_rq_sg; - props->max_sge_rd = 1; + props->max_sge_rd = hr_dev->caps.max_sq_sg; props->max_cq = hr_dev->caps.num_cqs; props->max_cqe = hr_dev->caps.max_cqes; props->max_mr = hr_dev->caps.num_mtpts; -- Gitee From f78cf185e79fd2dc64a361505cf5604a12707d7e Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Fri, 21 Mar 2025 11:45:55 +0800 Subject: [PATCH 6/6] RDMA/hns: Fix missing xa_destroy() maillist inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBV0G0 CVE: NA Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?h=wip/leon-for-rc&id=eda0a2fdbc24c35cd8d61d9c9111cafd5f89b2dc ---------------------------------------------------------------------- Add xa_destroy() for xarray in driver. Fixes: 5c1f167af112 ("RDMA/hns: Init SRQ table for hip08") Fixes: 27e19f451089 ("RDMA/hns: Convert cq_table to XArray") Fixes: 736b5a70db98 ("RDMA/hns: Convert qp_table_tree to XArray") Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20250311084857.3803665-7-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_alloc.c | 4 +++- drivers/infiniband/hw/hns/hns_roce_cq.c | 1 + drivers/infiniband/hw/hns/hns_roce_qp.c | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 950c133d4220..6ee911f6885b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -175,8 +175,10 @@ void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev) if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC) ida_destroy(&hr_dev->xrcd_ida.ida); - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { ida_destroy(&hr_dev->srq_table.srq_ida.ida); + xa_destroy(&hr_dev->srq_table.xa); + } hns_roce_cleanup_qp_table(hr_dev); hns_roce_cleanup_cq_table(hr_dev); ida_destroy(&hr_dev->mr_table.mtpt_ida.ida); diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index c717c00c58e1..59f5abd868e5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -616,5 +616,6 @@ void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev) for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) ida_destroy(&hr_dev->cq_table.bank[i].ida); + xa_destroy(&hr_dev->cq_table.array); mutex_destroy(&hr_dev->cq_table.bank_mutex); } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 715ecf5c709b..e0ba0ab891dd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1781,6 +1781,7 @@ void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev) for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) ida_destroy(&hr_dev->qp_table.bank[i].ida); xa_destroy(&hr_dev->qp_table.dip_xa); + xa_destroy(&hr_dev->qp_table_xa); mutex_destroy(&hr_dev->qp_table.bank_mutex); mutex_destroy(&hr_dev->qp_table.scc_mutex); } -- Gitee