From 271aee7434270553ef65c41cfaf6ee3edbd85abf Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Wed, 20 Nov 2024 21:07:49 +0800 Subject: [PATCH 01/13] RDMA/hns: Fix HW UAF when destroy context timeout driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8 ---------------------------------------------------------------------- If mbox timeout during detroying some resource, the HW may still access the related resource which caused an UAF. To fix it, if resource destruction fails, the resource till be retained until driver uninit. Fixes: 04c5d76e4f15 ("RDMA/hns: Fix simultaneous reset and resource deregistration") Signed-off-by: Chengchang Tang Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_cq.c | 7 +++---- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 7 +++---- drivers/infiniband/hw/hns/hns_roce_mr.c | 6 +++--- drivers/infiniband/hw/hns/hns_roce_srq.c | 6 +++--- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 6bb5f4b6c7c2..dd24f2d991ee 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -178,12 +178,11 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC, hr_cq->cqn); - if (ret) + if (ret) { + hr_cq->delayed_destroy_flag = true; dev_err_ratelimited(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret, hr_cq->cqn); - - if (ret == -EBUSY) - hr_cq->delayed_destroy_flag = true; + } xa_erase(&cq_table->array, hr_cq->cqn); xa_erase_irq(&cq_table->array, hr_cq->cqn); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 3f254ac48b42..75c6aacf724c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5974,10 +5974,12 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, /* Modify qp to reset before destroying qp */ ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET, udata); - if (ret) + if (ret) { + hr_qp->delayed_destroy_flag = true; ibdev_err_ratelimited(ibdev, "failed to modify QP to RST, ret = %d.\n", ret); + } } send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL; @@ -6024,9 +6026,6 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n", hr_qp->qpn, ret); - if (ret == -EBUSY) - hr_qp->delayed_destroy_flag = true; - hns_roce_qp_destroy(hr_dev, hr_qp, udata); return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 1f29377b52b8..5c4b6c4f4ca7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -153,11 +153,11 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT, key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1)); - if (ret) + if (ret) { + mr->delayed_destroy_flag = true; ibdev_warn_ratelimited(ibdev, "failed to destroy mpt, ret = %d.\n", ret); - if (ret == -EBUSY) - mr->delayed_destroy_flag = true; + } } free_mr_pbl(hr_dev, mr); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 0a8e71431246..0ab99aa9f9d5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -149,12 +149,12 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_SRQ, srq->srqn); - if (ret) + if (ret) { + srq->delayed_destroy_flag = true; dev_err_ratelimited(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", ret, srq->srqn); - if (ret == -EBUSY) - srq->delayed_destroy_flag = true; + } xa_erase_irq(&srq_table->xa, srq->srqn); -- Gitee From 5a71b75e363e02cef53ad2987ca7fcba058baeb4 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Wed, 20 Nov 2024 21:07:50 +0800 Subject: [PATCH 02/13] RDMA/hns: Fix wrong output of sysfs scc pram when configuration failed driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8 ---------------------------------------------------------------------- param[] in struct hns_roce_scc_param is used to store and show scc params both. But when the configuration to HW fails, the params stored in this array will become different from the ones in HW. Add an member latest_param[] to struct hns_roce_scc_param to store the latest configured value of scc params. It will be modified only after the configuration has succeeded to ensure the shown result from sysfs is always the correct param in HW even if the previous configuration failed. The original member param[] is only used to store the temporary value of sysfs input now. Fixes: 41da9cd8456d ("RDMA/hns: Support congestion control algorithm parameter configuration") Signed-off-by: Junxian Huang Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 13 ++++++++++--- drivers/infiniband/hw/hns/hns_roce_sysfs.c | 6 +++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index fdc1fe5e6a81..a7c965d9ce20 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1081,6 +1081,7 @@ struct hns_roce_scc_param { enum hns_roce_scc_algo algo_type; struct delayed_work scc_cfg_dwork; struct hns_roce_dev *hr_dev; + __le32 latest_param[HNS_ROCE_SCC_PARAM_SIZE]; }; struct hns_roce_dev { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 75c6aacf724c..2db22c418866 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -7261,11 +7261,16 @@ static int hns_roce_v2_config_scc_param(struct hns_roce_dev *hr_dev, memcpy(&desc.data, scc_param, sizeof(scc_param->param)); ret = hns_roce_cmq_send(hr_dev, &desc, 1); - if (ret) + if (ret) { ibdev_err_ratelimited(&hr_dev->ib_dev, "failed to configure scc param, opcode: 0x%x, ret = %d.\n", le16_to_cpu(desc.opcode), ret); - return ret; + return ret; + } + + memcpy(scc_param->latest_param, &desc.data, + sizeof(scc_param->latest_param)); + return 0; } static int hns_roce_v2_query_scc_param(struct hns_roce_dev *hr_dev, @@ -7293,7 +7298,9 @@ static int hns_roce_v2_query_scc_param(struct hns_roce_dev *hr_dev, } scc_param = &hr_dev->scc_param[algo]; - memcpy(scc_param, &desc.data, sizeof(scc_param->param)); + memcpy(scc_param->param, &desc.data, sizeof(scc_param->param)); + memcpy(scc_param->latest_param, &desc.data, + sizeof(scc_param->latest_param)); return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_sysfs.c b/drivers/infiniband/hw/hns/hns_roce_sysfs.c index d36f05ac5f1e..4126a744f539 100644 --- a/drivers/infiniband/hw/hns/hns_roce_sysfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_sysfs.c @@ -110,7 +110,11 @@ static ssize_t scc_attr_show(struct ib_device *ibdev, u32 port_num, scc_param = &hr_dev->scc_param[scc_attr->algo_type]; - memcpy(&val, (void *)scc_param + scc_attr->offset, scc_attr->size); + if (scc_attr->offset == offsetof(typeof(*scc_param), lifespan)) + val = scc_param->lifespan; + else + memcpy(&val, (void *)scc_param->latest_param + scc_attr->offset, + scc_attr->size); return sysfs_emit(buf, "%u\n", le32_to_cpu(val)); } -- Gitee From 3d57d0f4604a4d94d452c0fc39ef4bbdfdc326fb Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Wed, 20 Nov 2024 21:07:51 +0800 Subject: [PATCH 03/13] RDMA/hns: Fix concurrency between sysfs store and FW configuration of scc params driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8 ---------------------------------------------------------------------- The FW configuration of scc param is delayed with a workqueue. This may lead to scc params being modified by sysfs store callback while they're being configured to FW. Use a mutex to solve this. Fixes: 41da9cd8456d ("RDMA/hns: Support congestion control algorithm parameter configuration") Signed-off-by: Junxian Huang Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 ++++++ drivers/infiniband/hw/hns/hns_roce_sysfs.c | 9 ++++++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a7c965d9ce20..1f8ec175d424 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1082,6 +1082,7 @@ struct hns_roce_scc_param { struct delayed_work scc_cfg_dwork; struct hns_roce_dev *hr_dev; __le32 latest_param[HNS_ROCE_SCC_PARAM_SIZE]; + struct mutex scc_mutex; /* protect @param and @lastest_param */ }; struct hns_roce_dev { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 2db22c418866..4f595e6a4443 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -7258,6 +7258,7 @@ static int hns_roce_v2_config_scc_param(struct hns_roce_dev *hr_dev, hns_roce_cmq_setup_basic_desc(&desc, scc_opcode[algo], false); scc_param = &hr_dev->scc_param[algo]; + mutex_lock(&scc_param->scc_mutex); memcpy(&desc.data, scc_param, sizeof(scc_param->param)); ret = hns_roce_cmq_send(hr_dev, &desc, 1); @@ -7265,11 +7266,14 @@ static int hns_roce_v2_config_scc_param(struct hns_roce_dev *hr_dev, ibdev_err_ratelimited(&hr_dev->ib_dev, "failed to configure scc param, opcode: 0x%x, ret = %d.\n", le16_to_cpu(desc.opcode), ret); + mutex_unlock(&scc_param->scc_mutex); return ret; } memcpy(scc_param->latest_param, &desc.data, sizeof(scc_param->latest_param)); + mutex_unlock(&scc_param->scc_mutex); + return 0; } @@ -7298,9 +7302,11 @@ static int hns_roce_v2_query_scc_param(struct hns_roce_dev *hr_dev, } scc_param = &hr_dev->scc_param[algo]; + mutex_lock(&scc_param->scc_mutex); memcpy(scc_param->param, &desc.data, sizeof(scc_param->param)); memcpy(scc_param->latest_param, &desc.data, sizeof(scc_param->latest_param)); + mutex_unlock(&scc_param->scc_mutex); return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_sysfs.c b/drivers/infiniband/hw/hns/hns_roce_sysfs.c index 4126a744f539..3a8a98097042 100644 --- a/drivers/infiniband/hw/hns/hns_roce_sysfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_sysfs.c @@ -46,6 +46,7 @@ int hns_roce_alloc_scc_param(struct hns_roce_dev *hr_dev) for (i = 0; i < HNS_ROCE_SCC_ALGO_TOTAL; i++) { scc_param[i].algo_type = i; scc_param[i].hr_dev = hr_dev; + mutex_init(&scc_param[i].scc_mutex); INIT_DELAYED_WORK(&scc_param[i].scc_cfg_dwork, scc_param_config_work); } @@ -63,8 +64,10 @@ void hns_roce_dealloc_scc_param(struct hns_roce_dev *hr_dev) if (!hr_dev->scc_param) return; - for (i = 0; i < HNS_ROCE_SCC_ALGO_TOTAL; i++) + for (i = 0; i < HNS_ROCE_SCC_ALGO_TOTAL; i++) { cancel_delayed_work_sync(&hr_dev->scc_param[i].scc_cfg_dwork); + mutex_destroy(&hr_dev->scc_param[i].scc_mutex); + } kvfree(hr_dev->scc_param); hr_dev->scc_param = NULL; @@ -110,11 +113,13 @@ static ssize_t scc_attr_show(struct ib_device *ibdev, u32 port_num, scc_param = &hr_dev->scc_param[scc_attr->algo_type]; + mutex_lock(&scc_param->scc_mutex); if (scc_attr->offset == offsetof(typeof(*scc_param), lifespan)) val = scc_param->lifespan; else memcpy(&val, (void *)scc_param->latest_param + scc_attr->offset, scc_attr->size); + mutex_unlock(&scc_param->scc_mutex); return sysfs_emit(buf, "%u\n", le32_to_cpu(val)); } @@ -145,8 +150,10 @@ static ssize_t scc_attr_store(struct ib_device *ibdev, u32 port_num, attr_val = cpu_to_le32(val); scc_param = &hr_dev->scc_param[scc_attr->algo_type]; + mutex_lock(&scc_param->scc_mutex); memcpy((void *)scc_param + scc_attr->offset, &attr_val, scc_attr->size); + mutex_unlock(&scc_param->scc_mutex); /* lifespan is only used for driver */ if (scc_attr->offset >= offsetof(typeof(*scc_param), lifespan)) -- Gitee From c78aee253fff8279cf1930b1a90dc3ae5afcdf94 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Wed, 20 Nov 2024 21:07:52 +0800 Subject: [PATCH 04/13] RDMA/hns: Fix mixed use of u32 and __le32 in sysfs driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8 ---------------------------------------------------------------------- lifespan is u32 but is operated with an __le32 in memcpy(). Change it to __le32 and add le32_to_cpu() where needed. Fixes: 41da9cd8456d ("RDMA/hns: Support congestion control algorithm parameter configuration") Signed-off-by: Junxian Huang Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_sysfs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1f8ec175d424..e22e485d7cdf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1076,7 +1076,7 @@ struct hns_roce_hw { #define HNS_ROCE_SCC_PARAM_SIZE 4 struct hns_roce_scc_param { __le32 param[HNS_ROCE_SCC_PARAM_SIZE]; - u32 lifespan; + __le32 lifespan; unsigned long timestamp; enum hns_roce_scc_algo algo_type; struct delayed_work scc_cfg_dwork; diff --git a/drivers/infiniband/hw/hns/hns_roce_sysfs.c b/drivers/infiniband/hw/hns/hns_roce_sysfs.c index 3a8a98097042..e8161ea0001f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_sysfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_sysfs.c @@ -159,7 +159,7 @@ static ssize_t scc_attr_store(struct ib_device *ibdev, u32 port_num, if (scc_attr->offset >= offsetof(typeof(*scc_param), lifespan)) return count; - lifespan_jiffies = msecs_to_jiffies(scc_param->lifespan); + lifespan_jiffies = msecs_to_jiffies(le32_to_cpu(scc_param->lifespan)); exp_time = scc_param->timestamp + lifespan_jiffies; if (time_is_before_eq_jiffies(exp_time)) { -- Gitee From c66cd73ad8d08ca2e14f52e3e4f8a3152bc91ebe Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Wed, 20 Nov 2024 21:07:53 +0800 Subject: [PATCH 05/13] RDMA/hns: Fix integer overflow in calc_loading_percent() driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8 ---------------------------------------------------------------------- For calc_loading_percent(), if the values of two types of u32 are multiplied, the result can be an integer overflow. To fix it, convert all variable to u64. Since total and free are both size_t, alloc_pages and free_pages may overflow. In addition, because there is multiplication in the calculation of percent, it may also cause overflow of u32. In this patch all relevant variables are converted to u64. This patch also adds corresponding processing for the exception of calc_loading_percent() to avoid printing a wrong result. Fixes: 640cb0880216 ("RDMA/hns: Add debugfs support for DCA") Signed-off-by: Yuyu Li Signed-off-by: Chengchang Tang Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_debugfs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c index 3c2d7096fe13..7023c3cefaa7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_debugfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c @@ -187,8 +187,8 @@ static void dca_setup_pool_name(pid_t pid, bool is_kdca, char *name, int size) static u64 calc_loading_percent(size_t total, size_t free, u32 *out_rem) { - u32 all_pages, used_pages, free_pages, scale; - u64 percent = 0; + u64 used_pages, scale, all_pages, free_pages; + u64 percent = U64_MAX; u32 rem = 0; all_pages = total >> HNS_HW_PAGE_SHIFT; @@ -214,6 +214,9 @@ static void dca_print_pool_stats(struct hns_roce_dca_ctx *ctx, pid_t pid, u32 rem = 0; percent = calc_loading_percent(ctx->total_size, ctx->free_size, &rem); + if (percent == U64_MAX) + return; + dca_setup_pool_name(pid, is_kdca, name, sizeof(name)); seq_printf(file, "%-10s %-16ld %-16ld %-16u %llu.%0*u\n", name, ctx->total_size / KB, ctx->free_size / KB, ctx->free_mems, @@ -366,6 +369,9 @@ static void dca_stats_ctx_mem_in_seqfile(struct hns_roce_dca_ctx *ctx, dca_ctx_stats_mem(ctx, &stats); percent = calc_loading_percent(stats.total_size, stats.free_size, &rem); + if (percent == U64_MAX) + return; + seq_printf(file, DCA_STAT_NAME_FMT "%llu.%0*u\n", "Loading:", percent, LOADING_PERCENT_SHIFT, rem); dca_ctx_print_mem_kb(file, "Total:", stats.total_size); -- Gitee From e519f15b907a17a7021860a65f49d8e677fb4e30 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Wed, 20 Nov 2024 21:07:54 +0800 Subject: [PATCH 06/13] RDMA/hns: Fix possible RAS when DCA is not attached driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8 ---------------------------------------------------------------------- RAS may occur if the DCA buffer is not attached and the DB is knocked out. This patch adds a safe page for DCA, which will be attached to QP if no DCA buffer is attached to avoid the HW accessing illegal addresses. Fixes: 10bb3b802412 ("RDMA/hns: Add method for attaching WQE buffer") Signed-off-by: Chengchang Tang Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_dca.c | 56 ++++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_dca.h | 2 + drivers/infiniband/hw/hns/hns_roce_device.h | 3 ++ drivers/infiniband/hw/hns/hns_roce_main.c | 13 +++++ drivers/infiniband/hw/hns/hns_roce_qp.c | 23 +++++++++ 5 files changed, 95 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index 4cef41591795..f435f6f5d8a3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -309,6 +309,33 @@ hr_qp_to_dca_ctx(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) return to_hr_dca_ctx(hr_dev, uctx); } +int hns_roce_map_dca_safe_page(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + unsigned int page_count = hr_qp->dca_cfg.npages; + struct ib_device *ibdev = &hr_dev->ib_dev; + dma_addr_t *pages; + unsigned int i; + int ret; + + pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL); + if (IS_ERR_OR_NULL(pages)) { + ibdev_err(ibdev, "failed to alloc DCA safe page array.\n"); + return -ENOMEM; + } + + for (i = 0; i < page_count; i++) + pages[i] = hr_dev->dca_safe_page; + + ret = hns_roce_mtr_map(hr_dev, &hr_qp->mtr, pages, page_count); + if (ret) + ibdev_err(ibdev, "failed to map safe page for DCA, ret = %d.\n", + ret); + + kvfree(pages); + return ret; +} + static int config_dca_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, dma_addr_t *pages, int page_count) @@ -335,6 +362,29 @@ static int config_dca_qpc(struct hns_roce_dev *hr_dev, return 0; } +static int config_dca_qpc_to_safe_page(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + unsigned int page_count = hr_qp->dca_cfg.npages; + dma_addr_t *pages; + unsigned int i; + int ret; + + might_sleep(); + + pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL); + if (IS_ERR_OR_NULL(pages)) + return -ENOMEM; + + for (i = 0; i < page_count; i++) + pages[i] = hr_dev->dca_safe_page; + + ret = config_dca_qpc(hr_dev, hr_qp, pages, page_count); + + kvfree(pages); + return ret; +} + static int setup_dca_buf_to_hw(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_dca_ctx *ctx, u32 buf_id, @@ -980,8 +1030,10 @@ static void process_aging_dca_mem(struct hns_roce_dev *hr_dev, spin_unlock(&ctx->aging_lock); if (start_free_dca_buf(ctx, cfg->dcan)) { - if (hr_dev->hw->chk_dca_buf_inactive(hr_dev, hr_qp)) - free_buf_from_dca_mem(ctx, cfg); + if (hr_dev->hw->chk_dca_buf_inactive(hr_dev, hr_qp)) { + if (!config_dca_qpc_to_safe_page(hr_dev, hr_qp)) + free_buf_from_dca_mem(ctx, cfg); + } stop_free_dca_buf(ctx, cfg->dcan); } diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.h b/drivers/infiniband/hw/hns/hns_roce_dca.h index 7733887ce5e1..36f03f5357d7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.h +++ b/drivers/infiniband/hw/hns/hns_roce_dca.h @@ -75,4 +75,6 @@ void hns_roce_modify_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, void hns_roce_enum_dca_pool(struct hns_roce_dca_ctx *dca_ctx, void *param, hns_dca_enum_callback cb); +int hns_roce_map_dca_safe_page(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp); #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index e22e485d7cdf..1a42d7e726b4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1163,6 +1163,9 @@ struct hns_roce_dev { struct mutex mtr_unfree_list_mutex; /* protect mtr_unfree_list */ struct list_head umem_unfree_list; /* list of unfree umem on this dev */ struct mutex umem_unfree_list_mutex; /* protect umem_unfree_list */ + + void *dca_safe_buf; + dma_addr_t dca_safe_page; }; static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index d488c3d5986f..c88807bdfda0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -1370,6 +1370,17 @@ static void hns_roce_dealloc_dfx_cnt(struct hns_roce_dev *hr_dev) kvfree(hr_dev->dfx_cnt); } +static void hns_roce_free_dca_safe_buf(struct hns_roce_dev *hr_dev) +{ + if (!hr_dev->dca_safe_buf) + return; + + dma_free_coherent(hr_dev->dev, PAGE_SIZE, hr_dev->dca_safe_buf, + hr_dev->dca_safe_page); + hr_dev->dca_safe_page = 0; + hr_dev->dca_safe_buf = NULL; +} + int hns_roce_init(struct hns_roce_dev *hr_dev) { struct device *dev = hr_dev->dev; @@ -1483,6 +1494,8 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup) hns_roce_dealloc_scc_param(hr_dev); hns_roce_unregister_debugfs(hr_dev); + hns_roce_free_dca_safe_buf(hr_dev); + if (hr_dev->hw->hw_exit) hr_dev->hw->hw_exit(hr_dev); hns_roce_free_unfree_umem(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 5ed2647567aa..926543cfa40b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -844,6 +844,8 @@ static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hns_roce_disable_dca(hr_dev, hr_qp, udata); kvfree(hr_qp->mtr_node); hr_qp->mtr_node = NULL; + } else if (dca_en) { + ret = hns_roce_map_dca_safe_page(hr_dev, hr_qp); } return ret; @@ -864,6 +866,21 @@ static void free_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hns_roce_disable_dca(hr_dev, hr_qp, udata); } +static int alloc_dca_safe_page(struct hns_roce_dev *hr_dev) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + + hr_dev->dca_safe_buf = dma_alloc_coherent(hr_dev->dev, PAGE_SIZE, + &hr_dev->dca_safe_page, + GFP_KERNEL); + if (!hr_dev->dca_safe_buf) { + ibdev_err(ibdev, "failed to alloc dca safe page.\n"); + return -ENOMEM; + } + + return 0; +} + static int alloc_qp_wqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, @@ -882,6 +899,12 @@ static int alloc_qp_wqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, dca_en = check_dca_is_enable(hr_dev, hr_qp, init_attr, !!udata, ucmd->buf_addr); + if (dca_en && !hr_dev->dca_safe_buf) { + ret = alloc_dca_safe_page(hr_dev); + if (ret) + return ret; + } + ret = set_wqe_buf_attr(hr_dev, hr_qp, dca_en, page_shift, &buf_attr); if (ret) { ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret); -- Gitee From bf8042f8fadf6863603d3dee77f6125dff28bca0 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Wed, 20 Nov 2024 21:07:55 +0800 Subject: [PATCH 07/13] RDMA/hns: Fix a meaningless loop in active_dca_pages_proc() driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8 ---------------------------------------------------------------------- The iterated element does not change, making the loop in active_dca_pages_proc() meaningless. Fixes: ef35d79d91ed ("RDMA/hns: Add DCA support for kernel space") Signed-off-by: Chengchang Tang Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_dca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index f435f6f5d8a3..e76a7e6e8ad4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -617,7 +617,7 @@ static int active_dca_pages_proc(struct dca_mem *mem, int index, void *param) } for (; changed && i < mem->page_count; i++) - if (dca_page_is_free(state)) + if (dca_page_is_free(&mem->states[i])) free_pages++; /* Clean mem changed to dirty */ -- Gitee From 053bf134f23344c38a511eddeb1ff5c3fb098952 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Wed, 20 Nov 2024 21:07:56 +0800 Subject: [PATCH 08/13] RDMA/hns: Fix list_*_careful() not being used in pairs driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8 ---------------------------------------------------------------------- list_del_init_careful() is designed to be used together with list_empty_careful(). Fixes: 10bb3b802412 ("RDMA/hns: Add method for attaching WQE buffer") Signed-off-by: Chengchang Tang Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_dca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index e76a7e6e8ad4..eb408130329b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -1022,7 +1022,7 @@ static void process_aging_dca_mem(struct hns_roce_dev *hr_dev, list_for_each_entry_safe(cfg, tmp_cfg, &ctx->aging_new_list, aging_node) list_move(&cfg->aging_node, &ctx->aging_proc_list); - while (!ctx->exit_aging && !list_empty(&ctx->aging_proc_list)) { + while (!ctx->exit_aging && !list_empty_careful(&ctx->aging_proc_list)) { cfg = list_first_entry(&ctx->aging_proc_list, struct hns_roce_dca_cfg, aging_node); list_del_init_careful(&cfg->aging_node); -- Gitee From c6d6af3dcca36bb55f12800d0837686207e7fc8f Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Wed, 20 Nov 2024 21:07:57 +0800 Subject: [PATCH 09/13] RDMA/hns: Fix dereference of noderef expression driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8 ---------------------------------------------------------------------- Fix sparse warnings: dereference of noderef expression. This is because curr_active_slave is defined as: struct bonding { ... struct slave __rcu *curr_active_slave; ... }; __rcu contains __attribute__((noderef)) inside, which disallows callers dereferece it directly. Fixes: 2004b3f9092a ("RDMA/hns: Support RoCE bonding") Signed-off-by: Junxian Huang Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_bond.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c index 0fc026eb40e8..7adae8990acd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.c +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -93,10 +93,16 @@ bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev) static inline bool is_active_slave(struct net_device *net_dev, struct hns_roce_bond_group *bond_grp) { + struct net_device *slave_dev; + if (!bond_grp || !bond_grp->bond || !bond_grp->bond->curr_active_slave) return false; - return net_dev == bond_grp->bond->curr_active_slave->dev; + rcu_read_lock(); + slave_dev = bond_option_active_slave_get_rcu(bond_grp->bond); + rcu_read_unlock(); + + return net_dev == slave_dev; } struct net_device *hns_roce_get_bond_netdev(struct hns_roce_dev *hr_dev) -- Gitee From 8e668a71838e9daa29cc00ffcb929caee5e94ed8 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Wed, 20 Nov 2024 21:07:58 +0800 Subject: [PATCH 10/13] RDMA/hns: Fix "Should it be static?" warnings driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8 ---------------------------------------------------------------------- Fix sparse warnings: Should it be static? Fixes: 41da9cd8456d ("RDMA/hns: Support congestion control algorithm parameter configuration") Signed-off-by: Junxian Huang Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_sysfs.c | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_sysfs.c b/drivers/infiniband/hw/hns/hns_roce_sysfs.c index e8161ea0001f..0ccc75ccb434 100644 --- a/drivers/infiniband/hw/hns/hns_roce_sysfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_sysfs.c @@ -204,11 +204,11 @@ static umode_t scc_attr_is_visible(struct kobject *kobj, .max = _max, \ } -#define HNS_PORT_DCQCN_CC_ATTR_RW(_name, NAME) \ - struct hns_port_cc_attr hns_roce_port_attr_dcqcn_##_name = \ - __HNS_SCC_ATTR(_name, HNS_ROCE_SCC_ALGO_DCQCN, \ - HNS_ROCE_DCQCN_##NAME##_OFS, \ - HNS_ROCE_DCQCN_##NAME##_SZ, \ +#define HNS_PORT_DCQCN_CC_ATTR_RW(_name, NAME) \ + static struct hns_port_cc_attr hns_roce_port_attr_dcqcn_##_name = \ + __HNS_SCC_ATTR(_name, HNS_ROCE_SCC_ALGO_DCQCN, \ + HNS_ROCE_DCQCN_##NAME##_OFS, \ + HNS_ROCE_DCQCN_##NAME##_SZ, \ 0, HNS_ROCE_DCQCN_##NAME##_MAX) HNS_PORT_DCQCN_CC_ATTR_RW(ai, AI); @@ -244,11 +244,11 @@ static const struct attribute_group dcqcn_cc_param_group = { .is_visible = scc_attr_is_visible, }; -#define HNS_PORT_LDCP_CC_ATTR_RW(_name, NAME) \ - struct hns_port_cc_attr hns_roce_port_attr_ldcp_##_name = \ - __HNS_SCC_ATTR(_name, HNS_ROCE_SCC_ALGO_LDCP, \ - HNS_ROCE_LDCP_##NAME##_OFS, \ - HNS_ROCE_LDCP_##NAME##_SZ, \ +#define HNS_PORT_LDCP_CC_ATTR_RW(_name, NAME) \ + static struct hns_port_cc_attr hns_roce_port_attr_ldcp_##_name = \ + __HNS_SCC_ATTR(_name, HNS_ROCE_SCC_ALGO_LDCP, \ + HNS_ROCE_LDCP_##NAME##_OFS, \ + HNS_ROCE_LDCP_##NAME##_SZ, \ 0, HNS_ROCE_LDCP_##NAME##_MAX) HNS_PORT_LDCP_CC_ATTR_RW(cwd0, CWD0); @@ -275,7 +275,7 @@ static const struct attribute_group ldcp_cc_param_group = { }; #define HNS_PORT_HC3_CC_ATTR_RW(_name, NAME) \ - struct hns_port_cc_attr hns_roce_port_attr_hc3_##_name = \ + static struct hns_port_cc_attr hns_roce_port_attr_hc3_##_name = \ __HNS_SCC_ATTR(_name, HNS_ROCE_SCC_ALGO_HC3, \ HNS_ROCE_HC3_##NAME##_OFS, \ HNS_ROCE_HC3_##NAME##_SZ, \ @@ -309,7 +309,7 @@ static const struct attribute_group hc3_cc_param_group = { }; #define HNS_PORT_DIP_CC_ATTR_RW(_name, NAME) \ - struct hns_port_cc_attr hns_roce_port_attr_dip_##_name = \ + static struct hns_port_cc_attr hns_roce_port_attr_dip_##_name = \ __HNS_SCC_ATTR(_name, HNS_ROCE_SCC_ALGO_DIP, \ HNS_ROCE_DIP_##NAME##_OFS, \ HNS_ROCE_DIP_##NAME##_SZ, \ -- Gitee From e3c28592e3ea1f1bc55910a72eefca263c9233a6 Mon Sep 17 00:00:00 2001 From: wenglianfa Date: Wed, 20 Nov 2024 21:07:59 +0800 Subject: [PATCH 11/13] RDMA/hns: Fix the modification of max_send_sge driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8 ---------------------------------------------------------------------- The attribute max_send_sge describes the requested max number of SGE in a WR in the SQ. It is a necessary attribute for creating a qp. It is returned directly without modifying its value. Fixes: 0c5e259b06a8 ("RDMA/hns: Fix incorrect sge nums calculation") Signed-off-by: wenglianfa Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 926543cfa40b..b0877e23223e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -668,7 +668,6 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, hr_qp->sq.wqe_shift = ucmd->log_sq_stride; hr_qp->sq.wqe_cnt = cnt; - cap->max_send_sge = hr_qp->sq.max_gs; return 0; } @@ -780,7 +779,6 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, /* sync the parameters of kernel QP to user's configuration */ cap->max_send_wr = cnt; - cap->max_send_sge = hr_qp->sq.max_gs; return 0; } -- Gitee From 5b4694150feb80cebc63a6027711bcae9ba5ebc3 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Wed, 20 Nov 2024 21:08:00 +0800 Subject: [PATCH 12/13] RDMA/hns: Use one CQ bank per context driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8 ---------------------------------------------------------------------- By forcing each context to use the same CQ bank. Ensure that there is fixed mapping logic between all QP and CQ banks. Ensure that SQ, RQ, and CQ can share the QPC cache in QMM to avoid the timer deadlock. Currently, since the upload strategy for this issue(DTS2024032521959) has not yet been clarified and it involves 920B/C, the patch is currently marked as noup. Fixes: 9e03dbea2b06 ("RDMA/hns: Fix CQ and QP cache affinity") Signed-off-by: Chengchang Tang Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_cq.c | 57 +++++++++++++++++++-- drivers/infiniband/hw/hns/hns_roce_device.h | 4 ++ drivers/infiniband/hw/hns/hns_roce_main.c | 2 + 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index dd24f2d991ee..d34fd7122b3d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -37,6 +37,43 @@ #include "hns_roce_hem.h" #include "hns_roce_common.h" +void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + + if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09) + return; + + mutex_lock(&cq_table->bank_mutex); + cq_table->ctx_num[uctx->cq_bank_id]--; + mutex_unlock(&cq_table->bank_mutex); +} + +void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + u32 least_load = cq_table->ctx_num[0]; + u8 bankid = 0; + u8 i; + + if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09) + return; + + mutex_lock(&cq_table->bank_mutex); + for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) { + if (cq_table->ctx_num[i] < least_load) { + least_load = cq_table->ctx_num[i]; + bankid = i; + } + } + cq_table->ctx_num[bankid]++; + mutex_unlock(&cq_table->bank_mutex); + + uctx->cq_bank_id = bankid; +} + static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank) { u32 least_load = bank[0].inuse; @@ -55,7 +92,21 @@ static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank) return bankid; } -static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +static u8 select_cq_bankid(struct hns_roce_dev *hr_dev, struct hns_roce_bank *bank, + struct ib_udata *udata) +{ + struct hns_roce_ucontext *uctx = udata ? + rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, + ibucontext) : NULL; + /* only apply for HIP09 and HIP10 now, and use bank 0 for kernel */ + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return uctx ? uctx->cq_bank_id : 0; + + return get_least_load_bankid_for_cq(bank); +} + +static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct ib_udata *udata) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct hns_roce_bank *bank; @@ -63,7 +114,7 @@ static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) int id; mutex_lock(&cq_table->bank_mutex); - bankid = get_least_load_bankid_for_cq(cq_table->bank); + bankid = select_cq_bankid(hr_dev, cq_table->bank, udata); bank = &cq_table->bank[bankid]; id = ida_alloc_range(&bank->ida, bank->min, bank->max, GFP_KERNEL); @@ -416,7 +467,7 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, goto err_cq_buf; } - ret = alloc_cqn(hr_dev, hr_cq); + ret = alloc_cqn(hr_dev, hr_cq, udata); if (ret) { ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret); goto err_cq_db; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1a42d7e726b4..aea848070ffd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -265,6 +265,7 @@ struct hns_roce_ucontext { struct list_head list; /* link all uctx to uctx_list on hr_dev */ pid_t pid; /* process id to which the uctx belongs */ struct hns_dca_ctx_debugfs dca_dbgfs; + u8 cq_bank_id; }; struct hns_roce_pd { @@ -592,6 +593,7 @@ struct hns_roce_cq_table { struct hns_roce_hem_table table; struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM]; struct mutex bank_mutex; + u32 ctx_num[HNS_ROCE_CQ_BANK_NUM]; }; struct hns_roce_srq_table { @@ -1476,4 +1478,6 @@ void hns_roce_add_unfree_mtr(struct hns_roce_mtr_node *pos, void hns_roce_free_unfree_mtr(struct hns_roce_dev *hr_dev); int hns_roce_alloc_scc_param(struct hns_roce_dev *hr_dev); void hns_roce_dealloc_scc_param(struct hns_roce_dev *hr_dev); +void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx); +void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index c88807bdfda0..65e8b697d0b3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -598,6 +598,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, mutex_unlock(&hr_dev->uctx_list_mutex); hns_roce_register_uctx_debugfs(hr_dev, context); + hns_roce_get_cq_bankid_for_uctx(context); return 0; @@ -634,6 +635,7 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) mutex_destroy(&context->page_mutex); + hns_roce_put_cq_bankid_for_uctx(context); hns_roce_unregister_uctx_debugfs(context); hns_roce_unregister_udca(hr_dev, context); -- Gitee From 209f40403456bf445343fa00b481cbbb012117ea Mon Sep 17 00:00:00 2001 From: wenglianfa Date: Wed, 20 Nov 2024 21:08:01 +0800 Subject: [PATCH 13/13] RDMA/hns: Fix RoCEE hang when multiple QP banks use EXT_SGE EXT_SGE driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8 ---------------------------------------------------------------------- When QPs of multiple banks are used, there is a possibility that the RoCEE is hang. This is because QPs of different banks may interfere with each other in certain cases when processing extended SGEs. To solve this problem, the QP-bank-limit mechanism is introduced. When this mechanism is enabled, the number of QP banks must be limited to ensure that extended SGEs can be used. If this mechanism is not applied, the FW will limit the maximum number of SGEs and makes extended SGEs unavailable to avoid the HW hang out. Signed-off-by: wenglianfa Signed-off-by: Xinghai Cen --- drivers/infiniband/hw/hns/hns_roce_cq.c | 15 +++++-- drivers/infiniband/hw/hns/hns_roce_device.h | 6 +++ drivers/infiniband/hw/hns/hns_roce_main.c | 5 +++ drivers/infiniband/hw/hns/hns_roce_qp.c | 49 ++++++++++++++++----- 4 files changed, 62 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index d34fd7122b3d..7cda55debe62 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -52,9 +52,10 @@ void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) { +#define INVALID_LOAD_CQNUM 0xFFFFFFFF struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; - u32 least_load = cq_table->ctx_num[0]; + u32 least_load = INVALID_LOAD_CQNUM; u8 bankid = 0; u8 i; @@ -62,7 +63,10 @@ void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) return; mutex_lock(&cq_table->bank_mutex); - for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) { + for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) { + if (!(cq_table->valid_cq_bank_mask & BIT(i))) + continue; + if (cq_table->ctx_num[i] < least_load) { least_load = cq_table->ctx_num[i]; bankid = i; @@ -98,7 +102,7 @@ static u8 select_cq_bankid(struct hns_roce_dev *hr_dev, struct hns_roce_bank *ba struct hns_roce_ucontext *uctx = udata ? rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, ibucontext) : NULL; - /* only apply for HIP09 and HIP10 now, and use bank 0 for kernel */ + /* only HIP08 is not applied now, and use bank 0 for kernel */ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) return uctx ? uctx->cq_bank_id : 0; @@ -600,6 +604,11 @@ void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) cq_table->bank[i].max = hr_dev->caps.num_cqs / HNS_ROCE_CQ_BANK_NUM - 1; } + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_LIMIT_BANK) + cq_table->valid_cq_bank_mask = VALID_CQ_BANK_MASK_LIMIT; + else + cq_table->valid_cq_bank_mask = VALID_CQ_BANK_MASK_DEFAULT; } void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index aea848070ffd..85e160132135 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -105,6 +105,10 @@ #define CQ_BANKID_SHIFT 2 #define CQ_BANKID_MASK GENMASK(1, 0) +#define VALID_CQ_BANK_MASK_DEFAULT 0xF +#define VALID_CQ_BANK_MASK_LIMIT 0x9 + +#define VALID_EXT_SGE_QP_BANK_MASK_LIMIT 0x41 #define HNS_ROCE_MAX_CQ_COUNT 0xFFFF #define HNS_ROCE_MAX_CQ_PERIOD 0xFFFF @@ -168,6 +172,7 @@ enum { HNS_ROCE_CAP_FLAG_CQE_INLINE = BIT(19), HNS_ROCE_CAP_FLAG_BOND = BIT(21), HNS_ROCE_CAP_FLAG_SRQ_RECORD_DB = BIT(22), + HNS_ROCE_CAP_FLAG_LIMIT_BANK = BIT(23), }; #define HNS_ROCE_DB_TYPE_COUNT 2 @@ -594,6 +599,7 @@ struct hns_roce_cq_table { struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM]; struct mutex bank_mutex; u32 ctx_num[HNS_ROCE_CQ_BANK_NUM]; + u8 valid_cq_bank_mask; }; struct hns_roce_srq_table { diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 65e8b697d0b3..9fa62d31d500 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -262,6 +262,11 @@ static int hns_roce_query_device(struct ib_device *ib_dev, props->max_srq_sge = hr_dev->caps.max_srq_sges; } + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_LIMIT_BANK) { + props->max_cq >>= 1; + props->max_qp >>= 1; + } + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR && hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index b0877e23223e..c75706bc4212 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -198,22 +198,16 @@ static u8 get_affinity_cq_bank(u8 qp_bank) return (qp_bank >> 1) & CQ_BANKID_MASK; } -static u8 get_least_load_bankid_for_qp(struct ib_qp_init_attr *init_attr, - struct hns_roce_bank *bank) +static u8 get_least_load_bankid_for_qp(struct hns_roce_bank *bank, u8 valid_qp_bank_mask) { #define INVALID_LOAD_QPNUM 0xFFFFFFFF - struct ib_cq *scq = init_attr->send_cq; u32 least_load = INVALID_LOAD_QPNUM; - unsigned long cqn = 0; u8 bankid = 0; u32 bankcnt; u8 i; - if (scq) - cqn = to_hr_cq(scq)->cqn; - for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) { - if (scq && (get_affinity_cq_bank(i) != (cqn & CQ_BANKID_MASK))) + if (!(valid_qp_bank_mask & BIT(i))) continue; bankcnt = bank[i].inuse; @@ -247,6 +241,42 @@ static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid, return 0; } + +static bool use_ext_sge(struct ib_qp_init_attr *init_attr) +{ + return init_attr->cap.max_send_sge > HNS_ROCE_SGE_IN_WQE || + init_attr->qp_type == IB_QPT_UD || + init_attr->qp_type == IB_QPT_GSI; +} + +static u8 select_qp_bankid(struct hns_roce_dev *hr_dev, + struct ib_qp_init_attr *init_attr) +{ + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + struct hns_roce_bank *bank = qp_table->bank; + struct ib_cq *scq = init_attr->send_cq; + u8 valid_qp_bank_mask = 0; + unsigned long cqn = 0; + u8 i; + + if (scq) + cqn = to_hr_cq(scq)->cqn; + + for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) { + if (scq && (get_affinity_cq_bank(i) != (cqn & CQ_BANKID_MASK))) + continue; + + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_LIMIT_BANK) && + use_ext_sge(init_attr) && + !(VALID_EXT_SGE_QP_BANK_MASK_LIMIT & BIT(i))) + continue; + + valid_qp_bank_mask |= BIT(i); + } + + return get_least_load_bankid_for_qp(bank, valid_qp_bank_mask); +} + static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_qp_init_attr *init_attr) { @@ -259,8 +289,7 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, num = 1; } else { mutex_lock(&qp_table->bank_mutex); - bankid = get_least_load_bankid_for_qp(init_attr, qp_table->bank); - + bankid = select_qp_bankid(hr_dev, init_attr); ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid, &num); if (ret) { -- Gitee