From 948f5845e8a67c2246f06c50314220c62ed58b3d Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Tue, 8 Jul 2025 20:52:43 +0800 Subject: [PATCH 01/11] RDMA/hns: Add check between dca_min_size and dca_max_size driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICJYA1 ---------------------------------------------------------------------- Add check between dca_min_size and dca_max_size to ensure dca_min_size <= dca_max_size. Fixes: 12aa71f83089 ("RDMA/hns: Add DCA support for kernel space") Signed-off-by: Junxian Huang Signed-off-by: Donghua Huang --- drivers/infiniband/hw/hns/hns_roce_dca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index dcf560513205..6d9f1ac5754f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -1142,7 +1142,7 @@ static void load_kdca_param(struct hns_roce_dca_ctx *ctx) else ctx->max_size = roundup(dca_max_size, unit_size); - if (dca_min_size == DCA_MAX_MEM_SIZE) + if (dca_min_size == DCA_MAX_MEM_SIZE || dca_min_size > dca_max_size) ctx->min_size = ctx->max_size; else ctx->min_size = roundup(dca_min_size, unit_size); -- Gitee From 6d9bf9477a1d91dc229b757751f0db1c9a4ed3aa Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Tue, 8 Jul 2025 20:52:44 +0800 Subject: [PATCH 02/11] RDMA/hns: Fix accessing uninitialized resources driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICJYA1 ---------------------------------------------------------------------- hr_dev->pgdir_list and hr_dev->pgdir_mutex won't be initialized if CQ/QP record db are not enabled, but they are also needed when using SRQ with SRQ record db enabled. Simplified the logic by initailizing reosurces by default. Fixes: 85e3185fc11e ("RDMA/hns: Support SRQ record doorbell") Signed-off-by: Junxian Huang Signed-off-by: Donghua Huang --- drivers/infiniband/hw/hns/hns_roce_main.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 720e463ccea4..6566d795da14 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -1299,10 +1299,7 @@ static void hns_roce_teardown_hca(struct hns_roce_dev *hr_dev) mutex_destroy(&hr_dev->db_unfree_list_mutex); mutex_destroy(&hr_dev->mtr_unfree_list_mutex); mutex_destroy(&hr_dev->uctx_list_mutex); - - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || - hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) - mutex_destroy(&hr_dev->pgdir_mutex); + mutex_destroy(&hr_dev->pgdir_mutex); } /** @@ -1329,11 +1326,8 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) INIT_LIST_HEAD(&hr_dev->db_unfree_list); mutex_init(&hr_dev->db_unfree_list_mutex); - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || - hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) { - INIT_LIST_HEAD(&hr_dev->pgdir_list); - mutex_init(&hr_dev->pgdir_mutex); - } + INIT_LIST_HEAD(&hr_dev->pgdir_list); + mutex_init(&hr_dev->pgdir_mutex); hns_roce_init_uar_table(hr_dev); @@ -1372,10 +1366,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) mutex_destroy(&hr_dev->db_unfree_list_mutex); mutex_destroy(&hr_dev->mtr_unfree_list_mutex); mutex_destroy(&hr_dev->uctx_list_mutex); - - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || - hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) - mutex_destroy(&hr_dev->pgdir_mutex); + mutex_destroy(&hr_dev->pgdir_mutex); return ret; } -- Gitee From 59de6d12eb1ef867e7c9ae0f467de0315af64d88 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Tue, 8 Jul 2025 20:52:45 +0800 Subject: [PATCH 03/11] RDMA/hns: Fix poe memory leak in error flow driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICJYA1 ---------------------------------------------------------------------- Call hns_roce_unregister_poe_ch() to fix memory leak when hns_roce_register_device() failed. Fixes: 1bb63063234c ("RDMA/hns: Fix allocating POE channels after IB device registration") Signed-off-by: Junxian Huang Signed-off-by: Donghua Huang --- drivers/infiniband/hw/hns/hns_roce_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 6566d795da14..82dcc3e57598 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -1548,6 +1548,7 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) return 0; error_failed_register_device: + hns_roce_unregister_poe_ch(hr_dev); if (hr_dev->hw->hw_exit) hr_dev->hw->hw_exit(hr_dev); -- Gitee From a67da0f9639f3c96678dcde9fade380df9ac419b Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Tue, 8 Jul 2025 20:52:46 +0800 Subject: [PATCH 04/11] RDMA/hns: Fix descriptions of stars api driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICJYA1 ---------------------------------------------------------------------- Fix wrong function name in the description of rdma_register_poe_channel(). Add constraint description about concurreny call. Fixes: 21cacb516f20 ("RDMA/hns: Support write with notify") Fixes: 96df01574e0c ("RDMA/hns: Support configuring POE channels and creating POE CQs") Signed-off-by: Junxian Huang Signed-off-by: Donghua Huang --- drivers/infiniband/hw/hns/hns_roce_ext.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_ext.h b/drivers/infiniband/hw/hns/hns_roce_ext.h index 603e51cbf29a..c5148e0116d1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ext.h +++ b/drivers/infiniband/hw/hns/hns_roce_ext.h @@ -8,15 +8,27 @@ #include /** - * rdma_register_notify_addr - Register an POE channel for this RDMA device. + * rdma_register_poe_channel - Register an POE channel for this RDMA device. * @channel - POE channel index. * @poe_addr - POE channel address. * * If the current POE device is not associated with CQ, then it will be * allowed to be re-registered. Otherwise, re-registration or * de-registration will report an EBUSY error. + * + * This function is not allowed to be called concurrently. */ int rdma_register_poe_channel(struct ib_device *ib_dev, u8 channel, u64 poe_addr); + +/** + * rdma_unregister_poe_channel - Unregister an POE channel for this RDMA device. + * @channel - POE channel index. + * + * If the current POE device is still associated with CQ, de-registration + * will report an EBUSY error. + * + * This function is not allowed to be called concurrently. + */ int rdma_unregister_poe_channel(struct ib_device *ib_dev, u8 channel); /** @@ -58,9 +70,17 @@ struct rdma_notify_mem { * * If notify_mem has already been registered, re-registration * will not be allowed. + * + * This function is not allowed to be called concurrently. */ int rdma_register_notify_addr(struct ib_device *ib_dev, size_t num, struct rdma_notify_mem *notify_mem); +/** + * rdma_unregister_notify_addr - Unregister the memory regions for write with + * notify operation. + * + * This function is not allowed to be called concurrently. + */ int rdma_unregister_notify_addr(struct ib_device *ib_dev); #endif -- Gitee From d9f476a0697ee5073b34a2d7ae913da942e847a4 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Tue, 8 Jul 2025 20:52:47 +0800 Subject: [PATCH 05/11] RDMA/hns: Fix divide-by-zero error in dca debugfs driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICJYA1 ---------------------------------------------------------------------- When uctx is allocated while dca memory not registered, the total size of dca memory will be zero, leading to a divide-by-zero error in debugfs. Fixes: a21781182f77 ("RDMA/hns: Add debugfs support for DCA") Signed-off-by: Junxian Huang Signed-off-by: Donghua Huang --- drivers/infiniband/hw/hns/hns_roce_debugfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c index 270282521306..068de09c292d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_debugfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c @@ -250,6 +250,12 @@ static u64 calc_loading_percent(size_t total, size_t free, u32 *out_rem) all_pages = total >> HNS_HW_PAGE_SHIFT; free_pages = free >> HNS_HW_PAGE_SHIFT; + + if (!all_pages) { + percent = 0; + goto out; + } + if (all_pages >= free_pages) { used_pages = all_pages - free_pages; scale = LOADING_PERCENT_SCALE * LOADING_PERCENT_SCALE; @@ -257,6 +263,7 @@ static u64 calc_loading_percent(size_t total, size_t free, u32 *out_rem) percent = div_u64_rem(percent, LOADING_PERCENT_SCALE, &rem); } +out: if (out_rem) *out_rem = rem; -- Gitee From fef40880c3066f203ce21d04031e38f127c7bd9f Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Tue, 8 Jul 2025 20:52:48 +0800 Subject: [PATCH 06/11] RDMA/hns: Use __free_page() to free pages allocated with alloc_page() driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICJYA1 ---------------------------------------------------------------------- alloc_page() is a wrapper of alloc_pages() and should be matched with __free_pages(). For detailed explanation, look up __free_pages() entry in the following link: https://www.kernel.org/doc/html/next/core-api/mm-api.html Fixes: e8b1fec497a0 ("RDMA/hns: Kernel notify usr space to stop ring db") Signed-off-by: Junxian Huang Signed-off-by: Donghua Huang --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 962f6332bc7a..e8ca3778f23c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2967,7 +2967,7 @@ static int hns_roce_v2_get_reset_page(struct hns_roce_dev *hr_dev) return 0; err_with_vmap: - put_page(hr_dev->reset_page); + __free_page(hr_dev->reset_page); return -ENOMEM; } @@ -2975,7 +2975,7 @@ static void hns_roce_v2_put_reset_page(struct hns_roce_dev *hr_dev) { vunmap(hr_dev->reset_kaddr); hr_dev->reset_kaddr = NULL; - put_page(hr_dev->reset_page); + __free_page(hr_dev->reset_page); hr_dev->reset_page = NULL; } -- Gitee From 1749e4e480f59d1b31b0c825816f35bbd25080bb Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Tue, 8 Jul 2025 20:52:49 +0800 Subject: [PATCH 07/11] RDMA/hns: Fix accessing invalid dip_ctx during destroying QP mainline inclusion from mainline-v6.13-rc1 commit 176995ff48e47b415e767b7f48622f79076bda68 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICJYA1 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=176995ff48e47b415e767b7f48622f79076bda68 ---------------------------------------------------------------------- If it fails to modify QP to RTR, dip_ctx will not be attached. And during detroying QP, the invalid dip_ctx pointer will be accessed. Fixes: faa62440a577 ("RDMA/hns: Fix different dgids mapping to the same dip_idx") Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20241220055249.146943-3-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Donghua Huang --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index e8ca3778f23c..d851f6bbd411 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6364,6 +6364,9 @@ static void put_dip_ctx_idx(struct hns_roce_dev *hr_dev, { struct hns_roce_dip *hr_dip = hr_qp->dip; + if (!hr_dip) + return; + xa_lock(&hr_dev->qp_table.dip_xa); hr_dip->qp_cnt--; -- Gitee From 2a92584e7f00f6048de2c71f6f44ee524df29591 Mon Sep 17 00:00:00 2001 From: Yuyu Li Date: Tue, 8 Jul 2025 20:52:50 +0800 Subject: [PATCH 08/11] RDMA/hns: Fix congestion control algorithm parameter range driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICJYA1 ---------------------------------------------------------------------- Previously, the DCQCN, LDCP, and HC3 algorithms had incorrect individual parameter ranges, this patch will fix it. Fixes: 523f34d81ea7 ("RDMA/hns: Support congestion control algorithm parameter configuration") Signed-off-by: Yuyu Li Signed-off-by: Donghua Huang --- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 22 +++++++++++++++------- drivers/infiniband/hw/hns/hns_roce_sysfs.c | 3 ++- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index b98d2d11bb3f..e1fddb2f7db1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1590,7 +1590,7 @@ struct hns_roce_wqe_atomic_seg { #define HNS_ROCE_DCQCN_F_MAX ((u8)(~0U)) #define HNS_ROCE_DCQCN_TKP_OFS (HNS_ROCE_DCQCN_F_OFS + HNS_ROCE_DCQCN_F_SZ) #define HNS_ROCE_DCQCN_TKP_SZ sizeof(u8) -#define HNS_ROCE_DCQCN_TKP_MAX 15 +#define HNS_ROCE_DCQCN_TKP_MAX 10 #define HNS_ROCE_DCQCN_TMP_OFS (HNS_ROCE_DCQCN_TKP_OFS + HNS_ROCE_DCQCN_TKP_SZ) #define HNS_ROCE_DCQCN_TMP_SZ sizeof(u16) #define HNS_ROCE_DCQCN_TMP_MAX 15 @@ -1630,47 +1630,55 @@ struct hns_roce_wqe_atomic_seg { #define HNS_ROCE_LDCP_GAMMA_OFS (HNS_ROCE_LDCP_ALPHA_OFS + \ HNS_ROCE_LDCP_ALPHA_SZ) #define HNS_ROCE_LDCP_GAMMA_SZ sizeof(u8) -#define HNS_ROCE_LDCP_GAMMA_MAX ((u8)(~0U)) +#define HNS_ROCE_LDCP_GAMMA_MAX 7 #define HNS_ROCE_LDCP_BETA_OFS (HNS_ROCE_LDCP_GAMMA_OFS + \ HNS_ROCE_LDCP_GAMMA_SZ) #define HNS_ROCE_LDCP_BETA_SZ sizeof(u8) -#define HNS_ROCE_LDCP_BETA_MAX ((u8)(~0U)) +#define HNS_ROCE_LDCP_BETA_MAX 7 #define HNS_ROCE_LDCP_ETA_OFS (HNS_ROCE_LDCP_BETA_OFS + HNS_ROCE_LDCP_BETA_SZ) #define HNS_ROCE_LDCP_ETA_SZ sizeof(u8) -#define HNS_ROCE_LDCP_ETA_MAX ((u8)(~0U)) +#define HNS_ROCE_LDCP_ETA_MAX 7 #define HNS_ROCE_LDCP_LIFESPAN_OFS (4 * sizeof(u32)) #define HNS_ROCE_LDCP_LIFESPAN_SZ sizeof(u32) #define HNS_ROCE_LDCP_LIFESPAN_MAX 1000 #define HNS_ROCE_HC3_INITIAL_WINDOW_OFS 0 #define HNS_ROCE_HC3_INITIAL_WINDOW_SZ sizeof(u32) +#define HNS_ROCE_HC3_INITIAL_WINDOW_MIN 0 #define HNS_ROCE_HC3_INITIAL_WINDOW_MAX ((u32)(~0U)) #define HNS_ROCE_HC3_BANDWIDTH_OFS (HNS_ROCE_HC3_INITIAL_WINDOW_OFS + \ HNS_ROCE_HC3_INITIAL_WINDOW_SZ) #define HNS_ROCE_HC3_BANDWIDTH_SZ sizeof(u32) +#define HNS_ROCE_HC3_BANDWIDTH_MIN 1000 #define HNS_ROCE_HC3_BANDWIDTH_MAX ((u32)(~0U)) #define HNS_ROCE_HC3_QLEN_SHIFT_OFS (HNS_ROCE_HC3_BANDWIDTH_OFS + \ HNS_ROCE_HC3_BANDWIDTH_SZ) #define HNS_ROCE_HC3_QLEN_SHIFT_SZ sizeof(u8) -#define HNS_ROCE_HC3_QLEN_SHIFT_MAX ((u8)(~0U)) +#define HNS_ROCE_HC3_QLEN_SHIFT_MIN 0 +#define HNS_ROCE_HC3_QLEN_SHIFT_MAX 31 #define HNS_ROCE_HC3_PORT_USAGE_SHIFT_OFS (HNS_ROCE_HC3_QLEN_SHIFT_OFS + \ HNS_ROCE_HC3_QLEN_SHIFT_SZ) #define HNS_ROCE_HC3_PORT_USAGE_SHIFT_SZ sizeof(u8) -#define HNS_ROCE_HC3_PORT_USAGE_SHIFT_MAX ((u8)(~0U)) +#define HNS_ROCE_HC3_PORT_USAGE_SHIFT_MIN 0 +#define HNS_ROCE_HC3_PORT_USAGE_SHIFT_MAX 100 #define HNS_ROCE_HC3_OVER_PERIOD_OFS (HNS_ROCE_HC3_PORT_USAGE_SHIFT_OFS + \ HNS_ROCE_HC3_PORT_USAGE_SHIFT_SZ) #define HNS_ROCE_HC3_OVER_PERIOD_SZ sizeof(u8) +#define HNS_ROCE_HC3_OVER_PERIOD_MIN 0 #define HNS_ROCE_HC3_OVER_PERIOD_MAX ((u8)(~0U)) #define HNS_ROCE_HC3_MAX_STAGE_OFS (HNS_ROCE_HC3_OVER_PERIOD_OFS + \ HNS_ROCE_HC3_OVER_PERIOD_SZ) #define HNS_ROCE_HC3_MAX_STAGE_SZ sizeof(u8) +#define HNS_ROCE_HC3_MAX_STAGE_MIN 0 #define HNS_ROCE_HC3_MAX_STAGE_MAX ((u8)(~0U)) #define HNS_ROCE_HC3_GAMMA_SHIFT_OFS (HNS_ROCE_HC3_MAX_STAGE_OFS + \ HNS_ROCE_HC3_MAX_STAGE_SZ) #define HNS_ROCE_HC3_GAMMA_SHIFT_SZ sizeof(u8) +#define HNS_ROCE_HC3_GAMMA_SHIFT_MIN 0 #define HNS_ROCE_HC3_GAMMA_SHIFT_MAX 15 #define HNS_ROCE_HC3_LIFESPAN_OFS (4 * sizeof(u32)) #define HNS_ROCE_HC3_LIFESPAN_SZ sizeof(u32) +#define HNS_ROCE_HC3_LIFESPAN_MIN 0 #define HNS_ROCE_HC3_LIFESPAN_MAX 1000 #define HNS_ROCE_DIP_AI_OFS 0 @@ -1681,7 +1689,7 @@ struct hns_roce_wqe_atomic_seg { #define HNS_ROCE_DIP_F_MAX ((u8)(~0U)) #define HNS_ROCE_DIP_TKP_OFS (HNS_ROCE_DIP_F_OFS + HNS_ROCE_DIP_F_SZ) #define HNS_ROCE_DIP_TKP_SZ sizeof(u8) -#define HNS_ROCE_DIP_TKP_MAX 15 +#define HNS_ROCE_DIP_TKP_MAX 10 #define HNS_ROCE_DIP_TMP_OFS (HNS_ROCE_DIP_TKP_OFS + HNS_ROCE_DIP_TKP_SZ) #define HNS_ROCE_DIP_TMP_SZ sizeof(u16) #define HNS_ROCE_DIP_TMP_MAX 15 diff --git a/drivers/infiniband/hw/hns/hns_roce_sysfs.c b/drivers/infiniband/hw/hns/hns_roce_sysfs.c index ec266136d038..ab6184809708 100644 --- a/drivers/infiniband/hw/hns/hns_roce_sysfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_sysfs.c @@ -417,7 +417,8 @@ static const struct attribute_group ldcp_cc_param_group = { __HNS_SCC_ATTR(_name, HNS_ROCE_SCC_ALGO_HC3, \ HNS_ROCE_HC3_##NAME##_OFS, \ HNS_ROCE_HC3_##NAME##_SZ, \ - 0, HNS_ROCE_HC3_##NAME##_MAX) + HNS_ROCE_HC3_##NAME##_MIN, \ + HNS_ROCE_HC3_##NAME##_MAX) HNS_PORT_HC3_CC_ATTR_RW(initial_window, INITIAL_WINDOW); HNS_PORT_HC3_CC_ATTR_RW(bandwidth, BANDWIDTH); -- Gitee From 6e2adf63acbc01ae46711d28eb8fcbd42892d70a Mon Sep 17 00:00:00 2001 From: Yuyu Li Date: Tue, 8 Jul 2025 20:52:51 +0800 Subject: [PATCH 09/11] RDMA/hns: Fix scc_param failed logic judgments driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICJYA1 ---------------------------------------------------------------------- Previously, firmware request fails, the driver does not restore the abnormal parameter value to the normal value, resulting in the user configuring other parameters, firmware request will continue to fail unless the user manually sets the abnormal parameter value to the normal range. Now, restore the parameters after a firmware request failure. Fixes: 523f34d81ea7 ("RDMA/hns: Support congestion control algorithm parameter configuration") Signed-off-by: Yuyu Li Signed-off-by: Donghua Huang --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d851f6bbd411..e91fbc39d7b0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -7697,6 +7697,8 @@ static int hns_roce_v2_config_scc_param(struct hns_roce_dev *hr_dev, ibdev_err_ratelimited(&hr_dev->ib_dev, "failed to configure scc param, opcode: 0x%x, ret = %d.\n", le16_to_cpu(desc.opcode), ret); + memcpy(scc_param->param, scc_param->latest_param, + sizeof(scc_param->param)); mutex_unlock(&scc_param->scc_mutex); return ret; } -- Gitee From d2aefabd6de2b415c0ba3f3093e048000dcae614 Mon Sep 17 00:00:00 2001 From: wenglianfa Date: Tue, 8 Jul 2025 20:52:52 +0800 Subject: [PATCH 10/11] RDMA/hns: Fix address information leakage of DCA memory driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICJYA1 ---------------------------------------------------------------------- The DCA memory address information is exposed to the user mode. Use hash to encrypt the address information. Fixes: 12aa71f83089 ("RDMA/hns: Add DCA support for kernel space") Signed-off-by: wenglianfa Signed-off-by: Donghua Huang --- drivers/infiniband/hw/hns/hns_roce_dca.c | 3 ++- drivers/infiniband/hw/hns/hns_roce_device.h | 2 ++ drivers/infiniband/hw/hns/hns_roce_main.c | 2 ++ drivers/infiniband/hw/hns/hns_roce_qp.c | 2 ++ 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index 6d9f1ac5754f..c3bb89811088 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -1327,7 +1327,8 @@ static int add_dca_mem(struct hns_roce_dev *hr_dev, u32 new_size) if (!mem) return -ENOMEM; - attr.key = (u64)mem; + attr.key = siphash_1u64((u64)mem, &hr_dev->dca_safe_hash_key); + attr.size = roundup(new_size, ctx->unit_size); ret = register_dca_mem(hr_dev, NULL, mem, &attr); if (ret) { diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 35724e005575..48dcdbb08831 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -34,6 +34,7 @@ #define _HNS_ROCE_DEVICE_H #include +#include #include #include #include "hns_roce_bond.h" @@ -1233,6 +1234,7 @@ struct hns_roce_dev { void *dca_safe_buf; dma_addr_t dca_safe_page; + siphash_key_t dca_safe_hash_key; }; static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 82dcc3e57598..da912e2d8d2d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -1469,6 +1469,8 @@ static void hns_roce_free_dca_safe_buf(struct hns_roce_dev *hr_dev) hr_dev->dca_safe_page); hr_dev->dca_safe_page = 0; hr_dev->dca_safe_buf = NULL; + + memzero_explicit(&hr_dev->dca_safe_hash_key, sizeof(siphash_key_t)); } int hns_roce_init(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 8f4ca0f53af2..8da03d8ddc56 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -900,6 +900,8 @@ static int alloc_dca_safe_page(struct hns_roce_dev *hr_dev) return -ENOMEM; } + get_random_bytes(&hr_dev->dca_safe_hash_key, sizeof(siphash_key_t)); + return 0; } -- Gitee From ae694c9e4011102a617e4aad9e89e8945261ac34 Mon Sep 17 00:00:00 2001 From: wenglianfa Date: Tue, 8 Jul 2025 20:52:53 +0800 Subject: [PATCH 11/11] RDMA/hns: Fix double destruction of rsv_qp driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICJYA1 ---------------------------------------------------------------------- rsv_qp will be double destroyed in error flow, first in free_mr_init(), and then in hns_roce_exit(). Here fix it. list_del corruption, ffff589732eb9b50->next is LIST_POISON1 ... Call trace: __list_del_entry_valid+0x148/0x240 hns_roce_qp_remove+0x4c/0x3f0 [hns_roce_hw_v2] hns_roce_v2_destroy_qp_common+0x1dc/0x5f4 [hns_roce_hw_v2] hns_roce_v2_destroy_qp+0x22c/0x46c [hns_roce_hw_v2] free_mr_exit+0x6c/0x120 [hns_roce_hw_v2] hns_roce_v2_exit+0x170/0x200 [hns_roce_hw_v2] hns_roce_exit+0x118/0x350 [hns_roce_hw_v2] __hns_roce_hw_v2_init_instance+0x1c8/0x304 [hns_roce_hw_v2] hns_roce_hw_v2_reset_notify_init+0x170/0x21c [hns_roce_hw_v2] hns_roce_hw_v2_reset_notify+0x6c/0x190 [hns_roce_hw_v2] hclge_notify_roce_client+0x6c/0x160 [hclge] hclge_reset_rebuild+0x150/0x5c0 [hclge] hclge_reset+0x10c/0x140 [hclge] hclge_reset_subtask+0x80/0x104 [hclge] hclge_reset_service_task+0x168/0x3ac [hclge] hclge_service_task+0x50/0x100 [hclge] process_one_work+0x250/0x9a0 worker_thread+0x324/0x990 kthread+0x190/0x210 ret_from_fork+0x10/0x18 Fixes: ec799ad0f9d3 ("RDMA/hns: Fix Use-After-Free of rsv_qp") Signed-off-by: wenglianfa Signed-off-by: Donghua Huang --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 25 +++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index e91fbc39d7b0..b91400e2bd5a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3357,11 +3357,20 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) { int ret; + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + ret = free_mr_init(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "failed to init free mr!\n"); + return ret; + } + } + + ret = hns_roce_v2_get_reset_page(hr_dev); if (ret) { dev_err(hr_dev->dev, "reset state init failed, ret = %d.\n", ret); - return ret; + goto error_get_reset_page_failed; } /* The hns ROCEE requires the extdb info to be cleared before using */ @@ -3390,6 +3399,9 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) put_hem_table(hr_dev); err_clear_extdb_failed: hns_roce_v2_put_reset_page(hr_dev); +error_get_reset_page_failed: + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) + free_mr_exit(hr_dev); return ret; } @@ -8013,21 +8025,10 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) goto error_failed_roce_init; } - if (pdev->revision == PCI_REVISION_ID_HIP08) { - ret = free_mr_init(hr_dev); - if (ret) { - dev_err(hr_dev->dev, "failed to init free mr!\n"); - goto error_failed_free_mr_init; - } - } - handle->priv = hr_dev; return 0; -error_failed_free_mr_init: - hns_roce_exit(hr_dev, true); - error_failed_roce_init: kfree(hr_dev->priv); -- Gitee