From f368acfcaf0b1ce31e0c6c373c7073362002a590 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Thu, 7 Dec 2023 19:42:27 +0800 Subject: [PATCH 1/6] RDMA/hns: Rename the interrupts mainline inclusion from mainline-v6.8-rc1 commit 95f6b40082aaf37fd0553828982402af36f81685 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8W3IU CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=95f6b40082aaf37fd0553828982402af36f81685 ---------------------------------------------------------------------- Now, different devices may have the same interrupt name, which makes it difficult for users to distinguish between these interrupts. Modify the naming style to be consistent with our network devices. Before: "hns-aeq-0" "hns-ceq-0" ... Now: "hns-0000:35:00.0-aeq-0" "hns-0000:35:00.0-ceq-0" ... Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20231207114231.2872104-2-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Juan Zhou --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b896a653f4cf..7f8da2ac0d8e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6857,15 +6857,16 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, /* irq contains: abnormal + AEQ + CEQ */ for (j = 0; j < other_num; j++) snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN, - "hns-abn-%d", j); + "hns-%s-abn-%d", pci_name(hr_dev->pci_dev), j); for (j = other_num; j < (other_num + aeq_num); j++) snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN, - "hns-aeq-%d", j - other_num); + "hns-%s-aeq-%d", pci_name(hr_dev->pci_dev), j - other_num); for (j = (other_num + aeq_num); j < irq_num; j++) snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN, - "hns-ceq-%d", j - other_num - aeq_num); + "hns-%s-ceq-%d", pci_name(hr_dev->pci_dev), + j - other_num - aeq_num); for (j = 0; j < irq_num; j++) { if (j < other_num) -- Gitee From 722c43188a5d8a40a70fdce19d01e95162259889 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Tue, 23 May 2023 20:16:39 +0800 Subject: [PATCH 2/6] RDMA/hns: Remove unnecessary QP type checks mainline inclusion from mainline-v6.5-rc1 commit b9989ab3f61ec459cbaf0a492fea3168bbfa4c7a category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8W3IU CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b9989ab3f61ec459cbaf0a492fea3168bbfa4c7a ---------------------------------------------------------------------- It is not necessary to check the type of the queue on IO path because unsupported QP type cannot be created. Link: https://lore.kernel.org/r/20230523121641.3132102-2-huangjunxian6@hisilicon.com Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Signed-off-by: Jason Gunthorpe Signed-off-by: Juan Zhou --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 23 ++++------------------ 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7f8da2ac0d8e..df580be0fc80 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -447,18 +447,12 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct ib_device *ibdev = &hr_dev->ib_dev; - struct ib_qp *ibqp = &hr_qp->ibqp; int ret; - if (unlikely(ibqp->qp_type != IB_QPT_RC && - ibqp->qp_type != IB_QPT_GSI && - ibqp->qp_type != IB_QPT_UD)) { - ibdev_err(ibdev, "Not supported QP(0x%x)type!\n", - ibqp->qp_type); - return -EOPNOTSUPP; - } else if (unlikely(hr_qp->state == IB_QPS_RESET || - hr_qp->state == IB_QPS_INIT || - hr_qp->state == IB_QPS_RTR)) { + if (unlikely(hr_qp->state == IB_QPS_RESET || + hr_qp->state == IB_QPS_INIT || + hr_qp->state == IB_QPS_RTR)) { + ibdev_err(ibdev, "failed to post WQE, QP state %u!\n", hr_qp->state); return -EINVAL; @@ -879,17 +873,8 @@ static int check_recv_valid(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct ib_device *ibdev = &hr_dev->ib_dev; - struct ib_qp *ibqp = &hr_qp->ibqp; int ret; - if (unlikely(ibqp->qp_type != IB_QPT_RC && - ibqp->qp_type != IB_QPT_GSI && - ibqp->qp_type != IB_QPT_UD)) { - ibdev_err(ibdev, "unsupported qp type, qp_type = %d.\n", - ibqp->qp_type); - return -EOPNOTSUPP; - } - if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) return -EIO; -- Gitee From c079b3d82275c64abe884d505a80d837ac66637d Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Tue, 17 Oct 2023 20:52:33 +0800 Subject: [PATCH 3/6] RDMA/hns: Fix printing level of asynchronous events mainline inclusion from mainline-v6.7-rc1 commit 9faef73ef4f6666b97e04d99734ac09251098185 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8W3IU CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9faef73ef4f6666b97e04d99734ac09251098185 ---------------------------------------------------------------------- The current driver will print all asynchronous events. Some of the print levels are set improperly, e.g. SRQ limit reach and SRQ last wqe reach, which may also occur during normal operation of the software. Currently, the information of these event is printed as a warning, which causes a large amount of printing even during normal use of the application. As a result, the service performance deteriorates. This patch fixes the printing storms by modifying the print level. Fixes: b00a92c8f2ca ("RDMA/hns: Move all prints out of irq handle") Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20231017125239.164455-2-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Juan Zhou --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index df580be0fc80..5fa8638d887f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6235,7 +6235,7 @@ static void hns_roce_irq_work_handle(struct work_struct *work) case HNS_ROCE_EVENT_TYPE_COMM_EST: break; case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: - ibdev_warn(ibdev, "Send queue drained.\n"); + ibdev_dbg(ibdev, "send queue drained.\n"); break; case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: ibdev_err(ibdev, "Local work queue 0x%x catast error, sub_event type is: %d\n", @@ -6250,10 +6250,10 @@ static void hns_roce_irq_work_handle(struct work_struct *work) irq_work->queue_num, irq_work->sub_type); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: - ibdev_warn(ibdev, "SRQ limit reach.\n"); + ibdev_dbg(ibdev, "SRQ limit reach.\n"); break; case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: - ibdev_warn(ibdev, "SRQ last wqe reach.\n"); + ibdev_dbg(ibdev, "SRQ last wqe reach.\n"); break; case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: ibdev_err(ibdev, "SRQ catas error.\n"); -- Gitee From f92a376462aa7f1094a7686f85d7599adcf655cc Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Thu, 7 Dec 2023 19:42:29 +0800 Subject: [PATCH 4/6] RDMA/hns: Add a max length of gid table mainline inclusion from mainline-v6.8-rc1 commit 7243396aaf12385ba514764b6401bcd15e1a52c7 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8W3IU CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=7243396aaf12385ba514764b6401bcd15e1a52c7 ---------------------------------------------------------------------- IB-core and rdma-core restrict the sgid_index specified by users, which is uint8_t/u8 data type, to only be within the range of 0-255, so it's meaningless to support excessively large gid_table_len. On the other hand, ib-core creates as many sysfs gid files as gid_table_len, most of which are not only useless because of the reason above, but also greatly increase the traversal time of the sysfs gid files for applications. This patch limits the maximum length of gid table to 256. Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20231207114231.2872104-4-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Juan Zhou --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 5fa8638d887f..0bd4547dc2f6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2230,6 +2230,7 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev) /* Apply all loaded caps before setting to hardware */ static void apply_func_caps(struct hns_roce_dev *hr_dev) { +#define MAX_GID_TBL_LEN 256 struct hns_roce_caps *caps = &hr_dev->caps; struct hns_roce_v2_priv *priv = hr_dev->priv; @@ -2264,8 +2265,14 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev) caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; - caps->gid_table_len[0] = caps->gmv_bt_num * - (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz); + + /* It's meaningless to support excessively large gid_table_len, + * as the type of sgid_index in kernel struct ib_global_route + * and userspace struct ibv_global_route are u8/uint8_t (0-255). + */ + caps->gid_table_len[0] = min_t(u32, MAX_GID_TBL_LEN, + caps->gmv_bt_num * + (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz)); caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / caps->gmv_entry_sz); -- Gitee From 93e51f0d51ba05f869694138de30bbcc2584a9d8 Mon Sep 17 00:00:00 2001 From: Luoyouming Date: Wed, 22 Nov 2023 16:06:23 +0800 Subject: [PATCH 5/6] RDMA/hns: Fix a missing validation check for sl driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8W3IU -------------------------------------------------------------------------- After the user sets the sl value, the driver does not verify the sl value, which may result in the user's configuration value exceeding the system configuration specifications. This patch adds validation for the validity of the user-configured sl value. Fixes: 11ef2ec6aa7c ("RDMA/hns: Support DSCP of userspace") Signed-off-by: Luoyouming --- drivers/infiniband/hw/hns/hns_roce_ah.c | 9 +++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 10 ++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 9e223aa927f1..abe11f4027a9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -62,6 +62,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct hns_roce_ah *ah = to_hr_ah(ibah); u8 priority = 0; u8 tc_mode = 0; + u32 sl_num; int ret; if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && udata) @@ -92,6 +93,14 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, else ah->av.sl = rdma_ah_get_sl(ah_attr); + sl_num = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1); + if (unlikely(ah->av.sl > sl_num)) { + ibdev_err_ratelimited(&hr_dev->ib_dev, + "failed to set sl, sl (%u) shouldn't be larger than %u.\n", + ah->av.sl, sl_num); + return -EINVAL; + } + memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0bd4547dc2f6..b2959b0f31ea 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5147,6 +5147,7 @@ static int hns_roce_set_sl(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; + u32 sl_num; int ret; ret = hns_roce_hw_v2_get_dscp(hr_dev, get_tclass(&attr->ah_attr.grh), @@ -5163,10 +5164,11 @@ static int hns_roce_set_sl(struct ib_qp *ibqp, else hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); - if (unlikely(hr_qp->sl > MAX_SERVICE_LEVEL)) { - ibdev_err(ibdev, - "failed to fill QPC, sl (%u) shouldn't be larger than %d.\n", - hr_qp->sl, MAX_SERVICE_LEVEL); + sl_num = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1); + if (unlikely(hr_qp->sl > sl_num)) { + ibdev_err_ratelimited(ibdev, + "failed to fill QPC, sl (%u) shouldn't be larger than %u.\n", + hr_qp->sl, sl_num); return -EINVAL; } -- Gitee From 8c8000cee84acf18a75018fcda70c656bf2beea1 Mon Sep 17 00:00:00 2001 From: Luoyouming Date: Wed, 22 Nov 2023 16:06:22 +0800 Subject: [PATCH 6/6] RDMA/hns: Fix congestions control algorithm type for UD driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8W3IU -------------------------------------------------------------------------- Due to hardware limitations, UD mode can only configure DCQCN. Therefore, the default configuration in this mode is set to DCQCN, and additional validation is applied to user configuration. Fixes: 09f1b7cb29b2 ("RDMA/hns: Support congestion control algorithm configuration at QP granularity") Signed-off-by: Luoyouming --- drivers/infiniband/hw/hns/hns_roce_qp.c | 33 ++++++++++++++++++------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index b3ee159f1c1f..50771d454d67 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1096,7 +1096,10 @@ static inline void default_congest_type(struct hns_roce_dev *hr_dev, { struct hns_roce_caps *caps = &hr_dev->caps; - hr_qp->congest_type = 1 << caps->default_congest_type; + if (hr_qp->ibqp.qp_type == IB_QPT_UD) + hr_qp->congest_type = HNS_ROCE_CREATE_QP_FLAGS_DCQCN; + else + hr_qp->congest_type = 1 << caps->default_congest_type; } static int set_congest_type(struct hns_roce_qp *hr_qp, @@ -1104,6 +1107,17 @@ static int set_congest_type(struct hns_roce_qp *hr_qp, { int ret = 0; + if (hr_qp->ibqp.qp_type == IB_QPT_UD && + !(ucmd->congest_type_flags & HNS_ROCE_CREATE_QP_FLAGS_DCQCN)) { + struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); + + ibdev_err_ratelimited(&hr_dev->ib_dev, + "UD just support DCQCN. unsupported congest type 0x%llx.\n", + ucmd->congest_type_flags); + + return -EINVAL; + } + if (ucmd->congest_type_flags & HNS_ROCE_CREATE_QP_FLAGS_DCQCN) hr_qp->congest_type = HNS_ROCE_CREATE_QP_FLAGS_DCQCN; else if (ucmd->congest_type_flags & HNS_ROCE_CREATE_QP_FLAGS_LDCP) @@ -1118,19 +1132,17 @@ static int set_congest_type(struct hns_roce_qp *hr_qp, return ret; } -static void set_congest_param(struct hns_roce_dev *hr_dev, +static int set_congest_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_ib_create_qp *ucmd) { - int ret; + if (ucmd->comp_mask & HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE) + return set_congest_type(hr_qp, ucmd); - if (ucmd->comp_mask & HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE) { - ret = set_congest_type(hr_qp, ucmd); - if (ret == 0) - return; - } default_congest_type(hr_dev, hr_qp); + + return 0; } static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, @@ -1177,7 +1189,10 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, if (ret) ibdev_err(ibdev, "Failed to set user SQ size, ret = %d\n", ret); - set_congest_param(hr_dev, hr_qp, ucmd); + + ret = set_congest_param(hr_dev, hr_qp, ucmd); + if (ret) + return ret; } else { if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { -- Gitee