From 17b6c19797cb58cfe0b5a90708bd7c16651146f9 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Mon, 24 Oct 2022 16:38:13 +0800 Subject: [PATCH 1/3] RDMA/hns: Disable local invalidate operation mainline inclusion from mainline-v6.1-rc4 commit 9e272ed69ad6f6952fafd0599d6993575512408e category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6ROBG CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9e272ed69ad6f6952fafd0599d6993575512408e ---------------------------------------------------------------------- When function reset and local invalidate are mixed, HNS RoCEE may hang. Before introducing the cause of the problem, two hardware internal concepts need to be introduced: 1. Execution queue: The queue of hardware execution instructions, function reset and local invalidate are queued for execution in this queue. 2.Local queue: A queue that stores local operation instructions. The instructions in the local queue will be sent to the execution queue for execution. The instructions in the local queue will not be removed until the execution is completed. The reason for the problem is as follows: 1. There is a function reset instruction in the execution queue, which is currently being executed. A necessary condition for the successful execution of function reset is: the hardware pipeline needs to empty the instructions that were not completed before; 2. A local invalidate instruction at the head of the local queue is sent to the execution queue. Now there are two instructions in the execution queue, the first is the function reset instruction, and the second is the local invalidate instruction, which will be executed in se quence; 3. The user has issued many local invalidate operations, causing the local queue to be filled up. 4. The user still has a new local operation command and is queuing to enter the local queue. But the local queue is full and cannot receive new instructions, this instruction is temporarily stored at the hardware pipeline. 5. The function reset has been waiting for the instruction before the hardware pipeline stage is drained. The hardware pipeline stage also caches a local invalidate instruction, so the function reset cannot be completed, and the instructions after it cannot be executed. These factors together cause the execution logic deadlock of the hardware, and the consequence is that RoCEE will not have any response. Considering that the local operation command may potentially cause RoCEE to hang, this feature is no longer supported. Fixes: e93df0108579 ("RDMA/hns: Support local invalidate for hip08 in kernel space") Signed-off-by: Yangyang Li Signed-off-by: Wenpeng Liang Signed-off-by: Haoyue Xu Link: https://lore.kernel.org/r/20221024083814.1089722-2-xuhaoyue1@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Zhou Juan --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 11 ----------- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 2 -- 2 files changed, 13 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index cba78f0eac14..7ed35503ec93 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -121,7 +121,6 @@ static const u32 hns_roce_op_code[] = { HR_OPC_MAP(ATOMIC_CMP_AND_SWP, ATOM_CMP_AND_SWAP), HR_OPC_MAP(ATOMIC_FETCH_AND_ADD, ATOM_FETCH_AND_ADD), HR_OPC_MAP(SEND_WITH_INV, SEND_WITH_INV), - HR_OPC_MAP(LOCAL_INV, LOCAL_INV), HR_OPC_MAP(MASKED_ATOMIC_CMP_AND_SWP, ATOM_MSK_CMP_AND_SWAP), HR_OPC_MAP(MASKED_ATOMIC_FETCH_AND_ADD, ATOM_MSK_FETCH_AND_ADD), HR_OPC_MAP(REG_MR, FAST_REG_PMR), @@ -638,9 +637,6 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev, else ret = -EOPNOTSUPP; break; - case IB_WR_LOCAL_INV: - hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_SO); - fallthrough; case IB_WR_SEND_WITH_INV: rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); break; @@ -3533,7 +3529,6 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID); hr_reg_write(mpt_entry, MPT_PD, mr->pd); - hr_reg_enable(mpt_entry, MPT_L_INV_EN); hr_reg_write_bool(mpt_entry, MPT_BIND_EN, mr->access & IB_ACCESS_MW_BIND); @@ -3624,7 +3619,6 @@ static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev, hr_reg_enable(mpt_entry, MPT_RA_EN); hr_reg_enable(mpt_entry, MPT_R_INV_EN); - hr_reg_enable(mpt_entry, MPT_L_INV_EN); hr_reg_enable(mpt_entry, MPT_FRE); hr_reg_clear(mpt_entry, MPT_MR_MW); @@ -3656,7 +3650,6 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) hr_reg_write(mpt_entry, MPT_PD, mw->pdn); hr_reg_enable(mpt_entry, MPT_R_INV_EN); - hr_reg_enable(mpt_entry, MPT_L_INV_EN); hr_reg_enable(mpt_entry, MPT_LW_EN); hr_reg_enable(mpt_entry, MPT_MR_MW); @@ -4072,7 +4065,6 @@ static const u32 wc_send_op_map[] = { HR_WC_OP_MAP(RDMA_READ, RDMA_READ), HR_WC_OP_MAP(RDMA_WRITE, RDMA_WRITE), HR_WC_OP_MAP(RDMA_WRITE_WITH_IMM, RDMA_WRITE), - HR_WC_OP_MAP(LOCAL_INV, LOCAL_INV), HR_WC_OP_MAP(ATOM_CMP_AND_SWAP, COMP_SWAP), HR_WC_OP_MAP(ATOM_FETCH_AND_ADD, FETCH_ADD), HR_WC_OP_MAP(ATOM_MSK_CMP_AND_SWAP, MASKED_COMP_SWAP), @@ -4122,9 +4114,6 @@ static void fill_send_wc(struct ib_wc *wc, struct hns_roce_v2_cqe *cqe) case HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM: wc->wc_flags |= IB_WC_WITH_IMM; break; - case HNS_ROCE_V2_WQE_OP_LOCAL_INV: - wc->wc_flags |= IB_WC_WITH_INVALIDATE; - break; case HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP: case HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD: case HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP: diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index e5f3a4639bf3..dc7e7217324c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -152,7 +152,6 @@ enum { HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP = 0x8, HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD = 0x9, HNS_ROCE_V2_WQE_OP_FAST_REG_PMR = 0xa, - HNS_ROCE_V2_WQE_OP_LOCAL_INV = 0xb, HNS_ROCE_V2_WQE_OP_BIND_MW = 0xc, HNS_ROCE_V2_WQE_OP_MASK = 0x1f, }; @@ -901,7 +900,6 @@ struct hns_roce_v2_rc_send_wqe { #define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7) #define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8) #define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9) -#define RC_SEND_WQE_SO RC_SEND_WQE_FIELD_LOC(10, 10) #define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11) #define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12) #define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15) -- Gitee From 064d39fb9c2c2930f582da953a09a983886bae5f Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Sat, 19 Nov 2022 15:08:34 +0800 Subject: [PATCH 2/3] RDMA/hns: fix memory leak in hns_roce_alloc_mr() mainline inclusion from mainline-v6.2-rc1 commit a115aa00b18f7b8982b8f458149632caf64a862a category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6RP11 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a115aa00b18f7b8982b8f458149632caf64a862a ---------------------------------------------------------------------- When hns_roce_mr_enable() failed in hns_roce_alloc_mr(), mr_key is not released. Compiled test only. Fixes: 9b2cf76c9f05 ("RDMA/hns: Optimize PBL buffer allocation process") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221119070834.48502-1-shaozhengchao@huawei.com Signed-off-by: Leon Romanovsky Signed-off-by: Zhou Juan --- drivers/infiniband/hw/hns/hns_roce_mr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 81aa3239a54a..eacbb938323b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -406,10 +406,10 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, return &mr->ibmr; -err_key: - free_mr_key(hr_dev, mr); err_pbl: free_mr_pbl(hr_dev, mr); +err_key: + free_mr_key(hr_dev, mr); err_free: kfree(mr); return ERR_PTR(ret); -- Gitee From be89d155ca03bd9114c55dde538a7d9f2ba5bb31 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Sat, 26 Nov 2022 18:29:10 +0800 Subject: [PATCH 3/3] RDMA/hns: Fix error code of CMD mainline inclusion from mainline-v6.2-rc1 commit 667d6164b84884c64de3fc18670cd5a98b0b10cf category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6RO6S CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=667d6164b84884c64de3fc18670cd5a98b0b10cf ---------------------------------------------------------------------- The error code is fixed to EIO when CMD fails to excute. This patch converts the error status reported by firmware to linux errno. Fixes: a04ff739f2a9 ("RDMA/hns: Add command queue support for hip08 RoCE driver") Link: https://lore.kernel.org/r/20221126102911.2921820-6-xuhaoyue1@hisilicon.com Signed-off-by: Chengchang Tang Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe Signed-off-by: Zhou Juan --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 26 +++++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 5 +++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7ed35503ec93..bd95a58dec0b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1368,6 +1368,30 @@ static void update_cmdq_status(struct hns_roce_dev *hr_dev) hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR; } +static int hns_roce_cmd_err_convert_errno(u16 desc_ret) +{ + struct hns_roce_cmd_errcode errcode_table[] = { + {CMD_EXEC_SUCCESS, 0}, + {CMD_NO_AUTH, -EPERM}, + {CMD_NOT_EXIST, -EOPNOTSUPP}, + {CMD_CRQ_FULL, -EXFULL}, + {CMD_NEXT_ERR, -ENOSR}, + {CMD_NOT_EXEC, -ENOTBLK}, + {CMD_PARA_ERR, -EINVAL}, + {CMD_RESULT_ERR, -ERANGE}, + {CMD_TIMEOUT, -ETIME}, + {CMD_HILINK_ERR, -ENOLINK}, + {CMD_INFO_ILLEGAL, -ENXIO}, + {CMD_INVALID, -EBADR}, + }; + u16 i; + + for (i = 0; i < ARRAY_SIZE(errcode_table); i++) + if (desc_ret == errcode_table[i].return_status) + return errcode_table[i].errno; + return -EIO; +} + static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc *desc, int num) { @@ -1415,7 +1439,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, dev_err_ratelimited(hr_dev->dev, "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n", desc->opcode, desc_ret); - ret = -EIO; + ret = hns_roce_cmd_err_convert_errno(desc_ret); } } else { /* FW/HW reset or incorrect number of desc */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index dc7e7217324c..f2dca8a2c054 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -256,6 +256,11 @@ enum hns_roce_cmd_return_status { CMD_OTHER_ERR = 0xff }; +struct hns_roce_cmd_errcode { + enum hns_roce_cmd_return_status return_status; + int errno; +}; + enum hns_roce_sgid_type { GID_TYPE_FLAG_ROCE_V1 = 0, GID_TYPE_FLAG_ROCE_V2_IPV4, -- Gitee