diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 947fdef4e40977fdded97b1ea9e7537a2dbcea44..b4fd2227cae9d6c80b1bf5c8a8ca1308457300ac 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -233,9 +233,12 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC, hr_cq->cqn); - if (ret) - dev_err_ratelimited(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", + if (ret) { + hr_cq->delayed_destroy_flag = true; + dev_err_ratelimited(dev, + "DESTROY_CQ failed (%d) for CQN %06lx\n", ret, hr_cq->cqn); + } xa_erase_irq(&cq_table->array, hr_cq->cqn); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 9cc9fa069b8fac0aac1537a7776925cbc0a72c73..a6c4ef631bb7fc428e196a8fd220664144c00540 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1538,7 +1538,6 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp); void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); -void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn); void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index d5d6bb04054b8269ac7a15c083e077fa574b794d..a6be620c2b221de92026f7fb2ea0061b15fdc2ee 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -713,8 +713,9 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev, ret = hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT); if (ret) - dev_warn_ratelimited(dev, "failed to clear HEM base address, ret = %d.\n", - ret); + dev_warn_ratelimited(dev, + "failed to clear HEM base address, ret = %d.\n", + ret); hns_roce_free_hem(hr_dev, table->hem[i]); table->hem[i] = NULL; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b6fb8276b37bd7ecba473940e2d12309521e9460..28feac784e492a4869fdbf62d354d26be04c93f7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -451,10 +451,17 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, if (unlikely(hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_INIT || - hr_qp->state == IB_QPS_RTR)) + hr_qp->state == IB_QPS_RTR)) { + ibdev_err_ratelimited(ibdev, + "failed to post WQE, QP state %u!\n", + hr_qp->state); return -EINVAL; - else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) + } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) { + ibdev_err_ratelimited(ibdev, + "failed to post WQE, dev state %d!\n", + hr_dev->state); return -EIO; + } if (check_dca_attach_enable(hr_qp)) { ret = dca_attach_qp_buf(hr_dev, hr_qp); @@ -3151,8 +3158,8 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev, ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT, IB_QPS_INIT, NULL); if (ret) { - ibdev_err_ratelimited(ibdev, "failed to modify qp to init, ret = %d.\n", - ret); + ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n", + ret); return ret; } @@ -3841,8 +3848,9 @@ static int free_mr_post_send_lp_wqe(struct hns_roce_qp *hr_qp) ret = hns_roce_v2_post_send(&hr_qp->ibqp, send_wr, &bad_wr); if (ret) { - ibdev_err_ratelimited(ibdev, "failed to post wqe for free mr, ret = %d.\n", - ret); + ibdev_err_ratelimited(ibdev, + "failed to post wqe for free mr, ret = %d.\n", + ret); return ret; } @@ -3882,8 +3890,8 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev) ret = free_mr_post_send_lp_wqe(hr_qp); if (ret) { ibdev_err_ratelimited(ibdev, - "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n", - hr_qp->qpn, ret); + "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n", + hr_qp->qpn, ret); break; } @@ -3895,15 +3903,15 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev) npolled = hns_roce_v2_poll_cq(&free_mr->rsv_cq->ib_cq, cqe_cnt, wc); if (npolled < 0) { ibdev_err_ratelimited(ibdev, - "failed to poll cqe for free mr, remain %d cqe.\n", - cqe_cnt); + "failed to poll cqe for free mr, remain %d cqe.\n", + cqe_cnt); goto out; } if (time_after(jiffies, end)) { ibdev_err_ratelimited(ibdev, - "failed to poll cqe for free mr and timeout, remain %d cqe.\n", - cqe_cnt); + "failed to poll cqe for free mr and timeout, remain %d cqe.\n", + cqe_cnt); goto out; } cqe_cnt -= npolled; @@ -5675,8 +5683,11 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); int ret = 0; - if (!check_qp_state(cur_state, new_state)) + if (!check_qp_state(cur_state, new_state)) { + ibdev_err_ratelimited(&hr_dev->ib_dev, + "Illegal state for QP!\n"); return -EINVAL; + } if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { memset(qpc_mask, 0, hr_dev->caps.qpc_sz); @@ -6256,10 +6267,12 @@ int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, /* Modify qp to reset before destroying qp */ ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET, udata); - if (ret) + if (ret) { + hr_qp->delayed_destroy_flag = true; ibdev_err_ratelimited(ibdev, - "failed to modify QP to RST, ret = %d.\n", - ret); + "failed to modify QP to RST, ret = %d.\n", + ret); + } } send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL; @@ -6325,8 +6338,8 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); if (ret) ibdev_err_ratelimited(&hr_dev->ib_dev, - "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n", - hr_qp->qpn, ret); + "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n", + hr_qp->qpn, ret); hns_roce_qp_destroy(hr_dev, hr_qp, udata); @@ -6621,8 +6634,8 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) ibdev_err_ratelimited(&hr_dev->ib_dev, - "failed to process cmd when modifying CQ, ret = %d.\n", - ret); + "failed to process cmd when modifying CQ, ret = %d.\n", + ret); err_out: if (ret) @@ -6647,8 +6660,8 @@ static int hns_roce_v2_query_cqc(struct hns_roce_dev *hr_dev, u32 cqn, HNS_ROCE_CMD_QUERY_CQC, cqn); if (ret) { ibdev_err_ratelimited(&hr_dev->ib_dev, - "failed to process cmd when querying CQ, ret = %d.\n", - ret); + "failed to process cmd when querying CQ, ret = %d.\n", + ret); goto err_mailbox; } @@ -6767,12 +6780,7 @@ static void hns_roce_irq_work_handle(struct work_struct *work) case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: case HNS_ROCE_EVENT_TYPE_COMM_EST: case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: - case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: - case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: - case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: - case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION: - case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH: hns_roce_qp_event(hr_dev, queue_num, event_type); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: @@ -6871,7 +6879,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION: case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH: - hns_roce_flush_cqe(hr_dev, queue_num); + hns_roce_qp_event(hr_dev, queue_num, event_type); break; case HNS_ROCE_EVENT_TYPE_MB: hns_roce_cmd_event(hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 44f9c4d82b003cb52adf48013c03722826782bb0..1264bacb82b9fca479ef312a360356cbe5a3b9fb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -152,9 +152,11 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT, key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1)); - if (ret) + if (ret) { + mr->delayed_destroy_flag = true; ibdev_warn_ratelimited(ibdev, "failed to destroy mpt, ret = %d.\n", - ret); + ret); + } } free_mr_pbl(hr_dev, mr); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index bcf24f26a8397148ee2c2be4b8ec0de41bee5fdb..98d9f3a779976a304f53cb7221a2281eb1992e5b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -39,25 +39,6 @@ #include "hns_roce_hem.h" #include "hns_roce_dca.h" -static struct hns_roce_qp *hns_roce_qp_lookup(struct hns_roce_dev *hr_dev, - u32 qpn) -{ - struct device *dev = hr_dev->dev; - struct hns_roce_qp *qp; - unsigned long flags; - - xa_lock_irqsave(&hr_dev->qp_table_xa, flags); - qp = __hns_roce_qp_lookup(hr_dev, qpn); - if (qp) - refcount_inc(&qp->refcount); - xa_unlock_irqrestore(&hr_dev->qp_table_xa, flags); - - if (!qp) - dev_warn(dev, "async event for bogus QP %08x\n", qpn); - - return qp; -} - static void flush_work_handle(struct work_struct *work) { struct hns_roce_work *flush_work = container_of(work, @@ -114,28 +95,31 @@ void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp) void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) { + struct device *dev = hr_dev->dev; struct hns_roce_qp *qp; - qp = hns_roce_qp_lookup(hr_dev, qpn); - if (!qp) - return; - - qp->event(qp, (enum hns_roce_event)event_type); + xa_lock(&hr_dev->qp_table_xa); + qp = __hns_roce_qp_lookup(hr_dev, qpn); + if (qp) + refcount_inc(&qp->refcount); + xa_unlock(&hr_dev->qp_table_xa); - if (refcount_dec_and_test(&qp->refcount)) - complete(&qp->free); -} + if (!qp) { + dev_warn(dev, "Async event for bogus QP %08x\n", qpn); + return; + } -void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn) -{ - struct hns_roce_qp *qp; + if (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR || + event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR || + event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR || + event_type == HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION || + event_type == HNS_ROCE_EVENT_TYPE_INVALID_XRCETH) { + qp->state = IB_QPS_ERR; - qp = hns_roce_qp_lookup(hr_dev, qpn); - if (!qp) - return; + flush_cqe(hr_dev, qp); + } - qp->state = IB_QPS_ERR; - flush_cqe(hr_dev, qp); + qp->event(qp, (enum hns_roce_event)event_type); if (refcount_dec_and_test(&qp->refcount)) complete(&qp->free); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index f6a0244cbf3e3e4ce2a4d11f2dd4ec8da3441774..454a7370a8b7a25250a8222144bfd61f4abc2505 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -161,9 +161,12 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_SRQ, srq->srqn); - if (ret) - dev_err_ratelimited(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", + if (ret) { + srq->delayed_destroy_flag = true; + dev_err_ratelimited(hr_dev->dev, + "DESTROY_SRQ failed (%d) for SRQN %06lx\n", ret, srq->srqn); + } xa_erase_irq(&srq_table->xa, srq->srqn);