diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index f4ceaeba5601b9081d667a93848882a4c20ab25d..abe11f4027a9f0a0b1167b51b0d690e2d87b7178 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -62,6 +62,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct hns_roce_ah *ah = to_hr_ah(ibah); u8 priority = 0; u8 tc_mode = 0; + u32 sl_num; int ret; if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && udata) @@ -92,6 +93,14 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, else ah->av.sl = rdma_ah_get_sl(ah_attr); + sl_num = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1); + if (unlikely(ah->av.sl > sl_num)) { + ibdev_err_ratelimited(&hr_dev->ib_dev, + "failed to set sl, sl (%u) shouldn't be larger than %u.\n", + ah->av.sl, sl_num); + return -EINVAL; + } + memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); @@ -108,6 +117,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, if (udata) { resp.priority = ah->av.sl; resp.tc_mode = tc_mode; + memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN); ret = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); } diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c index 06ffcf64c73ac2a7ca4755530d5cdb818c8c4adf..f9f57f10135fc04ea45ac4aec2a2b55b85a86910 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.c +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -256,7 +256,7 @@ static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp) static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp) { u8 main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn); - struct hns_roce_dev *hr_dev; + struct hns_roce_dev *hr_dev = NULL; struct net_device *net_dev; int i, ret; @@ -676,8 +676,8 @@ static void hns_roce_bond_info_update(struct hns_roce_bond_group *bond_grp, net_dev = bond_grp->bond_func_info[i].net_dev; if (net_dev && upper_dev != get_upper_dev_from_ndev(net_dev)) { - bond_grp->slave_map_diff |= (1 << i); - bond_grp->slave_map &= ~(1 << i); + bond_grp->slave_map_diff |= (1U << i); + bond_grp->slave_map &= ~(1U << i); } } return; @@ -689,8 +689,8 @@ static void hns_roce_bond_info_update(struct hns_roce_bond_group *bond_grp, if (hr_dev) { func_idx = PCI_FUNC(hr_dev->pci_dev->devfn); if (!bond_grp->bond_func_info[func_idx].net_dev) { - bond_grp->slave_map_diff |= (1 << func_idx); - bond_grp->slave_map |= (1 << func_idx); + bond_grp->slave_map_diff |= (1U << func_idx); + bond_grp->slave_map |= (1U << func_idx); priv = hr_dev->priv; bond_grp->bond_func_info[func_idx].net_dev = diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index cd058acfa10fd9c41b3f900fd4452bd044ce8b8b..3110b4fd00eda0760f9d9237d487a8fda7896e83 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -182,6 +182,8 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) if (ret) dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret, hr_cq->cqn); + if (ret == -EBUSY) + hr_cq->delayed_destroy_flag = true; xa_erase(&cq_table->array, hr_cq->cqn); @@ -193,7 +195,11 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) complete(&hr_cq->free); wait_for_completion(&hr_cq->free); - hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); + /* this resource will be freed when the driver is uninstalled, so + * no memory leak will occur. + */ + if (!hr_cq->delayed_destroy_flag) + hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); } static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, @@ -203,6 +209,10 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, struct hns_roce_buf_attr buf_attr = {}; int ret; + hr_cq->mtr_node = kvmalloc(sizeof(*hr_cq->mtr_node), GFP_KERNEL); + if (!hr_cq->mtr_node) + return -ENOMEM; + buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + PAGE_SHIFT; buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size; buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num; @@ -211,15 +221,22 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT, udata, addr); - if (ret) + if (ret) { ibdev_err(ibdev, "Failed to alloc CQ mtr, ret = %d\n", ret); + kvfree(hr_cq->mtr_node); + } return ret; } static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { - hns_roce_mtr_destroy(hr_dev, &hr_cq->mtr); + if (hr_cq->delayed_destroy_flag) { + hns_roce_add_unfree_mtr(hr_cq->mtr_node, hr_dev, &hr_cq->mtr); + } else { + hns_roce_mtr_destroy(hr_dev, &hr_cq->mtr); + kvfree(hr_cq->mtr_node); + } } static int alloc_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, @@ -270,7 +287,8 @@ static void free_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, ibucontext); - hns_roce_db_unmap_user(uctx, &hr_cq->db); + hns_roce_db_unmap_user(uctx, &hr_cq->db, + hr_cq->delayed_destroy_flag); } else { hns_roce_free_db(hr_dev, &hr_cq->db); } diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index 0f5fe892e897db025991b7b2e06289608473a37e..78121524a794820f72dc19d22f74a3464b9f3294 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -24,7 +24,7 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, page = kmalloc(sizeof(*page), GFP_KERNEL); if (!page) { ret = -ENOMEM; - goto out; + goto err_out; } refcount_set(&page->refcount, 1); @@ -33,8 +33,12 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, PAGE_SIZE, 0); if (IS_ERR(page->umem)) { ret = PTR_ERR(page->umem); - kfree(page); - goto out; + goto err_page; + } + page->umem_node = kvmalloc(sizeof(*page->umem_node), GFP_KERNEL); + if (!page->umem_node) { + ret = -ENOMEM; + goto err_umem; } list_add(&page->list, &context->page_list); @@ -45,22 +49,36 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, db->virt_addr = sg_virt(page->umem->sg_head.sgl) + offset; db->u.user_page = page; refcount_inc(&page->refcount); + mutex_unlock(&context->page_mutex); + return 0; -out: +err_umem: + ib_umem_release(page->umem); +err_page: + kvfree(page); +err_out: mutex_unlock(&context->page_mutex); return ret; } void hns_roce_db_unmap_user(struct hns_roce_ucontext *context, - struct hns_roce_db *db) + struct hns_roce_db *db, + bool delayed_unmap_flag) { + struct hns_roce_dev *hr_dev = to_hr_dev(context->ibucontext.device); + mutex_lock(&context->page_mutex); refcount_dec(&db->u.user_page->refcount); if (refcount_dec_if_one(&db->u.user_page->refcount)) { list_del(&db->u.user_page->list); - ib_umem_release(db->u.user_page->umem); + if (delayed_unmap_flag) { + hns_roce_add_unfree_umem(db->u.user_page, hr_dev); + } else { + ib_umem_release(db->u.user_page->umem); + kvfree(db->u.user_page->umem_node); + } kfree(db->u.user_page); } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 743453accfd75192de224bcb03f5e07767903024..a9ff1eb815067a9cb74424b43f203608802b2cd0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -409,6 +409,8 @@ struct hns_roce_mr { struct hns_roce_mtr pbl_mtr; u32 npages; dma_addr_t *page_list; + bool delayed_destroy_flag; + struct hns_roce_mtr_node *mtr_node; }; struct hns_roce_mr_table { @@ -475,11 +477,17 @@ struct hns_roce_db_pgdir { dma_addr_t db_dma; }; +struct hns_roce_umem_node { + struct ib_umem *umem; + struct list_head list; +}; + struct hns_roce_user_db_page { struct list_head list; struct ib_umem *umem; unsigned long user_virt; refcount_t refcount; + struct hns_roce_umem_node *umem_node; }; struct hns_roce_db { @@ -531,7 +539,9 @@ struct hns_roce_cq { int is_armed; /* cq is armed */ struct list_head node; /* all armed cqs are on a list */ u8 poe_channel; + bool delayed_destroy_flag; struct hns_roce_notify_conf write_notify; + struct hns_roce_mtr_node *mtr_node; }; struct hns_roce_idx_que { @@ -540,6 +550,7 @@ struct hns_roce_idx_que { unsigned long *bitmap; u32 head; u32 tail; + struct hns_roce_mtr_node *mtr_node; }; struct hns_roce_srq { @@ -565,6 +576,8 @@ struct hns_roce_srq { void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); struct hns_roce_db rdb; u32 cap_flags; + bool delayed_destroy_flag; + struct hns_roce_mtr_node *mtr_node; }; struct hns_roce_uar_table { @@ -740,6 +753,8 @@ struct hns_roce_qp { u32 config; u8 tc_mode; u8 priority; + bool delayed_destroy_flag; + struct hns_roce_mtr_node *mtr_node; }; struct hns_roce_ib_iboe { @@ -1100,6 +1115,11 @@ struct hns_roce_port { struct hns_roce_scc_param *scc_param; }; +struct hns_roce_mtr_node { + struct hns_roce_mtr mtr; + struct list_head list; +}; + struct hns_roce_dev { struct ib_device ib_dev; struct pci_dev *pci_dev; @@ -1183,6 +1203,10 @@ struct hns_roce_dev { struct rdma_notify_mem *notify_tbl; size_t notify_num; + struct list_head mtr_unfree_list; /* list of unfree mtr on this dev */ + spinlock_t mtr_unfree_list_lock; /* protect mtr_unfree_list */ + struct list_head umem_unfree_list; /* list of unfree umem on this dev */ + spinlock_t umem_unfree_list_lock; /* protect umem_unfree_list */ }; static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) @@ -1464,7 +1488,8 @@ int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, struct hns_roce_db *db); void hns_roce_db_unmap_user(struct hns_roce_ucontext *context, - struct hns_roce_db *db); + struct hns_roce_db *db, + bool delayed_unmap_flag); int hns_roce_alloc_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db, int order); void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db); @@ -1484,6 +1509,13 @@ int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp); int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp); int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr); int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr); +void hns_roce_add_unfree_umem(struct hns_roce_user_db_page *user_page, + struct hns_roce_dev *hr_dev); +void hns_roce_free_unfree_umem(struct hns_roce_dev *hr_dev); +void hns_roce_add_unfree_mtr(struct hns_roce_mtr_node *pos, + struct hns_roce_dev *hr_dev, + struct hns_roce_mtr *mtr); +void hns_roce_free_unfree_mtr(struct hns_roce_dev *hr_dev); struct hns_user_mmap_entry * hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, size_t length, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index bd2d8e447c199f3a03177f13daaea852897def3c..69f969b7773bd32b15a3baa53d8eccba57ec620c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2268,6 +2268,7 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev) /* Apply all loaded caps before setting to hardware */ static void apply_func_caps(struct hns_roce_dev *hr_dev) { +#define MAX_GID_TBL_LEN 256 struct hns_roce_caps *caps = &hr_dev->caps; struct hns_roce_v2_priv *priv = hr_dev->priv; @@ -2303,9 +2304,14 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev) caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; - caps->gid_table_len[0] = caps->gmv_bt_num * - (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz); + /* It's meaningless to support excessively large gid_table_len, + * as the type of sgid_index in kernel struct ib_global_route + * and userspace struct ibv_global_route are u8/uint8_t (0-255). + */ + caps->gid_table_len[0] = min_t(u32, MAX_GID_TBL_LEN, + caps->gmv_bt_num * + (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz)); caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / caps->gmv_entry_sz); } else { @@ -5153,6 +5159,7 @@ static int check_congest_type(struct ib_qp *ibqp, congest_alg->wnd_mode_sel = WND_LIMIT; break; default: + hr_qp->congest_type = HNS_ROCE_CONGEST_TYPE_DCQCN; congest_alg->alg_sel = CONGEST_DCQCN; congest_alg->alg_sub_sel = UNSUPPORT_CONGEST_LEVEL; congest_alg->dip_vld = DIP_INVALID; @@ -5171,6 +5178,7 @@ static int fill_congest_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr, struct hns_roce_congestion_algorithm congest_field; struct ib_device *ibdev = ibqp->device; struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); u32 dip_idx = 0; int ret; @@ -5184,7 +5192,7 @@ static int fill_congest_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr, hr_reg_write(context, QPC_CONGEST_ALGO_TMPL_ID, hr_dev->congest_algo_tmpl_id + - hr_dev->caps.congest_type * HNS_ROCE_CONGEST_SIZE); + ilog2(hr_qp->congest_type) * HNS_ROCE_CONGEST_SIZE); hr_reg_clear(qpc_mask, QPC_CONGEST_ALGO_TMPL_ID); hr_reg_write(&context->ext, QPCEX_CONGEST_ALG_SEL, congest_field.alg_sel); @@ -5243,6 +5251,7 @@ static int hns_roce_set_sl(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; + u32 sl_num; int ret; ret = hns_roce_hw_v2_get_dscp(hr_dev, get_tclass(&attr->ah_attr.grh), @@ -5259,10 +5268,11 @@ static int hns_roce_set_sl(struct ib_qp *ibqp, else hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); - if (unlikely(hr_qp->sl > MAX_SERVICE_LEVEL)) { - ibdev_err(ibdev, - "failed to fill QPC, sl (%u) shouldn't be larger than %d.\n", - hr_qp->sl, MAX_SERVICE_LEVEL); + sl_num = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1); + if (unlikely(hr_qp->sl > sl_num)) { + ibdev_err_ratelimited(ibdev, + "failed to fill QPC, sl (%u) shouldn't be larger than %u.\n", + hr_qp->sl, sl_num); return -EINVAL; } @@ -6034,6 +6044,9 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n", hr_qp->qpn, ret); + if (ret == -EBUSY) + hr_qp->delayed_destroy_flag = true; + hns_roce_qp_destroy(hr_dev, hr_qp, udata); return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 861c63449519be0f722785c0a996f9018921d6fb..4eb18fb31726095e96703ce714601c5e265803dc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -1309,6 +1309,12 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) INIT_LIST_HEAD(&hr_dev->uctx_list); spin_lock_init(&hr_dev->uctx_list_lock); + INIT_LIST_HEAD(&hr_dev->mtr_unfree_list); + spin_lock_init(&hr_dev->mtr_unfree_list_lock); + + INIT_LIST_HEAD(&hr_dev->umem_unfree_list); + spin_lock_init(&hr_dev->umem_unfree_list_lock); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) { INIT_LIST_HEAD(&hr_dev->pgdir_list); @@ -1555,6 +1561,8 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup) if (hr_dev->hw->hw_exit) hr_dev->hw->hw_exit(hr_dev); hns_roce_teardown_hca(hr_dev); + hns_roce_free_unfree_umem(hr_dev); + hns_roce_free_unfree_mtr(hr_dev); hns_roce_cleanup_hem(hr_dev); if (hr_dev->cmd_mod) diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 0b5108d792191d31de4dd5701152ba1e353f787b..9c0281744c484f980c860fd9aa2f05bf65b19916 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -83,7 +83,11 @@ static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { unsigned long obj = key_to_hw_index(mr->key); - hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, obj); + /* this resource will be freed when the driver is uninstalled, so + * no memory leak will occur. + */ + if (!mr->delayed_destroy_flag) + hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, obj); ida_free(&hr_dev->mr_table.mtpt_ida.ida, (int)obj); } @@ -95,6 +99,10 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, struct hns_roce_buf_attr buf_attr = {}; int err; + mr->mtr_node = kvmalloc(sizeof(*mr->mtr_node), GFP_KERNEL); + if (!mr->mtr_node) + return -ENOMEM; + mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num; buf_attr.page_shift = is_fast ? PAGE_SHIFT : hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT; @@ -113,6 +121,7 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, udata, start); if (err) { ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err); + kvfree(mr->mtr_node); return err; } @@ -124,7 +133,12 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { - hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr); + if (mr->delayed_destroy_flag && mr->type != MR_TYPE_DMA) { + hns_roce_add_unfree_mtr(mr->mtr_node, hr_dev, &mr->pbl_mtr); + } else { + hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr); + kvfree(mr->mtr_node); + } } static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) @@ -139,6 +153,8 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr if (ret) ibdev_warn(ibdev, "failed to destroy mpt, ret = %d.\n", ret); + if (ret == -EBUSY) + mr->delayed_destroy_flag = true; } free_mr_pbl(hr_dev, mr); @@ -1217,3 +1233,75 @@ void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) /* free buffers */ mtr_free_bufs(hr_dev, mtr); } + +static void hns_roce_copy_mtr(struct hns_roce_mtr *new_mtr, struct hns_roce_mtr *old_mtr) +{ + struct list_head *new_head, *old_head; + int i, j; + + memcpy(new_mtr, old_mtr, sizeof(*old_mtr)); + + for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++) + for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++) { + new_head = &new_mtr->hem_list.mid_bt[i][j]; + old_head = &old_mtr->hem_list.mid_bt[i][j]; + list_replace(old_head, new_head); + } + + new_head = &new_mtr->hem_list.root_bt; + old_head = &old_mtr->hem_list.root_bt; + list_replace(old_head, new_head); + + new_head = &new_mtr->hem_list.btm_bt; + old_head = &old_mtr->hem_list.btm_bt; + list_replace(old_head, new_head); +} + +void hns_roce_add_unfree_mtr(struct hns_roce_mtr_node *pos, + struct hns_roce_dev *hr_dev, + struct hns_roce_mtr *mtr) +{ + hns_roce_copy_mtr(&pos->mtr, mtr); + + spin_lock(&hr_dev->mtr_unfree_list_lock); + list_add_tail(&pos->list, &hr_dev->mtr_unfree_list); + spin_unlock(&hr_dev->mtr_unfree_list_lock); +} + +void hns_roce_free_unfree_mtr(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_mtr_node *pos, *next; + + spin_lock(&hr_dev->mtr_unfree_list_lock); + list_for_each_entry_safe(pos, next, &hr_dev->mtr_unfree_list, list) { + list_del(&pos->list); + hns_roce_mtr_destroy(hr_dev, &pos->mtr); + kvfree(pos); + } + spin_unlock(&hr_dev->mtr_unfree_list_lock); +} + +void hns_roce_add_unfree_umem(struct hns_roce_user_db_page *user_page, + struct hns_roce_dev *hr_dev) +{ + struct hns_roce_umem_node *pos = user_page->umem_node; + + pos->umem = user_page->umem; + + spin_lock(&hr_dev->umem_unfree_list_lock); + list_add_tail(&pos->list, &hr_dev->umem_unfree_list); + spin_unlock(&hr_dev->umem_unfree_list_lock); +} + +void hns_roce_free_unfree_umem(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_umem_node *pos, *next; + + spin_lock(&hr_dev->umem_unfree_list_lock); + list_for_each_entry_safe(pos, next, &hr_dev->umem_unfree_list, list) { + list_del(&pos->list); + ib_umem_release(pos->umem); + kvfree(pos); + } + spin_unlock(&hr_dev->mtr_unfree_list_lock); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index e8e5641ec6927e5be132476329a641e4ad6e6157..d3251fa82242cad7437e9dda6183dc7e42abb3f8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -387,6 +387,12 @@ static void free_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + if (hr_qp->delayed_destroy_flag) + return; + + /* this resource will be freed when the driver is uninstalled, so + * no memory leak will occur. + */ if (hr_dev->caps.trrl_entry_sz) hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn); hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); @@ -782,12 +788,17 @@ static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_device *ibdev = &hr_dev->ib_dev; int ret; + hr_qp->mtr_node = kvmalloc(sizeof(*hr_qp->mtr_node), GFP_KERNEL); + if (!hr_qp->mtr_node) + return -ENOMEM; + if (dca_en) { /* DCA must be enabled after the buffer attr is configured. */ ret = hns_roce_enable_dca(hr_dev, hr_qp, udata); if (ret) { ibdev_err(ibdev, "failed to enable DCA, ret = %d.\n", ret); + kvfree(hr_qp->mtr_node); return ret; } @@ -811,6 +822,7 @@ static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret); if (dca_en) hns_roce_disable_dca(hr_dev, hr_qp, udata); + kvfree(hr_qp->mtr_node); } return ret; @@ -819,7 +831,12 @@ static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, static void free_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_udata *udata) { - hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr); + if (hr_qp->delayed_destroy_flag) { + hns_roce_add_unfree_mtr(hr_qp->mtr_node, hr_dev, &hr_qp->mtr); + } else { + hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr); + kvfree(hr_qp->mtr_node); + } if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH) hns_roce_disable_dca(hr_dev, hr_qp, udata); @@ -959,7 +976,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev, err_sdb: if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) - hns_roce_db_unmap_user(uctx, &hr_qp->sdb); + hns_roce_db_unmap_user(uctx, &hr_qp->sdb, false); err_out: return ret; } @@ -1041,9 +1058,11 @@ static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, if (udata) { if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB) - hns_roce_db_unmap_user(uctx, &hr_qp->rdb); + hns_roce_db_unmap_user(uctx, &hr_qp->rdb, + hr_qp->delayed_destroy_flag); if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) - hns_roce_db_unmap_user(uctx, &hr_qp->sdb); + hns_roce_db_unmap_user(uctx, &hr_qp->sdb, + hr_qp->delayed_destroy_flag); if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE) qp_user_mmap_entry_remove(hr_qp); } else { @@ -1095,7 +1114,10 @@ static inline void default_congest_type(struct hns_roce_dev *hr_dev, { struct hns_roce_caps *caps = &hr_dev->caps; - hr_qp->congest_type = 1 << caps->default_congest_type; + if (hr_qp->ibqp.qp_type == IB_QPT_UD) + hr_qp->congest_type = HNS_ROCE_CREATE_QP_FLAGS_DCQCN; + else + hr_qp->congest_type = 1 << caps->default_congest_type; } static int set_congest_type(struct hns_roce_qp *hr_qp, @@ -1103,6 +1125,17 @@ static int set_congest_type(struct hns_roce_qp *hr_qp, { int ret = 0; + if (hr_qp->ibqp.qp_type == IB_QPT_UD && + !(ucmd->congest_type_flags & HNS_ROCE_CREATE_QP_FLAGS_DCQCN)) { + struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); + + ibdev_err_ratelimited(&hr_dev->ib_dev, + "UD just support DCQCN. unsupported congest type 0x%llx.\n", + ucmd->congest_type_flags); + + return -EINVAL; + } + if (ucmd->congest_type_flags & HNS_ROCE_CREATE_QP_FLAGS_DCQCN) hr_qp->congest_type = HNS_ROCE_CREATE_QP_FLAGS_DCQCN; else if (ucmd->congest_type_flags & HNS_ROCE_CREATE_QP_FLAGS_LDCP) @@ -1117,19 +1150,16 @@ static int set_congest_type(struct hns_roce_qp *hr_qp, return ret; } -static void set_congest_param(struct hns_roce_dev *hr_dev, +static int set_congest_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_ib_create_qp *ucmd) { - int ret; - - if (ucmd->comp_mask & HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE) { - ret = set_congest_type(hr_qp, ucmd); - if (ret == 0) - return; - } + if (ucmd->comp_mask & HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE) + return set_congest_type(hr_qp, ucmd); default_congest_type(hr_dev, hr_qp); + + return 0; } static void set_qp_notify_param(struct hns_roce_qp *hr_qp, @@ -1234,7 +1264,10 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ret = set_uqp_create_flag_param(hr_dev, hr_qp, init_attr, ucmd); if (ret) return ret; - set_congest_param(hr_dev, hr_qp, ucmd); + + ret = set_congest_param(hr_dev, hr_qp, ucmd); + if (ret) + return ret; } else { if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 1e8b7a65519fb1e9ac482cb86f5a44d630af63dc..0f2f4e897738158f636b360f612cc1e54cdf1a34 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -164,14 +164,16 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) if (ret) dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", ret, srq->srqn); + if (ret == -EBUSY) + srq->delayed_destroy_flag = true; xa_erase(&srq_table->xa, srq->srqn); if (refcount_dec_and_test(&srq->refcount)) complete(&srq->free); wait_for_completion(&srq->free); - - hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); + if (!srq->delayed_destroy_flag) + hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); } static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, @@ -182,6 +184,10 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, struct hns_roce_buf_attr buf_attr = {}; int ret; + idx_que->mtr_node = kvmalloc(sizeof(*idx_que->mtr_node), GFP_KERNEL); + if (!idx_que->mtr_node) + return -ENOMEM; + srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ); buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + PAGE_SHIFT; @@ -195,7 +201,7 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, udata, addr); if (ret) { ibdev_err(ibdev, "Failed to alloc SRQ idx mtr, ret = %d.\n", ret); - return ret; + goto err_kvmalloc; } if (!udata) { @@ -213,6 +219,8 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, return 0; err_idx_mtr: hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); +err_kvmalloc: + kvfree(idx_que->mtr_node); return ret; } @@ -223,7 +231,12 @@ static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) bitmap_free(idx_que->bitmap); idx_que->bitmap = NULL; - hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); + if (srq->delayed_destroy_flag) { + hns_roce_add_unfree_mtr(idx_que->mtr_node, hr_dev, &idx_que->mtr); + } else { + hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); + kvfree(idx_que->mtr_node); + } } static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, @@ -234,6 +247,10 @@ static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_buf_attr buf_attr = {}; int ret; + srq->mtr_node = kvmalloc(sizeof(*srq->mtr_node), GFP_KERNEL); + if (!srq->mtr_node) + return -ENOMEM; + srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, HNS_ROCE_SGE_SIZE * srq->max_gs))); @@ -247,9 +264,11 @@ static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, ret = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT, udata, addr); - if (ret) + if (ret) { ibdev_err(ibdev, "failed to alloc SRQ buf mtr, ret = %d.\n", ret); + kvfree(srq->mtr_node); + } return ret; } @@ -257,7 +276,12 @@ static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); + if (srq->delayed_destroy_flag) { + hns_roce_add_unfree_mtr(srq->mtr_node, hr_dev, &srq->buf_mtr); + } else { + hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); + kvfree(srq->mtr_node); + } } static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) @@ -427,7 +451,8 @@ static void free_srq_db(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, ibucontext); - hns_roce_db_unmap_user(uctx, &srq->rdb); + hns_roce_db_unmap_user(uctx, &srq->rdb, + srq->delayed_destroy_flag); } else { hns_roce_free_db(hr_dev, &srq->rdb); } diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 722313599d87af05a661cc7e2f8de5d92de273cc..d49d3a569b21f429235cd2b1ece0988c205b958d 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -135,7 +135,7 @@ struct hns_roce_ib_create_qp_resp { struct hns_roce_ib_create_ah_resp { __u8 priority; __u8 tc_mode; - __u8 reserved[6]; + __u8 dmac[6]; }; struct hns_roce_ib_modify_qp_resp {