diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h index 326bba9555189f494daac7fded06d20816d97c49..e111541486cbea1bd5a1611d7b1cac4c0ad3be96 100644 --- a/drivers/infiniband/hw/erdma/erdma.h +++ b/drivers/infiniband/hw/erdma/erdma.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* Authors: Cheng Xu */ /* Kai Shen */ @@ -9,10 +9,13 @@ #include #include +#include #include #include +#include "erdma_debug.h" #include "erdma_hw.h" +#include "erdma_ioctl.h" #include "erdma_stats.h" #ifndef RDMA_DRIVER_ERDMA @@ -21,7 +24,7 @@ #define ERDMA_MAJOR_VER 0 #define ERDMA_MEDIUM_VER 2 -#define ERDMA_MINOR_VER 14 +#define ERDMA_MINOR_VER 35 #define DRV_MODULE_NAME "erdma" #define ERDMA_NODE_DESC "Elastic RDMA(iWARP) stack" @@ -46,7 +49,7 @@ struct erdma_eq { atomic64_t event_num; atomic64_t notify_num; - u64 __iomem *db_addr; + void __iomem *db; u64 *db_record; }; @@ -143,6 +146,7 @@ struct erdma_devattr { int numa_node; enum erdma_cc_alg cc; + u8 flags; u32 grp_num; int irq_num; @@ -205,6 +209,7 @@ struct erdma_dev { struct net_device *netdev; struct pci_dev *pdev; struct notifier_block netdev_nb; + struct workqueue_struct *reflush_wq; resource_size_t func_bar_addr; resource_size_t func_bar_len; @@ -213,6 +218,7 @@ struct erdma_dev { struct erdma_devattr attrs; /* physical port state (only one port per device) */ enum ib_port_state state; + u32 mtu; /* cmdq and aeq use the same msix vector */ struct erdma_irq comm_irq; @@ -234,14 +240,16 @@ struct erdma_dev { DECLARE_BITMAP(sdb_page, ERDMA_DWQE_TYPE0_CNT); /* * We provide max 496 uContexts that each has one SQ normal Db, - * and one directWQE db。 + * and one directWQE db. */ DECLARE_BITMAP(sdb_entry, ERDMA_DWQE_TYPE1_CNT); atomic_t num_ctx; atomic_t num_cep; struct list_head cep_list; - bool is_registered; + + struct dma_pool *db_pool; + struct dma_pool *resp_pool; }; static inline void *get_queue_entry(void *qbuf, u32 idx, u32 depth, u32 shift) @@ -289,7 +297,7 @@ void erdma_finish_cmdq_init(struct erdma_dev *dev); void erdma_cmdq_destroy(struct erdma_dev *dev); void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op); -int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, u64 *req, u32 req_size, +int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, u64 *resp0, u64 *resp1); void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq); diff --git a/drivers/infiniband/hw/erdma/erdma_cm.c b/drivers/infiniband/hw/erdma/erdma_cm.c index 1a62189430773f1faf7f1ebdabb9b2192a123ef7..1b76506c80769c914543da1b8fd78e6df8c5437d 100644 --- a/drivers/infiniband/hw/erdma/erdma_cm.c +++ b/drivers/infiniband/hw/erdma/erdma_cm.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Authors: Cheng Xu */ /* Kai Shen */ @@ -10,23 +10,9 @@ /* Copyright (c) 2008-2019, IBM Corporation */ /* Copyright (c) 2017, Open Grid Computing, Inc. */ -#include -#include -#include -#include -#include -#include -#include #include -#include - -#include -#include -#include -#include #include "erdma.h" -#include "erdma_debug.h" #include "erdma_cm.h" #include "erdma_verbs.h" @@ -320,11 +306,9 @@ void erdma_qp_cm_drop(struct erdma_qp *qp) erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL); break; - case ERDMA_EPSTATE_RDMA_MODE: erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0); break; - case ERDMA_EPSTATE_IDLE: case ERDMA_EPSTATE_LISTENING: case ERDMA_EPSTATE_CONNECTING: @@ -360,7 +344,6 @@ void erdma_cep_put(struct erdma_cep *cep) kref_read(&cep->ref) - 1); WARN_ON(kref_read(&cep->ref) < 1); - kref_put(&cep->ref, __erdma_cep_dealloc); } diff --git a/drivers/infiniband/hw/erdma/erdma_cm.h b/drivers/infiniband/hw/erdma/erdma_cm.h index b87c53b83e10ed24dd5e092e2a4944b46421be0a..6d5db98e1b88b7bb39f80fb547774d64069a5449 100644 --- a/drivers/infiniband/hw/erdma/erdma_cm.h +++ b/drivers/infiniband/hw/erdma/erdma_cm.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* Authors: Cheng Xu */ /* Kai Shen */ @@ -12,9 +12,8 @@ #ifndef __ERDMA_CM_H__ #define __ERDMA_CM_H__ -#include #include - +#include #include /* iWarp MPA protocol defs */ diff --git a/drivers/infiniband/hw/erdma/erdma_cmdq.c b/drivers/infiniband/hw/erdma/erdma_cmdq.c index cd8b8071e5fd33f1f6c03afe9e39a734c51d4023..dcb185f5465ea3fc7f3e5999e43d47a56db8cd30 100644 --- a/drivers/infiniband/hw/erdma/erdma_cmdq.c +++ b/drivers/infiniband/hw/erdma/erdma_cmdq.c @@ -1,16 +1,10 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Authors: Cheng Xu */ /* Kai Shen */ /* Copyright (c) 2020-2022, Alibaba Group. */ -#include -#include -#include - #include "erdma.h" -#include "erdma_hw.h" -#include "erdma_verbs.h" static void arm_cmdq_cq(struct erdma_cmdq *cmdq) { @@ -47,7 +41,7 @@ static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq) return ERR_PTR(-ENOMEM); } - set_bit(comp_idx, cmdq->comp_wait_bitmap); + __set_bit(comp_idx, cmdq->comp_wait_bitmap); spin_unlock(&cmdq->lock); return &cmdq->wait_pool[comp_idx]; @@ -60,7 +54,7 @@ static void put_comp_wait(struct erdma_cmdq *cmdq, cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT; spin_lock(&cmdq->lock); - used = test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap); + used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap); spin_unlock(&cmdq->lock); WARN_ON(!used); @@ -127,6 +121,16 @@ static int erdma_cmdq_sq_init(struct erdma_dev *dev) return 0; } +static void erdma_cmdq_sq_destroy(struct erdma_dev *dev) +{ + struct erdma_cmdq *cmdq = &dev->cmdq; + + dma_free_coherent(&dev->pdev->dev, + (cmdq->sq.depth << SQEBB_SHIFT) + + ERDMA_EXTRA_BUFFER_SIZE, + cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); +} + static int erdma_cmdq_cq_init(struct erdma_dev *dev) { struct erdma_cmdq *cmdq = &dev->cmdq; @@ -158,6 +162,16 @@ static int erdma_cmdq_cq_init(struct erdma_dev *dev) return 0; } +static void erdma_cmdq_cq_destroy(struct erdma_dev *dev) +{ + struct erdma_cmdq *cmdq = &dev->cmdq; + + dma_free_coherent(&dev->pdev->dev, + (cmdq->cq.depth << CQE_SHIFT) + + ERDMA_EXTRA_BUFFER_SIZE, + cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); +} + static int erdma_cmdq_eq_init(struct erdma_dev *dev) { struct erdma_cmdq *cmdq = &dev->cmdq; @@ -176,8 +190,7 @@ static int erdma_cmdq_eq_init(struct erdma_dev *dev) spin_lock_init(&eq->lock); atomic64_set(&eq->event_num, 0); - eq->db_addr = - (u64 __iomem *)(dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG); + eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG; eq->db_record = (u64 *)(eq->qbuf + buf_size); erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG, @@ -191,11 +204,20 @@ static int erdma_cmdq_eq_init(struct erdma_dev *dev) return 0; } +static void erdma_cmdq_eq_destroy(struct erdma_dev *dev) +{ + struct erdma_cmdq *cmdq = &dev->cmdq; + + dma_free_coherent(&dev->pdev->dev, + (cmdq->eq.depth << EQE_SHIFT) + + ERDMA_EXTRA_BUFFER_SIZE, + cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr); +} + int erdma_cmdq_init(struct erdma_dev *dev) { - int err, i; struct erdma_cmdq *cmdq = &dev->cmdq; - u32 status, ctrl; + int err; cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING; cmdq->use_event = false; @@ -218,46 +240,14 @@ int erdma_cmdq_init(struct erdma_dev *dev) if (err) goto err_destroy_cq; - ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1); - erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl); - - for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) { - status = - erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG, - ERDMA_REG_DEV_ST_INIT_DONE_MASK); - if (status) - break; - - msleep(ERDMA_REG_ACCESS_WAIT_MS); - } - - if (i == ERDMA_WAIT_DEV_DONE_CNT) { - dev_err(&dev->pdev->dev, "wait init done failed.\n"); - err = -ETIMEDOUT; - goto err_destroy_eq; - } - set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); return 0; -err_destroy_eq: - dma_free_coherent(&dev->pdev->dev, - (cmdq->eq.depth << EQE_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, - cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr); - err_destroy_cq: - dma_free_coherent(&dev->pdev->dev, - (cmdq->cq.depth << CQE_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, - cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); - + erdma_cmdq_cq_destroy(dev); err_destroy_sq: - dma_free_coherent(&dev->pdev->dev, - (cmdq->sq.depth << SQEBB_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, - cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); + erdma_cmdq_sq_destroy(dev); return err; } @@ -275,18 +265,9 @@ void erdma_cmdq_destroy(struct erdma_dev *dev) clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); - dma_free_coherent(&dev->pdev->dev, - (cmdq->eq.depth << EQE_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, - cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr); - dma_free_coherent(&dev->pdev->dev, - (cmdq->sq.depth << SQEBB_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, - cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); - dma_free_coherent(&dev->pdev->dev, - (cmdq->cq.depth << CQE_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, - cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); + erdma_cmdq_eq_destroy(dev); + erdma_cmdq_cq_destroy(dev); + erdma_cmdq_sq_destroy(dev); } static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq) @@ -449,7 +430,7 @@ void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op) FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op); } -int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, u64 *req, u32 req_size, +int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, u64 *resp0, u64 *resp1) { struct erdma_comp_wait *comp_wait; diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c index d29886324592eae0d73e0ed63fda911f99f2bb96..d9eae90e94cf7417caee4d3c76ab29c261425f81 100644 --- a/drivers/infiniband/hw/erdma/erdma_cq.c +++ b/drivers/infiniband/hw/erdma/erdma_cq.c @@ -1,12 +1,9 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Authors: Cheng Xu */ /* Kai Shen */ /* Copyright (c) 2020-2022, Alibaba Group. */ -#include - -#include "erdma_hw.h" #include "erdma_verbs.h" static void *get_next_valid_cqe(struct erdma_cq *cq) @@ -36,18 +33,26 @@ static void notify_cq(struct erdma_cq *cq, u8 solcitied) int erdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct erdma_cq *cq = to_ecq(ibcq); + u16 dim_timeout = cq->dim.timeout; unsigned long irq_flags; int ret = 0; spin_lock_irqsave(&cq->kern_cq.lock, irq_flags); - notify_cq(cq, (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); - - if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && get_next_valid_cqe(cq)) + if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && get_next_valid_cqe(cq)) { ret = 1; + goto unlock; + } - cq->kern_cq.notify_cnt++; - + if (!dim_timeout) { + notify_cq(cq, (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + cq->kern_cq.notify_cnt++; + } else { + cq->dim.flags |= flags; + hrtimer_start(&cq->dim.timer, ns_to_ktime(dim_timeout * NSEC_PER_USEC), + HRTIMER_MODE_REL_PINNED); + } +unlock: spin_unlock_irqrestore(&cq->kern_cq.lock, irq_flags); return ret; @@ -62,7 +67,6 @@ static const enum ib_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = { [ERDMA_OP_RECV_IMM] = IB_WC_RECV_RDMA_WITH_IMM, [ERDMA_OP_RECV_INV] = IB_WC_RECV, [ERDMA_OP_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, - [ERDMA_OP_INVALIDATE] = IB_WC_LOCAL_INV, [ERDMA_OP_RSP_SEND_IMM] = IB_WC_RECV, [ERDMA_OP_SEND_WITH_INV] = IB_WC_SEND, [ERDMA_OP_REG_MR] = IB_WC_REG_MR, @@ -203,3 +207,24 @@ int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) return npolled; } + +enum hrtimer_restart cq_timer_fn(struct hrtimer *t) +{ + struct erdma_cq *cq = container_of(t, struct erdma_cq, dim.timer); + + notify_cq(cq, (cq->dim.flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + cq->kern_cq.notify_cnt++; + cq->dim.flags = 0; + + return HRTIMER_NORESTART; +} + +#define DIM_OFF_THRESHOLD 3 +int erdma_modify_cq(struct ib_cq *ibcq, u16 cq_count, u16 cq_period) +{ + struct erdma_cq *cq = to_ecq(ibcq); + + cq->dim.timeout = cq_period >= DIM_OFF_THRESHOLD ? cq_period : 0; + + return 0; +} diff --git a/drivers/infiniband/hw/erdma/erdma_eq.c b/drivers/infiniband/hw/erdma/erdma_eq.c index 51ce06bc3909278fe41ccb7d1b234dc3b4f8072a..0ed611bfb6e38d6a63762ebb62aa1cdfa04f73d3 100644 --- a/drivers/infiniband/hw/erdma/erdma_eq.c +++ b/drivers/infiniband/hw/erdma/erdma_eq.c @@ -1,20 +1,9 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Authors: Cheng Xu */ /* Kai Shen */ /* Copyright (c) 2020-2022, Alibaba Group. */ -#include -#include -#include - -#include -#include -#include - -#include "erdma.h" -#include "erdma_cm.h" -#include "erdma_hw.h" #include "erdma_verbs.h" #define MAX_POLL_CHUNK_SIZE 16 @@ -25,7 +14,7 @@ void notify_eq(struct erdma_eq *eq) FIELD_PREP(ERDMA_EQDB_ARM_MASK, 1); *eq->db_record = db_data; - writeq(db_data, eq->db_addr); + writeq(db_data, eq->db); atomic64_inc(&eq->notify_num); } @@ -109,7 +98,7 @@ int erdma_aeq_init(struct erdma_dev *dev) atomic64_set(&eq->event_num, 0); atomic64_set(&eq->notify_num, 0); - eq->db_addr = (u64 __iomem *)(dev->func_bar + ERDMA_REGS_AEQ_DB_REG); + eq->db = dev->func_bar + ERDMA_REGS_AEQ_DB_REG; eq->db_record = (u64 *)(eq->qbuf + buf_size); erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_H_REG, @@ -234,7 +223,7 @@ static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq) req.db_dma_addr_l = lower_32_bits(db_info_dma_addr); req.db_dma_addr_h = upper_32_bits(db_info_dma_addr); - return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(struct erdma_cmdq_create_eq_req), NULL, NULL); } @@ -256,9 +245,8 @@ static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn) atomic64_set(&eq->notify_num, 0); eq->depth = ERDMA_DEFAULT_EQ_DEPTH; - eq->db_addr = - (u64 __iomem *)(dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG + - (ceqn + 1) * ERDMA_DB_SIZE); + eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG + + (ceqn + 1) * ERDMA_DB_SIZE; eq->db_record = (u64 *)(eq->qbuf + buf_size); eq->ci = 0; dev->ceqs[ceqn].dev = dev; @@ -286,8 +274,7 @@ static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn) req.qtype = ERDMA_EQ_TYPE_CEQ; req.vector_idx = ceqn + 1; - err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); if (err) return; diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index bb028712eb961d8985594bd6b8ee596055b1a2ee..087aae76dd95151d8c4c23e43c7e5cad4eb97ed8 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* Authors: Cheng Xu */ /* Kai Shen */ @@ -10,6 +10,9 @@ #include #include +#define ERDMA_HW_PAGE_SHIFT 12 +#define ERDMA_HW_PAGE_SIZE 4096 + /* PCIe device related definition. */ #define PCI_VENDOR_ID_ALIBABA 0x1ded @@ -75,11 +78,11 @@ #define ERDMA_BAR_SQDB_SPACE_OFFSET ERDMA_BAR_DB_SPACE_BASE #define ERDMA_BAR_SQDB_SPACE_SIZE (384 * 1024) -#define ERDMA_BAR_RQDB_SPACE_OFFSET \ +#define ERDMA_BAR_RQDB_SPACE_OFFSET \ (ERDMA_BAR_SQDB_SPACE_OFFSET + ERDMA_BAR_SQDB_SPACE_SIZE) #define ERDMA_BAR_RQDB_SPACE_SIZE (96 * 1024) -#define ERDMA_BAR_CQDB_SPACE_OFFSET \ +#define ERDMA_BAR_CQDB_SPACE_OFFSET \ (ERDMA_BAR_RQDB_SPACE_OFFSET + ERDMA_BAR_RQDB_SPACE_SIZE) /* Doorbell page resources related. */ @@ -145,14 +148,20 @@ enum CMDQ_RDMA_OPCODE { CMDQ_OPCODE_MODIFY_QP = 3, CMDQ_OPCODE_CREATE_CQ = 4, CMDQ_OPCODE_DESTROY_CQ = 5, + CMDQ_OPCODE_REFLUSH = 6, CMDQ_OPCODE_REG_MR = 8, - CMDQ_OPCODE_DEREG_MR = 9 + CMDQ_OPCODE_DEREG_MR = 9, + CMDQ_OPCODE_QUERY_QPC = 11, + CMDQ_OPCODE_QUERY_CQC = 12, }; enum CMDQ_COMMON_OPCODE { CMDQ_OPCODE_CREATE_EQ = 0, CMDQ_OPCODE_DESTROY_EQ = 1, CMDQ_OPCODE_QUERY_FW_INFO = 2, + CMDQ_OPCODE_CONF_MTU = 3, + CMDQ_OPCODE_GET_STATS = 4, + CMDQ_OPCODE_QUERY_EQC = 6, }; /* cmdq-SQE HDR */ @@ -190,6 +199,11 @@ struct erdma_cmdq_destroy_eq_req { u8 qtype; }; +struct erdma_cmdq_config_mtu_req { + u64 hdr; + u32 mtu; +}; + /* create_cq cfg0 */ #define ERDMA_CMD_CREATE_CQ_DEPTH_MASK GENMASK(31, 24) #define ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK GENMASK(23, 20) @@ -218,8 +232,8 @@ struct erdma_cmdq_create_cq_req { /* regmr cfg1 */ #define ERDMA_CMD_REGMR_PD_MASK GENMASK(31, 12) #define ERDMA_CMD_REGMR_TYPE_MASK GENMASK(7, 6) -#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 2) -#define ERDMA_CMD_REGMR_ACC_MODE_MASK GENMASK(1, 0) +#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 1) +#define ERDMA_CMD_REGMR_ACC_MODE_MASK BIT(0) /* regmr cfg2 */ #define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27) @@ -299,8 +313,16 @@ struct erdma_cmdq_destroy_qp_req { u32 qpn; }; +struct erdma_cmdq_reflush_req { + u64 hdr; + u32 qpn; + u32 sq_pi; + u32 rq_pi; +}; + /* cap qword 0 definition */ #define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40) +#define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24) #define ERDMA_CMD_DEV_CAP_MAX_RECV_WR_MASK GENMASK_ULL(23, 16) #define ERDMA_CMD_DEV_CAP_MAX_MR_SIZE_MASK GENMASK_ULL(7, 0) @@ -312,6 +334,11 @@ struct erdma_cmdq_destroy_qp_req { #define ERDMA_NQP_PER_QBLOCK 1024 +enum { + ERDMA_DEV_CAP_FLAGS_ATOMIC = 1 << 7, + ERDMA_DEV_CAP_FLAGS_QUERY_QC = 1 << 6, +}; + #define ERDMA_CMD_INFO0_FW_VER_MASK GENMASK_ULL(31, 0) /* CQE hdr */ @@ -367,8 +394,8 @@ struct erdma_rqe { #define ERDMA_SQE_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0) /* REG MR attrs */ -#define ERDMA_SQE_MR_MODE_MASK GENMASK(1, 0) -#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 2) +#define ERDMA_SQE_MR_MODE_MASK BIT(0) +#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 1) #define ERDMA_SQE_MR_MTT_TYPE_MASK GENMASK(7, 6) #define ERDMA_SQE_MR_MTT_CNT_MASK GENMASK(31, 12) @@ -417,7 +444,7 @@ struct erdma_reg_mr_sqe { }; /* EQ related. */ -#define ERDMA_DEFAULT_EQ_DEPTH 256 +#define ERDMA_DEFAULT_EQ_DEPTH 4096 /* ceqe */ #define ERDMA_CEQE_HDR_DB_MASK BIT_ULL(63) @@ -453,13 +480,13 @@ enum erdma_opcode { ERDMA_OP_RECV_IMM = 5, ERDMA_OP_RECV_INV = 6, - ERDMA_OP_REQ_ERR = 7, - ERDMA_OP_READ_RESPONSE = 8, + ERDMA_OP_RSVD0 = 7, + ERDMA_OP_RSVD1 = 8, ERDMA_OP_WRITE_WITH_IMM = 9, - ERDMA_OP_RECV_ERR = 10, + ERDMA_OP_RSVD2 = 10, + ERDMA_OP_RSVD3 = 11, - ERDMA_OP_INVALIDATE = 11, ERDMA_OP_RSP_SEND_IMM = 12, ERDMA_OP_SEND_WITH_INV = 13, @@ -508,4 +535,139 @@ enum erdma_vendor_err { ERDMA_WC_VENDOR_SQE_WARP_ERR = 0x34 }; +/* Response Definitions for Query Command Category */ +#define ERDMA_HW_RESP_SIZE 256 + +struct erdma_cmdq_query_req { + u64 hdr; + u32 rsvd; + u32 index; + + u64 target_addr; + u32 target_length; +}; + +struct erdma_cmdq_query_resp_hdr { + u16 magic; + u8 ver; + u8 length; + + u32 index; + u32 rsvd[2]; +}; + +struct erdma_cmdq_query_stats_resp { + struct erdma_cmdq_query_resp_hdr hdr; + + u64 tx_req_cnt; + u64 tx_packets_cnt; + u64 tx_bytes_cnt; + u64 tx_drop_packets_cnt; + u64 tx_bps_meter_drop_packets_cnt; + u64 tx_pps_meter_drop_packets_cnt; + u64 rx_packets_cnt; + u64 rx_bytes_cnt; + u64 rx_drop_packets_cnt; + u64 rx_bps_meter_drop_packets_cnt; + u64 rx_pps_meter_drop_packets_cnt; +}; + +struct erdma_cmdq_query_qpc_resp { + struct erdma_cmdq_query_resp_hdr hdr; + + struct{ + u8 status; /* 0 - disabled, 1 - enabled. */ + u8 qbuf_page_offset; + u8 qbuf_page_size; + u8 qbuf_depth; + + u16 hw_pi; + u16 hw_ci; + } qpc[2]; + + /* hardware io stat */ + u16 last_comp_sqe_idx; + u16 last_comp_rqe_idx; + u16 scqe_counter; + u16 rcqe_counter; + + u16 tx_pkts_cnt; + u16 rx_pkts_cnt; + u16 rx_error_drop_cnt; + u16 rx_invalid_drop_cnt; + + u32 rto_retrans_cnt; + //qp sw info + u32 rqpn; + + u32 pd; + u16 fw_sq_pi; + u16 fw_sq_ci; + + u16 fw_rq_ci; + u8 sq_in_flush; + u8 rq_in_flush; + u16 sq_flushed_pi; + u16 rq_flushed_pi; + + u32 scqn; + u32 rcqn; + + u64 sqbuf_addr; + u64 rqbuf_addr; + u64 sdbrec_addr; + u64 rdbrec_addr; + + u64 sdbrec_cur; + u64 rdbrec_cur; + + u32 ip_src; + u32 ip_dst; + u16 srcport; + u16 dstport; +}; + +struct erdma_cmdq_query_cqc_resp { + struct erdma_cmdq_query_resp_hdr hdr; + + u32 pi; + u8 q_en; + u8 log_depth; + u8 cq_cur_ownership; + u8 last_errdb_type; /* 0,dup db;1,out-order db */ + + u32 last_errdb_ci; + u8 out_order_db_cnt; + u8 dup_db_cnt; + u16 rsvd; + + u64 cn_cq_db_addr; + u64 cq_db_record; +}; + +struct erdma_cmdq_query_eqc_resp { + struct erdma_cmdq_query_resp_hdr hdr; + u16 depth; + u16 vector; + + u8 int_suppression; + u8 tail_owner; + u8 head_owner; + u8 overflow; + + u32 head; + u32 tail; + + u64 cn_addr; + u64 cn_db_addr; + u64 eq_db_record; +}; + +struct erdma_cmdq_dump_addr_req { + u64 hdr; + u64 dump_addr; + u64 target_addr; + u32 target_length; +}; + #endif diff --git a/drivers/infiniband/hw/erdma/erdma_ioctl.c b/drivers/infiniband/hw/erdma/erdma_ioctl.c index 47d375ec8b2c869c171dedcefd022ded466043e7..6352a00c92b9c464764efe37f42043c6e6849a85 100644 --- a/drivers/infiniband/hw/erdma/erdma_ioctl.c +++ b/drivers/infiniband/hw/erdma/erdma_ioctl.c @@ -5,14 +5,14 @@ #include #include +#include +#include +#include #include "erdma.h" +#include "erdma_cm.h" #include "erdma_ioctl.h" #include "erdma_verbs.h" -#include "erdma_debug.h" -#include -#include -#include static struct class *erdma_chrdev_class; static struct cdev erdma_cdev; @@ -21,7 +21,69 @@ static dev_t erdma_char_dev; #define ERDMA_CHRDEV_NAME "erdma" -static int erdma_ioctl_conf_cmd(struct erdma_dev *edev, struct erdma_ioctl_msg *msg) +static int erdma_query_resource(struct erdma_dev *dev, u32 mod, u32 op, + u32 index, void *out, u32 len) +{ + struct erdma_cmdq_query_req req; + dma_addr_t dma_addr; + void *resp; + int err; + + erdma_cmdq_build_reqhdr(&req.hdr, mod, op); + + resp = dma_pool_alloc(dev->resp_pool, GFP_KERNEL, &dma_addr); + if (!resp) + return -ENOMEM; + + req.index = index; + req.target_addr = dma_addr; + req.target_length = ERDMA_HW_RESP_SIZE; + + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + if (err) + goto out; + + if (out) + memcpy(out, resp, len); + +out: + dma_pool_free(dev->resp_pool, resp, dma_addr); + + return err; +} + +static int erdma_query_qpc(struct erdma_dev *dev, u32 qpn, void *out) +{ + BUILD_BUG_ON(sizeof(struct erdma_cmdq_query_qpc_resp) > + ERDMA_HW_RESP_SIZE); + + return erdma_query_resource(dev, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_QUERY_QPC, qpn, out, + sizeof(struct erdma_cmdq_query_qpc_resp)); +} + +static int erdma_query_cqc(struct erdma_dev *dev, u32 cqn, void *out) +{ + BUILD_BUG_ON(sizeof(struct erdma_cmdq_query_cqc_resp) > + ERDMA_HW_RESP_SIZE); + + return erdma_query_resource(dev, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_QUERY_CQC, cqn, out, + sizeof(struct erdma_cmdq_query_cqc_resp)); +} + +static int erdma_query_eqc(struct erdma_dev *dev, u32 eqn, void *out) +{ + BUILD_BUG_ON(sizeof(struct erdma_cmdq_query_eqc_resp) > + ERDMA_HW_RESP_SIZE); + + return erdma_query_resource(dev, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_QUERY_EQC, eqn, out, + sizeof(struct erdma_cmdq_query_eqc_resp)); +} + +static int erdma_ioctl_conf_cmd(struct erdma_dev *edev, + struct erdma_ioctl_msg *msg) { int ret = 0; @@ -41,14 +103,36 @@ static int erdma_ioctl_conf_cmd(struct erdma_dev *edev, struct erdma_ioctl_msg * return ret; } -static void fill_eq_info(struct erdma_eq_info *info, struct erdma_eq *eq) +static void fill_eq_info(struct erdma_dev *dev, struct erdma_eq_info *info, + struct erdma_eq *eq) { + struct erdma_cmdq_query_eqc_resp resp; + int ret; + info->event_cnt = atomic64_read(&eq->event_num); info->notify_cnt = atomic64_read(&eq->notify_num); info->depth = eq->depth; info->ci = eq->ci; info->qbuf_dma = eq->qbuf_dma_addr; info->qbuf_va = (u64)eq->qbuf; + info->hw_info_valid = 0; + + ret = erdma_query_eqc(dev, info->eqn, &resp); + if (ret) + return; + + info->hw_info_valid = 1; + info->hw_depth = resp.depth; + info->vector = resp.vector; + info->int_suppression = resp.int_suppression; + info->tail_owner = resp.tail_owner; + info->head_owner = resp.head_owner; + info->overflow = resp.overflow; + info->head = resp.head; + info->tail = resp.tail; + info->cn_addr = resp.cn_addr; + info->cn_db_addr = resp.cn_db_addr; + info->eq_db_record = resp.eq_db_record; } static void show_cep_info(struct erdma_dev *edev) @@ -61,115 +145,308 @@ static void show_cep_info(struct erdma_dev *edev) if (!num_cep) return; - pr_info("%-20s%-6s%-6s%-7s%-3s%-3s%-4s%-21s%-9s\n", - "CEP", "State", "Ref's", "QP-ID", "LQ", "LC", "U", "Sock", "CM-ID"); + pr_info("%-20s%-6s%-6s%-7s%-3s%-3s%-4s%-21s%-9s\n", "CEP", "State", + "Ref's", "QP-ID", "LQ", "LC", "U", "Sock", "CM-ID"); list_for_each_safe(pos, tmp, &edev->cep_list) { struct erdma_cep *cep = list_entry(pos, struct erdma_cep, devq); - pr_info("0x%-18p%-6d%-6d%-7d%-3s%-3s%-4d0x%-18p 0x%-16p\n", - cep, cep->state, kref_read(&cep->ref), + pr_info("0x%-18p%-6d%-6d%-7d%-3s%-3s%-4d0x%-18p 0x%-16p\n", cep, + cep->state, kref_read(&cep->ref), cep->qp ? QP_ID(cep->qp) : -1, list_empty(&cep->listenq) ? "n" : "y", - cep->listen_cep ? "y" : "n", cep->in_use, - cep->sock, cep->cm_id); + cep->listen_cep ? "y" : "n", cep->in_use, cep->sock, + cep->cm_id); + } +} + +static int fill_cq_info(struct erdma_dev *dev, u32 cqn, + struct erdma_ioctl_msg *msg) +{ + struct erdma_cq_info *info = &msg->out.cq_info; + struct erdma_cmdq_query_cqc_resp resp; + struct rdma_restrack_entry *res; + struct erdma_cq *cq; + int ret; + + if (cqn == 0) { + info->cqn = 0; + info->depth = dev->cmdq.cq.depth; + info->assoc_eqn = 0; + info->qbuf_dma_addr = dev->cmdq.cq.qbuf_dma_addr; + info->ci = dev->cmdq.cq.ci; + info->cmdsn = dev->cmdq.cq.cmdsn; + info->notify_cnt = atomic64_read(&dev->cmdq.cq.armed_num); + + goto query_hw_cqc; + } + + cq = find_cq_by_cqn(dev, cqn); + if (!cq) + return -EINVAL; + + info->cqn = cq->cqn; + info->depth = cq->depth; + info->assoc_eqn = cq->assoc_eqn; + + res = &cq->ibcq.res; + info->is_user = !rdma_is_kernel_res(res); + + if (info->is_user) { + info->mtt.page_size = cq->user_cq.qbuf_mtt.page_size; + info->mtt.page_offset = cq->user_cq.qbuf_mtt.page_offset; + info->mtt.page_cnt = cq->user_cq.qbuf_mtt.page_cnt; + info->mtt.mtt_nents = cq->user_cq.qbuf_mtt.mtt_nents; + memcpy(info->mtt.mtt_entry, cq->user_cq.qbuf_mtt.mtt_entry, + ERDMA_MAX_INLINE_MTT_ENTRIES * sizeof(__u64)); + info->mtt.va = cq->user_cq.qbuf_mtt.va; + info->mtt.len = cq->user_cq.qbuf_mtt.len; + info->mtt_type = cq->user_cq.qbuf_mtt.mtt_type; + } else { + info->qbuf_dma_addr = cq->kern_cq.qbuf_dma_addr; + info->ci = cq->kern_cq.ci; + info->cmdsn = cq->kern_cq.cmdsn; + info->notify_cnt = cq->kern_cq.notify_cnt; } + + info->hw_info_valid = 0; + +query_hw_cqc: + ret = erdma_query_cqc(dev, cqn, &resp); + if (ret) + return 0; + + info->hw_info_valid = 1; + info->hw_pi = resp.pi; + info->enable = resp.q_en; + info->log_depth = resp.log_depth; + info->cq_cur_ownership = resp.cq_cur_ownership; + info->last_errdb_type = resp.last_errdb_type; + info->last_errdb_ci = resp.last_errdb_ci; + info->out_order_db_cnt = resp.out_order_db_cnt; + info->dup_db_cnt = resp.dup_db_cnt; + info->cn_cq_db_addr = resp.cn_cq_db_addr; + info->cq_db_record = resp.cq_db_record; + + return 0; } -static int erdma_ioctl_ver_cmd(struct erdma_dev *edev, struct erdma_ioctl_msg *msg) +static int erdma_ioctl_ver_cmd(struct erdma_dev *edev, + struct erdma_ioctl_msg *msg) { - msg->out.version = ERDMA_MAJOR_VER << 16 | - ERDMA_MEDIUM_VER << 8 | - ERDMA_MINOR_VER; + msg->out.version = + ERDMA_MAJOR_VER << 16 | ERDMA_MEDIUM_VER << 8 | ERDMA_MINOR_VER; return 0; } -static int erdma_ioctl_info_cmd(struct erdma_dev *edev, struct erdma_ioctl_msg *msg) +static int erdma_fill_qp_info(struct erdma_dev *dev, u32 qpn, + struct erdma_qp_info *qp_info) { - int ret = 0; - struct erdma_qp *qp; - struct erdma_qp_info *qp_info; + struct erdma_cmdq_query_qpc_resp resp; struct rdma_restrack_entry *res; - int count = 0; struct erdma_mem *mtt; - int i; + struct erdma_qp *qp; + int i, ret; + + if (qpn == 0) + goto query_hw_qpc; + + qp = find_qp_by_qpn(dev, qpn); + if (!qp) + return -EINVAL; + erdma_qp_get(qp); + + qp_info->hw_info_valid = 0; + qp_info->qpn = qp->ibqp.qp_num; + qp_info->qp_state = qp->attrs.state; + qp_info->ref_cnt = kref_read(&qp->ref); + qp_info->qtype = qp->attrs.qp_type; + qp_info->sq_depth = qp->attrs.sq_size; + qp_info->rq_depth = qp->attrs.rq_size; + qp_info->cookie = qp->attrs.remote_cookie; + qp_info->cc = qp->attrs.cc; + qp_info->assoc_scqn = qp->scq->cqn; + qp_info->assoc_rcqn = qp->rcq->cqn; + + if (qp->cep && qp->cep->cm_id) { + struct erdma_cep *cep = qp->cep; + struct iw_cm_id *id = cep->cm_id; + struct sockaddr_storage remote_addr; + struct sockaddr_storage local_addr; + + qp_info->sip = + ntohl(to_sockaddr_in(id->local_addr).sin_addr.s_addr); + qp_info->dip = + ntohl(to_sockaddr_in(id->remote_addr).sin_addr.s_addr); + qp_info->sport = ntohs(to_sockaddr_in(id->local_addr).sin_port); + qp_info->dport = + ntohs(to_sockaddr_in(id->remote_addr).sin_port); + + if (cep->sock) { + getname_local(cep->sock, &local_addr); + getname_peer(cep->sock, &remote_addr); + qp_info->origin_sport = + ntohs(to_sockaddr_in(local_addr).sin_port); + qp_info->sip = ntohl( + to_sockaddr_in(local_addr).sin_addr.s_addr); + } + } + + res = &qp->ibqp.res; + qp_info->is_user = !rdma_is_kernel_res(res); + if (qp_info->is_user) { + qp_info->pid = res->task->pid; + get_task_comm(qp_info->buf, res->task); + mtt = &qp->user_qp.sq_mtt; + qp_info->sq_mtt_type = mtt->mtt_type; + qp_info->sq_mtt.page_size = mtt->page_size; + qp_info->sq_mtt.page_offset = mtt->page_offset; + qp_info->sq_mtt.page_cnt = mtt->page_cnt; + qp_info->sq_mtt.mtt_nents = mtt->mtt_nents; + qp_info->sq_mtt.va = mtt->va; + qp_info->sq_mtt.len = mtt->len; + for (i = 0; i < ERDMA_MAX_INLINE_MTT_ENTRIES; i++) + qp_info->sq_mtt.mtt_entry[i] = mtt->mtt_entry[i]; + + mtt = &qp->user_qp.rq_mtt; + qp_info->rq_mtt_type = mtt->mtt_type; + qp_info->rq_mtt.page_size = mtt->page_size; + qp_info->rq_mtt.page_offset = mtt->page_offset; + qp_info->rq_mtt.page_cnt = mtt->page_cnt; + qp_info->rq_mtt.mtt_nents = mtt->mtt_nents; + qp_info->rq_mtt.va = mtt->va; + qp_info->rq_mtt.len = mtt->len; + for (i = 0; i < ERDMA_MAX_INLINE_MTT_ENTRIES; i++) + qp_info->rq_mtt.mtt_entry[i] = mtt->mtt_entry[i]; + } else { + qp_info->sqci = qp->kern_qp.sq_ci; + qp_info->sqpi = qp->kern_qp.sq_pi; + qp_info->rqci = qp->kern_qp.rq_ci; + qp_info->rqpi = qp->kern_qp.rq_pi; + + qp_info->sqbuf_dma = qp->kern_qp.sq_buf_dma_addr; + qp_info->rqbuf_dma = qp->kern_qp.rq_buf_dma_addr; + qp_info->sqdbrec_dma = qp->kern_qp.sq_db_info_dma_addr; + qp_info->rqdbrec_dma = qp->kern_qp.rq_db_info_dma_addr; + } + + erdma_qp_put(qp); + +query_hw_qpc: + ret = erdma_query_qpc(dev, qpn, &resp); + if (ret) + return 0; + + qp_info->hw_info_valid = 1; + qp_info->sq_enable = resp.qpc[0].status; + qp_info->sqbuf_page_offset = resp.qpc[0].qbuf_page_offset; + qp_info->sqbuf_page_size = resp.qpc[0].qbuf_page_size; + qp_info->sqbuf_depth = resp.qpc[0].qbuf_depth; + qp_info->hw_sq_ci = resp.qpc[0].hw_ci; + qp_info->hw_sq_pi = resp.qpc[0].hw_pi; + + qp_info->rq_enable = resp.qpc[1].status; + qp_info->rqbuf_page_offset = resp.qpc[1].qbuf_page_offset; + qp_info->rqbuf_page_size = resp.qpc[1].qbuf_page_size; + qp_info->rqbuf_depth = resp.qpc[1].qbuf_depth; + qp_info->hw_rq_ci = resp.qpc[1].hw_ci; + qp_info->hw_rq_pi = resp.qpc[1].hw_pi; + qp_info->last_comp_sqe_idx = resp.last_comp_sqe_idx; + qp_info->last_comp_rqe_idx = resp.last_comp_rqe_idx; + qp_info->scqe_counter = resp.scqe_counter; + qp_info->rcqe_counter = resp.rcqe_counter; + qp_info->tx_pkts_cnt = resp.tx_pkts_cnt; + qp_info->rx_pkts_cnt = resp.rx_pkts_cnt; + qp_info->rx_error_drop_cnt = resp.rx_error_drop_cnt; + qp_info->rx_invalid_drop_cnt = resp.rx_invalid_drop_cnt; + qp_info->rto_retrans_cnt = resp.rto_retrans_cnt; + qp_info->pd = resp.pd; + qp_info->fw_sq_pi = resp.fw_sq_pi; + qp_info->fw_sq_ci = resp.fw_sq_ci; + qp_info->fw_rq_ci = resp.fw_rq_ci; + qp_info->sq_in_flush = resp.sq_in_flush; + qp_info->rq_in_flush = resp.rq_in_flush; + qp_info->sq_flushed_pi = resp.sq_flushed_pi; + qp_info->rq_flushed_pi = resp.rq_flushed_pi; + qp_info->sqbuf_addr = resp.sqbuf_addr; + qp_info->rqbuf_addr = resp.rqbuf_addr; + qp_info->sdbrec_addr = resp.sdbrec_addr; + qp_info->rdbrec_addr = resp.rdbrec_addr; + qp_info->ip_src = resp.ip_src; + qp_info->ip_dst = resp.ip_dst; + qp_info->srcport = resp.srcport; + qp_info->dstport = resp.dstport; + qp_info->sdbrec_val = resp.sdbrec_cur; + qp_info->rdbrec_val = resp.rdbrec_cur; + + if (qpn != 0 && resp.scqn != qp_info->assoc_scqn) + ibdev_info(&dev->ibdev, "hw scqn(%u) != drv scqn(%u)\n", + resp.scqn, qp_info->assoc_scqn); + + if (qpn != 0 && resp.rcqn != qp_info->assoc_rcqn) + ibdev_info(&dev->ibdev, "hw rcqn(%u) != drv rcqn(%u)\n", + resp.rcqn, qp_info->assoc_rcqn); + + return 0; +} + +static int erdma_ioctl_info_cmd(struct erdma_dev *edev, + struct erdma_ioctl_msg *msg) +{ + struct erdma_qp_info *qp_info; + int ret = 0, count = 0, i; + struct erdma_qp *qp; + struct erdma_cq *cq; unsigned long index; switch (msg->in.opcode) { case ERDMA_INFO_TYPE_QP: - qp = find_qp_by_qpn(edev, msg->in.info_req.qn); - if (!qp) - return -EINVAL; - erdma_qp_get(qp); - qp_info = &msg->out.qp_info; - - qp_info->qpn = qp->ibqp.qp_num; - qp_info->qp_state = qp->attrs.state; - qp_info->ref_cnt = kref_read(&qp->ref); - qp_info->qtype = qp->attrs.qp_type; - qp_info->sq_depth = qp->attrs.sq_size; - qp_info->rq_depth = qp->attrs.rq_size; - qp_info->cookie = qp->attrs.cookie; - qp_info->cc = qp->attrs.cc; - res = &qp->ibqp.res; - qp_info->is_user = !rdma_is_kernel_res(res); - if (qp_info->is_user) { - qp_info->pid = res->task->pid; - get_task_comm(qp_info->buf, res->task); - mtt = &qp->user_qp.sq_mtt; - qp_info->sq_mtt_type = mtt->mtt_type; - qp_info->sq_mtt.page_size = mtt->page_size; - qp_info->sq_mtt.page_offset = mtt->page_offset; - qp_info->sq_mtt.page_cnt = mtt->page_cnt; - qp_info->sq_mtt.mtt_nents = mtt->mtt_nents; - qp_info->sq_mtt.va = mtt->va; - qp_info->sq_mtt.len = mtt->len; - for (i = 0; i < ERDMA_MAX_INLINE_MTT_ENTRIES; i++) - qp_info->sq_mtt.mtt_entry[i] = mtt->mtt_entry[i]; - - mtt = &qp->user_qp.rq_mtt; - qp_info->rq_mtt_type = mtt->mtt_type; - qp_info->rq_mtt.page_size = mtt->page_size; - qp_info->rq_mtt.page_offset = mtt->page_offset; - qp_info->rq_mtt.page_cnt = mtt->page_cnt; - qp_info->rq_mtt.mtt_nents = mtt->mtt_nents; - qp_info->rq_mtt.va = mtt->va; - qp_info->rq_mtt.len = mtt->len; - for (i = 0; i < ERDMA_MAX_INLINE_MTT_ENTRIES; i++) - qp_info->rq_mtt.mtt_entry[i] = mtt->mtt_entry[i]; - } - - erdma_qp_put(qp); + ret = erdma_fill_qp_info(edev, msg->in.info_req.qn, qp_info); break; case ERDMA_INFO_TYPE_ALLOCED_QP: - xa_for_each_start(&edev->qp_xa, index, qp, msg->in.info_req.qn) { + xa_for_each_start(&edev->qp_xa, index, qp, + msg->in.info_req.qn) { msg->out.allocted_qpn[count++] = index; if (count == msg->in.info_req.max_result_cnt) break; } msg->out.length = count * 4; + break; + case ERDMA_INFO_TYPE_ALLOCED_CQ: + xa_for_each_start(&edev->cq_xa, index, cq, + msg->in.info_req.qn) { + msg->out.allocted_cqn[count++] = index; + if (count == msg->in.info_req.max_result_cnt) + break; + } + msg->out.length = count * 4; + break; case ERDMA_INFO_TYPE_EQ: msg->out.eq_info[0].ready = 1; msg->out.eq_info[0].eqn = 0; - fill_eq_info(&msg->out.eq_info[0], &edev->aeq); + fill_eq_info(edev, &msg->out.eq_info[0], &edev->aeq); msg->out.eq_info[1].ready = 1; msg->out.eq_info[1].eqn = 1; - fill_eq_info(&msg->out.eq_info[1], &edev->cmdq.eq); + fill_eq_info(edev, &msg->out.eq_info[1], &edev->cmdq.eq); for (i = 0; i < 31; i++) { msg->out.eq_info[i + 2].ready = edev->ceqs[i].ready; msg->out.eq_info[i + 2].eqn = i + 2; - fill_eq_info(&msg->out.eq_info[i + 2], &edev->ceqs[i].eq); + fill_eq_info(edev, &msg->out.eq_info[i + 2], + &edev->ceqs[i].eq); } break; case ERDMA_INFO_TYPE_CEP: show_cep_info(edev); break; + case ERDMA_INFO_TYPE_CQ: + ret = fill_cq_info(edev, msg->in.info_req.qn, msg); + break; default: pr_info("unknown opcode:%u\n", msg->in.opcode); return -EINVAL; @@ -180,29 +457,24 @@ static int erdma_ioctl_info_cmd(struct erdma_dev *edev, struct erdma_ioctl_msg * int erdma_ioctl_stat_cmd(struct erdma_dev *edev, struct erdma_ioctl_msg *msg) { - __u64 *stats_data; + int ret; switch (msg->in.opcode) { case ERDMA_STAT_TYPE_QP: case ERDMA_STAT_TYPE_CQ: break; case ERDMA_STAT_TYPE_DEV: - stats_data = (__u64 *)msg->out.data; - stats_data[0] = erdma_reg_read64(edev, ERDMA_REGS_STATS_TSO_IN_PKTS_REG); - stats_data[1] = erdma_reg_read64(edev, ERDMA_REGS_STATS_TSO_OUT_PKTS_REG); - stats_data[2] = erdma_reg_read64(edev, ERDMA_REGS_STATS_TSO_OUT_BYTES_REG); - stats_data[3] = erdma_reg_read64(edev, ERDMA_REGS_STATS_TX_DROP_PKTS_REG); - stats_data[4] = erdma_reg_read64(edev, ERDMA_REGS_STATS_TX_BPS_METER_DROP_PKTS_REG); - stats_data[5] = erdma_reg_read64(edev, ERDMA_REGS_STATS_TX_PPS_METER_DROP_PKTS_REG); - - stats_data[6] = erdma_reg_read64(edev, ERDMA_REGS_STATS_RX_PKTS_REG); - stats_data[7] = erdma_reg_read64(edev, ERDMA_REGS_STATS_RX_BYTES_REG); - stats_data[8] = erdma_reg_read64(edev, ERDMA_REGS_STATS_RX_DROP_PKTS_REG); - stats_data[9] = erdma_reg_read64(edev, ERDMA_REGS_STATS_RX_BPS_METER_DROP_PKTS_REG); - stats_data[10] = - erdma_reg_read64(edev, ERDMA_REGS_STATS_RX_PPS_METER_DROP_PKTS_REG); - - msg->out.length = 256; + ret = erdma_query_hw_stats(edev); + if (ret) + return ret; + + /* Make sure that no overflow happens. */ + BUILD_BUG_ON(ERDMA_STATS_MAX > 512); + + memcpy(msg->out.stats, &edev->stats, + sizeof(__u64) * ERDMA_STATS_MAX); + + msg->out.length = ERDMA_STATS_MAX * sizeof(__u64); break; default: pr_err("unknown stat opcode %d.\n", msg->in.opcode); @@ -214,39 +486,138 @@ int erdma_ioctl_stat_cmd(struct erdma_dev *edev, struct erdma_ioctl_msg *msg) int erdma_ioctl_dump_cmd(struct erdma_dev *edev, struct erdma_ioctl_msg *msg) { - u32 qe_idx = msg->in.dump_req.qe_idx; - u32 qn = msg->in.dump_req.qn; + u32 qe_idx = msg->in.dump_req.qe_idx; + u32 qn = msg->in.dump_req.qn; struct erdma_qp *qp; + struct erdma_cq *cq; + struct erdma_eq *eq; int ret = 0; u64 address; u32 wqe_idx; switch (msg->in.opcode) { case ERDMA_DUMP_TYPE_SQE: + + /* CMDQ-SQ */ + if (qn == 0) { + wqe_idx = qe_idx & (edev->cmdq.sq.depth - 1); + memcpy(msg->out.data, + edev->cmdq.sq.qbuf + (wqe_idx << SQEBB_SHIFT), + SQEBB_SIZE); + } else { + qp = find_qp_by_qpn(edev, qn); + if (!qp) + return -EINVAL; + erdma_qp_get(qp); + + if (!rdma_is_kernel_res(&qp->ibqp.res)) { + address = qp->user_qp.sq_mtt.umem->address; + wqe_idx = qe_idx & (qp->attrs.sq_size - 1); + address += wqe_idx << SQEBB_SHIFT; + ret = access_process_vm(qp->ibqp.res.task, + address, msg->out.data, + SQEBB_SIZE, FOLL_FORCE); + if (ret != SQEBB_SIZE) { + pr_info("access address with error (%d)\n", + ret); + erdma_qp_put(qp); + return -EIO; + } + ret = 0; + } else { + wqe_idx = qe_idx & (qp->attrs.sq_size - 1); + memcpy(msg->out.data, + qp->kern_qp.sq_buf + + (wqe_idx << SQEBB_SHIFT), + SQEBB_SIZE); + } + erdma_qp_put(qp); + } + msg->out.length = SQEBB_SIZE; + break; + case ERDMA_DUMP_TYPE_RQE: qp = find_qp_by_qpn(edev, qn); if (!qp) return -EINVAL; erdma_qp_get(qp); if (!rdma_is_kernel_res(&qp->ibqp.res)) { - - address = qp->user_qp.sq_mtt.umem->address; - wqe_idx = qe_idx & (qp->attrs.sq_size - 1); - address += wqe_idx << SQEBB_SHIFT; - ret = access_process_vm(qp->ibqp.res.task, - address, msg->out.data, SQEBB_SIZE, FOLL_FORCE); - if (ret != SQEBB_SIZE) { - pr_info("access address with error (%d)\n", ret); + address = qp->user_qp.rq_mtt.umem->address; + wqe_idx = qe_idx & (qp->attrs.rq_size - 1); + address += wqe_idx << RQE_SHIFT; + ret = access_process_vm(qp->ibqp.res.task, address, + msg->out.data, RQE_SIZE, + FOLL_FORCE); + if (ret != RQE_SIZE) { + pr_info("access address with error (%d)\n", + ret); erdma_qp_put(qp); return -EIO; } + ret = 0; } else { - + wqe_idx = qe_idx & (qp->attrs.rq_size - 1); + memcpy(msg->out.data, + qp->kern_qp.rq_buf + (wqe_idx << RQE_SHIFT), + RQE_SIZE); } erdma_qp_put(qp); - msg->out.length = 256; + msg->out.length = RQE_SIZE; break; - case ERDMA_DUMP_TYPE_RQE: + case ERDMA_DUMP_TYPE_CQE: + if (qn == 0) { + /* CMDQ-CQ */ + wqe_idx = qe_idx & (edev->cmdq.cq.depth - 1); + memcpy(msg->out.data, + edev->cmdq.cq.qbuf + (wqe_idx << CQE_SHIFT), + CQE_SIZE); + } else { + cq = find_cq_by_cqn(edev, qn); + if (!cq) + return -EINVAL; + + if (!rdma_is_kernel_res(&cq->ibcq.res)) { + address = cq->user_cq.qbuf_mtt.umem->address; + wqe_idx = qe_idx & (cq->depth - 1); + address += wqe_idx << CQE_SHIFT; + ret = access_process_vm(cq->ibcq.res.task, + address, msg->out.data, + CQE_SIZE, FOLL_FORCE); + if (ret != CQE_SIZE) { + pr_info("access address with error (%d)\n", + ret); + return -EIO; + } + ret = 0; + } else { + wqe_idx = qe_idx & (cq->depth - 1); + memcpy(msg->out.data, + cq->kern_cq.qbuf + + (wqe_idx << CQE_SHIFT), + CQE_SIZE); + } + } + msg->out.length = CQE_SIZE; + break; + + case ERDMA_DUMP_TYPE_EQE: + /* 0: AEQ, 1: CMD-EQ, 2 - 33: CEQ */ + if (qn == 0) { /* AEQ */ + eq = &edev->aeq; + } else if (qn == 1) { + eq = &edev->cmdq.eq; + } else if (qn > 1 && qn <= 33) { + if (edev->ceqs[qn - 2].ready == 0) + return -EINVAL; + eq = &edev->ceqs[qn - 2].eq; + } else { + return -EINVAL; + } + + wqe_idx = qe_idx & (eq->depth - 1); + memcpy(msg->out.data, eq->qbuf + (wqe_idx << EQE_SHIFT), + EQE_SIZE); + msg->out.length = EQE_SIZE; break; default: break; @@ -320,7 +691,9 @@ long do_ioctl(unsigned int cmd, unsigned long arg) out: if (!bypass_dev) ib_device_put(ibdev); - return -EOPNOTSUPP; + + kfree(msg); + return ret; } long chardev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) @@ -351,6 +724,7 @@ static int chardev_close(struct inode *inode, struct file *filp) return 0; } +/* clang-format off */ static const struct file_operations chardev_fops = { .owner = THIS_MODULE, .open = chardev_open, @@ -358,6 +732,7 @@ static const struct file_operations chardev_fops = { .read = chardev_read, .unlocked_ioctl = chardev_ioctl }; +/* clang-format on */ void erdma_chrdev_destroy(void) { @@ -395,8 +770,8 @@ int erdma_chrdev_init(void) goto destroy_class; } - erdma_chrdev = device_create(erdma_chrdev_class, - NULL, erdma_char_dev, NULL, ERDMA_CHRDEV_NAME); + erdma_chrdev = device_create(erdma_chrdev_class, NULL, erdma_char_dev, + NULL, ERDMA_CHRDEV_NAME); if (IS_ERR(erdma_chrdev)) { pr_err("create_device failed.\n"); goto delete_cdev; diff --git a/drivers/infiniband/hw/erdma/erdma_ioctl.h b/drivers/infiniband/hw/erdma/erdma_ioctl.h index 57e7d29eef4eedbe6e6835a894da8515d217499e..c0f4a2cb0789cac12e50f14f44c9249ba47292f9 100644 --- a/drivers/infiniband/hw/erdma/erdma_ioctl.h +++ b/drivers/infiniband/hw/erdma/erdma_ioctl.h @@ -49,9 +49,10 @@ enum erdma_stat_type { enum erdma_info_type { ERDMA_INFO_TYPE_DEV = 0, + ERDMA_INFO_TYPE_ALLOCED_QP, ERDMA_INFO_TYPE_QP, + ERDMA_INFO_TYPE_ALLOCED_CQ, ERDMA_INFO_TYPE_CQ, - ERDMA_INFO_TYPE_ALLOCED_QP, ERDMA_INFO_TYPE_EQ, ERDMA_INFO_TYPE_CEP, ERDMA_INFO_TYPE_MAX, @@ -67,12 +68,8 @@ enum erdma_dump_type { ERDMA_DUMP_TYPE_SQE = 0, ERDMA_DUMP_TYPE_RQE, ERDMA_DUMP_TYPE_CQE, - ERDMA_DUMP_MAX = ERDMA_DUMP_TYPE_CQE + 1, -}; - -struct erdma_dev_info { - __u32 devid; - __u64 node_guid; + ERDMA_DUMP_TYPE_EQE, + ERDMA_DUMP_MAX = ERDMA_DUMP_TYPE_EQE + 1, }; struct erdma_qp_info { @@ -96,8 +93,22 @@ struct erdma_qp_info { __u8 sq_mtt_type; __u8 rq_mtt_type; + __u32 assoc_scqn; + __u32 assoc_rcqn; + + __u16 sqci; + __u16 sqpi; + __u16 rqci; + __u16 rqpi; + __u64 sqbuf_dma; + __u64 rqbuf_dma; + __u64 sqdbrec_dma; + __u64 rqdbrec_dma; + __u32 pid; char buf[TASK_COMM_LEN]; + __u8 rsvd0[15]; + __u8 hw_info_valid; struct { __u32 page_size; @@ -108,12 +119,100 @@ struct erdma_qp_info { __u64 va; __u64 len; } sq_mtt, rq_mtt; + + __u8 sq_enable; + __u8 sqbuf_page_offset; + __u8 sqbuf_page_size; + __u8 sqbuf_depth; + __u16 hw_sq_ci; + __u16 hw_sq_pi; + + __u8 rq_enable; + __u8 rqbuf_page_offset; + __u8 rqbuf_page_size; + __u8 rqbuf_depth; + __u16 hw_rq_ci; + __u16 hw_rq_pi; + + __u16 last_comp_sqe_idx; + __u16 last_comp_rqe_idx; + __u16 scqe_counter; + __u16 rcqe_counter; + __u16 tx_pkts_cnt; + __u16 rx_pkts_cnt; + __u16 rx_error_drop_cnt; + __u16 rx_invalid_drop_cnt; + __u32 rto_retrans_cnt; + + __u32 pd; + __u16 fw_sq_pi; + __u16 fw_sq_ci; + __u16 fw_rq_ci; + __u8 sq_in_flush; + __u8 rq_in_flush; + + __u16 sq_flushed_pi; + __u16 rq_flushed_pi; + + __u64 sqbuf_addr; + __u64 rqbuf_addr; + __u64 sdbrec_addr; + __u64 rdbrec_addr; + __u64 sdbrec_val; + __u64 rdbrec_val; + + __u32 ip_src; + __u32 ip_dst; + __u16 srcport; + __u16 dstport; +}; + +struct erdma_cq_info { + __u32 cqn; + __u32 depth; + + __u32 assoc_eqn; + __u8 is_user; + __u8 rsvd0; + __u8 mtt_type; + __u8 hw_info_valid; + + __u64 qbuf_dma_addr; + __u32 ci; + __u32 cmdsn; + __u32 notify_cnt; + __u32 rsvd1; + + struct { + __u32 page_size; + __u32 page_offset; + __u32 page_cnt; + __u32 mtt_nents; + __u64 mtt_entry[4]; + __u64 va; + __u64 len; + } mtt; + + __u32 hw_pi; + __u8 enable; + __u8 log_depth; + __u8 cq_cur_ownership; + __u8 last_errdb_type; /* 0,dup db;1,out-order db */ + + __u32 last_errdb_ci; + __u8 out_order_db_cnt; + __u8 dup_db_cnt; + __u16 rsvd; + + __u64 cn_cq_db_addr; + __u64 cq_db_record; }; struct erdma_eq_info { __u32 eqn; __u8 ready; - __u8 rsvd[3]; + __u8 rsvd[2]; + __u8 hw_info_valid; __u64 event_cnt; __u64 notify_cnt; @@ -122,6 +221,22 @@ struct erdma_eq_info { __u32 ci; __u64 qbuf_dma; __u64 qbuf_va; + + __u16 hw_depth; + __u16 vector; + + __u8 int_suppression; + __u8 tail_owner; + __u8 head_owner; + __u8 overflow; + + __u32 head; + __u32 tail; + + __u64 cn_addr; + __u64 cn_db_addr; + __u64 eq_db_record; + }; struct erdma_ioctl_inbuf { @@ -157,12 +272,15 @@ struct erdma_ioctl_outbuf { } config_resp; __u32 allocted_qpn[1024]; + __u32 allocted_cqn[1024]; struct erdma_qp_info qp_info; /* 0: AEQ, 1: Cmd-EQ, 2-32: Completion-EQ */ struct erdma_eq_info eq_info[33]; + struct erdma_cq_info cq_info; __u32 version; + __u64 stats[512]; }; }; diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c index 4650fbc956240345aa2209bc28dde496aabf8420..443442ad0e445dd12df29c748a4a8f39faf59fde 100644 --- a/drivers/infiniband/hw/erdma/erdma_main.c +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -1,39 +1,22 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Authors: Cheng Xu */ /* Kai Shen */ /* Copyright (c) 2020-2022, Alibaba Group. */ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include - +#include #include -#include - -#include -#include #include #include "erdma.h" #include "erdma_cm.h" -#include "erdma_debug.h" -#include "erdma_hw.h" -#include "erdma_ioctl.h" -#include "erdma_stats.h" #include "erdma_verbs.h" MODULE_AUTHOR("Cheng Xu "); -MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver"); +MODULE_AUTHOR("Kai Shen "); +MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver (preview)"); MODULE_LICENSE("Dual BSD/GPL"); __u32 dprint_mask; @@ -48,6 +31,83 @@ static unsigned int vector_num = ERDMA_NUM_MSIX_VEC; module_param(vector_num, uint, 0444); MODULE_PARM_DESC(vector_num, "number of compeletion vectors"); +static int erdma_netdev_event(struct notifier_block *nb, unsigned long event, + void *arg) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(arg); + struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb); + + dprint(DBG_CTRL, " netdev:%s,ns:%p: Event %lu to erdma_dev %p\n", + netdev->name, dev_net(netdev), event, dev); + + if (dev->netdev == NULL || dev->netdev != netdev) + goto done; + + switch (event) { + case NETDEV_UP: + dev->state = IB_PORT_ACTIVE; + erdma_port_event(dev, IB_EVENT_PORT_ACTIVE); + break; + case NETDEV_DOWN: + dev->state = IB_PORT_DOWN; + erdma_port_event(dev, IB_EVENT_PORT_ERR); + break; + case NETDEV_CHANGEMTU: + if (dev->mtu != netdev->mtu) { + erdma_set_mtu(dev, netdev->mtu); + dev->mtu = netdev->mtu; + } + break; + case NETDEV_REGISTER: + case NETDEV_UNREGISTER: + case NETDEV_CHANGEADDR: + case NETDEV_GOING_DOWN: + case NETDEV_CHANGE: + default: + break; + } + +done: + return NOTIFY_OK; +} + +static int erdma_enum_and_get_netdev(struct erdma_dev *dev) +{ + struct net_device *netdev; + int ret = -ENODEV; + + /* Already binded to a net_device, so we skip. */ + if (dev->netdev) + return 0; + + rtnl_lock(); + for_each_netdev(&init_net, netdev) { + /* + * In erdma, the paired netdev and ibdev should have the same + * MAC address. erdma can get the value from its PCIe bar + * registers. Since erdma can not get the paired netdev + * reference directly, we do a traverse here to get the paired + * netdev. + */ + if (ether_addr_equal_unaligned(netdev->perm_addr, + dev->attrs.peer_addr)) { + ret = ib_device_set_netdev(&dev->ibdev, netdev, 1); + if (ret) { + rtnl_unlock(); + ibdev_warn(&dev->ibdev, + "failed (%d) to link netdev", ret); + return ret; + } + dev->netdev = netdev; + break; + } + } + + rtnl_unlock(); + + return ret; +} + static int erdma_device_register(struct erdma_dev *dev) { struct ib_device *ibdev = &dev->ibdev; @@ -65,12 +125,13 @@ static int erdma_device_register(struct erdma_dev *dev) if (ret < 0) return ret; + ret = erdma_enum_and_get_netdev(dev); + if (ret) + return -EPROBE_DEFER; + + dev->mtu = dev->netdev->mtu; addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr); - ibdev->phys_port_cnt = 1; - ret = ib_device_set_netdev(ibdev, dev->netdev, 1); - if (ret) - return ret; ret = ib_register_device(ibdev, ibdev->name, &dev->pdev->dev); if (ret) { dev_err(&dev->pdev->dev, @@ -79,6 +140,14 @@ static int erdma_device_register(struct erdma_dev *dev) return ret; } + dev->netdev_nb.notifier_call = erdma_netdev_event; + ret = register_netdevice_notifier(&dev->netdev_nb); + if (ret) { + ibdev_err(&dev->ibdev, "failed to register notifier.\n"); + ib_unregister_device(ibdev); + return ret; + } + dprint(DBG_DM, " Registered '%s' for interface '%s',HWaddr=%02x.%02x.%02x.%02x.%02x.%02x\n", ibdev->name, dev->netdev->name, *(__u8 *)dev->netdev->dev_addr, @@ -88,105 +157,9 @@ static int erdma_device_register(struct erdma_dev *dev) *((__u8 *)dev->netdev->dev_addr + 4), *((__u8 *)dev->netdev->dev_addr + 5)); - dev->is_registered = 1; - return 0; } -int erdma_find_netdev_and_register_ibdev(struct erdma_dev *dev) -{ - struct net *net; - struct net_device *ndev; - - rtnl_lock(); - down_read(&net_rwsem); - for_each_net(net) - for_each_netdev(net, ndev) { - if (ether_addr_equal_unaligned(ndev->perm_addr, dev->attrs.peer_addr)) { - dev->netdev = ndev; - break; - } - } - up_read(&net_rwsem); - rtnl_unlock(); - - if (dev->netdev) - return erdma_device_register(dev); - - return -ENODEV; -} - -static void erdma_device_deregister(struct erdma_dev *edev) -{ - int i; - - ib_unregister_device(&edev->ibdev); - - WARN_ON(atomic_read(&edev->num_ctx)); - WARN_ON(atomic_read(&edev->num_cep)); - i = 0; - - while (!list_empty(&edev->cep_list)) { - struct erdma_cep *cep = - list_entry(edev->cep_list.next, struct erdma_cep, devq); - list_del(&cep->devq); - dprint(DBG_ON, ": Free CEP (0x%p), state: %d\n", cep, - cep->state); - kfree(cep); - i++; - } - if (i) - pr_warn("erdma device deregister: free'd %d CEPs\n", i); -} - -static int erdma_netdev_event(struct notifier_block *nb, unsigned long event, - void *arg) -{ - struct net_device *netdev = netdev_notifier_info_to_dev(arg); - struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb); - - dprint(DBG_CTRL, " netdev:%s,ns:%p: Event %lu to erdma_dev %p\n", - netdev->name, dev_net(netdev), event, dev); - - if ((dev->netdev == NULL && event != NETDEV_REGISTER) || - (dev->netdev != NULL && dev->netdev != netdev)) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_UP: - dev->state = IB_PORT_ACTIVE; - erdma_port_event(dev, IB_EVENT_PORT_ACTIVE); - break; - case NETDEV_DOWN: - dev->state = IB_PORT_DOWN; - erdma_port_event(dev, IB_EVENT_PORT_ERR); - break; - case NETDEV_REGISTER: - if (!compat_mode && - ether_addr_equal_unaligned(netdev->perm_addr, - dev->attrs.peer_addr)) { - dev->netdev = netdev; - dev->state = IB_PORT_INIT; - if (!dev->is_registered) { - dprint(DBG_DM, - ": new erdma lowlevel device for %s\n", - netdev->name); - erdma_device_register(dev); - } - } - break; - case NETDEV_UNREGISTER: - case NETDEV_CHANGEADDR: - case NETDEV_CHANGEMTU: - case NETDEV_GOING_DOWN: - case NETDEV_CHANGE: - default: - break; - } - - return NOTIFY_OK; -} - static irqreturn_t erdma_comm_irq_handler(int irq, void *data) { struct erdma_dev *dev = data; @@ -223,7 +196,9 @@ static void erdma_dwqe_resource_init(struct erdma_dev *dev) dev->attrs.dwqe_pages = type0; dev->attrs.dwqe_entries = type1 * ERDMA_DWQE_TYPE1_CNT_PER_PAGE; - dev_info(&dev->pdev->dev, "grp_num:%d, total pages:%d, type0:%d, type1:%d, type1_db_cnt:%d\n", + dev_info( + &dev->pdev->dev, + "grp_num:%d, total pages:%d, type0:%d, type1:%d, type1_db_cnt:%d\n", dev->attrs.grp_num, total_pages, type0, type1, type1 * 16); } @@ -263,16 +238,37 @@ static void erdma_comm_irq_uninit(struct erdma_dev *dev) free_irq(dev->comm_irq.msix_vector, dev); } +static int erdma_hw_resp_pool_init(struct erdma_dev *dev) +{ + dev->resp_pool = + dma_pool_create("erdma_resp_pool", &dev->pdev->dev, + ERDMA_HW_RESP_SIZE, ERDMA_HW_RESP_SIZE, 0); + if (!dev->resp_pool) + return -ENOMEM; + + return 0; +} + +static void erdma_hw_resp_pool_destroy(struct erdma_dev *dev) +{ + dma_pool_destroy(dev->resp_pool); +} + static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev) { int ret; erdma_dwqe_resource_init(dev); + ret = erdma_hw_resp_pool_init(dev); + if (ret) + return ret; ret = dma_set_mask_and_coherent(&pdev->dev, - DMA_BIT_MASK(ERDMA_PCI_WIDTH)); - if (ret) + DMA_BIT_MASK(ERDMA_PCI_WIDTH)); + if (ret) { + erdma_hw_resp_pool_destroy(dev); return ret; + } dma_set_max_seg_size(&pdev->dev, UINT_MAX); @@ -280,6 +276,34 @@ static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev) } static void erdma_device_uninit(struct erdma_dev *dev) +{ + erdma_hw_resp_pool_destroy(dev); +} + +static int erdma_wait_hw_init_done(struct erdma_dev *dev) +{ + int i; + + erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, + FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1)); + + for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) { + if (erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG, + ERDMA_REG_DEV_ST_INIT_DONE_MASK)) + break; + + msleep(ERDMA_REG_ACCESS_WAIT_MS); + } + + if (i == ERDMA_WAIT_DEV_DONE_CNT) { + dev_err(&dev->pdev->dev, "wait init done failed.\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static void erdma_hw_stop(struct erdma_dev *dev) { u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1); @@ -293,10 +317,9 @@ static const struct pci_device_id erdma_pci_tbl[] = { static int erdma_probe_dev(struct pci_dev *pdev) { - int err; struct erdma_dev *dev; + int bars, err; u32 version; - int bars; err = pci_enable_device(pdev); if (err) { @@ -348,7 +371,7 @@ static int erdma_probe_dev(struct pci_dev *pdev) err = erdma_request_vectors(dev); if (err) - goto err_iounmap_func_bar; + goto err_uninit_device; err = erdma_comm_irq_init(dev); if (err) @@ -362,18 +385,24 @@ static int erdma_probe_dev(struct pci_dev *pdev) if (err) goto err_uninit_aeq; - err = erdma_ceqs_init(dev); + err = erdma_wait_hw_init_done(dev); if (err) goto err_uninit_cmdq; + err = erdma_ceqs_init(dev); + if (err) + goto err_stop_hw; + msleep(500); erdma_finish_cmdq_init(dev); return 0; +err_stop_hw: + erdma_hw_stop(dev); + err_uninit_cmdq: - erdma_device_uninit(dev); erdma_cmdq_destroy(dev); err_uninit_aeq: @@ -385,6 +414,9 @@ static int erdma_probe_dev(struct pci_dev *pdev) err_free_vectors: pci_free_irq_vectors(dev->pdev); +err_uninit_device: + erdma_device_uninit(dev); + err_iounmap_func_bar: devm_iounmap(&pdev->dev, dev->func_bar); @@ -405,19 +437,15 @@ static void erdma_remove_dev(struct pci_dev *pdev) struct erdma_dev *dev = pci_get_drvdata(pdev); erdma_ceqs_uninit(dev); - - erdma_device_uninit(dev); - + erdma_hw_stop(dev); erdma_cmdq_destroy(dev); erdma_aeq_destroy(dev); erdma_comm_irq_uninit(dev); pci_free_irq_vectors(dev->pdev); - + erdma_device_uninit(dev); devm_iounmap(&pdev->dev, dev->func_bar); pci_release_selected_regions(pdev, ERDMA_BAR_MASK); - ib_dealloc_device(&dev->ibdev); - pci_disable_device(pdev); } @@ -435,7 +463,9 @@ static int erdma_check_version(struct erdma_dev *dev) u8 fw_major = (dev->attrs.fw_version >> 16); u8 fw_medium = (dev->attrs.fw_version >> 8); - return (fw_major != ERDMA_MAJOR_VER || fw_medium != ERDMA_MEDIUM_VER) ? -1 : 0; + return (fw_major != ERDMA_MAJOR_VER || fw_medium != ERDMA_MEDIUM_VER) ? + -1 : + 0; } #define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap) @@ -462,6 +492,7 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev) dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1); dev->attrs.max_mr = dev->attrs.max_qp << 1; dev->attrs.max_cq = dev->attrs.max_qp << 1; + dev->attrs.flags = ERDMA_GET_CAP(FLAGS, cap0); dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR; dev->attrs.max_ord = ERDMA_MAX_ORD; @@ -496,7 +527,6 @@ static int erdma_res_cb_init(struct erdma_dev *dev) dev->res_cb[i].bitmap = kcalloc(BITS_TO_LONGS(dev->res_cb[i].max_cap), sizeof(unsigned long), GFP_KERNEL); - /* We will free the memory in erdma_res_cb_free */ if (!dev->res_cb[i].bitmap) goto err; } @@ -559,9 +589,8 @@ static const struct ib_device_ops erdma_device_ops = { .req_notify_cq = erdma_req_notify_cq, .reg_user_mr = erdma_reg_user_mr, .get_netdev = erdma_get_netdev, - .drain_sq = erdma_drain_sq, - .drain_rq = erdma_drain_rq, .query_pkey = erdma_query_pkey, + .modify_cq = erdma_modify_cq, INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd), @@ -570,7 +599,6 @@ static const struct ib_device_ops erdma_device_ops = { static const struct ib_device_ops erdma_compat_ops = { .get_link_layer = erdma_get_link_layer, - .query_pkey = erdma_query_pkey }; static int erdma_ib_device_add(struct pci_dev *pdev) @@ -578,9 +606,7 @@ static int erdma_ib_device_add(struct pci_dev *pdev) struct erdma_dev *dev = pci_get_drvdata(pdev); struct ib_device *ibdev = &dev->ibdev; u64 mac; - int ret = 0; - - dprint(DBG_INIT, "init erdma_dev(%p)\n", dev); + int ret; erdma_stats_init(dev); @@ -641,33 +667,40 @@ static int erdma_ib_device_add(struct pci_dev *pdev) atomic_set(&dev->num_ctx, 0); - dprint(DBG_INIT, "ib device create ok.\n"); - mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG); mac |= (u64)erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_H_REG) << 32; - dev_info(&dev->pdev->dev, "assoc netdev mac addr is 0x%llx.\n", - mac); + dev_info(&dev->pdev->dev, "assoc netdev mac addr is 0x%llx.\n", mac); u64_to_ether_addr(mac, dev->attrs.peer_addr); - dev->netdev = NULL; - if (compat_mode) { - ret = erdma_find_netdev_and_register_ibdev(dev); - if (ret) - goto err_out; + dev->db_pool = dma_pool_create("erdma_db", &pdev->dev, ERDMA_DB_SIZE, + ERDMA_DB_SIZE, 0); + if (!dev->db_pool) { + ret = -ENOMEM; + goto err_out; } - dev->netdev_nb.notifier_call = erdma_netdev_event; - ret = register_netdevice_notifier(&dev->netdev_nb); + dev->reflush_wq = alloc_workqueue("erdma-reflush-wq", WQ_UNBOUND, + WQ_UNBOUND_MAX_ACTIVE); + if (!dev->reflush_wq) { + ret = -ENOMEM; + goto free_pool; + } + + ret = erdma_device_register(dev); if (ret) - goto err_out; + goto free_wq; + + dev->ibdev.use_cq_dim = true; return 0; +free_wq: + destroy_workqueue(dev->reflush_wq); +free_pool: + dma_pool_destroy(dev->db_pool); err_out: - if (dev->is_registered && compat_mode) - ib_unregister_device(&dev->ibdev); xa_destroy(&dev->qp_xa); xa_destroy(&dev->cq_xa); @@ -681,16 +714,20 @@ static void erdma_ib_device_remove(struct pci_dev *pdev) { struct erdma_dev *dev = pci_get_drvdata(pdev); + unregister_netdevice_notifier(&dev->netdev_nb); - if (dev->is_registered) { - erdma_device_deregister(dev); - dev->is_registered = 0; - } + ib_unregister_device(&dev->ibdev); + + WARN_ON(atomic_read(&dev->num_ctx)); + WARN_ON(atomic_read(&dev->num_cep)); + WARN_ON(!list_empty(&dev->cep_list)); erdma_res_cb_free(dev); xa_destroy(&dev->qp_xa); xa_destroy(&dev->cq_xa); + dma_pool_destroy(dev->db_pool); + destroy_workqueue(dev->reflush_wq); } static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent) diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index 243bb0666226f8ff16d9936c0f7504f7f1633d08..886fcd58ce8c8b1bd9b482e0d5b0d799ebbd5cf3 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Authors: Cheng Xu */ /* Kai Shen */ @@ -6,16 +6,6 @@ /* Authors: Bernard Metzler */ /* Copyright (c) 2008-2019, IBM Corporation */ -#include -#include -#include -#include - -#include -#include -#include - -#include "erdma.h" #include "erdma_cm.h" #include "erdma_verbs.h" @@ -32,7 +22,7 @@ void erdma_qp_llp_close(struct erdma_qp *qp) case ERDMA_QP_STATE_RTR: case ERDMA_QP_STATE_IDLE: case ERDMA_QP_STATE_TERMINATE: - qp_attrs.state = ERDMA_QP_STATE_CLOSING; + qp_attrs.state = ERDMA_QP_STATE_ERROR; erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE); break; case ERDMA_QP_STATE_CLOSING: @@ -97,6 +87,8 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, tp = tcp_sk(qp->cep->sock->sk); + qp->attrs.remote_cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie); + req.cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie); req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr; req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr; @@ -120,8 +112,7 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, qp->attrs.cc) | FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp)); - return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } static int erdma_modify_qp_state_to_rts_compat(struct erdma_qp *qp, @@ -145,17 +136,17 @@ static int erdma_modify_qp_state_to_rts_compat(struct erdma_qp *qp, req.sip = qp->attrs.laddr.in.sin_addr.s_addr; if (req.dip < req.sip) { - req.dport = COMPAT_PORT_BASE + - ((QP_ID(qp) >> 16) & 0xF); + req.dport = COMPAT_PORT_BASE + ((QP_ID(qp) >> 16) & 0xF); req.sport = QP_ID(qp); - } else if (req.dip == req.sip) { /* if dip == sip, must have lqpn != rqpn */ + } else if (req.dip == + req.sip) { /* if dip == sip, must have lqpn != rqpn */ if (QP_ID(qp) < qp->attrs.remote_qp_num) { - req.dport = COMPAT_PORT_BASE + - ((QP_ID(qp) >> 16) & 0xF); + req.dport = + COMPAT_PORT_BASE + ((QP_ID(qp) >> 16) & 0xF); req.sport = QP_ID(qp); } else { req.sport = COMPAT_PORT_BASE + - ((qp->attrs.remote_qp_num >> 16) & 0xF); + ((qp->attrs.remote_qp_num >> 16) & 0xF); req.dport = qp->attrs.remote_qp_num; } } else { @@ -167,8 +158,7 @@ static int erdma_modify_qp_state_to_rts_compat(struct erdma_qp *qp, req.send_nxt = req.sport * 4; req.recv_nxt = req.dport * 4; - return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp, @@ -186,14 +176,14 @@ static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp, req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, attrs->state) | FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp)); - return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, enum erdma_qp_attr_mask mask) { int drop_conn, ret = 0; + bool need_reflush = false; if (!mask) return 0; @@ -206,9 +196,11 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, case ERDMA_QP_STATE_RTR: if (attrs->state == ERDMA_QP_STATE_RTS) { if (compat_mode) - ret = erdma_modify_qp_state_to_rts_compat(qp, attrs, mask); + ret = erdma_modify_qp_state_to_rts_compat( + qp, attrs, mask); else - ret = erdma_modify_qp_state_to_rts(qp, attrs, mask); + ret = erdma_modify_qp_state_to_rts(qp, attrs, + mask); } else if (attrs->state == ERDMA_QP_STATE_ERROR) { qp->attrs.state = ERDMA_QP_STATE_ERROR; if (qp->cep) { @@ -217,6 +209,9 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, } ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); + /* We apply to kernel qp first. */ + if (rdma_is_kernel_res(&qp->ibqp.res)) + need_reflush = true; } break; case ERDMA_QP_STATE_RTS: @@ -227,7 +222,11 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, attrs->state == ERDMA_QP_STATE_ERROR) { drop_conn = 1; if (!(qp->attrs.flags & ERDMA_QP_IN_DESTROY)) - ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); + ret = erdma_modify_qp_state_to_stop(qp, attrs, + mask); + /* We apply to kernel qp first. */ + if (rdma_is_kernel_res(&qp->ibqp.res)) + need_reflush = true; } if (drop_conn) @@ -252,6 +251,12 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, break; } + if (need_reflush && !ret) { + qp->flags |= ERDMA_QP_IN_FLUSHING; + mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork, + usecs_to_jiffies(100)); + } + return ret; } @@ -311,7 +316,7 @@ static int fill_inline_data(struct erdma_qp *qp, qp->attrs.sq_size, SQEBB_SHIFT); if (!remain_size) break; - }; + } i++; } @@ -474,7 +479,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK, mr->mem.mtt_nents); - if (mr->mem.mtt_nents < ERDMA_MAX_INLINE_MTT_ENTRIES) { + if (mr->mem.mtt_nents <= ERDMA_MAX_INLINE_MTT_ENTRIES) { attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 0); /* Copy SGLs to SQE content to accelerate */ memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1, @@ -536,8 +541,8 @@ static void kick_sq_db(struct erdma_qp *qp, u16 pi) writeq(db_data, qp->kern_qp.hw_sq_db); } -static int erdma_post_send_internal(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, - const struct ib_send_wr **bad_send_wr, bool is_last) +int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, + const struct ib_send_wr **bad_send_wr) { struct erdma_qp *qp = to_eqp(ibqp); int ret = 0; @@ -549,14 +554,6 @@ static int erdma_post_send_internal(struct ib_qp *ibqp, const struct ib_send_wr return -EINVAL; spin_lock_irqsave(&qp->kern_qp.sq_lock, flags); - if (unlikely(qp->kern_qp.sq_shutdown)) { - *bad_send_wr = send_wr; - ret = -EINVAL; - goto out; - } - if (unlikely(is_last)) - qp->kern_qp.sq_shutdown = true; - sq_pi = qp->kern_qp.sq_pi; while (wr) { @@ -576,16 +573,13 @@ static int erdma_post_send_internal(struct ib_qp *ibqp, const struct ib_send_wr wr = wr->next; } -out: spin_unlock_irqrestore(&qp->kern_qp.sq_lock, flags); - return ret; -} + if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING)) + mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork, + usecs_to_jiffies(100)); -int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, - const struct ib_send_wr **bad_send_wr) -{ - return erdma_post_send_internal(ibqp, send_wr, bad_send_wr, false); + return ret; } static int erdma_post_recv_one(struct erdma_qp *qp, @@ -618,8 +612,8 @@ static int erdma_post_recv_one(struct erdma_qp *qp, return 0; } -static int erdma_post_recv_internal(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr, - const struct ib_recv_wr **bad_recv_wr, bool is_last) +int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr) { const struct ib_recv_wr *wr = recv_wr; struct erdma_qp *qp = to_eqp(ibqp); @@ -627,13 +621,6 @@ static int erdma_post_recv_internal(struct ib_qp *ibqp, const struct ib_recv_wr int ret = 0; spin_lock_irqsave(&qp->kern_qp.rq_lock, flags); - if (unlikely(qp->kern_qp.rq_shutdown)) { - *bad_recv_wr = recv_wr; - ret = -EINVAL; - goto out; - } - if (unlikely(is_last)) - qp->kern_qp.rq_shutdown = true; while (wr) { ret = erdma_post_recv_one(qp, wr); @@ -643,97 +630,12 @@ static int erdma_post_recv_internal(struct ib_qp *ibqp, const struct ib_recv_wr } wr = wr->next; } -out: - spin_unlock_irqrestore(&qp->kern_qp.rq_lock, flags); - return ret; -} -int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr, - const struct ib_recv_wr **bad_recv_wr) -{ - return erdma_post_recv_internal(ibqp, recv_wr, bad_recv_wr, false); -} - -struct ib_drain_cqe { - struct ib_cqe cqe; - struct completion done; -}; - -static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) -{ - struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe, - cqe); - - complete(&cqe->done); -} - -static void erdma_drain_qp(struct ib_qp *qp) -{ - struct ib_drain_cqe sdrain, rdrain; - const struct ib_send_wr *bad_swr; - const struct ib_recv_wr *bad_rwr; - struct ib_recv_wr rwr = {}; - struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; - struct ib_rdma_wr swr = { - .wr = { - .next = NULL, - { .wr_cqe = &sdrain.cqe, }, - .opcode = IB_WR_RDMA_WRITE, - .send_flags = IB_SEND_SIGNALED, - }, - }; - int ret, cnt; - - rwr.wr_cqe = &rdrain.cqe; - rdrain.cqe.done = ib_drain_qp_done; - init_completion(&rdrain.done); - - ret = erdma_post_recv_internal(qp, &rwr, &bad_rwr, true); - if (ret) { - WARN_ONCE(ret, "failed to drain recv queue: %d", ret); - return; - } - - sdrain.cqe.done = ib_drain_qp_done; - init_completion(&sdrain.done); - - ret = erdma_post_send_internal(qp, &swr.wr, &bad_swr, true); - if (ret) { - WARN_ONCE(ret, "failed to drain send queue: %d", ret); - return; - } - - ret = ib_modify_qp(qp, &attr, IB_QP_STATE); - if (ret) { - WARN_ONCE(ret, "failed to modify qp to ERR: %d", ret); - return; - } - - cnt = 0; - while (wait_for_completion_timeout(&sdrain.done, HZ / 10) <= 0 && cnt < 50) { - ib_process_cq_direct(qp->send_cq, -1); - cnt++; - } - - cnt = 0; - while (wait_for_completion_timeout(&rdrain.done, HZ / 10) <= 0 && cnt < 50) { - ib_process_cq_direct(qp->recv_cq, -1); - cnt++; - } -} - -void erdma_drain_rq(struct ib_qp *ibqp) -{ - struct erdma_qp *qp = to_eqp(ibqp); + spin_unlock_irqrestore(&qp->kern_qp.rq_lock, flags); - if (qp->attrs.state != ERDMA_QP_STATE_ERROR) - erdma_drain_qp(ibqp); -} + if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING)) + mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork, + usecs_to_jiffies(100)); -void erdma_drain_sq(struct ib_qp *ibqp) -{ - struct erdma_qp *qp = to_eqp(ibqp); - - if (qp->attrs.state != ERDMA_QP_STATE_ERROR) - erdma_drain_qp(ibqp); + return ret; } diff --git a/drivers/infiniband/hw/erdma/erdma_stats.c b/drivers/infiniband/hw/erdma/erdma_stats.c index 4df2290291bf89072224d1262d18b634e0947e4f..b8442cdd4261a8ca6e040f8de407e55495b47cd2 100644 --- a/drivers/infiniband/hw/erdma/erdma_stats.c +++ b/drivers/infiniband/hw/erdma/erdma_stats.c @@ -3,12 +3,11 @@ /* Authors: Cheng Xu */ /* Kai Shen */ /* Copyright (c) 2020-2022, Alibaba Group. */ -//#include "kcompat.h" #include "erdma.h" -#include "erdma_stats.h" -static const char * const erdma_stats_names[] = { + +static const char *const erdma_stats_names[] = { [ERDMA_STATS_IW_LISTEN_CREATE] = "listen_create_cnt", [ERDMA_STATS_IW_LISTEN_IPV6] = "listen_ipv6_cnt", [ERDMA_STATS_IW_LISTEN_SUCCESS] = "listen_success_cnt", @@ -62,32 +61,48 @@ static const char * const erdma_stats_names[] = { [ERDMA_STATS_CMD_REG_USR_MR] = "verbs_reg_usr_mr_cnt", [ERDMA_STATS_CMD_REG_USR_MR_FAILED] = "verbs_reg_usr_mr_failed_cnt", + [ERDMA_STATS_TX_REQS_CNT] = "hw_tx_reqs_cnt", + [ERDMA_STATS_TX_PACKETS_CNT] = "hw_tx_packets_cnt", + [ERDMA_STATS_TX_BYTES_CNT] = "hw_tx_bytes_cnt", + [ERDMA_STATS_TX_DISABLE_DROP_CNT] = "hw_disable_drop_cnt", + [ERDMA_STATS_TX_BPS_METER_DROP_CNT] = "hw_bps_limit_drop_cnt", + [ERDMA_STATS_TX_PPS_METER_DROP_CNT] = "hw_pps_limit_drop_cnt", + [ERDMA_STATS_RX_PACKETS_CNT] = "hw_rx_packets_cnt", + [ERDMA_STATS_RX_BYTES_CNT] = "hw_rx_bytes_cnt", + [ERDMA_STATS_RX_DISABLE_DROP_CNT] = "hw_rx_disable_drop_cnt", + [ERDMA_STATS_RX_BPS_METER_DROP_CNT] = "hw_rx_bps_limit_drop_cnt", + [ERDMA_STATS_RX_PPS_METER_DROP_CNT] = "hw_rx_pps_limit_drop_cnt", }; -struct rdma_hw_stats *erdma_alloc_hw_stats(struct ib_device *ibdev, port_t port_num) +struct rdma_hw_stats *erdma_alloc_hw_stats(struct ib_device *ibdev, + port_t port_num) { - return rdma_alloc_hw_stats_struct(erdma_stats_names, - ERDMA_STATS_MAX, RDMA_HW_STATS_DEFAULT_LIFESPAN); + return rdma_alloc_hw_stats_struct(erdma_stats_names, ERDMA_STATS_MAX, + RDMA_HW_STATS_DEFAULT_LIFESPAN); } -int erdma_get_hw_stats(struct ib_device *ibdev, - struct rdma_hw_stats *stats, port_t port_num, int index) +int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + port_t port_num, int index) { struct erdma_dev *dev = to_edev(ibdev); - atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_SUBMITTED], dev->cmdq.sq.total_cmds); - atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_COMP], dev->cmdq.sq.total_comp_cmds); + atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_SUBMITTED], + dev->cmdq.sq.total_cmds); + atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_COMP], + dev->cmdq.sq.total_comp_cmds); atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_EQ_NOTIFY], - atomic64_read(&dev->cmdq.eq.notify_num)); + atomic64_read(&dev->cmdq.eq.notify_num)); atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_EQ_EVENT], - atomic64_read(&dev->cmdq.eq.event_num)); + atomic64_read(&dev->cmdq.eq.event_num)); atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_CQ_ARMED], - atomic64_read(&dev->cmdq.cq.armed_num)); - atomic64_set(&dev->stats.value[ERDMA_STATS_AEQ_EVENT], atomic64_read(&dev->aeq.event_num)); + atomic64_read(&dev->cmdq.cq.armed_num)); + atomic64_set(&dev->stats.value[ERDMA_STATS_AEQ_EVENT], + atomic64_read(&dev->aeq.event_num)); atomic64_set(&dev->stats.value[ERDMA_STATS_AEQ_NOTIFY], - atomic64_read(&dev->aeq.notify_num)); + atomic64_read(&dev->aeq.notify_num)); - memcpy(&stats->value[0], &dev->stats.value[0], sizeof(u64) * ERDMA_STATS_MAX); + memcpy(&stats->value[0], &dev->stats.value[0], + sizeof(u64) * ERDMA_STATS_MAX); return stats->num_counters; } diff --git a/drivers/infiniband/hw/erdma/erdma_stats.h b/drivers/infiniband/hw/erdma/erdma_stats.h index 2bbfd437bd81f0ac9af222469807260369653c4b..d2fcf25ddb75b9ca0b3e5c5bbc68572bf668450e 100644 --- a/drivers/infiniband/hw/erdma/erdma_stats.h +++ b/drivers/infiniband/hw/erdma/erdma_stats.h @@ -7,10 +7,10 @@ #ifndef __ERDMA_STATS_H__ #define __ERDMA_STATS_H__ -//#include "kcompat.h" #include typedef u8 port_t; + #define ERDMA_INC_CNT(dev, name) \ atomic64_inc(&dev->stats.value[ERDMA_STATS_##name]) @@ -68,6 +68,19 @@ enum erdma_hw_stats_index { ERDMA_STATS_CMD_REG_USR_MR, ERDMA_STATS_CMD_REG_USR_MR_FAILED, + ERDMA_STATS_TX_REQS_CNT, + ERDMA_STATS_TX_PACKETS_CNT, + ERDMA_STATS_TX_BYTES_CNT, + ERDMA_STATS_TX_DISABLE_DROP_CNT, + ERDMA_STATS_TX_BPS_METER_DROP_CNT, + ERDMA_STATS_TX_PPS_METER_DROP_CNT, + + ERDMA_STATS_RX_PACKETS_CNT, + ERDMA_STATS_RX_BYTES_CNT, + ERDMA_STATS_RX_DISABLE_DROP_CNT, + ERDMA_STATS_RX_BPS_METER_DROP_CNT, + ERDMA_STATS_RX_PPS_METER_DROP_CNT, + ERDMA_STATS_MAX }; diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 377baf9a6be47548d91cf4d176becdd391bea939..986dbb136048f9e74aa08c6bf7253506d8baea57 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -9,32 +9,21 @@ /* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */ -//#include "kcompat.h" - -#include -#include -#include -#include +#include #include #include -#include -#include -#include #include -#include #include -#include #include + #include "erdma.h" #include "erdma_cm.h" -#include "erdma_hw.h" #include "erdma_verbs.h" -#include "erdma_debug.h" - extern bool compat_mode; -static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp, bool is_user) +static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp, + bool is_user) { struct erdma_cmdq_create_qp_req req; struct erdma_pd *pd = to_epd(qp->ibqp.pd); @@ -53,7 +42,7 @@ static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp, bool is_use FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn); if (!is_user) { - u32 pgsz_range = ilog2(SZ_1M) - PAGE_SHIFT; + u32 pgsz_range = ilog2(SZ_1M) - ERDMA_HW_PAGE_SHIFT; req.sq_cqn_mtt_cfg = FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, @@ -73,21 +62,19 @@ static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp, bool is_use req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr; req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr; - req.sq_db_info_dma_addr = qp->kern_qp.sq_buf_dma_addr + - (qp->attrs.sq_size << SQEBB_SHIFT); - req.rq_db_info_dma_addr = qp->kern_qp.rq_buf_dma_addr + - (qp->attrs.rq_size << RQE_SHIFT); + req.sq_db_info_dma_addr = qp->kern_qp.sq_db_info_dma_addr; + req.rq_db_info_dma_addr = qp->kern_qp.rq_db_info_dma_addr; } else { user_qp = &qp->user_qp; req.sq_cqn_mtt_cfg = FIELD_PREP( ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, - ilog2(user_qp->sq_mtt.page_size) - PAGE_SHIFT); + ilog2(user_qp->sq_mtt.page_size) - ERDMA_HW_PAGE_SHIFT); req.sq_cqn_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn); req.rq_cqn_mtt_cfg = FIELD_PREP( ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, - ilog2(user_qp->rq_mtt.page_size) - PAGE_SHIFT); + ilog2(user_qp->rq_mtt.page_size) - ERDMA_HW_PAGE_SHIFT); req.rq_cqn_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn); @@ -118,12 +105,11 @@ static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp, bool is_use req.rq_mtt_entry[2] = user_qp->rq_mtt.mtt_entry[3]; } - req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr; req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr; } - err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), &resp0, + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0, &resp1); if (err) { dev_err(&dev->pdev->dev, @@ -178,11 +164,11 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) } post_cmd: - return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } -static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq, bool is_user) +static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq, + bool is_user) { int err; struct erdma_cmdq_create_cq_req req; @@ -199,7 +185,7 @@ static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq, bool is_use if (!is_user) { page_size = SZ_32M; req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK, - ilog2(page_size) - PAGE_SHIFT); + ilog2(page_size) - ERDMA_HW_PAGE_SHIFT); req.qbuf_addr_l = lower_32_bits(cq->kern_cq.qbuf_dma_addr); req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr); @@ -212,8 +198,9 @@ static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq, bool is_use cq->kern_cq.qbuf_dma_addr + (cq->depth << CQE_SHIFT); } else { mtt = &cq->user_cq.qbuf_mtt; - req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK, - ilog2(mtt->page_size) - PAGE_SHIFT); + req.cfg0 |= + FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK, + ilog2(mtt->page_size) - ERDMA_HW_PAGE_SHIFT); if (mtt->mtt_nents == 1) { req.qbuf_addr_l = lower_32_bits(*(u64 *)mtt->mtt_buf); req.qbuf_addr_h = upper_32_bits(*(u64 *)mtt->mtt_buf); @@ -230,8 +217,7 @@ static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq, bool is_use req.cq_db_info_addr = cq->user_cq.db_info_dma_addr; } - err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); if (err) { dev_err(&dev->pdev->dev, "ERROR: err code = %d, cmd of create cq failed.\n", @@ -272,15 +258,14 @@ static inline void erdma_free_idx(struct erdma_resource_cb *res_cb, u32 idx) u32 used; spin_lock_irqsave(&res_cb->lock, flags); - used = test_and_clear_bit(idx, res_cb->bitmap); + used = __test_and_clear_bit(idx, res_cb->bitmap); spin_unlock_irqrestore(&res_cb->lock, flags); WARN_ON(!used); } - static struct rdma_user_mmap_entry * -erdma_user_mmap_entry_insert(struct ib_ucontext *uctx, u64 address, - u32 size, u8 mmap_flag, u64 *mmap_offset) +erdma_user_mmap_entry_insert(struct ib_ucontext *uctx, u64 address, u32 size, + u8 mmap_flag, u64 *mmap_offset) { struct erdma_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); @@ -294,8 +279,7 @@ erdma_user_mmap_entry_insert(struct ib_ucontext *uctx, u64 address, size = PAGE_ALIGN(size); - ret = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry, - size); + ret = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry, size); if (ret) { kfree(entry); return NULL; @@ -322,7 +306,8 @@ int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, attr->max_qp_rd_atom = dev->attrs.max_ord; attr->max_qp_init_rd_atom = dev->attrs.max_ird; attr->max_res_rd_atom = dev->attrs.max_qp * dev->attrs.max_ird; - attr->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; + attr->device_cap_flags = + IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; ibdev->local_dma_lkey = dev->attrs.local_dma_key; attr->max_send_sge = dev->attrs.max_send_sge; attr->max_recv_sge = dev->attrs.max_recv_sge; @@ -334,6 +319,12 @@ int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, attr->max_mw = dev->attrs.max_mw; attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA; attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT; + + if (dev->attrs.flags & ERDMA_DEV_CAP_FLAGS_ATOMIC) { + attr->atomic_cap = IB_ATOMIC_GLOB; + attr->masked_atomic_cap = IB_ATOMIC_GLOB; + } + attr->fw_ver = ((u64)(dev->attrs.fw_version >> 16) << 32) | (((dev->attrs.fw_version >> 8) & 0xFF) << 16) | ((dev->attrs.fw_version & 0xFF)); @@ -375,6 +366,7 @@ int erdma_query_port(struct ib_device *ibdev, port_t port, attr->gid_tbl_len = 16; else attr->gid_tbl_len = 1; + attr->pkey_tbl_len = 1; attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; attr->max_msg_sz = -1; if (dev->state == IB_PORT_ACTIVE) @@ -390,19 +382,18 @@ int erdma_get_port_immutable(struct ib_device *ibdev, port_t port, { if (compat_mode) { port_immutable->gid_tbl_len = 16; - port_immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + port_immutable->core_cap_flags = + RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; port_immutable->max_mad_size = IB_MGMT_MAD_SIZE; } else { port_immutable->gid_tbl_len = 1; port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; - } return 0; } -int erdma_query_pkey(struct ib_device *ibdev, port_t port, u16 index, - u16 *pkey) +int erdma_query_pkey(struct ib_device *ibdev, port_t port, u16 index, u16 *pkey) { if (index > 0) return -EINVAL; @@ -447,6 +438,21 @@ int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } +static void erdma_flush_worker(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct erdma_qp *qp = + container_of(dwork, struct erdma_qp, reflush_dwork); + struct erdma_cmdq_reflush_req req; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_REFLUSH); + req.qpn = QP_ID(qp); + req.sq_pi = qp->kern_qp.sq_pi; + req.rq_pi = qp->kern_qp.rq_pi; + erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL); +} + static int erdma_qp_validate_cap(struct erdma_dev *dev, struct ib_qp_init_attr *attrs) { @@ -485,16 +491,24 @@ static void free_kernel_qp(struct erdma_qp *qp) vfree(qp->kern_qp.rwr_tbl); if (qp->kern_qp.sq_buf) - dma_free_coherent( - &dev->pdev->dev, - WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT), - qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr); + dma_free_coherent(&dev->pdev->dev, + qp->attrs.sq_size << SQEBB_SHIFT, + qp->kern_qp.sq_buf, + qp->kern_qp.sq_buf_dma_addr); if (qp->kern_qp.rq_buf) - dma_free_coherent( - &dev->pdev->dev, - WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT), - qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr); + dma_free_coherent(&dev->pdev->dev, + qp->attrs.rq_size << RQE_SHIFT, + qp->kern_qp.rq_buf, + qp->kern_qp.rq_buf_dma_addr); + + if (qp->kern_qp.sq_db_info) + dma_pool_free(dev->db_pool, qp->kern_qp.sq_db_info, + qp->kern_qp.sq_db_info_dma_addr); + + if (qp->kern_qp.rq_db_info) + dma_pool_free(dev->db_pool, qp->kern_qp.rq_db_info, + qp->kern_qp.rq_db_info_dma_addr); } static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, @@ -502,7 +516,6 @@ static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, { struct erdma_kqp *kqp = &qp->kern_qp; int ret = -ENOMEM; - int size; if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) kqp->sig_all = 1; @@ -511,8 +524,8 @@ static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, kqp->sq_ci = 0; kqp->rq_pi = 0; kqp->rq_ci = 0; - kqp->hw_sq_db = - dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT); + kqp->hw_sq_db = dev->func_bar + + (ERDMA_SDB_SHARED_PAGE_INDEX << ERDMA_HW_PAGE_SHIFT); kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET; kqp->swr_tbl = vmalloc(qp->attrs.sq_size * sizeof(u64)); @@ -520,30 +533,40 @@ static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, if (!kqp->swr_tbl || !kqp->rwr_tbl) goto err_out; - size = (qp->attrs.sq_size << SQEBB_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE; - kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, size, + kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, + qp->attrs.sq_size << SQEBB_SHIFT, &kqp->sq_buf_dma_addr, GFP_KERNEL); if (!kqp->sq_buf) goto err_out; - size = (qp->attrs.rq_size << RQE_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE; - kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, size, + kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, + qp->attrs.rq_size << RQE_SHIFT, &kqp->rq_buf_dma_addr, GFP_KERNEL); if (!kqp->rq_buf) goto err_out; - kqp->sq_db_info = kqp->sq_buf + (qp->attrs.sq_size << SQEBB_SHIFT); - kqp->rq_db_info = kqp->rq_buf + (qp->attrs.rq_size << RQE_SHIFT); + kqp->sq_db_info = dma_pool_alloc(dev->db_pool, GFP_KERNEL, + &kqp->sq_db_info_dma_addr); + if (!kqp->sq_db_info) + goto err_out; + + kqp->rq_db_info = dma_pool_alloc(dev->db_pool, GFP_KERNEL, + &kqp->rq_db_info_dma_addr); + if (!kqp->rq_db_info) + goto err_out; if (attrs->create_flags & IB_QP_CREATE_IWARP_WITHOUT_CM) { - struct iw_ext_conn_param *param = (struct iw_ext_conn_param *)(attrs->qp_context); + struct iw_ext_conn_param *param = + (struct iw_ext_conn_param *)(attrs->qp_context); if (param == NULL) { ret = -EINVAL; goto err_out; } if (param->sk_addr.family != PF_INET) { - ibdev_err(&dev->ibdev, "IPv4 address is required for connection without CM.\n"); + ibdev_err_ratelimited( + &dev->ibdev, + "IPv4 address is required for connection without CM.\n"); ret = -EINVAL; goto err_out; } @@ -563,11 +586,10 @@ static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, return ret; } -static int -get_mtt_entries(struct ib_udata *udata, struct erdma_ucontext *ctx, struct erdma_mem *mem, - u64 start, u64 len, int access, u64 virt, - unsigned long req_page_size, u8 force_indirect_mtt, - bool is_mr) +static int get_mtt_entries(struct ib_udata *udata, struct erdma_ucontext *ctx, + struct erdma_mem *mem, u64 start, u64 len, + int access, u64 virt, unsigned long req_page_size, + u8 force_indirect_mtt, bool is_mr) { struct erdma_dev *dev = to_edev(ctx->ibucontext.device); struct ib_block_iter biter; @@ -584,7 +606,6 @@ get_mtt_entries(struct ib_udata *udata, struct erdma_ucontext *ctx, struct erdma mem->va = virt; mem->len = len; mem->page_size = ib_umem_find_best_pgsz(mem->umem, req_page_size, virt); - mem->page_offset = start & (mem->page_size - 1); mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size); mem->page_cnt = mem->mtt_nents; @@ -603,6 +624,7 @@ get_mtt_entries(struct ib_udata *udata, struct erdma_ucontext *ctx, struct erdma mem->mtt_type = ERDMA_MR_INLINE_MTT; phy_addr = mem->mtt_entry; } + rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) { *phy_addr = rdma_block_iter_dma_address(&biter); phy_addr++; @@ -644,7 +666,8 @@ static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem) } } -static int erdma_map_user_dbrecords(struct ib_udata *udata, struct erdma_ucontext *uctx, +static int erdma_map_user_dbrecords(struct ib_udata *udata, + struct erdma_ucontext *uctx, u64 dbrecords_va, struct erdma_user_dbrecords_page **dbr_page, dma_addr_t *dma_addr) @@ -706,15 +729,15 @@ erdma_unmap_user_dbrecords(struct erdma_ucontext *ctx, mutex_unlock(&ctx->dbrecords_page_mutex); } -static int init_user_qp(struct erdma_qp *qp, struct ib_udata *udata, struct erdma_ucontext *uctx, - u64 va, u32 len, u64 db_info_va) +static int init_user_qp(struct erdma_qp *qp, struct ib_udata *udata, + struct erdma_ucontext *uctx, u64 va, u32 len, + u64 db_info_va) { - dma_addr_t db_info_dma_addr; u32 rq_offset; int ret; - if (len < (PAGE_ALIGN(qp->attrs.sq_size * SQEBB_SIZE) + + if (len < (ALIGN(qp->attrs.sq_size * SQEBB_SIZE, ERDMA_HW_PAGE_SIZE) + qp->attrs.rq_size * RQE_SIZE)) return -EINVAL; @@ -724,7 +747,7 @@ static int init_user_qp(struct erdma_qp *qp, struct ib_udata *udata, struct erdm if (ret) return ret; - rq_offset = PAGE_ALIGN(qp->attrs.sq_size << SQEBB_SHIFT); + rq_offset = ALIGN(qp->attrs.sq_size << SQEBB_SHIFT, ERDMA_HW_PAGE_SIZE); qp->user_qp.rq_offset = rq_offset; ret = get_mtt_entries(udata, uctx, &qp->user_qp.rq_mtt, va + rq_offset, @@ -763,11 +786,11 @@ static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx) int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, struct ib_udata *udata) { - struct erdma_qp *qp = to_eqp(ibqp); struct erdma_dev *dev = to_edev(ibqp->device); - struct erdma_ucontext *uctx; - struct erdma_ureq_create_qp ureq; struct erdma_uresp_create_qp uresp; + struct erdma_qp *qp = to_eqp(ibqp); + struct erdma_ureq_create_qp ureq; + struct erdma_ucontext *uctx; int ret; uctx = rdma_udata_to_drv_context(udata, struct erdma_ucontext, @@ -831,6 +854,8 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, goto err_out_xa; } + INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker); + qp->attrs.max_send_sge = attrs->cap.max_send_sge; qp->attrs.max_recv_sge = attrs->cap.max_recv_sge; qp->attrs.state = ERDMA_QP_STATE_IDLE; @@ -880,7 +905,7 @@ struct ib_qp *erdma_kzalloc_qp(struct ib_pd *ibpd, if (ret) goto err_free; - /* clear the field, otherwise core code will have problems. */ + /* clear the field, otherwise core code will have problems. */ qp->ibqp.res.task = NULL; return &qp->ibqp; err_free: @@ -1135,8 +1160,7 @@ int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) | FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF); - ret = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); if (ret) { ERDMA_INC_CNT(dev, CMD_DEREG_MR_FAILED); dev_err(&dev->pdev->dev, @@ -1163,12 +1187,13 @@ int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) ERDMA_INC_CNT(dev, CMD_DESTROY_CQ); + hrtimer_cancel(&cq->dim.timer); + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_DESTROY_CQ); req.cqn = cq->cqn; - err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); if (err) { dev_err(&dev->pdev->dev, "ERROR: err code = %d, cmd of destroy cq failed.\n", @@ -1189,12 +1214,13 @@ int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) return 0; } -static void erdma_ib_lock_cqs(struct erdma_cq *send_cq, struct erdma_cq *recv_cq) +static void erdma_ib_lock_cqs(struct erdma_cq *send_cq, + struct erdma_cq *recv_cq) __acquires(&send_cq->kern_cq.lock) __acquires(&recv_cq->kern_cq.lock) { if (send_cq) { if (recv_cq) { - if (send_cq->cqn < recv_cq->cqn) { + if (send_cq->cqn < recv_cq->cqn) { spin_lock(&send_cq->kern_cq.lock); spin_lock_nested(&recv_cq->kern_cq.lock, SINGLE_DEPTH_NESTING); @@ -1219,12 +1245,13 @@ static void erdma_ib_lock_cqs(struct erdma_cq *send_cq, struct erdma_cq *recv_cq } } -static void erdma_ib_unlock_cqs(struct erdma_cq *send_cq, struct erdma_cq *recv_cq) +static void erdma_ib_unlock_cqs(struct erdma_cq *send_cq, + struct erdma_cq *recv_cq) __releases(&send_cq->kern_cq.lock) __releases(&recv_cq->kern_cq.lock) { if (send_cq) { if (recv_cq) { - if (send_cq->cqn < recv_cq->cqn) { + if (send_cq->cqn < recv_cq->cqn) { spin_unlock(&recv_cq->kern_cq.lock); spin_unlock(&send_cq->kern_cq.lock); } else if (send_cq->cqn == recv_cq->cqn) { @@ -1273,12 +1300,13 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE); up_write(&qp->state_lock); + cancel_delayed_work_sync(&qp->reflush_dwork); + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_DESTROY_QP); req.qpn = QP_ID(qp); - err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); if (err) { dev_err(&dev->pdev->dev, "ERROR: err code = %d, cmd of destroy qp failed.\n", @@ -1291,16 +1319,7 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) wait_for_completion(&qp->safe_free); if (rdma_is_kernel_res(&qp->ibqp.res)) { - vfree(qp->kern_qp.swr_tbl); - vfree(qp->kern_qp.rwr_tbl); - dma_free_coherent( - &dev->pdev->dev, - WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT), - qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr); - dma_free_coherent( - &dev->pdev->dev, - WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT), - qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr); + free_kernel_qp(qp); } else { put_mtt_entries(dev, &qp->user_qp.sq_mtt); put_mtt_entries(dev, &qp->user_qp.rq_mtt); @@ -1368,8 +1387,8 @@ void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry) static void alloc_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx) { - u32 bitmap_idx; struct erdma_devattr *attrs = &dev->attrs; + u32 bitmap_idx, hw_page_idx; if (attrs->disable_dwqe) goto alloc_normal_db; @@ -1382,11 +1401,9 @@ static void alloc_db_resources(struct erdma_dev *dev, spin_unlock(&dev->db_bitmap_lock); ctx->sdb_type = ERDMA_SDB_PAGE; - ctx->sdb_idx = bitmap_idx; - ctx->sdb_page_idx = bitmap_idx; + ctx->sdb_bitmap_idx = bitmap_idx; ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET + - (bitmap_idx << PAGE_SHIFT); - ctx->sdb_page_off = 0; + (bitmap_idx << ERDMA_HW_PAGE_SHIFT); return; } @@ -1397,13 +1414,12 @@ static void alloc_db_resources(struct erdma_dev *dev, spin_unlock(&dev->db_bitmap_lock); ctx->sdb_type = ERDMA_SDB_ENTRY; - ctx->sdb_idx = bitmap_idx; - ctx->sdb_page_idx = attrs->dwqe_pages + - bitmap_idx / ERDMA_DWQE_TYPE1_CNT_PER_PAGE; - ctx->sdb_page_off = bitmap_idx % ERDMA_DWQE_TYPE1_CNT_PER_PAGE; - + ctx->sdb_bitmap_idx = bitmap_idx; + hw_page_idx = attrs->dwqe_pages + + bitmap_idx / ERDMA_DWQE_TYPE1_CNT_PER_PAGE; + ctx->sdb_entid = bitmap_idx % ERDMA_DWQE_TYPE1_CNT_PER_PAGE; ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET + - (ctx->sdb_page_idx << PAGE_SHIFT); + (hw_page_idx << ERDMA_HW_PAGE_SHIFT); return; } @@ -1412,11 +1428,8 @@ static void alloc_db_resources(struct erdma_dev *dev, alloc_normal_db: ctx->sdb_type = ERDMA_SDB_SHARED; - ctx->sdb_idx = 0; - ctx->sdb_page_idx = ERDMA_SDB_SHARED_PAGE_INDEX; - ctx->sdb_page_off = 0; - - ctx->sdb = dev->func_bar_addr + (ctx->sdb_page_idx << PAGE_SHIFT); + ctx->sdb = dev->func_bar_addr + + (ERDMA_SDB_SHARED_PAGE_INDEX << ERDMA_HW_PAGE_SHIFT); } static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx) @@ -1448,11 +1461,6 @@ int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata) ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET; ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET; - if (udata->outlen < sizeof(uresp)) { - ret = -EINVAL; - goto err_out; - } - ctx->sq_db_mmap_entry = erdma_user_mmap_entry_insert( ibctx, (u64)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb); if (!ctx->sq_db_mmap_entry) { @@ -1476,9 +1484,13 @@ int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata) uresp.dev_id = dev->pdev->device; uresp.sdb_type = ctx->sdb_type; - uresp.sdb_offset = ctx->sdb_page_off; + uresp.sdb_entid = ctx->sdb_entid; + uresp.sdb_off = ctx->sdb & ~PAGE_MASK; + uresp.rdb_off = ctx->rdb & ~PAGE_MASK; + uresp.cdb_off = ctx->cdb & ~PAGE_MASK; - ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + ret = ib_copy_to_udata(udata, &uresp, + min(sizeof(uresp), udata->outlen)); if (ret) goto err_out; @@ -1503,9 +1515,9 @@ void erdma_dealloc_ucontext(struct ib_ucontext *ibctx) spin_lock(&dev->db_bitmap_lock); if (ctx->sdb_type == ERDMA_SDB_PAGE) - clear_bit(ctx->sdb_idx, dev->sdb_page); + clear_bit(ctx->sdb_bitmap_idx, dev->sdb_page); else if (ctx->sdb_type == ERDMA_SDB_ENTRY) - clear_bit(ctx->sdb_idx, dev->sdb_entry); + clear_bit(ctx->sdb_bitmap_idx, dev->sdb_entry); erdma_uctx_user_mmap_entries_remove(ctx); spin_unlock(&dev->db_bitmap_lock); @@ -1533,9 +1545,10 @@ int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct rdma_ah_attr *ah_attr; const struct ib_gid_attr *sgid_attr; + if (compat_mode) { - dprint(DBG_QP, "attr mask: %x, av: %d, state:%d\n", - attr_mask, attr_mask & IB_QP_AV, attr_mask & IB_QP_STATE); + dprint(DBG_QP, "attr mask: %x, av: %d, state:%d\n", attr_mask, + attr_mask & IB_QP_AV, attr_mask & IB_QP_STATE); if (attr_mask & IB_QP_AV) { ah_attr = &attr->ah_attr; @@ -1543,18 +1556,24 @@ int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, dprint(DBG_QP, "attr_type:%d\n", ah_attr->type); dprint(DBG_QP, "gid type:%u, sgid: %pI6\n", - rdma_gid_attr_network_type(sgid_attr), sgid_attr->gid.raw); - dprint(DBG_QP, "dgid: %pI6\n", rdma_ah_read_grh(ah_attr)->dgid.raw); + rdma_gid_attr_network_type(sgid_attr), + sgid_attr->gid.raw); + dprint(DBG_QP, "dgid: %pI6\n", + rdma_ah_read_grh(ah_attr)->dgid.raw); - rdma_gid2ip((struct sockaddr *)&qp->attrs.laddr, &sgid_attr->gid); + rdma_gid2ip((struct sockaddr *)&qp->attrs.laddr, + &sgid_attr->gid); rdma_gid2ip((struct sockaddr *)&qp->attrs.raddr, - &rdma_ah_read_grh(ah_attr)->dgid); - dprint(DBG_QP, "laddr:0x%x\n", ntohl(qp->attrs.laddr.in.sin_addr.s_addr)); - dprint(DBG_QP, "raddr:0x%x\n", ntohl(qp->attrs.raddr.in.sin_addr.s_addr)); + &rdma_ah_read_grh(ah_attr)->dgid); + dprint(DBG_QP, "laddr:0x%x\n", + ntohl(qp->attrs.laddr.in.sin_addr.s_addr)); + dprint(DBG_QP, "raddr:0x%x\n", + ntohl(qp->attrs.raddr.in.sin_addr.s_addr)); } if (attr_mask & IB_QP_DEST_QPN) { - dprint(DBG_QP, "get remote qpn %u\n", attr->dest_qp_num); + dprint(DBG_QP, "get remote qpn %u\n", + attr->dest_qp_num); qp->attrs.remote_qp_num = attr->dest_qp_num; } @@ -1645,15 +1664,16 @@ int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, return 0; } -static int erdma_init_user_cq(struct ib_udata *udata, struct erdma_ucontext *uctx, - struct erdma_cq *cq, struct erdma_ureq_create_cq *ureq) +static int erdma_init_user_cq(struct ib_udata *udata, + struct erdma_ucontext *uctx, struct erdma_cq *cq, + struct erdma_ureq_create_cq *ureq) { struct erdma_dev *dev = to_edev(cq->ibcq.device); int ret; ret = get_mtt_entries(udata, uctx, &cq->user_cq.qbuf_mtt, ureq->qbuf_va, - ureq->qbuf_len, 0, ureq->qbuf_va, - SZ_64M - SZ_4K, 1, false); + ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K, + 1, false); if (ret) return ret; @@ -1749,6 +1769,9 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, if (ret) goto err_free_res; + hrtimer_init(&cq->dim.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cq->dim.timer.function = cq_timer_fn; + return 0; err_free_res: @@ -1767,7 +1790,7 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return ret; } -struct net_device *erdma_get_netdev(struct ib_device *device, u8 port_num) +struct net_device *erdma_get_netdev(struct ib_device *device, port_t port_num) { struct erdma_dev *edev = to_edev(device); @@ -1781,6 +1804,17 @@ void erdma_disassociate_ucontext(struct ib_ucontext *ibcontext) { } +void erdma_set_mtu(struct erdma_dev *dev, u32 mtu) +{ + struct erdma_cmdq_config_mtu_req req; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_CONF_MTU); + req.mtu = mtu; + + erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); +} + void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason) { struct ib_event event; @@ -1796,3 +1830,41 @@ void erdma_destroy_ah(struct ib_ah *ibah, u32 flags) { return; } + +int erdma_query_hw_stats(struct erdma_dev *dev) +{ + struct erdma_cmdq_query_stats_resp *stats; + struct erdma_cmdq_query_req req; + dma_addr_t dma_addr; + int err; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_GET_STATS); + + stats = dma_pool_alloc(dev->resp_pool, GFP_KERNEL, &dma_addr); + if (!stats) + return -ENOMEM; + + req.target_addr = dma_addr; + req.target_length = ERDMA_HW_RESP_SIZE; + /* Clear the magic fileds. */ + stats->hdr.magic = 0; + + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + if (err) + goto out; + + if (stats->hdr.magic != 0x5566) { + err = -EINVAL; + goto out; + } + + memcpy(&dev->stats.value[ERDMA_STATS_TX_REQS_CNT], &stats->tx_req_cnt, + sizeof(__u64) * (ERDMA_STATS_RX_PPS_METER_DROP_CNT - + ERDMA_STATS_TX_REQS_CNT + 1)); + +out: + dma_pool_free(dev->resp_pool, stats, dma_addr); + + return err; +} diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index ade781ca6eac1ff4c2dc739efbe8060c1958ad72..87a8c652a42256d2158c9dec6c9046871f1c74c7 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* Authors: Cheng Xu */ /* Kai Shen */ @@ -7,20 +7,11 @@ #ifndef __ERDMA_VERBS_H__ #define __ERDMA_VERBS_H__ -#include - -#include -#include -#include - -//#include "kcompat.h" #include "erdma.h" -#include "erdma_cm.h" -#include "erdma_hw.h" /* RDMA Capability. */ #define ERDMA_MAX_PD (128 * 1024) -#define ERDMA_MAX_SEND_WR 4096 +#define ERDMA_MAX_SEND_WR 8192 #define ERDMA_MAX_ORD 128 #define ERDMA_MAX_IRD 128 #define ERDMA_MAX_SGE_RD 1 @@ -44,9 +35,8 @@ struct erdma_ucontext { struct ib_ucontext ibucontext; u32 sdb_type; - u32 sdb_idx; - u32 sdb_page_idx; - u32 sdb_page_off; + u32 sdb_bitmap_idx; + u32 sdb_entid; u64 sdb; u64 rdb; u64 cdb; @@ -80,16 +70,18 @@ struct erdma_pd { #define ERDMA_MR_INLINE_MTT 0 #define ERDMA_MR_INDIRECT_MTT 1 -#define ERDMA_MR_ACC_LR BIT(0) -#define ERDMA_MR_ACC_LW BIT(1) -#define ERDMA_MR_ACC_RR BIT(2) -#define ERDMA_MR_ACC_RW BIT(3) +#define ERDMA_MR_ACC_RA BIT(0) +#define ERDMA_MR_ACC_LR BIT(1) +#define ERDMA_MR_ACC_LW BIT(2) +#define ERDMA_MR_ACC_RR BIT(3) +#define ERDMA_MR_ACC_RW BIT(4) static inline u8 to_erdma_access_flags(int access) { return (access & IB_ACCESS_REMOTE_READ ? ERDMA_MR_ACC_RR : 0) | (access & IB_ACCESS_LOCAL_WRITE ? ERDMA_MR_ACC_LW : 0) | - (access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0); + (access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0) | + (access & IB_ACCESS_REMOTE_ATOMIC ? ERDMA_MR_ACC_RA : 0); } struct erdma_mem { @@ -139,7 +131,6 @@ struct erdma_kqp { u16 sq_pi; u16 sq_ci; u64 *swr_tbl; - bool sq_shutdown; void *hw_sq_db; void *sq_buf; dma_addr_t sq_buf_dma_addr; @@ -149,12 +140,14 @@ struct erdma_kqp { u16 rq_pi; u16 rq_ci; u64 *rwr_tbl; - bool rq_shutdown; void *hw_rq_db; void *rq_buf; dma_addr_t rq_buf_dma_addr; void *rq_db_info; + dma_addr_t sq_db_info_dma_addr; + dma_addr_t rq_db_info_dma_addr; + u8 sig_all; }; @@ -170,7 +163,8 @@ enum erdma_qp_state { }; enum erdma_qp_flags { - ERDMA_QP_IN_DESTROY = (1 << 0) + ERDMA_QP_IN_DESTROY = (1 << 0), + ERDMA_QP_IN_FLUSHING = (1 << 1), }; enum erdma_qp_attr_mask { @@ -194,6 +188,8 @@ struct erdma_qp_attrs { u32 max_recv_sge; u32 cookie; u32 flags; + + u32 remote_cookie; #define ERDMA_QP_ACTIVE 0 #define ERDMA_QP_PASSIVE 1 u8 qp_type; @@ -229,6 +225,9 @@ struct erdma_qp { struct erdma_cq *rcq; struct erdma_qp_attrs attrs; + unsigned long flags; + struct delayed_work reflush_dwork; + }; struct erdma_kcq_info { @@ -249,6 +248,12 @@ struct erdma_ucq_info { dma_addr_t db_info_dma_addr; }; +struct erdma_dim { + enum ib_cq_notify_flags flags; + struct hrtimer timer; + u16 timeout; +}; + struct erdma_cq { struct ib_cq ibcq; u32 cqn; @@ -260,6 +265,7 @@ struct erdma_cq { struct erdma_kcq_info kern_cq; struct erdma_ucq_info user_cq; }; + struct erdma_dim dim; }; #define QP_ID(qp) ((qp)->ibqp.qp_num) @@ -312,6 +318,8 @@ to_emmap(struct rdma_user_mmap_entry *ibmmap) return container_of(ibmmap, struct erdma_user_mmap_entry, rdma_entry); } +enum hrtimer_restart cq_timer_fn(struct hrtimer *t); + int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *data); void erdma_dealloc_ucontext(struct ib_ucontext *ibctx); int erdma_query_device(struct ib_device *dev, struct ib_device_attr *attr, @@ -320,6 +328,7 @@ int erdma_get_port_immutable(struct ib_device *dev, port_t port, struct ib_port_immutable *ib_port_immutable); int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *data); + int erdma_query_port(struct ib_device *dev, port_t port, struct ib_port_attr *attr); int erdma_query_gid(struct ib_device *dev, port_t port, int idx, @@ -327,7 +336,7 @@ int erdma_query_gid(struct ib_device *dev, port_t port, int idx, int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *data); int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); struct ib_qp *erdma_kzalloc_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *attr, - struct ib_udata *data); + struct ib_udata *data); int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask, struct ib_qp_init_attr *init_attr); int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask, @@ -355,15 +364,17 @@ int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); void erdma_disassociate_ucontext(struct ib_ucontext *ibcontext); void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason); +void erdma_set_mtu(struct erdma_dev *dev, u32 mtu); -struct net_device *erdma_get_netdev(struct ib_device *device, u8 port_num); +struct net_device *erdma_get_netdev(struct ib_device *device, port_t port_num); enum rdma_link_layer erdma_get_link_layer(struct ib_device *dev, port_t port_num); int erdma_query_pkey(struct ib_device *ibdev, port_t port, u16 index, - u16 *pkey); -void erdma_drain_rq(struct ib_qp *ibqp); -void erdma_drain_sq(struct ib_qp *ibqp); + u16 *pkey); void erdma_destroy_ah(struct ib_ah *ibah, u32 flags); +int erdma_modify_cq(struct ib_cq *ibcq, u16 cq_count, u16 cq_period); + +int erdma_query_hw_stats(struct erdma_dev *dev); #endif diff --git a/include/uapi/rdma/erdma-abi.h b/include/uapi/rdma/erdma-abi.h index cd409b9cfca897ab4f231ed1e9a1275c1ce09202..455046415983589d36c3cdf6401fc2c70742f26a 100644 --- a/include/uapi/rdma/erdma-abi.h +++ b/include/uapi/rdma/erdma-abi.h @@ -11,8 +11,8 @@ #define ERDMA_ABI_VERSION 1 struct erdma_ureq_create_cq { - __u64 db_record_va; - __u64 qbuf_va; + __aligned_u64 db_record_va; + __aligned_u64 qbuf_va; __u32 qbuf_len; __u32 rsvd0; }; @@ -23,8 +23,8 @@ struct erdma_uresp_create_cq { }; struct erdma_ureq_create_qp { - __u64 db_record_va; - __u64 qbuf_va; + __aligned_u64 db_record_va; + __aligned_u64 qbuf_va; __u32 qbuf_len; __u32 rsvd0; }; @@ -40,10 +40,13 @@ struct erdma_uresp_alloc_ctx { __u32 dev_id; __u32 pad; __u32 sdb_type; - __u32 sdb_offset; - __u64 sdb; - __u64 rdb; - __u64 cdb; + __u32 sdb_entid; + __aligned_u64 sdb; + __aligned_u64 rdb; + __aligned_u64 cdb; + __u32 sdb_off; + __u32 rdb_off; + __u32 cdb_off; }; #endif