From 1c8e95df22f274142ce4cf833c303b54ae573145 Mon Sep 17 00:00:00 2001 From: Kai Shen Date: Mon, 29 May 2023 09:39:19 +0000 Subject: [PATCH] anolis: RDMA/erdma: Backport erdma driver update ANBZ: #293 Backport erdma driver update since there are many updates. Signed-off-by: Kai Shen Reviewed-by: Tony Lu tonylu@linux.alibaba.com Reviewed-by: Cheng You Link: https://gitee.com/anolis/cloud-kernel/pulls/1693 --- drivers/infiniband/hw/erdma/Kconfig | 12 +- drivers/infiniband/hw/erdma/Makefile | 2 +- drivers/infiniband/hw/erdma/erdma.h | 16 +- drivers/infiniband/hw/erdma/erdma_cm.c | 261 +++-- drivers/infiniband/hw/erdma/erdma_cm.h | 13 +- drivers/infiniband/hw/erdma/erdma_cmdq.c | 17 +- drivers/infiniband/hw/erdma/erdma_compat.c | 206 ++++ drivers/infiniband/hw/erdma/erdma_compat.h | 33 + drivers/infiniband/hw/erdma/erdma_cq.c | 8 +- drivers/infiniband/hw/erdma/erdma_eq.c | 18 +- drivers/infiniband/hw/erdma/erdma_hw.h | 80 +- drivers/infiniband/hw/erdma/erdma_ioctl.c | 84 +- drivers/infiniband/hw/erdma/erdma_ioctl.h | 3 +- drivers/infiniband/hw/erdma/erdma_main.c | 139 ++- drivers/infiniband/hw/erdma/erdma_qp.c | 142 ++- drivers/infiniband/hw/erdma/erdma_stats.c | 1 + drivers/infiniband/hw/erdma/erdma_stats.h | 3 +- drivers/infiniband/hw/erdma/erdma_verbs.c | 1031 +++++++++++++------- drivers/infiniband/hw/erdma/erdma_verbs.h | 74 +- drivers/infiniband/hw/erdma/kcompat.h | 39 + 20 files changed, 1479 insertions(+), 703 deletions(-) create mode 100644 drivers/infiniband/hw/erdma/erdma_compat.c create mode 100644 drivers/infiniband/hw/erdma/erdma_compat.h create mode 100644 drivers/infiniband/hw/erdma/kcompat.h diff --git a/drivers/infiniband/hw/erdma/Kconfig b/drivers/infiniband/hw/erdma/Kconfig index 719dfb14e96e..f035d2d9ab31 100644 --- a/drivers/infiniband/hw/erdma/Kconfig +++ b/drivers/infiniband/hw/erdma/Kconfig @@ -1,10 +1,12 @@ # SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause config INFINIBAND_ERDMA - tristate "Elastic ERDMA support" - depends on PCI_MSI && 64BIT && !CPU_BIG_ENDIAN && X86_64 + tristate "Alibaba Elastic RDMA Adapter (ERDMA) support" + depends on PCI_MSI && 64BIT + depends on INFINIBAND_ADDR_TRANS depends on INFINIBAND_USER_ACCESS help - This driver supports Alibaba Elastic RDMA Adapter (ERDMA). + This is a RDMA/iWarp driver for Alibaba Elastic RDMA Adapter(ERDMA), + which supports RDMA features in Alibaba cloud environment. - To compile this driver as a module, choose M here. - The module will be called erdma. + To compile this driver as module, choose M here. The module will be + called erdma. diff --git a/drivers/infiniband/hw/erdma/Makefile b/drivers/infiniband/hw/erdma/Makefile index 5bd9304a5ec7..b272645f1321 100644 --- a/drivers/infiniband/hw/erdma/Makefile +++ b/drivers/infiniband/hw/erdma/Makefile @@ -2,5 +2,5 @@ obj-$(CONFIG_INFINIBAND_ERDMA) += erdma.o erdma-y :=\ - erdma_cm.o erdma_cq.o erdma_main.o erdma_qp.o erdma_verbs.o \ + erdma_cm.o erdma_cq.o erdma_main.o erdma_qp.o erdma_verbs.o erdma_compat.o\ erdma_cmdq.o erdma_eq.o erdma_ioctl.o erdma_stats.o diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h index e111541486cb..a96a6bd5a2f6 100644 --- a/drivers/infiniband/hw/erdma/erdma.h +++ b/drivers/infiniband/hw/erdma/erdma.h @@ -7,29 +7,22 @@ #ifndef __ERDMA_H__ #define __ERDMA_H__ +#include "kcompat.h" #include #include #include #include #include -#include "erdma_debug.h" #include "erdma_hw.h" #include "erdma_ioctl.h" #include "erdma_stats.h" -#ifndef RDMA_DRIVER_ERDMA -#define RDMA_DRIVER_ERDMA 19 -#endif - -#define ERDMA_MAJOR_VER 0 -#define ERDMA_MEDIUM_VER 2 -#define ERDMA_MINOR_VER 35 #define DRV_MODULE_NAME "erdma" #define ERDMA_NODE_DESC "Elastic RDMA(iWARP) stack" -typedef u8 port_t; +extern bool compat_mode; struct erdma_stats { atomic64_t value[ERDMA_STATS_MAX]; @@ -143,11 +136,14 @@ struct erdma_devattr { u32 fw_version; unsigned char peer_addr[ETH_ALEN]; + unsigned long cap_flags; int numa_node; enum erdma_cc_alg cc; - u8 flags; + u8 retrans_num; + u8 rsvd; u32 grp_num; + u32 max_ceqs; int irq_num; bool disable_dwqe; diff --git a/drivers/infiniband/hw/erdma/erdma_cm.c b/drivers/infiniband/hw/erdma/erdma_cm.c index 1b76506c8076..b0308eb889ea 100644 --- a/drivers/infiniband/hw/erdma/erdma_cm.c +++ b/drivers/infiniband/hw/erdma/erdma_cm.c @@ -111,7 +111,7 @@ static struct erdma_cep *erdma_cep_alloc(struct erdma_dev *dev) spin_unlock_irqrestore(&dev->lock, flags); atomic_inc(&dev->num_cep); - dprint(DBG_CM, "(CEP 0x%p): New Object\n", cep); + ibdev_dbg(&dev->ibdev, "(CEP 0x%p): New Object\n", cep); return cep; } @@ -151,7 +151,7 @@ static void erdma_put_work(struct erdma_cm_work *work) static void erdma_cep_set_inuse(struct erdma_cep *cep) { unsigned long flags; - dprint(DBG_CM, " (CEP 0x%p): use %d\n", cep, cep->in_use); + ibdev_dbg(&cep->dev->ibdev, " (CEP 0x%p): use %d\n", cep, cep->in_use); spin_lock_irqsave(&cep->lock, flags); while (cep->in_use) { @@ -160,7 +160,8 @@ static void erdma_cep_set_inuse(struct erdma_cep *cep) if (signal_pending(current)) flush_signals(current); - dprint(DBG_CM, " (CEP 0x%p): use %d\n", cep, cep->in_use); + ibdev_dbg(&cep->dev->ibdev, " (CEP 0x%p): use %d\n", cep, + cep->in_use); spin_lock_irqsave(&cep->lock, flags); } @@ -172,7 +173,7 @@ static void erdma_cep_set_free(struct erdma_cep *cep) { unsigned long flags; - dprint(DBG_CM, " (CEP 0x%p): use %d\n", cep, cep->in_use); + ibdev_dbg(&cep->dev->ibdev, " (CEP 0x%p): use %d\n", cep, cep->in_use); spin_lock_irqsave(&cep->lock, flags); cep->in_use = 0; @@ -227,7 +228,7 @@ static int erdma_cm_alloc_work(struct erdma_cep *cep, int num) if (!work) { if (!(list_empty(&cep->work_freelist))) erdma_cm_free_work(cep); - dprint(DBG_ON, " Failed\n"); + ibdev_dbg(&cep->dev->ibdev, " CEP alloc work failed\n"); return -ENOMEM; } work->cep = cep; @@ -271,10 +272,11 @@ static int erdma_cm_upcall(struct erdma_cep *cep, enum iw_cm_event_type reason, getname_peer(cep->sock, &event.remote_addr); } - dprint(DBG_CM, - " (QP%d): cep=0x%p, id=0x%p, dev(id)=%s, reason=%d, status=%d\n", - cep->qp ? QP_ID(cep->qp) : -1, cep, cm_id, cm_id->device->name, - reason, status); + ibdev_dbg( + &cep->dev->ibdev, + " (QP%d): cep=0x%p, id=0x%p, dev(id)=%s, reason=%d, status=%d\n", + cep->qp ? QP_ID(cep->qp) : -1, cep, cm_id, cm_id->device->name, + reason, status); return cm_id->event_handler(cm_id, &event); } @@ -289,10 +291,11 @@ void erdma_qp_cm_drop(struct erdma_qp *qp) /* * Immediately close socket */ - dprint(DBG_CM, - "(): immediate close, cep=0x%p, state=%d, id=0x%p, sock=0x%p, QP%d\n", - cep, cep->state, cep->cm_id, cep->sock, - cep->qp ? QP_ID(cep->qp) : -1); + ibdev_dbg( + &qp->dev->ibdev, + "(): immediate close, cep=0x%p, state=%d, id=0x%p, sock=0x%p, QP%d\n", + cep, cep->state, cep->cm_id, cep->sock, + cep->qp ? QP_ID(cep->qp) : -1); erdma_cep_set_inuse(cep); @@ -340,8 +343,8 @@ void erdma_qp_cm_drop(struct erdma_qp *qp) void erdma_cep_put(struct erdma_cep *cep) { - dprint(DBG_CM, "(CEP 0x%p): New refcount: %d\n", cep, - kref_read(&cep->ref) - 1); + ibdev_dbg(&cep->dev->ibdev, "(CEP 0x%p): New refcount: %d\n", cep, + kref_read(&cep->ref) - 1); WARN_ON(kref_read(&cep->ref) < 1); kref_put(&cep->ref, __erdma_cep_dealloc); @@ -351,8 +354,8 @@ void erdma_cep_get(struct erdma_cep *cep) { kref_get(&cep->ref); - dprint(DBG_CM, "(CEP 0x%p): New refcount: %d\n", cep, - kref_read(&cep->ref)); + ibdev_dbg(&cep->dev->ibdev, "(CEP 0x%p): New refcount: %d\n", cep, + kref_read(&cep->ref)); } static int erdma_send_mpareqrep(struct erdma_cep *cep, const void *pdata, @@ -511,7 +514,7 @@ static int erdma_recv_mpa_rr(struct erdma_cep *cep) return 0; if (ret) { - dprint(DBG_CM, " ERROR: %d:\n", ret); + ibdev_dbg(&cep->dev->ibdev, " ERROR: %d:\n", ret); return ret; } @@ -539,7 +542,8 @@ static int erdma_recv_mpa_rr(struct erdma_cep *cep) cep->mpa.bytes_rcvd += rcvd; if (to_rcv == rcvd) { - dprint(DBG_CM, " %d bytes private_data received\n", pd_len); + ibdev_dbg(&cep->dev->ibdev, " %d bytes private_data received\n", + pd_len); return 0; } @@ -697,8 +701,9 @@ static void erdma_accept_newconn(struct erdma_cep *cep) ret = kernel_accept(s, &new_s, O_NONBLOCK); if (ret != 0) { - dprint(DBG_CM | DBG_ON, - "(cep=0x%p): ERROR: kernel_accept(): rv=%d\n", cep, ret); + ibdev_dbg(&cep->dev->ibdev, + "(cep=0x%p): ERROR: kernel_accept(): rv=%d\n", cep, + ret); goto error; } @@ -707,9 +712,10 @@ static void erdma_accept_newconn(struct erdma_cep *cep) erdma_cep_get(new_cep); new_s->sk->sk_user_data = new_cep; - dprint(DBG_CM, - "(cep=0x%p, s=0x%p, new_s=0x%p): New LLP connection accepted\n", - cep, s, new_s); + ibdev_dbg( + &cep->dev->ibdev, + "(cep=0x%p, s=0x%p, new_s=0x%p): New LLP connection accepted\n", + cep, s, new_s); tcp_sock_set_nodelay(new_s->sk); new_cep->state = ERDMA_EPSTATE_AWAIT_MPAREQ; @@ -722,7 +728,8 @@ static void erdma_accept_newconn(struct erdma_cep *cep) erdma_cep_get(cep); if (atomic_read(&new_s->sk->sk_rmem_alloc)) { - dprint(DBG_CM, "(cep=0x%p): Immediate MPA req.\n", cep); + ibdev_dbg(&cep->dev->ibdev, "(cep=0x%p): Immediate MPA req.\n", + cep); /* MPA REQ already queued */ erdma_cep_set_inuse(new_cep); ret = erdma_proc_mpareq(new_cep); @@ -751,7 +758,7 @@ static void erdma_accept_newconn(struct erdma_cep *cep) erdma_socket_disassoc(new_s); sock_release(new_s); } - dprint(DBG_CM | DBG_ON, "(cep=0x%p): ERROR: rv=%d\n", cep, ret); + ibdev_dbg(&cep->dev->ibdev, "(cep=0x%p): ERROR: rv=%d\n", cep, ret); } static int erdma_newconn_connected(struct erdma_cep *cep) @@ -786,8 +793,8 @@ static void erdma_cm_work_handler(struct work_struct *w) cep = work->cep; dev = cep->dev; - dprint(DBG_CM, " (QP%d): WORK type: %d, CEP: 0x%p, state: %d\n", - cep->qp ? QP_ID(cep->qp) : -1, work->type, cep, cep->state); + ibdev_dbg(&dev->ibdev, " (QP%d): WORK type: %d, CEP: 0x%p, state: %d\n", + cep->qp ? QP_ID(cep->qp) : -1, work->type, cep, cep->state); erdma_cep_set_inuse(cep); @@ -902,10 +909,11 @@ static void erdma_cm_work_handler(struct work_struct *w) } if (release_cep) { - dprint(DBG_CM, - " (CEP 0x%p): Release: mpa_timer=%s, sock=0x%p, QP%d, id=0x%p\n", - cep, cep->mpa_timer ? "y" : "n", cep->sock, - cep->qp ? QP_ID(cep->qp) : -1, cep->cm_id); + ibdev_dbg( + &cep->dev->ibdev, + " (CEP 0x%p): Release: mpa_timer=%s, sock=0x%p, QP%d, id=0x%p\n", + cep, cep->mpa_timer ? "y" : "n", cep->sock, + cep->qp ? QP_ID(cep->qp) : -1, cep->cm_id); erdma_cancel_mpatimer(cep); cep->state = ERDMA_EPSTATE_CLOSED; @@ -941,7 +949,8 @@ static void erdma_cm_work_handler(struct work_struct *w) } erdma_cep_set_free(cep); - dprint(DBG_CM, " (Exit): WORK type: %d, CEP: 0x%p\n", work->type, cep); + ibdev_dbg(&cep->dev->ibdev, " (Exit): WORK type: %d, CEP: 0x%p\n", + work->type, cep); erdma_put_work(work); erdma_cep_put(cep); } @@ -974,9 +983,9 @@ int erdma_cm_queue_work(struct erdma_cep *cep, enum erdma_work_type type) delay = CONNECT_TIMEOUT; } - dprint(DBG_CM, - " (QP%d): WORK type: %d, CEP: 0x%p, work 0x%p, timeout %lu\n", - cep->qp ? QP_ID(cep->qp) : -1, type, cep, work, delay); + ibdev_dbg(&cep->dev->ibdev, + " (QP%d): WORK type: %d, CEP: 0x%p, work 0x%p, timeout %lu\n", + cep->qp ? QP_ID(cep->qp) : -1, type, cep, work, delay); queue_delayed_work(erdma_cm_wq, &work->work, delay); @@ -993,7 +1002,8 @@ static void erdma_cm_llp_data_ready(struct sock *sk) if (!cep) goto out; - dprint(DBG_CM, "(): cep 0x%p, state: %d\n", cep, cep->state); + ibdev_dbg(&cep->dev->ibdev, "(): cep 0x%p, state: %d\n", cep, + cep->state); if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ || cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) @@ -1007,7 +1017,8 @@ static void erdma_cm_llp_error_report(struct sock *sk) { struct erdma_cep *cep = sk_to_cep(sk); - dprint(DBG_CM, "(): error: %d, state: %d\n", sk->sk_err, sk->sk_state); + ibdev_dbg(&cep->dev->ibdev, "(): error: %d, state: %d\n", sk->sk_err, + sk->sk_state); if (cep) cep->sk_error_report(sk); @@ -1027,8 +1038,8 @@ static void erdma_cm_llp_state_change(struct sock *sk) } orig_state_change = cep->sk_state_change; - dprint(DBG_CM, "(): cep: 0x%p, state: %d, tcp_state: %d\n", cep, - cep->state, sk->sk_state); + ibdev_dbg(&cep->dev->ibdev, "(): cep: 0x%p, state: %d, tcp_state: %d\n", + cep, cep->state, sk->sk_state); switch (sk->sk_state) { case TCP_ESTABLISHED: @@ -1050,21 +1061,25 @@ static void erdma_cm_llp_state_change(struct sock *sk) } static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr, - int laddrlen, struct sockaddr *raddr, - int raddrlen, int flags) + struct sockaddr *raddr, int flags) { + size_t size = laddr->sa_family == AF_INET ? + sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); int ret; sock_set_reuseaddr(s->sk); - ret = s->ops->bind(s, laddr, laddrlen); + ret = s->ops->bind(s, laddr, size); if (ret) { - ((struct sockaddr_in *)laddr)->sin_port = 0; - ret = s->ops->bind(s, laddr, laddrlen); + if (laddr->sa_family == AF_INET) + ((struct sockaddr_in *)laddr)->sin_port = 0; + else + ((struct sockaddr_in6 *)laddr)->sin6_port = 0; + ret = s->ops->bind(s, laddr, size); if (ret) return ret; } - ret = s->ops->connect(s, raddr, raddrlen, flags); + ret = s->ops->connect(s, raddr, size, flags); return ret < 0 ? ret : 0; } @@ -1077,6 +1092,7 @@ int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) struct sockaddr *laddr = (struct sockaddr *)&id->m_local_addr; struct sockaddr *raddr = (struct sockaddr *)&id->m_remote_addr; u16 pd_len = params->private_data_len; + bool v4 = false; int ret; ERDMA_INC_CNT(dev, IW_CONNECT); @@ -1092,7 +1108,12 @@ int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) return -EINVAL; } - if (laddr->sa_family != AF_INET || raddr->sa_family != AF_INET) { + if (laddr->sa_family == AF_INET && raddr->sa_family == AF_INET) { + v4 = true; + } else if (laddr->sa_family != AF_INET6 || raddr->sa_family != AF_INET6) { + ERDMA_INC_CNT(dev, IW_CONNECT_FAILED); + return -EAFNOSUPPORT; + } else if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_IPV6)) { ERDMA_INC_CNT(dev, IW_CONNECT_FAILED); return -EAFNOSUPPORT; } @@ -1104,20 +1125,21 @@ int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) } erdma_qp_get(qp); - dprint(DBG_CM, "(id=0x%p, QP%d): dev(id)=%s, netdev=%s\n", id, - QP_ID(qp), dev->ibdev.name, dev->netdev->name); - dprint(DBG_CM, - "(id=0x%p, QP%d): laddr=(0x%x,%d,mport %d), raddr=(0x%x,%d,mport %d)\n", - id, QP_ID(qp), - ntohl(to_sockaddr_in(id->local_addr).sin_addr.s_addr), - ntohs(to_sockaddr_in(id->local_addr).sin_port), - ntohs(to_sockaddr_in(id->m_local_addr).sin_port), - ntohl(to_sockaddr_in(id->remote_addr).sin_addr.s_addr), - ntohs(to_sockaddr_in(id->remote_addr).sin_port), - ntohs(to_sockaddr_in(id->m_remote_addr).sin_port)); - - ret = __sock_create(current->nsproxy->net_ns, AF_INET, - SOCK_STREAM, IPPROTO_TCP, &s, 1); + ibdev_dbg(&dev->ibdev, "(id=0x%p, QP%d): dev(id)=%s, netdev=%s\n", id, + QP_ID(qp), dev->ibdev.name, dev->netdev->name); + ibdev_dbg( + &dev->ibdev, + "(id=0x%p, QP%d): laddr=(0x%x,%d,mport %d), raddr=(0x%x,%d,mport %d)\n", + id, QP_ID(qp), + ntohl(to_sockaddr_in(id->local_addr).sin_addr.s_addr), + ntohs(to_sockaddr_in(id->local_addr).sin_port), + ntohs(to_sockaddr_in(id->m_local_addr).sin_port), + ntohl(to_sockaddr_in(id->remote_addr).sin_addr.s_addr), + ntohs(to_sockaddr_in(id->remote_addr).sin_port), + ntohs(to_sockaddr_in(id->m_remote_addr).sin_port)); + + ret = __sock_create(current->nsproxy->net_ns, v4 ? AF_INET : AF_INET6, + SOCK_STREAM, IPPROTO_TCP, &s, 1); if (ret < 0) goto error_put_qp; @@ -1154,8 +1176,8 @@ int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) cep->ord = params->ord; cep->state = ERDMA_EPSTATE_CONNECTING; - dprint(DBG_CM, " (id=0x%p, QP%d): pd_len = %u\n", id, QP_ID(qp), - pd_len); + ibdev_dbg(&dev->ibdev, " (id=0x%p, QP%d): pd_len = %u\n", id, QP_ID(qp), + pd_len); erdma_cep_socket_assoc(cep, s); @@ -1171,8 +1193,7 @@ int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) params->private_data_len); } - ret = kernel_bindconnect(s, laddr, sizeof(*laddr), raddr, - sizeof(*raddr), O_NONBLOCK); + ret = kernel_bindconnect(s, laddr, raddr, O_NONBLOCK); if (ret != -EINPROGRESS && ret != 0) { goto error_disassoc; } else if (ret == 0) { @@ -1189,7 +1210,7 @@ int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) return 0; error_disassoc: - dprint(DBG_CM, " Failed: %d\n", ret); + ibdev_dbg(&dev->ibdev, " Failed: %d\n", ret); kfree(cep->private_data); cep->private_data = NULL; cep->pd_len = 0; @@ -1330,7 +1351,8 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) erdma_cep_set_free(cep); - dprint(DBG_CM, "(id=0x%p, QP%d): Exit\n", id, QP_ID(qp)); + ibdev_dbg(&dev->ibdev, "(id=0x%p, QP%d): Exit\n", id, + QP_ID(qp)); ERDMA_INC_CNT(dev, IW_ACCEPT_SUCCESS); return 0; } @@ -1382,8 +1404,9 @@ int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen) return -ECONNRESET; } - dprint(DBG_CM, "(id=0x%p): cep->state=%d\n", id, cep->state); - dprint(DBG_CM, " Reject: %d: %x\n", plen, plen ? *(char *)pdata : 0); + ibdev_dbg(&dev->ibdev, "(id=0x%p): cep->state=%d\n", id, cep->state); + ibdev_dbg(&dev->ibdev, " Reject: %d: %x\n", plen, + plen ? *(char *)pdata : 0); if (__mpa_rr_revision(cep->mpa.hdr.params.bits) == MPA_REVISION_EXT_1) { cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */ @@ -1404,7 +1427,6 @@ int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen) int erdma_create_listen(struct iw_cm_id *id, int backlog) { - struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr); struct erdma_dev *dev = to_edev(id->device); int addr_family = id->local_addr.ss_family; struct erdma_cep *cep = NULL; @@ -1415,11 +1437,12 @@ int erdma_create_listen(struct iw_cm_id *id, int backlog) if (addr_family != AF_INET) { ERDMA_INC_CNT(dev, IW_LISTEN_IPV6); - return -EAFNOSUPPORT; + if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_IPV6)) + return -EAFNOSUPPORT; } - ret = __sock_create(current->nsproxy->net_ns, addr_family, - SOCK_STREAM, IPPROTO_TCP, &s, 1); + ret = __sock_create(current->nsproxy->net_ns, addr_family, SOCK_STREAM, + IPPROTO_TCP, &s, 1); if (ret < 0) { ERDMA_INC_CNT(dev, IW_LISTEN_FAILED); return ret; @@ -1428,11 +1451,20 @@ int erdma_create_listen(struct iw_cm_id *id, int backlog) sock_set_reuseaddr(s->sk); /* For wildcard addr, limit binding to current device only */ - if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) - s->sk->sk_bound_dev_if = dev->netdev->ifindex; - ret = s->ops->bind(s, (struct sockaddr *)laddr, + if (addr_family == AF_INET) { + struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr); + + if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) + s->sk->sk_bound_dev_if = dev->netdev->ifindex; + ret = s->ops->bind(s, (struct sockaddr *)laddr, sizeof(struct sockaddr_in)); + } else { + struct sockaddr_in6 *laddr = &to_sockaddr_in6(id->local_addr); + + ret = s->ops->bind(s, (struct sockaddr *)laddr, + sizeof(struct sockaddr_in6)); + } if (ret) goto error; @@ -1464,10 +1496,11 @@ int erdma_create_listen(struct iw_cm_id *id, int backlog) INIT_LIST_HEAD((struct list_head *)id->provider_data); } - dprint(DBG_CM, - "(id=0x%p): dev(id)=%s, netdev=%s, id->provider_data=0x%p, cep=0x%p\n", - id, id->device->name, to_edev(id->device)->netdev->name, - id->provider_data, cep); + ibdev_dbg( + &dev->ibdev, + "(id=0x%p): dev(id)=%s, netdev=%s, id->provider_data=0x%p, cep=0x%p\n", + id, id->device->name, to_edev(id->device)->netdev->name, + id->provider_data, cep); list_add_tail(&cep->listenq, (struct list_head *)id->provider_data); cep->state = ERDMA_EPSTATE_LISTENING; @@ -1477,7 +1510,7 @@ int erdma_create_listen(struct iw_cm_id *id, int backlog) return 0; error: - dprint(DBG_CM, " Failed: %d\n", ret); + ibdev_dbg(&dev->ibdev, " Failed: %d\n", ret); if (cep) { erdma_cep_set_inuse(cep); @@ -1512,8 +1545,9 @@ static void erdma_drop_listeners(struct iw_cm_id *id) list_entry(p, struct erdma_cep, listenq); list_del(p); - dprint(DBG_CM, "(id=0x%p): drop CEP 0x%p, state %d\n", id, cep, - cep->state); + ibdev_dbg(&cep->dev->ibdev, + "(id=0x%p): drop CEP 0x%p, state %d\n", id, cep, + cep->state); erdma_cep_set_inuse(cep); if (cep->cm_id) { @@ -1535,8 +1569,8 @@ int erdma_destroy_listen(struct iw_cm_id *id) { struct erdma_dev *dev = to_edev(id->device); - dprint(DBG_CM, "(id=0x%p): dev(id)=%s, netdev=%s\n", id, - id->device->name, dev->netdev->name); + ibdev_dbg(&dev->ibdev, "(id=0x%p): dev(id)=%s, netdev=%s\n", id, + id->device->name, dev->netdev->name); if (!id->provider_data) return 0; @@ -1549,65 +1583,11 @@ int erdma_destroy_listen(struct iw_cm_id *id) return 0; } -extern bool compat_mode; -struct socket *rsvd_sock[16]; - -static int erdma_port_init(void) -{ - int ret = 0, i, j; - struct sockaddr_in laddr; - - if (!compat_mode) - return 0; - - for (i = 0; i < 16; i++) { - ret = __sock_create(current->nsproxy->net_ns, AF_INET, - SOCK_STREAM, IPPROTO_TCP, &rsvd_sock[i], 1); - if (ret < 0) - goto err_out; - memset(&laddr, 0, sizeof(struct sockaddr_in)); - laddr.sin_port = htons(COMPAT_PORT_BASE + i); - ret = rsvd_sock[i]->ops->bind(rsvd_sock[i], (struct sockaddr *)&laddr, - sizeof(struct sockaddr_in)); - if (ret) { - sock_release(rsvd_sock[i]); - goto err_out; - } - } - - return 0; - -err_out: - for (j = 0; j < i; j++) - sock_release(rsvd_sock[j]); - - return ret; -} - -static void erdma_port_release(void) -{ - int i; - - if (!compat_mode) - return; - - for (i = 0; i < 16; i++) - sock_release(rsvd_sock[i]); -} - int erdma_cm_init(void) { - int ret; - - ret = erdma_port_init(); - if (ret) - return ret; - erdma_cm_wq = create_singlethread_workqueue("erdma_cm_wq"); - if (!erdma_cm_wq) { - erdma_port_release(); + if (!erdma_cm_wq) return -ENOMEM; - } return 0; } @@ -1616,5 +1596,4 @@ void erdma_cm_exit(void) { if (erdma_cm_wq) destroy_workqueue(erdma_cm_wq); - erdma_port_release(); } diff --git a/drivers/infiniband/hw/erdma/erdma_cm.h b/drivers/infiniband/hw/erdma/erdma_cm.h index 6d5db98e1b88..2a6939b9a011 100644 --- a/drivers/infiniband/hw/erdma/erdma_cm.h +++ b/drivers/infiniband/hw/erdma/erdma_cm.h @@ -24,8 +24,6 @@ #define MPA_KEY_SIZE 16 #define MPA_DEFAULT_HDR_LEN 28 -#define COMPAT_PORT_BASE 0x7790 - struct mpa_rr_params { __be16 bits; __be16 pd_len; @@ -35,11 +33,11 @@ struct mpa_rr_params { * MPA request/response Hdr bits & fields */ enum { - MPA_RR_FLAG_MARKERS = __cpu_to_be16(0x8000), - MPA_RR_FLAG_CRC = __cpu_to_be16(0x4000), - MPA_RR_FLAG_REJECT = __cpu_to_be16(0x2000), - MPA_RR_RESERVED = __cpu_to_be16(0x1f00), - MPA_RR_MASK_REVISION = __cpu_to_be16(0x00ff) + MPA_RR_FLAG_MARKERS = cpu_to_be16(0x8000), + MPA_RR_FLAG_CRC = cpu_to_be16(0x4000), + MPA_RR_FLAG_REJECT = cpu_to_be16(0x2000), + MPA_RR_RESERVED = cpu_to_be16(0x1f00), + MPA_RR_MASK_REVISION = cpu_to_be16(0x00ff) }; /* @@ -140,6 +138,7 @@ struct erdma_cm_work { }; #define to_sockaddr_in(a) (*(struct sockaddr_in *)(&(a))) +#define to_sockaddr_in6(a) (*(struct sockaddr_in6 *)(&(a))) static inline int getname_peer(struct socket *s, struct sockaddr_storage *a) { diff --git a/drivers/infiniband/hw/erdma/erdma_cmdq.c b/drivers/infiniband/hw/erdma/erdma_cmdq.c index dcb185f5465e..7f2f6e08c41d 100644 --- a/drivers/infiniband/hw/erdma/erdma_cmdq.c +++ b/drivers/infiniband/hw/erdma/erdma_cmdq.c @@ -275,7 +275,7 @@ static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq) __be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci, cmdq->cq.depth, CQE_SHIFT); u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK, - __be32_to_cpu(READ_ONCE(*cqe))); + be32_to_cpu(READ_ONCE(*cqe))); return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL; } @@ -313,7 +313,6 @@ static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq) __be32 *cqe; u16 ctx_id; u64 *sqe; - int i; cqe = get_next_valid_cmdq_cqe(cmdq); if (!cqe) @@ -322,8 +321,8 @@ static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq) cmdq->cq.ci++; dma_rmb(); - hdr0 = __be32_to_cpu(*cqe); - sqe_idx = __be32_to_cpu(*(cqe + 1)); + hdr0 = be32_to_cpu(*cqe); + sqe_idx = be32_to_cpu(*(cqe + 1)); sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth, SQEBB_SHIFT); @@ -334,11 +333,9 @@ static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq) comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED; comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0); + be32_to_cpu_array(comp_wait->comp_data, cqe + 2, 4); cmdq->sq.ci += cmdq->sq.wqebb_cnt; - for (i = 0; i < 4; i++) - comp_wait->comp_data[i] = __be32_to_cpu(*(cqe + 2 + i)); - if (cmdq->use_event) complete(&comp_wait->wait_event); @@ -433,6 +430,7 @@ void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op) int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, u64 *resp0, u64 *resp1) { + struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq); struct erdma_comp_wait *comp_wait; int ret; @@ -477,5 +475,10 @@ int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, out: up(&cmdq->credits); + if (ret) + ibdev_err_ratelimited(&dev->ibdev, + "CMD(hdr 0x%llx) return with error %d\n", + *(u64 *)req, ret); + return ret; } diff --git a/drivers/infiniband/hw/erdma/erdma_compat.c b/drivers/infiniband/hw/erdma/erdma_compat.c new file mode 100644 index 000000000000..fc8f53c476e2 --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_compat.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include + +#include "erdma_verbs.h" + +bool compat_mode; +module_param(compat_mode, bool, 0444); +MODULE_PARM_DESC(compat_mode, "compat mode support"); + +u16 reserve_ports_base = 0x7790; +module_param(reserve_ports_base, ushort, 0444); +MODULE_PARM_DESC(reserve_ports_base, "ports reserved in RoCE mode"); + + +int erdma_create_ah(struct ib_ah *ibah, + struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) +{ + return -EOPNOTSUPP; +} + + +int erdma_destroy_ah(struct ib_ah *ibah, u32 flags) +{ + + return -EOPNOTSUPP; +} + +int erdma_query_pkey(struct ib_device *ibdev, port_t port, u16 index, u16 *pkey) +{ + if (index > 0) + return -EINVAL; + + *pkey = 0xffff; + return 0; +} + +enum rdma_link_layer erdma_get_link_layer(struct ib_device *dev, + port_t port_num) +{ + return IB_LINK_LAYER_ETHERNET; +} + +int erdma_add_gid(const struct ib_gid_attr *attr, void **context) +{ + return 0; +} + +int erdma_del_gid(const struct ib_gid_attr *attr, void **context) +{ + return 0; +} + +void erdma_gen_port_from_qpn(u32 sip, u32 dip, u32 lqpn, u32 rqpn, u16 *sport, + u16 *dport) +{ + /* select lqpn 0, select rqpn 1 */ + u32 select_type = 1; + + lqpn &= 0xFFFFF; + rqpn &= 0xFFFFF; + + if (dip < sip || (dip == sip && lqpn < rqpn)) + select_type = 0; + + if (select_type) { + *sport = reserve_ports_base + upper_16_bits(rqpn); + *dport = lower_16_bits(rqpn); + } else { + *dport = reserve_ports_base + upper_16_bits(lqpn); + *sport = lower_16_bits(lqpn); + } +} + +static int erdma_av_from_attr(struct erdma_qp *qp, struct ib_qp_attr *attr) +{ + struct rdma_ah_attr *ah_attr = &attr->ah_attr; + const struct ib_gid_attr *sgid_attr = ah_attr->grh.sgid_attr; + enum rdma_network_type ntype; + union ib_gid sgid; + + if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE) { + ibdev_dbg(&qp->dev->ibdev, "unsupport ah_attr type %u.\n", + ah_attr->type); + return -EOPNOTSUPP; + } + + ntype = rdma_gid_attr_network_type(sgid_attr); + sgid = sgid_attr->gid; + + ibdev_dbg(&qp->dev->ibdev, "gid type:%u, sgid: %pI6\n", ntype, + sgid.raw); + + rdma_gid2ip((struct sockaddr *)&qp->attrs.laddr, &sgid); + rdma_gid2ip((struct sockaddr *)&qp->attrs.raddr, + &rdma_ah_read_grh(ah_attr)->dgid); + + ibdev_dbg(&qp->dev->ibdev, "dgid: %pI6\n", + rdma_ah_read_grh(ah_attr)->dgid.raw); + + ibdev_dbg(&qp->dev->ibdev, "laddr:0x%x\n", + ntohl(qp->attrs.laddr.in.sin_addr.s_addr)); + ibdev_dbg(&qp->dev->ibdev, "raddr:0x%x\n", + ntohl(qp->attrs.raddr.in.sin_addr.s_addr)); + return 0; +} + +int erdma_handle_compat_attr(struct erdma_qp *qp, struct ib_qp_attr *attr, + int attr_mask) +{ + ibdev_dbg(&qp->dev->ibdev, "attr mask: %x, av: %d, state:%d\n", + attr_mask, attr_mask & IB_QP_AV, attr_mask & IB_QP_STATE); + + if (attr_mask & IB_QP_AV) + erdma_av_from_attr(qp, attr); + + if (attr_mask & IB_QP_DEST_QPN) { + ibdev_dbg(&qp->dev->ibdev, "get remote qpn %u\n", + attr->dest_qp_num); + qp->attrs.remote_qp_num = attr->dest_qp_num; + } + + if (attr_mask & IB_QP_SQ_PSN) { + ibdev_dbg(&qp->dev->ibdev, "get sqsn:%u\n", attr->sq_psn); + qp->attrs.sq_psn = attr->sq_psn; + } + + if (attr_mask & IB_QP_RQ_PSN) { + ibdev_dbg(&qp->dev->ibdev, "get rqsn:%u\n", attr->rq_psn); + qp->attrs.rq_psn = attr->rq_psn; + } + + return 0; +} + +struct socket *rsvd_sock[16]; + +static int erdma_port_init(void) +{ + struct sockaddr_in laddr; + int ret = 0, i, j; + + for (i = 0; i < 16; i++) { + ret = __sock_create(current->nsproxy->net_ns, AF_INET, + SOCK_STREAM, IPPROTO_TCP, &rsvd_sock[i], 1); + if (ret < 0) + goto err_out; + memset(&laddr, 0, sizeof(struct sockaddr_in)); + laddr.sin_port = htons(reserve_ports_base + i); + ret = rsvd_sock[i]->ops->bind(rsvd_sock[i], + (struct sockaddr *)&laddr, + sizeof(struct sockaddr_in)); + if (ret) { + sock_release(rsvd_sock[i]); + goto err_out; + } + } + + return 0; + +err_out: + for (j = 0; j < i; j++) + sock_release(rsvd_sock[j]); + + return ret; +} + +static void erdma_port_release(void) +{ + int i; + + if (!compat_mode) + return; + + for (i = 0; i < 16; i++) + sock_release(rsvd_sock[i]); +} + +int erdma_compat_init(void) +{ + int ret; + + if (!compat_mode) + return 0; + + + ret = erdma_port_init(); + + return ret; +} + +void erdma_compat_exit(void) +{ + if (!compat_mode) + return; + + erdma_port_release(); + +} diff --git a/drivers/infiniband/hw/erdma/erdma_compat.h b/drivers/infiniband/hw/erdma/erdma_compat.h new file mode 100644 index 000000000000..b83a5aad547d --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_compat.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ + +/* Authors: Cheng Xu */ +/* Kai Shen */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +#ifndef __ERDMA_SW_H__ +#define __ERDMA_SW_H__ + +#include "kcompat.h" +#include "erdma_verbs.h" + +int erdma_compat_init(void); +void erdma_compat_exit(void); + +void erdma_gen_port_from_qpn(u32 sip, u32 dip, u32 lqpn, u32 rqpn, u16 *sport, + u16 *dport); + +int erdma_handle_compat_attr(struct erdma_qp *qp, struct ib_qp_attr *attr, + int attr_mask); + +int erdma_add_gid(const struct ib_gid_attr *attr, void **context); + +int erdma_del_gid(const struct ib_gid_attr *attr, void **context); + +int erdma_create_ah(struct ib_ah *ibah, + struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata); + + +int erdma_destroy_ah(struct ib_ah *ibah, u32 flags); + +#endif diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c index d9eae90e94cf..e51faa2b8993 100644 --- a/drivers/infiniband/hw/erdma/erdma_cq.c +++ b/drivers/infiniband/hw/erdma/erdma_cq.c @@ -11,7 +11,7 @@ static void *get_next_valid_cqe(struct erdma_cq *cq) __be32 *cqe = get_queue_entry(cq->kern_cq.qbuf, cq->kern_cq.ci, cq->depth, CQE_SHIFT); u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK, - __be32_to_cpu(READ_ONCE(*cqe))); + be32_to_cpu(READ_ONCE(*cqe))); return owner ^ !!(cq->kern_cq.ci & cq->depth) ? cqe : NULL; } @@ -37,6 +37,7 @@ int erdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) unsigned long irq_flags; int ret = 0; + spin_lock_irqsave(&cq->kern_cq.lock, irq_flags); if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && get_next_valid_cqe(cq)) { @@ -159,6 +160,10 @@ static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc) } else { id_table = kern_qp->rwr_tbl; depth = qp->attrs.rq_size; + /* Prevent rqe out of range from HW */ + if (kern_qp->rq_pi - wqe_idx == 0 || + (u16)(kern_qp->rq_pi - wqe_idx) > depth) + syndrome = ERDMA_WC_GENERAL_ERR; } wc->wr_id = id_table[wqe_idx & (depth - 1)]; wc->byte_len = be32_to_cpu(cqe->size); @@ -190,6 +195,7 @@ int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) unsigned long flags; int npolled, ret; + spin_lock_irqsave(&cq->kern_cq.lock, flags); for (npolled = 0; npolled < num_entries;) { diff --git a/drivers/infiniband/hw/erdma/erdma_eq.c b/drivers/infiniband/hw/erdma/erdma_eq.c index 0ed611bfb6e3..bf335404ddd3 100644 --- a/drivers/infiniband/hw/erdma/erdma_eq.c +++ b/drivers/infiniband/hw/erdma/erdma_eq.c @@ -284,10 +284,24 @@ static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn) int erdma_ceqs_init(struct erdma_dev *dev) { + u64 req_hdr, cap0, cap1; u32 i, j; int err; - for (i = 0; i < dev->attrs.irq_num - 1; i++) { + erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_QUERY_DEVICE); + + err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0, + &cap1); + if (err) + return err; + +#define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap) + + dev->attrs.max_ceqs = + min((size_t)8 * (size_t)ERDMA_GET_CAP(QBLOCK, cap1), (size_t)dev->attrs.irq_num); + + for (i = 0; i < dev->attrs.max_ceqs - 1; i++) { err = erdma_ceq_init_one(dev, i); if (err) goto out_err; @@ -314,7 +328,7 @@ void erdma_ceqs_uninit(struct erdma_dev *dev) { u32 i; - for (i = 0; i < dev->attrs.irq_num - 1; i++) { + for (i = 0; i < dev->attrs.max_ceqs - 1; i++) { erdma_free_ceq_irq(dev, i); erdma_ceq_uninit_one(dev, i); } diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index 087aae76dd95..9981774672c5 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -10,9 +10,6 @@ #include #include -#define ERDMA_HW_PAGE_SHIFT 12 -#define ERDMA_HW_PAGE_SIZE 4096 - /* PCIe device related definition. */ #define PCI_VENDOR_ID_ALIBABA 0x1ded @@ -49,17 +46,6 @@ #define ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG 0x68 #define ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG 0x70 #define ERDMA_AEQ_DB_HOST_ADDR_REG 0x78 -#define ERDMA_REGS_STATS_TSO_IN_PKTS_REG 0x80 -#define ERDMA_REGS_STATS_TSO_OUT_PKTS_REG 0x88 -#define ERDMA_REGS_STATS_TSO_OUT_BYTES_REG 0x90 -#define ERDMA_REGS_STATS_TX_DROP_PKTS_REG 0x98 -#define ERDMA_REGS_STATS_TX_BPS_METER_DROP_PKTS_REG 0xa0 -#define ERDMA_REGS_STATS_TX_PPS_METER_DROP_PKTS_REG 0xa8 -#define ERDMA_REGS_STATS_RX_PKTS_REG 0xc0 -#define ERDMA_REGS_STATS_RX_BYTES_REG 0xc8 -#define ERDMA_REGS_STATS_RX_DROP_PKTS_REG 0xd0 -#define ERDMA_REGS_STATS_RX_BPS_METER_DROP_PKTS_REG 0xd8 -#define ERDMA_REGS_STATS_RX_PPS_METER_DROP_PKTS_REG 0xe0 #define ERDMA_REGS_CEQ_DB_BASE_REG 0x100 #define ERDMA_CMDQ_SQDB_REG 0x200 #define ERDMA_CMDQ_CQDB_REG 0x300 @@ -78,11 +64,11 @@ #define ERDMA_BAR_SQDB_SPACE_OFFSET ERDMA_BAR_DB_SPACE_BASE #define ERDMA_BAR_SQDB_SPACE_SIZE (384 * 1024) -#define ERDMA_BAR_RQDB_SPACE_OFFSET \ +#define ERDMA_BAR_RQDB_SPACE_OFFSET \ (ERDMA_BAR_SQDB_SPACE_OFFSET + ERDMA_BAR_SQDB_SPACE_SIZE) #define ERDMA_BAR_RQDB_SPACE_SIZE (96 * 1024) -#define ERDMA_BAR_CQDB_SPACE_OFFSET \ +#define ERDMA_BAR_CQDB_SPACE_OFFSET \ (ERDMA_BAR_RQDB_SPACE_OFFSET + ERDMA_BAR_RQDB_SPACE_SIZE) /* Doorbell page resources related. */ @@ -115,6 +101,10 @@ #define ERDMA_PAGE_SIZE_SUPPORT 0x7FFFF000 +/* Hardware page size definition */ +#define ERDMA_HW_PAGE_SHIFT 12 +#define ERDMA_HW_PAGE_SIZE 4096 + /* WQE related. */ #define EQE_SIZE 16 #define EQE_SHIFT 4 @@ -162,6 +152,7 @@ enum CMDQ_COMMON_OPCODE { CMDQ_OPCODE_CONF_MTU = 3, CMDQ_OPCODE_GET_STATS = 4, CMDQ_OPCODE_QUERY_EQC = 6, + CMDQ_OPCODE_SET_RETRANS_NUM = 7, }; /* cmdq-SQE HDR */ @@ -204,6 +195,11 @@ struct erdma_cmdq_config_mtu_req { u32 mtu; }; +struct erdma_cmdq_set_retrans_num_req { + u64 hdr; + u32 retrans_num; +}; + /* create_cq cfg0 */ #define ERDMA_CMD_CREATE_CQ_DEPTH_MASK GENMASK(31, 24) #define ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK GENMASK(23, 20) @@ -226,6 +222,7 @@ struct erdma_cmdq_create_cq_req { /* regmr/deregmr cfg0 */ #define ERDMA_CMD_MR_VALID_MASK BIT(31) +#define ERDMA_CMD_MR_VERSION_MASK GENMASK(30, 28) #define ERDMA_CMD_MR_KEY_MASK GENMASK(27, 20) #define ERDMA_CMD_MR_MPT_IDX_MASK GENMASK(19, 0) @@ -233,10 +230,10 @@ struct erdma_cmdq_create_cq_req { #define ERDMA_CMD_REGMR_PD_MASK GENMASK(31, 12) #define ERDMA_CMD_REGMR_TYPE_MASK GENMASK(7, 6) #define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 1) -#define ERDMA_CMD_REGMR_ACC_MODE_MASK BIT(0) /* regmr cfg2 */ #define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27) +#define ERDMA_CMD_REGMR_PBL_PAGESIZE_MASK GENMASK(26, 24) #define ERDMA_CMD_REGMR_MTT_TYPE_MASK GENMASK(21, 20) #define ERDMA_CMD_REGMR_MTT_CNT_MASK GENMASK(19, 0) @@ -247,7 +244,14 @@ struct erdma_cmdq_reg_mr_req { u64 start_va; u32 size; u32 cfg2; - u64 phy_addr[4]; + union { + u64 phy_addr[4]; + struct { + u64 rsvd; + u32 size_h; + u32 mtt_cnt_h; + }; + }; }; struct erdma_cmdq_dereg_mr_req { @@ -260,6 +264,10 @@ struct erdma_cmdq_dereg_mr_req { #define ERDMA_CMD_MODIFY_QP_CC_MASK GENMASK(23, 20) #define ERDMA_CMD_MODIFY_QP_QPN_MASK GENMASK(19, 0) +#define ERDMA_CMD_MODIFY_QP_IPV6_MASK BIT(31) +#define ERDMA_CMD_MODIFY_QP_IW_OOB_MASK BIT(30) +#define ERDMA_CMD_MODIFY_QP_RQPN_MASK GENMASK(19, 0) + struct erdma_cmdq_modify_qp_req { u64 hdr; u32 cfg; @@ -270,6 +278,11 @@ struct erdma_cmdq_modify_qp_req { __be16 dport; u32 send_nxt; u32 recv_nxt; + u32 rsvd0; + u32 rsvd1; + __be32 flow_label; + u8 ipv6_daddr[16]; + u8 ipv6_saddr[16]; }; /* create qp cfg0 */ @@ -337,6 +350,8 @@ struct erdma_cmdq_reflush_req { enum { ERDMA_DEV_CAP_FLAGS_ATOMIC = 1 << 7, ERDMA_DEV_CAP_FLAGS_QUERY_QC = 1 << 6, + ERDMA_DEV_CAP_FLAGS_MTT_VA = 1 << 5, + ERDMA_DEV_CAP_FLAGS_IPV6 = 1 << 4, }; #define ERDMA_CMD_INFO0_FW_VER_MASK GENMASK_ULL(31, 0) @@ -364,9 +379,9 @@ struct erdma_cqe { }; struct erdma_sge { - __aligned_le64 laddr; + __aligned_le64 addr; __le32 length; - __le32 lkey; + __le32 key; }; /* Receive Queue Element */ @@ -394,10 +409,11 @@ struct erdma_rqe { #define ERDMA_SQE_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0) /* REG MR attrs */ -#define ERDMA_SQE_MR_MODE_MASK BIT(0) +#define ERDMA_SQE_MR_PGSZ_AVAIL_MASK BIT_ULL(0) #define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 1) #define ERDMA_SQE_MR_MTT_TYPE_MASK GENMASK(7, 6) #define ERDMA_SQE_MR_MTT_CNT_MASK GENMASK(31, 12) +#define ERDMA_SQE_MR_PGSZ_MASK GENMASK(4, 0) struct erdma_write_sqe { __le64 hdr; @@ -410,7 +426,7 @@ struct erdma_write_sqe { __le32 rsvd; - struct erdma_sge sgl[0]; + struct erdma_sge sgl[]; }; struct erdma_send_sqe { @@ -421,7 +437,7 @@ struct erdma_send_sqe { }; __le32 length; - struct erdma_sge sgl[0]; + struct erdma_sge sgl[]; }; struct erdma_readreq_sqe { @@ -434,13 +450,23 @@ struct erdma_readreq_sqe { __le32 rsvd; }; +struct erdma_atomic_sqe { + __le64 hdr; + __le64 rsvd; + __le64 fetchadd_swap_data; + __le64 cmp_data; + + struct erdma_sge remote; + struct erdma_sge sgl; +}; + struct erdma_reg_mr_sqe { __le64 hdr; __le64 addr; __le32 length; __le32 stag; - __le32 attrs; - __le32 rsvd; + __le32 attr0; + __le32 attr1; }; /* EQ related. */ @@ -493,7 +519,9 @@ enum erdma_opcode { ERDMA_OP_REG_MR = 14, ERDMA_OP_LOCAL_INV = 15, ERDMA_OP_READ_WITH_INV = 16, - ERDMA_NUM_OPCODES = 17, + ERDMA_OP_ATOMIC_CAS = 17, + ERDMA_OP_ATOMIC_FAA = 18, + ERDMA_NUM_OPCODES = 19, ERDMA_OP_INVALID = ERDMA_NUM_OPCODES + 1 }; diff --git a/drivers/infiniband/hw/erdma/erdma_ioctl.c b/drivers/infiniband/hw/erdma/erdma_ioctl.c index 6352a00c92b9..af4d8e5df99b 100644 --- a/drivers/infiniband/hw/erdma/erdma_ioctl.c +++ b/drivers/infiniband/hw/erdma/erdma_ioctl.c @@ -92,11 +92,11 @@ static int erdma_ioctl_conf_cmd(struct erdma_dev *edev, edev->attrs.cc = msg->in.config_req.value; else msg->out.config_resp.value = edev->attrs.cc; - } else if (msg->in.opcode == ERDMA_CONFIG_TYPE_LOGLEVEL) { + } else if (msg->in.opcode == ERDMA_CONFIG_TYPE_RETRANS_NUM) { if (msg->in.config_req.is_set) - dprint_mask = msg->in.config_req.value; + ret = erdma_set_retrans_num(edev, msg->in.config_req.value); else - msg->out.config_resp.value = dprint_mask; + msg->out.config_resp.value = edev->attrs.retrans_num; } msg->out.length = 4; @@ -167,6 +167,7 @@ static int fill_cq_info(struct erdma_dev *dev, u32 cqn, struct erdma_cmdq_query_cqc_resp resp; struct rdma_restrack_entry *res; struct erdma_cq *cq; + struct erdma_mem *mtt; int ret; if (cqn == 0) { @@ -191,19 +192,17 @@ static int fill_cq_info(struct erdma_dev *dev, u32 cqn, res = &cq->ibcq.res; info->is_user = !rdma_is_kernel_res(res); + mtt = info->is_user ? &cq->user_cq.qbuf_mtt : + &cq->kern_cq.qbuf_mtt; - if (info->is_user) { - info->mtt.page_size = cq->user_cq.qbuf_mtt.page_size; - info->mtt.page_offset = cq->user_cq.qbuf_mtt.page_offset; - info->mtt.page_cnt = cq->user_cq.qbuf_mtt.page_cnt; - info->mtt.mtt_nents = cq->user_cq.qbuf_mtt.mtt_nents; - memcpy(info->mtt.mtt_entry, cq->user_cq.qbuf_mtt.mtt_entry, - ERDMA_MAX_INLINE_MTT_ENTRIES * sizeof(__u64)); - info->mtt.va = cq->user_cq.qbuf_mtt.va; - info->mtt.len = cq->user_cq.qbuf_mtt.len; - info->mtt_type = cq->user_cq.qbuf_mtt.mtt_type; - } else { - info->qbuf_dma_addr = cq->kern_cq.qbuf_dma_addr; + info->mtt.page_size = mtt->page_size; + info->mtt.page_offset = mtt->page_offset; + info->mtt.page_cnt = mtt->page_cnt; + info->mtt.mtt_nents = mtt->mtt_nents; + info->mtt.va = mtt->va; + info->mtt.len = mtt->len; + + if (!info->is_user) { info->ci = cq->kern_cq.ci; info->cmdsn = cq->kern_cq.cmdsn; info->notify_cnt = cq->kern_cq.notify_cnt; @@ -245,9 +244,9 @@ static int erdma_fill_qp_info(struct erdma_dev *dev, u32 qpn, { struct erdma_cmdq_query_qpc_resp resp; struct rdma_restrack_entry *res; - struct erdma_mem *mtt; + struct erdma_mem *sq_mtt, *rq_mtt; struct erdma_qp *qp; - int i, ret; + int ret; if (qpn == 0) goto query_hw_qpc; @@ -255,6 +254,10 @@ static int erdma_fill_qp_info(struct erdma_dev *dev, u32 qpn, qp = find_qp_by_qpn(dev, qpn); if (!qp) return -EINVAL; + + if (qp->ibqp.qp_type != IB_QPT_RC) + return -EINVAL; + erdma_qp_get(qp); qp_info->hw_info_valid = 0; @@ -298,35 +301,32 @@ static int erdma_fill_qp_info(struct erdma_dev *dev, u32 qpn, if (qp_info->is_user) { qp_info->pid = res->task->pid; get_task_comm(qp_info->buf, res->task); - mtt = &qp->user_qp.sq_mtt; - qp_info->sq_mtt_type = mtt->mtt_type; - qp_info->sq_mtt.page_size = mtt->page_size; - qp_info->sq_mtt.page_offset = mtt->page_offset; - qp_info->sq_mtt.page_cnt = mtt->page_cnt; - qp_info->sq_mtt.mtt_nents = mtt->mtt_nents; - qp_info->sq_mtt.va = mtt->va; - qp_info->sq_mtt.len = mtt->len; - for (i = 0; i < ERDMA_MAX_INLINE_MTT_ENTRIES; i++) - qp_info->sq_mtt.mtt_entry[i] = mtt->mtt_entry[i]; - - mtt = &qp->user_qp.rq_mtt; - qp_info->rq_mtt_type = mtt->mtt_type; - qp_info->rq_mtt.page_size = mtt->page_size; - qp_info->rq_mtt.page_offset = mtt->page_offset; - qp_info->rq_mtt.page_cnt = mtt->page_cnt; - qp_info->rq_mtt.mtt_nents = mtt->mtt_nents; - qp_info->rq_mtt.va = mtt->va; - qp_info->rq_mtt.len = mtt->len; - for (i = 0; i < ERDMA_MAX_INLINE_MTT_ENTRIES; i++) - qp_info->rq_mtt.mtt_entry[i] = mtt->mtt_entry[i]; - } else { + } + sq_mtt = qp_info->is_user ? &qp->user_qp.sq_mtt : + &qp->kern_qp.sq_mtt; + + qp_info->sq_mtt.page_size = sq_mtt->page_size; + qp_info->sq_mtt.page_offset = sq_mtt->page_offset; + qp_info->sq_mtt.page_cnt = sq_mtt->page_cnt; + qp_info->sq_mtt.mtt_nents = sq_mtt->mtt_nents; + qp_info->sq_mtt.va = sq_mtt->va; + qp_info->sq_mtt.len = sq_mtt->len; + + rq_mtt = qp_info->is_user ? &qp->user_qp.rq_mtt : + &qp->kern_qp.rq_mtt; + + qp_info->rq_mtt.page_size = rq_mtt->page_size; + qp_info->rq_mtt.page_offset = rq_mtt->page_offset; + qp_info->rq_mtt.page_cnt = rq_mtt->page_cnt; + qp_info->rq_mtt.mtt_nents = rq_mtt->mtt_nents; + qp_info->rq_mtt.va = rq_mtt->va; + qp_info->rq_mtt.len = rq_mtt->len; + + if (!qp_info->is_user) { qp_info->sqci = qp->kern_qp.sq_ci; qp_info->sqpi = qp->kern_qp.sq_pi; qp_info->rqci = qp->kern_qp.rq_ci; qp_info->rqpi = qp->kern_qp.rq_pi; - - qp_info->sqbuf_dma = qp->kern_qp.sq_buf_dma_addr; - qp_info->rqbuf_dma = qp->kern_qp.rq_buf_dma_addr; qp_info->sqdbrec_dma = qp->kern_qp.sq_db_info_dma_addr; qp_info->rqdbrec_dma = qp->kern_qp.rq_db_info_dma_addr; } diff --git a/drivers/infiniband/hw/erdma/erdma_ioctl.h b/drivers/infiniband/hw/erdma/erdma_ioctl.h index c0f4a2cb0789..505f1b3f592d 100644 --- a/drivers/infiniband/hw/erdma/erdma_ioctl.h +++ b/drivers/infiniband/hw/erdma/erdma_ioctl.h @@ -60,7 +60,8 @@ enum erdma_info_type { enum erdma_config_type { ERDMA_CONFIG_TYPE_CC = 0, - ERDMA_CONFIG_TYPE_LOGLEVEL, + ERDMA_CONFIG_TYPE_DISCARD0, + ERDMA_CONFIG_TYPE_RETRANS_NUM, ERDMA_CONFIG_MAX }; diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c index 443442ad0e44..edd04a22280b 100644 --- a/drivers/infiniband/hw/erdma/erdma_main.c +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -16,29 +16,22 @@ MODULE_AUTHOR("Cheng Xu "); MODULE_AUTHOR("Kai Shen "); -MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver (preview)"); +MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver"); MODULE_LICENSE("Dual BSD/GPL"); -__u32 dprint_mask; -module_param(dprint_mask, uint, 0644); -MODULE_PARM_DESC(dprint_mask, "debug information print level"); - -bool compat_mode; -module_param(compat_mode, bool, 0444); -MODULE_PARM_DESC(compat_mode, "compat mode support"); - static unsigned int vector_num = ERDMA_NUM_MSIX_VEC; module_param(vector_num, uint, 0444); MODULE_PARM_DESC(vector_num, "number of compeletion vectors"); + static int erdma_netdev_event(struct notifier_block *nb, unsigned long event, void *arg) { struct net_device *netdev = netdev_notifier_info_to_dev(arg); struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb); - dprint(DBG_CTRL, " netdev:%s,ns:%p: Event %lu to erdma_dev %p\n", - netdev->name, dev_net(netdev), event, dev); + ibdev_dbg(&dev->ibdev, " netdev:%s,ns:%p: Event %lu to erdma_dev %p\n", + netdev->name, dev_net(netdev), event, dev); if (dev->netdev == NULL || dev->netdev != netdev) goto done; @@ -119,19 +112,20 @@ static int erdma_device_register(struct erdma_dev *dev) * So, generating the ibdev's name from mac address of the binded * netdev. */ - ret = snprintf(ibdev->name, IB_DEVICE_NAME_MAX, "%s_%.2x%.2x%.2x", - DRV_MODULE_NAME, dev->attrs.peer_addr[3], - dev->attrs.peer_addr[4], dev->attrs.peer_addr[5]); - if (ret < 0) - return ret; + strlcpy(ibdev->name, "erdma_%d", IB_DEVICE_NAME_MAX); ret = erdma_enum_and_get_netdev(dev); if (ret) return -EPROBE_DEFER; dev->mtu = dev->netdev->mtu; + erdma_set_mtu(dev, dev->mtu); addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr); + ret = erdma_set_retrans_num(dev, ERDMA_DEFAULT_RETRANS_NUM); + if (ret) + dev->attrs.retrans_num = 0; + ret = ib_register_device(ibdev, ibdev->name, &dev->pdev->dev); if (ret) { dev_err(&dev->pdev->dev, @@ -148,14 +142,15 @@ static int erdma_device_register(struct erdma_dev *dev) return ret; } - dprint(DBG_DM, - " Registered '%s' for interface '%s',HWaddr=%02x.%02x.%02x.%02x.%02x.%02x\n", - ibdev->name, dev->netdev->name, *(__u8 *)dev->netdev->dev_addr, - *((__u8 *)dev->netdev->dev_addr + 1), - *((__u8 *)dev->netdev->dev_addr + 2), - *((__u8 *)dev->netdev->dev_addr + 3), - *((__u8 *)dev->netdev->dev_addr + 4), - *((__u8 *)dev->netdev->dev_addr + 5)); + ibdev_dbg( + &dev->ibdev, + " Registered '%s' for interface '%s',HWaddr=%02x.%02x.%02x.%02x.%02x.%02x\n", + ibdev->name, dev->netdev->name, *(__u8 *)dev->netdev->dev_addr, + *((__u8 *)dev->netdev->dev_addr + 1), + *((__u8 *)dev->netdev->dev_addr + 2), + *((__u8 *)dev->netdev->dev_addr + 3), + *((__u8 *)dev->netdev->dev_addr + 4), + *((__u8 *)dev->netdev->dev_addr + 5)); return 0; } @@ -205,14 +200,37 @@ static void erdma_dwqe_resource_init(struct erdma_dev *dev) static int erdma_request_vectors(struct erdma_dev *dev) { int expect_irq_num = min(num_possible_cpus() + 1, vector_num); +#ifdef HAVE_NO_PCI_IRQ_NEW_API + int i; + struct msix_entry *msix_entry = + kmalloc_array(expect_irq_num, sizeof(*msix_entry), GFP_KERNEL); + if (!msix_entry) + return -ENOMEM; + + for (i = 0; i < expect_irq_num; ++i) + msix_entry[i].entry = i; + dev->attrs.irq_num = + pci_enable_msix_range(dev->pdev, msix_entry, 1, expect_irq_num); +#else dev->attrs.irq_num = pci_alloc_irq_vectors(dev->pdev, 1, expect_irq_num, PCI_IRQ_MSIX); +#endif if (dev->attrs.irq_num <= 0) { dev_err(&dev->pdev->dev, "request irq vectors failed(%d)\n", dev->attrs.irq_num); +#ifdef HAVE_NO_PCI_IRQ_NEW_API + kfree(msix_entry); +#endif return -ENOSPC; } +#ifdef HAVE_NO_PCI_IRQ_NEW_API + dev->comm_irq.msix_vector = msix_entry[0].vector; + for (i = 1; i < dev->attrs.irq_num; i++) + dev->ceqs[i - 1].irq.msix_vector = msix_entry[i].vector; + kfree(msix_entry); +#endif + return 0; } @@ -303,11 +321,45 @@ static int erdma_wait_hw_init_done(struct erdma_dev *dev) return 0; } -static void erdma_hw_stop(struct erdma_dev *dev) +static int erdma_hw_stop(struct erdma_dev *dev, bool wait) { u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1); + int i; erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl); + + if (!wait) + return 0; + + for (i = 0; i < 50; i++) { + if (erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG, + ERDMA_REG_DEV_ST_RESET_DONE_MASK)) + break; + + msleep(ERDMA_REG_ACCESS_WAIT_MS); + } + + if (i == 50) { + dev_err(&dev->pdev->dev, "wait reset done timeout.\n"); + return -ETIME; + } + + return 0; +} + +static int erdma_preinit_proc(struct erdma_dev *dev) +{ + u32 version = + be32_to_cpu(erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG)); + + switch (version) { + case 0: + return -ENODEV; + case 2: + return erdma_hw_stop(dev, true); + default: + return 0; + } } static const struct pci_device_id erdma_pci_tbl[] = { @@ -319,7 +371,6 @@ static int erdma_probe_dev(struct pci_dev *pdev) { struct erdma_dev *dev; int bars, err; - u32 version; err = pci_enable_device(pdev); if (err) { @@ -358,12 +409,9 @@ static int erdma_probe_dev(struct pci_dev *pdev) goto err_release_bars; } - version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG); - if (version == 0) { - /* we knows that it is a non-functional function. */ - err = -ENODEV; + err = erdma_preinit_proc(dev); + if (err) goto err_iounmap_func_bar; - } err = erdma_device_init(dev, pdev); if (err) @@ -400,7 +448,7 @@ static int erdma_probe_dev(struct pci_dev *pdev) return 0; err_stop_hw: - erdma_hw_stop(dev); + erdma_hw_stop(dev, false); err_uninit_cmdq: erdma_cmdq_destroy(dev); @@ -412,7 +460,11 @@ static int erdma_probe_dev(struct pci_dev *pdev) erdma_comm_irq_uninit(dev); err_free_vectors: +#ifdef HAVE_NO_PCI_IRQ_NEW_API + pci_disable_msix(dev->pdev); +#else pci_free_irq_vectors(dev->pdev); +#endif err_uninit_device: erdma_device_uninit(dev); @@ -437,11 +489,15 @@ static void erdma_remove_dev(struct pci_dev *pdev) struct erdma_dev *dev = pci_get_drvdata(pdev); erdma_ceqs_uninit(dev); - erdma_hw_stop(dev); + erdma_hw_stop(dev, false); erdma_cmdq_destroy(dev); erdma_aeq_destroy(dev); erdma_comm_irq_uninit(dev); +#ifdef HAVE_NO_PCI_IRQ_NEW_API + pci_disable_msix(dev->pdev); +#else pci_free_irq_vectors(dev->pdev); +#endif erdma_device_uninit(dev); devm_iounmap(&pdev->dev, dev->func_bar); pci_release_selected_regions(pdev, ERDMA_BAR_MASK); @@ -492,7 +548,7 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev) dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1); dev->attrs.max_mr = dev->attrs.max_qp << 1; dev->attrs.max_cq = dev->attrs.max_qp << 1; - dev->attrs.flags = ERDMA_GET_CAP(FLAGS, cap0); + dev->attrs.cap_flags = ERDMA_GET_CAP(FLAGS, cap0); dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR; dev->attrs.max_ord = ERDMA_MAX_ORD; @@ -591,6 +647,7 @@ static const struct ib_device_ops erdma_device_ops = { .get_netdev = erdma_get_netdev, .query_pkey = erdma_query_pkey, .modify_cq = erdma_modify_cq, + .get_vector_affinity = erdma_get_vector_affinity, INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd), @@ -599,8 +656,11 @@ static const struct ib_device_ops erdma_device_ops = { static const struct ib_device_ops erdma_compat_ops = { .get_link_layer = erdma_get_link_layer, + .add_gid = erdma_add_gid, + .del_gid = erdma_del_gid, }; + static int erdma_ib_device_add(struct pci_dev *pdev) { struct erdma_dev *dev = pci_get_drvdata(pdev); @@ -728,6 +788,7 @@ static void erdma_ib_device_remove(struct pci_dev *pdev) xa_destroy(&dev->cq_xa); dma_pool_destroy(dev->db_pool); destroy_workqueue(dev->reflush_wq); + } static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent) @@ -766,10 +827,14 @@ static __init int erdma_init_module(void) { int ret; - ret = erdma_cm_init(); + ret = erdma_compat_init(); if (ret) return ret; + ret = erdma_cm_init(); + if (ret) + goto uninit_compat; + ret = erdma_chrdev_init(); if (ret) goto uninit_cm; @@ -784,9 +849,10 @@ static __init int erdma_init_module(void) uninit_chrdev: erdma_chrdev_destroy(); - uninit_cm: erdma_cm_exit(); +uninit_compat: + erdma_compat_exit(); return ret; } @@ -796,6 +862,7 @@ static void __exit erdma_exit_module(void) pci_unregister_driver(&erdma_pci_driver); erdma_chrdev_destroy(); erdma_cm_exit(); + erdma_compat_exit(); } module_init(erdma_init_module); diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index 886fcd58ce8c..ad2fe518400d 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -6,11 +6,10 @@ /* Authors: Bernard Metzler */ /* Copyright (c) 2008-2019, IBM Corporation */ +#include "kcompat.h" #include "erdma_cm.h" #include "erdma_verbs.h" -extern bool compat_mode; - void erdma_qp_llp_close(struct erdma_qp *qp) { struct erdma_qp_attrs qp_attrs; @@ -54,21 +53,34 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, enum erdma_qp_attr_mask mask) { - int ret; struct erdma_dev *dev = qp->dev; struct erdma_cmdq_modify_qp_req req; struct tcp_sock *tp; struct erdma_cep *cep = qp->cep; struct sockaddr_storage local_addr, remote_addr; + int ret; if (qp->attrs.connect_without_cm) { - req.cookie = qp->ibqp.qp_num; - req.dip = htonl(qp->attrs.dip); - req.sip = htonl(qp->attrs.sip); + req.cookie = FIELD_PREP(ERDMA_CMD_MODIFY_QP_IW_OOB_MASK, 1) | + FIELD_PREP(ERDMA_CMD_MODIFY_QP_RQPN_MASK, qp->attrs.remote_qp_num); + if (((struct sockaddr_in *)&qp->attrs.raddr)->sin_family == AF_INET) { + req.dip = qp->attrs.raddr.in.sin_addr.s_addr; + req.sip = qp->attrs.laddr.in.sin_addr.s_addr; + } else if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_IPV6) { + memcpy(req.ipv6_daddr, &qp->attrs.raddr.in6.sin6_addr.s6_addr, + sizeof(struct in6_addr)); + memcpy(req.ipv6_saddr, &qp->attrs.laddr.in6.sin6_addr.s6_addr, + sizeof(struct in6_addr)); + req.cookie |= FIELD_PREP(ERDMA_CMD_MODIFY_QP_IPV6_MASK, 1); + req.flow_label = 0; + } else { + return -EAFNOSUPPORT; + } req.dport = htons(qp->attrs.dport); req.sport = htons(qp->attrs.sport); req.send_nxt = 0; req.recv_nxt = 0; + goto without_cep; } if (!(mask & ERDMA_QP_ATTR_LLP_HANDLE)) @@ -90,10 +102,24 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, qp->attrs.remote_cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie); req.cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie); - req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr; - req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr; - req.dport = to_sockaddr_in(remote_addr).sin_port; - req.sport = to_sockaddr_in(local_addr).sin_port; + if (qp->cep->sock->sk->sk_family == AF_INET) { + req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr; + req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr; + req.dport = to_sockaddr_in(remote_addr).sin_port; + req.sport = to_sockaddr_in(local_addr).sin_port; + } else if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_IPV6) { + req.cookie = FIELD_PREP(ERDMA_CMD_MODIFY_QP_IPV6_MASK, 1) | + FIELD_PREP(ERDMA_CMD_MODIFY_QP_RQPN_MASK, req.cookie); + memcpy(req.ipv6_daddr, &to_sockaddr_in6(remote_addr).sin6_addr, + sizeof(struct in6_addr)); + memcpy(req.ipv6_saddr, &to_sockaddr_in6(local_addr).sin6_addr, + sizeof(struct in6_addr)); + req.dport = to_sockaddr_in6(remote_addr).sin6_port; + req.sport = to_sockaddr_in6(local_addr).sin6_port; + req.flow_label = to_sockaddr_in6(remote_addr).sin6_flowinfo; + } else { + return -EAFNOSUPPORT; + } req.send_nxt = tp->snd_nxt; /* rsvd tcp seq for mpa-rsp in server. */ @@ -130,31 +156,28 @@ static int erdma_modify_qp_state_to_rts_compat(struct erdma_qp *qp, req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, qp->attrs.state) | FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, qp->attrs.cc) | FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp)); - - req.cookie = qp->attrs.remote_qp_num; - req.dip = qp->attrs.raddr.in.sin_addr.s_addr; - req.sip = qp->attrs.laddr.in.sin_addr.s_addr; - - if (req.dip < req.sip) { - req.dport = COMPAT_PORT_BASE + ((QP_ID(qp) >> 16) & 0xF); - req.sport = QP_ID(qp); - } else if (req.dip == - req.sip) { /* if dip == sip, must have lqpn != rqpn */ - if (QP_ID(qp) < qp->attrs.remote_qp_num) { - req.dport = - COMPAT_PORT_BASE + ((QP_ID(qp) >> 16) & 0xF); - req.sport = QP_ID(qp); - } else { - req.sport = COMPAT_PORT_BASE + - ((qp->attrs.remote_qp_num >> 16) & 0xF); - req.dport = qp->attrs.remote_qp_num; - } + req.cookie = FIELD_PREP(ERDMA_CMD_MODIFY_QP_RQPN_MASK, qp->attrs.remote_qp_num); + + if (((struct sockaddr_in *)&qp->attrs.raddr)->sin_family == AF_INET) { + req.dip = qp->attrs.raddr.in.sin_addr.s_addr; + req.sip = qp->attrs.laddr.in.sin_addr.s_addr; + } else if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_IPV6) { + req.cookie |= FIELD_PREP(ERDMA_CMD_MODIFY_QP_IPV6_MASK, 1); + memcpy(req.ipv6_daddr, &qp->attrs.raddr.in6.sin6_addr.s6_addr, + sizeof(struct in6_addr)); + memcpy(req.ipv6_saddr, &qp->attrs.laddr.in6.sin6_addr.s6_addr, + sizeof(struct in6_addr)); + req.flow_label = 0; } else { - req.sport = COMPAT_PORT_BASE + - ((qp->attrs.remote_qp_num >> 16) & 0xF); - req.dport = qp->attrs.remote_qp_num; + return -EAFNOSUPPORT; } + erdma_gen_port_from_qpn(req.sip, req.dip, QP_ID(qp), + qp->attrs.remote_qp_num, &req.sport, + &req.dport); + req.sport = htons(req.sport); + req.dport = htons(req.dport); + req.send_nxt = req.sport * 4; req.recv_nxt = req.dport * 4; @@ -362,15 +385,16 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset; u32 idx = *pi & (qp->attrs.sq_size - 1); enum ib_wr_opcode op = send_wr->opcode; + struct erdma_atomic_sqe *atomic_sqe; struct erdma_readreq_sqe *read_sqe; struct erdma_reg_mr_sqe *regmr_sge; struct erdma_write_sqe *write_sqe; struct erdma_send_sqe *send_sqe; struct ib_rdma_wr *rdma_wr; - struct erdma_mr *mr; + struct erdma_sge *sge; __le32 *length_field; + struct erdma_mr *mr; u64 wqe_hdr, *entry; - struct ib_sge *sge; u32 attrs; int ret; @@ -437,9 +461,9 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1, qp->attrs.sq_size, SQEBB_SHIFT); - sge->addr = rdma_wr->remote_addr; - sge->lkey = rdma_wr->rkey; - sge->length = send_wr->sg_list[0].length; + sge->addr = cpu_to_le64(rdma_wr->remote_addr); + sge->key = cpu_to_le32(rdma_wr->rkey); + sge->length = cpu_to_le32(send_wr->sg_list[0].length); wqe_size = sizeof(struct erdma_readreq_sqe) + send_wr->num_sge * sizeof(struct ib_sge); @@ -471,10 +495,15 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(reg_wr(send_wr)->access); + if (compat_mode) + mr->access = mr->access | ERDMA_MR_ACC_RW; + regmr_sge->addr = cpu_to_le64(mr->ibmr.iova); regmr_sge->length = cpu_to_le32(mr->ibmr.length); regmr_sge->stag = cpu_to_le32(reg_wr(send_wr)->key); - attrs = FIELD_PREP(ERDMA_SQE_MR_MODE_MASK, 0) | + regmr_sge->attr1 = + FIELD_PREP(ERDMA_SQE_MR_PGSZ_MASK, ilog2(mr->ibmr.page_size)); + attrs = FIELD_PREP(ERDMA_SQE_MR_PGSZ_AVAIL_MASK, 1) | FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) | FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK, mr->mem.mtt_nents); @@ -484,7 +513,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, /* Copy SGLs to SQE content to accelerate */ memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1, qp->attrs.sq_size, SQEBB_SHIFT), - mr->mem.mtt_buf, MTT_SIZE(mr->mem.mtt_nents)); + mr->mem.pbl->buf, MTT_SIZE(mr->mem.mtt_nents)); wqe_size = sizeof(struct erdma_reg_mr_sqe) + MTT_SIZE(mr->mem.mtt_nents); } else { @@ -492,7 +521,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, wqe_size = sizeof(struct erdma_reg_mr_sqe); } - regmr_sge->attrs = cpu_to_le32(attrs); + regmr_sge->attr0 = cpu_to_le32(attrs); goto out; case IB_WR_LOCAL_INV: wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, @@ -501,6 +530,35 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, regmr_sge->stag = cpu_to_le32(send_wr->ex.invalidate_rkey); wqe_size = sizeof(struct erdma_reg_mr_sqe); goto out; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + atomic_sqe = (struct erdma_atomic_sqe *)entry; + if (op == IB_WR_ATOMIC_CMP_AND_SWP) { + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, + ERDMA_OP_ATOMIC_CAS); + atomic_sqe->fetchadd_swap_data = + cpu_to_le64(atomic_wr(send_wr)->swap); + atomic_sqe->cmp_data = + cpu_to_le64(atomic_wr(send_wr)->compare_add); + } else { + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, + ERDMA_OP_ATOMIC_FAA); + atomic_sqe->fetchadd_swap_data = + cpu_to_le64(atomic_wr(send_wr)->compare_add); + } + + sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1, + qp->attrs.sq_size, SQEBB_SHIFT); + sge->addr = cpu_to_le64(atomic_wr(send_wr)->remote_addr); + sge->key = cpu_to_le32(atomic_wr(send_wr)->rkey); + sge++; + + sge->addr = cpu_to_le64(send_wr->sg_list[0].addr); + sge->key = cpu_to_le32(send_wr->sg_list[0].lkey); + sge->length = cpu_to_le32(send_wr->sg_list[0].length); + + wqe_size = sizeof(*atomic_sqe); + goto out; default: return -EOPNOTSUPP; } @@ -544,15 +602,16 @@ static void kick_sq_db(struct erdma_qp *qp, u16 pi) int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, const struct ib_send_wr **bad_send_wr) { + const struct ib_send_wr *wr = send_wr; struct erdma_qp *qp = to_eqp(ibqp); int ret = 0; - const struct ib_send_wr *wr = send_wr; unsigned long flags; u16 sq_pi; if (!send_wr) return -EINVAL; + spin_lock_irqsave(&qp->kern_qp.sq_lock, flags); sq_pi = qp->kern_qp.sq_pi; @@ -620,6 +679,7 @@ int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr, unsigned long flags; int ret = 0; + spin_lock_irqsave(&qp->kern_qp.rq_lock, flags); while (wr) { diff --git a/drivers/infiniband/hw/erdma/erdma_stats.c b/drivers/infiniband/hw/erdma/erdma_stats.c index b8442cdd4261..756ebc0b6350 100644 --- a/drivers/infiniband/hw/erdma/erdma_stats.c +++ b/drivers/infiniband/hw/erdma/erdma_stats.c @@ -74,6 +74,7 @@ static const char *const erdma_stats_names[] = { [ERDMA_STATS_RX_PPS_METER_DROP_CNT] = "hw_rx_pps_limit_drop_cnt", }; + struct rdma_hw_stats *erdma_alloc_hw_stats(struct ib_device *ibdev, port_t port_num) { diff --git a/drivers/infiniband/hw/erdma/erdma_stats.h b/drivers/infiniband/hw/erdma/erdma_stats.h index d2fcf25ddb75..c4f7c950421b 100644 --- a/drivers/infiniband/hw/erdma/erdma_stats.h +++ b/drivers/infiniband/hw/erdma/erdma_stats.h @@ -7,10 +7,9 @@ #ifndef __ERDMA_STATS_H__ #define __ERDMA_STATS_H__ +#include "kcompat.h" #include -typedef u8 port_t; - #define ERDMA_INC_CNT(dev, name) \ atomic64_inc(&dev->stats.value[ERDMA_STATS_##name]) diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 6bcf88adc2c2..5631d95ba52f 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -9,7 +9,10 @@ /* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */ +#include "kcompat.h" + #include +#include #include #include #include @@ -20,14 +23,64 @@ #include "erdma_cm.h" #include "erdma_verbs.h" +bool rand_qpn; +module_param(rand_qpn, bool, 0444); +MODULE_PARM_DESC(rand_qpn, "randomized qpn"); + extern bool compat_mode; +static void assemble_qbuf_mtt_for_cmd(struct erdma_mem *mtt, u32 *cfg, + u64 *addr0, u64 *addr1) +{ + struct erdma_pbl *pbl = mtt->pbl; + + if (mtt->mtt_nents > ERDMA_MAX_INLINE_MTT_ENTRIES) { + *addr0 = pbl->buf_dma; + *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, + ERDMA_MR_INDIRECT_MTT); + } else { + *addr0 = pbl->buf[0]; + memcpy(addr1, pbl->buf + 1, MTT_SIZE(mtt->mtt_nents - 1)); + *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, + ERDMA_MR_INLINE_MTT); + } +} + +static void create_qp_mtt_cfg(struct erdma_cmdq_create_qp_req *req, + struct erdma_mem *sq_mtt, struct erdma_mem *rq_mtt, + u32 scqn, u32 rcqn) +{ + req->sq_cqn_mtt_cfg = FIELD_PREP( + ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, + ilog2(sq_mtt->page_size) - ERDMA_HW_PAGE_SHIFT); + req->sq_cqn_mtt_cfg |= + FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, scqn); + + req->rq_cqn_mtt_cfg = FIELD_PREP( + ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, + ilog2(rq_mtt->page_size) - ERDMA_HW_PAGE_SHIFT); + req->rq_cqn_mtt_cfg |= + FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, rcqn); + + req->sq_mtt_cfg = sq_mtt->page_offset; + req->sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, + sq_mtt->mtt_nents); + + req->rq_mtt_cfg = rq_mtt->page_offset; + req->rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, + rq_mtt->mtt_nents); + + assemble_qbuf_mtt_for_cmd(sq_mtt, &req->sq_mtt_cfg, + &req->sq_buf_addr, req->sq_mtt_entry); + assemble_qbuf_mtt_for_cmd(rq_mtt, &req->rq_mtt_cfg, + &req->rq_buf_addr, req->rq_mtt_entry); +} + static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp, bool is_user) { - struct erdma_cmdq_create_qp_req req; struct erdma_pd *pd = to_epd(qp->ibqp.pd); - struct erdma_uqp *user_qp; + struct erdma_cmdq_create_qp_req req; u64 resp0, resp1; int err; @@ -41,109 +94,61 @@ static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp, ilog2(qp->attrs.rq_size)) | FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn); - if (!is_user) { - u32 pgsz_range = ilog2(SZ_1M) - ERDMA_HW_PAGE_SHIFT; - - req.sq_cqn_mtt_cfg = - FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, - pgsz_range) | - FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn); - req.rq_cqn_mtt_cfg = - FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, - pgsz_range) | - FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn); - - req.sq_mtt_cfg = - FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK, 0) | - FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, 1) | - FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, - ERDMA_MR_INLINE_MTT); - req.rq_mtt_cfg = req.sq_mtt_cfg; + create_qp_mtt_cfg(&req, is_user ? &qp->user_qp.sq_mtt : &qp->kern_qp.sq_mtt, + is_user ? &qp->user_qp.rq_mtt : &qp->kern_qp.rq_mtt, + qp->scq->cqn, qp->rcq->cqn); - req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr; - req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr; - req.sq_db_info_dma_addr = qp->kern_qp.sq_db_info_dma_addr; - req.rq_db_info_dma_addr = qp->kern_qp.rq_db_info_dma_addr; - } else { - user_qp = &qp->user_qp; - req.sq_cqn_mtt_cfg = FIELD_PREP( - ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, - ilog2(user_qp->sq_mtt.page_size) - ERDMA_HW_PAGE_SHIFT); - req.sq_cqn_mtt_cfg |= - FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn); - - req.rq_cqn_mtt_cfg = FIELD_PREP( - ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, - ilog2(user_qp->rq_mtt.page_size) - ERDMA_HW_PAGE_SHIFT); - req.rq_cqn_mtt_cfg |= - FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn); - - req.sq_mtt_cfg = user_qp->sq_mtt.page_offset; - req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, - user_qp->sq_mtt.mtt_nents) | - FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, - user_qp->sq_mtt.mtt_type); - - req.rq_mtt_cfg = user_qp->rq_mtt.page_offset; - req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, - user_qp->rq_mtt.mtt_nents) | - FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, - user_qp->rq_mtt.mtt_type); - - req.sq_buf_addr = user_qp->sq_mtt.mtt_entry[0]; - req.rq_buf_addr = user_qp->rq_mtt.mtt_entry[0]; - - if (user_qp->sq_mtt.mtt_type == ERDMA_MR_INLINE_MTT) { - req.sq_mtt_entry[0] = user_qp->sq_mtt.mtt_entry[1]; - req.sq_mtt_entry[1] = user_qp->sq_mtt.mtt_entry[2]; - req.sq_mtt_entry[2] = user_qp->sq_mtt.mtt_entry[3]; - } - - if (user_qp->rq_mtt.mtt_type == ERDMA_MR_INLINE_MTT) { - req.rq_mtt_entry[0] = user_qp->rq_mtt.mtt_entry[1]; - req.rq_mtt_entry[1] = user_qp->rq_mtt.mtt_entry[2]; - req.rq_mtt_entry[2] = user_qp->rq_mtt.mtt_entry[3]; - } - - req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr; - req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr; - } + req.sq_db_info_dma_addr = is_user ? qp->user_qp.sq_db_info_dma_addr : + qp->kern_qp.sq_db_info_dma_addr; + req.rq_db_info_dma_addr = is_user ? qp->user_qp.rq_db_info_dma_addr : + qp->kern_qp.rq_db_info_dma_addr; err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0, &resp1); - if (err) { - dev_err(&dev->pdev->dev, - "ERROR: err code = %d, cmd of create qp failed.\n", - err); + if (err) return err; - } qp->attrs.cookie = FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0); - return err; + return 0; } static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) { - struct erdma_cmdq_reg_mr_req req; struct erdma_pd *pd = to_epd(mr->ibmr.pd); - u64 *phy_addr; - int i; + struct erdma_cmdq_reg_mr_req req; + u32 mtt_type; erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR); + if (mr->type == ERDMA_MR_TYPE_FRMR || + mr->mem.page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES) { + if (mr->mem.pbl->continuous) { + req.phy_addr[0] = mr->mem.pbl->buf_dma; + mtt_type = ERDMA_MR_INDIRECT_MTT; + } else { + req.phy_addr[0] = sg_dma_address(mr->mem.pbl->sglist); + mtt_type = mr->mem.pbl->level; + } + } else { + memcpy(req.phy_addr, mr->mem.pbl->buf, + MTT_SIZE(mr->mem.page_cnt)); + mtt_type = ERDMA_MR_INLINE_MTT; + } + req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) | FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, mr->ibmr.lkey & 0xFF) | FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8); req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) | FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) | - FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access) | - FIELD_PREP(ERDMA_CMD_REGMR_ACC_MODE_MASK, 0); + FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access); req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK, ilog2(mr->mem.page_size)) | - FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mr->mem.mtt_type) | + FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mtt_type) | FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt); + /* Clear this field because hardware will check it. */ + req.size = 0; if (mr->type == ERDMA_MR_TYPE_DMA) goto post_cmd; @@ -153,16 +158,20 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) req.size = mr->mem.len; } - if (mr->type == ERDMA_MR_TYPE_FRMR || - mr->mem.mtt_type == ERDMA_MR_INDIRECT_MTT) { - phy_addr = req.phy_addr; - *phy_addr = mr->mem.mtt_entry[0]; - } else { - phy_addr = req.phy_addr; - for (i = 0; i < mr->mem.mtt_nents; i++) - *phy_addr++ = mr->mem.mtt_entry[i]; + if (!mr->mem.pbl->continuous && mr->mem.pbl->level > 1) { + req.cfg0 |= FIELD_PREP(ERDMA_CMD_MR_VERSION_MASK, 1); + req.cfg2 |= FIELD_PREP(ERDMA_CMD_REGMR_PBL_PAGESIZE_MASK, + PAGE_SHIFT - ERDMA_HW_PAGE_SHIFT); + req.size_h = upper_32_bits(mr->mem.len); + req.mtt_cnt_h = mr->mem.page_cnt >> 20; + ibdev_dbg(&dev->ibdev, + "cfg0 %x, cfg2 %x, size_h %u, mtt_cmt_h %u\n", + req.cfg0, req.cfg2, req.size_h, req.mtt_cnt_h); + ibdev_dbg(&dev->ibdev, "mtt_0_level: 0x%llx\n", + req.phy_addr[0]); } + post_cmd: return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } @@ -170,10 +179,9 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq, bool is_user) { - int err; struct erdma_cmdq_create_cq_req req; - u32 page_size; - struct erdma_mem *mtt; + struct erdma_mem *mtt = is_user ? &cq->user_cq.qbuf_mtt : + &cq->kern_cq.qbuf_mtt; erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_CREATE_CQ); @@ -182,50 +190,27 @@ static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq, FIELD_PREP(ERDMA_CMD_CREATE_CQ_DEPTH_MASK, ilog2(cq->depth)); req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_EQN_MASK, cq->assoc_eqn); - if (!is_user) { - page_size = SZ_32M; - req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK, - ilog2(page_size) - ERDMA_HW_PAGE_SHIFT); - req.qbuf_addr_l = lower_32_bits(cq->kern_cq.qbuf_dma_addr); - req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr); - - req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) | - FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK, + req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK, + ilog2(mtt->page_size) - ERDMA_HW_PAGE_SHIFT); + if (mtt->mtt_nents == 1) { + req.qbuf_addr_l = lower_32_bits(mtt->pbl->buf[0]); + req.qbuf_addr_h = upper_32_bits(mtt->pbl->buf[0]); + req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK, ERDMA_MR_INLINE_MTT); - - req.first_page_offset = 0; - req.cq_db_info_addr = - cq->kern_cq.qbuf_dma_addr + (cq->depth << CQE_SHIFT); } else { - mtt = &cq->user_cq.qbuf_mtt; - req.cfg0 |= - FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK, - ilog2(mtt->page_size) - ERDMA_HW_PAGE_SHIFT); - if (mtt->mtt_nents == 1) { - req.qbuf_addr_l = lower_32_bits(*(u64 *)mtt->mtt_buf); - req.qbuf_addr_h = upper_32_bits(*(u64 *)mtt->mtt_buf); - } else { - req.qbuf_addr_l = lower_32_bits(mtt->mtt_entry[0]); - req.qbuf_addr_h = upper_32_bits(mtt->mtt_entry[0]); - } - req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, - mtt->mtt_nents); + req.qbuf_addr_l = lower_32_bits(mtt->pbl->buf_dma); + req.qbuf_addr_h = upper_32_bits(mtt->pbl->buf_dma); req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK, - mtt->mtt_type); - - req.first_page_offset = mtt->page_offset; - req.cq_db_info_addr = cq->user_cq.db_info_dma_addr; + ERDMA_MR_INDIRECT_MTT); } + req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, + mtt->mtt_nents); - err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); - if (err) { - dev_err(&dev->pdev->dev, - "ERROR: err code = %d, cmd of create cq failed.\n", - err); - return err; - } + req.first_page_offset = mtt->page_offset; + req.cq_db_info_addr = is_user ? cq->user_cq.db_info_dma_addr : + cq->kern_cq.db_info_dma_addr; - return 0; + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } static int erdma_alloc_idx(struct erdma_resource_cb *res_cb) @@ -263,6 +248,7 @@ static inline void erdma_free_idx(struct erdma_resource_cb *res_cb, u32 idx) WARN_ON(!used); } + static struct rdma_user_mmap_entry * erdma_user_mmap_entry_insert(struct ib_ucontext *uctx, u64 address, u32 size, u8 mmap_flag, u64 *mmap_offset) @@ -320,7 +306,7 @@ int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA; attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT; - if (dev->attrs.flags & ERDMA_DEV_CAP_FLAGS_ATOMIC) { + if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC) { attr->atomic_cap = IB_ATOMIC_GLOB; attr->masked_atomic_cap = IB_ATOMIC_GLOB; } @@ -385,6 +371,7 @@ int erdma_get_port_immutable(struct ib_device *ibdev, port_t port, port_immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; port_immutable->max_mad_size = IB_MGMT_MAD_SIZE; + port_immutable->pkey_tbl_len = 1; } else { port_immutable->gid_tbl_len = 1; port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; @@ -393,21 +380,6 @@ int erdma_get_port_immutable(struct ib_device *ibdev, port_t port, return 0; } -int erdma_query_pkey(struct ib_device *ibdev, port_t port, u16 index, u16 *pkey) -{ - if (index > 0) - return -EINVAL; - - *pkey = 0xffff; - return 0; -} - -enum rdma_link_layer erdma_get_link_layer(struct ib_device *dev, - port_t port_num) -{ - return IB_LINK_LAYER_ETHERNET; -} - int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct erdma_pd *pd = to_epd(ibpd); @@ -427,6 +399,7 @@ int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } + int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct erdma_dev *dev = to_edev(ibpd->device); @@ -434,6 +407,7 @@ int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) ERDMA_INC_CNT(dev, CMD_DEALLOC_PD); + erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_PD], pd->pdn); return 0; } @@ -456,6 +430,13 @@ static void erdma_flush_worker(struct work_struct *work) static int erdma_qp_validate_cap(struct erdma_dev *dev, struct ib_qp_init_attr *attrs) { + ibdev_dbg( + &dev->ibdev, + "create_qp_cap:send_wr(%u),recv_wr(%u),send_sge(%u),recv_sge(%u),inline(%u)\n", + attrs->cap.max_send_wr, attrs->cap.max_recv_wr, + attrs->cap.max_send_sge, attrs->cap.max_recv_sge, + attrs->cap.max_inline_data); + if ((attrs->cap.max_send_wr > dev->attrs.max_send_wr) || (attrs->cap.max_recv_wr > dev->attrs.max_recv_wr) || (attrs->cap.max_send_sge > dev->attrs.max_send_sge) || @@ -483,35 +464,7 @@ static int erdma_qp_validate_attr(struct erdma_dev *dev, return 0; } -static void free_kernel_qp(struct erdma_qp *qp) -{ - struct erdma_dev *dev = qp->dev; - - vfree(qp->kern_qp.swr_tbl); - vfree(qp->kern_qp.rwr_tbl); - - if (qp->kern_qp.sq_buf) - dma_free_coherent(&dev->pdev->dev, - qp->attrs.sq_size << SQEBB_SHIFT, - qp->kern_qp.sq_buf, - qp->kern_qp.sq_buf_dma_addr); - - if (qp->kern_qp.rq_buf) - dma_free_coherent(&dev->pdev->dev, - qp->attrs.rq_size << RQE_SHIFT, - qp->kern_qp.rq_buf, - qp->kern_qp.rq_buf_dma_addr); - - if (qp->kern_qp.sq_db_info) - dma_pool_free(dev->db_pool, qp->kern_qp.sq_db_info, - qp->kern_qp.sq_db_info_dma_addr); - - if (qp->kern_qp.rq_db_info) - dma_pool_free(dev->db_pool, qp->kern_qp.rq_db_info, - qp->kern_qp.rq_db_info_dma_addr); -} - -static int update_kernel_qp_oob_attr(struct erdma_qp *qp) +static int update_kernel_qp_oob_attr(struct erdma_qp *qp, struct ib_qp_attr *attr, int attr_mask) { struct iw_ext_conn_param *param = (struct iw_ext_conn_param *)(qp->ibqp.qp_context); @@ -522,20 +475,428 @@ static int update_kernel_qp_oob_attr(struct erdma_qp *qp) if (param == NULL) return -EINVAL; - if (param->sk_addr.family != PF_INET) { - ibdev_err_ratelimited( - &qp->dev->ibdev, - "IPv4 address is required for connection without CM.\n"); + if (attr_mask & IB_QP_DEST_QPN) + qp->attrs.remote_qp_num = attr->dest_qp_num; + + if (param->sk_addr.family == AF_INET) { + ((struct sockaddr_in *)&qp->attrs.raddr)->sin_family = AF_INET; + ((struct sockaddr_in *)&qp->attrs.laddr)->sin_family = AF_INET; + qp->attrs.raddr.in.sin_addr.s_addr = param->sk_addr.daddr_v4; + qp->attrs.laddr.in.sin_addr.s_addr = param->sk_addr.saddr_v4; + } else if (param->sk_addr.family == AF_INET6) { + ((struct sockaddr_in6 *)&qp->attrs.raddr)->sin6_family = AF_INET6; + ((struct sockaddr_in6 *)&qp->attrs.laddr)->sin6_family = AF_INET6; + memcpy(&qp->attrs.raddr.in6.sin6_addr, ¶m->sk_addr.daddr_v6, + sizeof(struct in6_addr)); + memcpy(&qp->attrs.laddr.in6.sin6_addr, ¶m->sk_addr.saddr_v6, + sizeof(struct in6_addr)); + } else { return -EINVAL; } - qp->attrs.sip = ntohl(param->sk_addr.saddr_v4); - qp->attrs.dip = ntohl(param->sk_addr.daddr_v4); qp->attrs.dport = ntohs(param->sk_addr.dport); qp->attrs.sport = param->sk_addr.sport; return 0; } +static struct erdma_pbl *erdma_create_cont_pbl(struct erdma_dev *dev, + size_t size) +{ + struct erdma_pbl *pbl; + int ret = -ENOMEM; + + pbl = kzalloc(sizeof(*pbl), GFP_KERNEL); + if (!pbl) + return ERR_PTR(-ENOMEM); + + pbl->size = size; + pbl->buf = kzalloc(pbl->size, GFP_KERNEL); + if (!pbl->buf) + goto err_free_pbl; + + pbl->continuous = true; + pbl->buf_dma = dma_map_single(&dev->pdev->dev, pbl->buf, pbl->size, + DMA_TO_DEVICE); + if (dma_mapping_error(&dev->pdev->dev, pbl->buf_dma)) + goto err_free_pbl_buf; + + return pbl; + +err_free_pbl_buf: + kfree(pbl->buf); + +err_free_pbl: + kfree(pbl); + + return ERR_PTR(ret); +} + +static u32 vmalloc_to_sgl(struct erdma_dev *dev, struct scatterlist **sgl_ptr, + void *buf, u64 len) +{ + u32 npages, i, nsg; + struct scatterlist *sglist; + struct page *pg; + + npages = DIV_ROUND_UP(len, PAGE_SIZE); + sglist = vzalloc(npages * sizeof(struct scatterlist)); + if (!sglist) + return 0; + + sg_init_table(sglist, npages); + for (i = 0; i < npages; i++) { + pg = vmalloc_to_page(buf); + if (!pg) + goto err; + sg_set_page(&sglist[i], pg, PAGE_SIZE, 0); + buf += PAGE_SIZE; + } + + nsg = dma_map_sg(&dev->pdev->dev, sglist, npages, DMA_BIDIRECTIONAL); + if (!nsg) + goto err; + + if (nsg != npages) + ibdev_warn(&dev->ibdev, "sgl len before DMA: %u sgl len after DMA: %u\n", + npages, nsg); + + *sgl_ptr = sglist; + return nsg; + +err: + vfree(sglist); + return 0; +} + +static int erdma_create_pbl_buf_sg(struct erdma_dev *dev, struct erdma_pbl *pbl) +{ + struct scatterlist *sglist; + u32 nsg; + + /* Failed if buf is not page aligned */ + if ((uintptr_t)pbl->buf & ~PAGE_MASK) + return -EINVAL; + + nsg = vmalloc_to_sgl(dev, &sglist, pbl->buf, pbl->size); + if (!nsg) + return -ENOMEM; + + pbl->sglist = sglist; + pbl->nsg = nsg; + + return 0; +} + +static void erdma_destroy_pbl_buf_sg(struct erdma_dev *dev, + struct erdma_pbl *pbl) +{ + dma_unmap_sg(&dev->pdev->dev, pbl->sglist, pbl->nsg, DMA_TO_DEVICE); + vfree(pbl->sglist); +} + +static struct erdma_pbl *erdma_create_scatter_pbl(struct erdma_dev *dev, + size_t size) +{ + struct erdma_pbl *pbl; + int ret = -ENOMEM; + + pbl = kzalloc(sizeof(*pbl), GFP_KERNEL); + if (!pbl) + return NULL; + + pbl->size = ALIGN(size, PAGE_SIZE); + pbl->buf = vzalloc(pbl->size); + pbl->continuous = false; + if (!pbl->buf) + goto err_free_pbl; + + ret = erdma_create_pbl_buf_sg(dev, pbl); + if (ret) + goto err_free_pbl_buf; + + ibdev_dbg(&dev->ibdev, "create scatter pbl, size:%lu, nsg:%u\n", + pbl->size, pbl->nsg); + + return pbl; + +err_free_pbl_buf: + vfree(pbl->buf); + +err_free_pbl: + kfree(pbl); + + return ERR_PTR(ret); +} + +static void erdma_destroy_scatter_pbl(struct erdma_dev *dev, + struct erdma_pbl *pbl) +{ + erdma_destroy_pbl_buf_sg(dev, pbl); + vfree(pbl->buf); + kfree(pbl); +} + +static void erdma_init_middle_pbl(struct erdma_pbl *pbl, + struct erdma_pbl *next_pbl) +{ + struct scatterlist *sg; + u32 idx = 0, i; + + for_each_sg(next_pbl->sglist, sg, next_pbl->nsg, i) + pbl->buf[idx++] = sg_dma_address(sg); +} + +static struct erdma_pbl *erdma_create_pbl(struct erdma_dev *dev, size_t size, + bool force_continuous) +{ + struct erdma_pbl *pbl, *tmp_pbl; + int ret, level = 0; + + ibdev_dbg(&dev->ibdev, "create_pbl, size:%lu, force cont:%d\n", size, + force_continuous); + + if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_MTT_VA)) + force_continuous = true; + + if (force_continuous) + return erdma_create_cont_pbl(dev, size); + + pbl = erdma_create_scatter_pbl(dev, size); + if (IS_ERR(pbl)) + return pbl; + level = 1; + + /* convergence the pbl table. */ + while (pbl->nsg != 1 && level <= 3) { + tmp_pbl = erdma_create_scatter_pbl(dev, MTT_SIZE(pbl->nsg)); + if (IS_ERR(tmp_pbl)) { + ret = PTR_ERR(tmp_pbl); + goto err_free_pbl; + } + erdma_init_middle_pbl(tmp_pbl, pbl); + tmp_pbl->low_level = pbl; + pbl = tmp_pbl; + level++; + } + + if (level > 3) { + ret = -ENOMEM; + goto err_free_pbl; + } + + pbl->level = level; + ibdev_dbg(&dev->ibdev, "top pbl: level:%d, dma_addr 0x%llx\n", + pbl->level, pbl->sglist[0].dma_address); + + return pbl; + +err_free_pbl: + while (pbl) { + tmp_pbl = pbl->low_level; + erdma_destroy_scatter_pbl(dev, pbl); + pbl = tmp_pbl; + } + + return ERR_PTR(ret); +} + +static void fill_mtt_entries_with_sgl(struct scatterlist *sgl, u64 *page_list, + u64 nents) +{ + u32 i, entry, chunk_pages, idx = 0; + u64 pg_addr; + struct scatterlist *sg; + + for_each_sg(sgl, sg, nents, entry) { + chunk_pages = sg_dma_len(sg) >> PAGE_SHIFT; + for (i = 0; i < chunk_pages; i++) { + pg_addr = sg_dma_address(sg) + (i << PAGE_SHIFT); + + if ((entry + i) == 0) + page_list[idx] = pg_addr & PAGE_MASK; + else if (!(pg_addr & ~PAGE_MASK)) + page_list[idx] = pg_addr; + else + continue; + idx++; + } + } +} + +static void erdma_init_pbl_leaf(struct erdma_mem *mem, struct erdma_pbl *pbl) +{ + u64 *page_list = pbl->buf; + bool is_user = !mem->type; + u32 idx = 0; + struct ib_block_iter biter; + + if (is_user && mem->umem) { + rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) + page_list[idx++] = rdma_block_iter_dma_address(&biter); + return; + } + + fill_mtt_entries_with_sgl(is_user ? mem->umem->sg_head.sgl : mem->kmem->sgl, + page_list, is_user ? mem->umem->nmap : mem->kmem->nmap); +} + +static void erdma_init_bottom_pbl(struct erdma_dev *dev, struct erdma_mem *mem) +{ + struct erdma_pbl *pbl = mem->pbl; + + while (pbl->low_level) + pbl = pbl->low_level; + + erdma_init_pbl_leaf(mem, pbl); +} + +static void erdma_destroy_pbl(struct erdma_dev *dev, struct erdma_pbl *pbl) +{ + struct erdma_pbl *tmp_pbl; + + if (pbl->continuous) { + dma_unmap_single(&dev->pdev->dev, pbl->buf_dma, pbl->size, + DMA_TO_DEVICE); + kfree(pbl->buf); + kfree(pbl); + } else { + while (pbl) { + tmp_pbl = pbl->low_level; + erdma_destroy_scatter_pbl(dev, pbl); + pbl = tmp_pbl; + } + } +} + +static void erdma_mem_free(struct erdma_dev *dev, struct erdma_mem *mem) +{ + switch (mem->type) { + case ERDMA_UMEM: + if (mem->umem) { + ib_umem_release(mem->umem); + mem->umem = NULL; + } + break; + case ERDMA_KMEM: + if (mem->kmem) { + if (mem->kmem->sgl) { + dma_unmap_sg(&dev->pdev->dev, mem->kmem->sgl, + mem->kmem->nmap, DMA_TO_DEVICE); + vfree(mem->kmem->sgl); + mem->kmem->sgl = NULL; + } + kfree(mem->kmem); + mem->kmem = NULL; + } + break; + default: + break; + } +} + +static u32 range_num_blocks(u64 start, u64 len, u64 blk_sz) +{ + return (ALIGN(start + len, blk_sz) - ALIGN_DOWN(start, blk_sz)) / blk_sz; +} + +static int get_mtt_entries(void *data, struct erdma_ucontext *ctx, + struct erdma_mem *mem, u64 start, u64 len, + int access, u64 virt, unsigned long req_page_size, + bool is_mr) +{ + int ret; + bool is_user = ctx ? true : false; + struct erdma_dev *dev = is_user ? to_edev(ctx->ibucontext.device) : + (struct erdma_dev *)data; + + if (is_user) { + mem->type = ERDMA_UMEM; + mem->umem = ib_umem_get(&dev->ibdev, start, len, access); + if (IS_ERR(mem->umem)) { + ret = PTR_ERR(mem->umem); + mem->umem = NULL; + return ret; + } + } else { + mem->type = ERDMA_KMEM; + mem->kmem = kzalloc(sizeof(struct erdma_kmem), GFP_KERNEL); + if (!mem->kmem) + return -ENOMEM; + + mem->kmem->nmap = vmalloc_to_sgl(dev, &mem->kmem->sgl, (void *)start, len); + if (!mem->kmem->nmap) { + kfree(mem->kmem); + mem->kmem = NULL; + return -ENOMEM; + } + } + + mem->va = virt; + mem->len = len; + mem->page_size = is_user ? ib_umem_find_best_pgsz(mem->umem, req_page_size, virt) : + PAGE_SIZE; + mem->page_offset = start & (mem->page_size - 1); + mem->mtt_nents = is_user ? ib_umem_num_dma_blocks(mem->umem, mem->page_size) : + range_num_blocks(mem->va, mem->len, mem->page_size); + mem->page_cnt = mem->mtt_nents; + + ibdev_dbg(&dev->ibdev, "page_size:%u, page_offset:%u, mtt_nents:%u\n", + mem->page_size, mem->page_offset, mem->page_cnt); + + mem->pbl = erdma_create_pbl(dev, MTT_SIZE(mem->page_cnt), !is_mr); + if (IS_ERR(mem->pbl)) { + ret = PTR_ERR(mem->pbl); + goto error_ret; + } + + erdma_init_bottom_pbl(dev, mem); + + return 0; + +error_ret: + erdma_mem_free(dev, mem); + + return ret; +} + +static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem) +{ + if (mem->pbl) { + erdma_destroy_pbl(dev, mem->pbl); + mem->pbl = NULL; + } + + erdma_mem_free(dev, mem); +} + +static void free_kernel_qp(struct erdma_qp *qp) +{ + struct erdma_dev *dev = qp->dev; + + vfree(qp->kern_qp.swr_tbl); + vfree(qp->kern_qp.rwr_tbl); + + if (qp->kern_qp.sq_buf) { + put_mtt_entries(dev, &qp->kern_qp.sq_mtt); + vfree(qp->kern_qp.sq_buf); + qp->kern_qp.sq_buf = NULL; + } + + if (qp->kern_qp.rq_buf) { + put_mtt_entries(dev, &qp->kern_qp.rq_mtt); + vfree(qp->kern_qp.rq_buf); + qp->kern_qp.rq_buf = NULL; + } + + if (qp->kern_qp.sq_db_info) + dma_pool_free(dev->db_pool, qp->kern_qp.sq_db_info, + qp->kern_qp.sq_db_info_dma_addr); + + if (qp->kern_qp.rq_db_info) + dma_pool_free(dev->db_pool, qp->kern_qp.rq_db_info, + qp->kern_qp.rq_db_info_dma_addr); +} + static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, struct ib_qp_init_attr *attrs) { @@ -558,18 +919,26 @@ static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, if (!kqp->swr_tbl || !kqp->rwr_tbl) goto err_out; - kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, - qp->attrs.sq_size << SQEBB_SHIFT, - &kqp->sq_buf_dma_addr, GFP_KERNEL); + kqp->sq_buf = vmalloc(qp->attrs.sq_size << SQEBB_SHIFT); if (!kqp->sq_buf) goto err_out; - kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, - qp->attrs.rq_size << RQE_SHIFT, - &kqp->rq_buf_dma_addr, GFP_KERNEL); + ret = get_mtt_entries(dev, NULL, &kqp->sq_mtt, (u64)kqp->sq_buf, + qp->attrs.sq_size << SQEBB_SHIFT, 0, + (u64)kqp->sq_buf, 0, false); + if (ret) + goto err_out; + + kqp->rq_buf = vmalloc(qp->attrs.rq_size << RQE_SHIFT); if (!kqp->rq_buf) goto err_out; + ret = get_mtt_entries(dev, NULL, &kqp->rq_mtt, (u64)kqp->rq_buf, + qp->attrs.rq_size << RQE_SHIFT, 0, (u64)kqp->rq_buf, + 0, false); + if (ret) + goto err_out; + kqp->sq_db_info = dma_pool_alloc(dev->db_pool, GFP_KERNEL, &kqp->sq_db_info_dma_addr); if (!kqp->sq_db_info) @@ -596,8 +965,10 @@ static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, goto err_out; } qp->attrs.connect_without_cm = true; - qp->attrs.sip = ntohl(param->sk_addr.saddr_v4); - qp->attrs.dip = ntohl(param->sk_addr.daddr_v4); + ((struct sockaddr_in *)&qp->attrs.raddr)->sin_family = AF_INET; + ((struct sockaddr_in *)&qp->attrs.laddr)->sin_family = AF_INET; + qp->attrs.raddr.in.sin_addr.s_addr = param->sk_addr.daddr_v4; + qp->attrs.laddr.in.sin_addr.s_addr = param->sk_addr.saddr_v4; qp->attrs.dport = ntohs(param->sk_addr.dport); qp->attrs.sport = param->sk_addr.sport; } @@ -611,86 +982,6 @@ static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, return ret; } -static int get_mtt_entries(struct ib_udata *udata, struct erdma_ucontext *ctx, - struct erdma_mem *mem, u64 start, u64 len, - int access, u64 virt, unsigned long req_page_size, - u8 force_indirect_mtt, bool is_mr) -{ - struct erdma_dev *dev = to_edev(ctx->ibucontext.device); - struct ib_block_iter biter; - uint64_t *phy_addr = NULL; - int ret = 0; - - mem->umem = ib_umem_get(&dev->ibdev, start, len, access); - if (IS_ERR(mem->umem)) { - ret = PTR_ERR(mem->umem); - mem->umem = NULL; - return ret; - } - - mem->va = virt; - mem->len = len; - mem->page_size = ib_umem_find_best_pgsz(mem->umem, req_page_size, virt); - mem->page_offset = start & (mem->page_size - 1); - mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size); - mem->page_cnt = mem->mtt_nents; - - if (mem->page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES || - force_indirect_mtt) { - mem->mtt_type = ERDMA_MR_INDIRECT_MTT; - mem->mtt_buf = - alloc_pages_exact(MTT_SIZE(mem->page_cnt), GFP_KERNEL); - if (!mem->mtt_buf) { - ret = -ENOMEM; - goto error_ret; - } - phy_addr = mem->mtt_buf; - } else { - mem->mtt_type = ERDMA_MR_INLINE_MTT; - phy_addr = mem->mtt_entry; - } - - rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) { - *phy_addr = rdma_block_iter_dma_address(&biter); - phy_addr++; - } - if (mem->mtt_type == ERDMA_MR_INDIRECT_MTT) { - mem->mtt_entry[0] = - dma_map_single(&dev->pdev->dev, mem->mtt_buf, - MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE); - if (dma_mapping_error(&dev->pdev->dev, mem->mtt_entry[0])) { - free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt)); - mem->mtt_buf = NULL; - ret = -ENOMEM; - goto error_ret; - } - } - - return 0; - -error_ret: - if (mem->umem) { - ib_umem_release(mem->umem); - mem->umem = NULL; - } - - return ret; -} - -static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem) -{ - if (mem->mtt_buf) { - dma_unmap_single(&dev->pdev->dev, mem->mtt_entry[0], - MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE); - free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt)); - } - - if (mem->umem) { - ib_umem_release(mem->umem); - mem->umem = NULL; - } -} - static int erdma_map_user_dbrecords(struct ib_udata *udata, struct erdma_ucontext *uctx, u64 dbrecords_va, @@ -768,7 +1059,7 @@ static int init_user_qp(struct erdma_qp *qp, struct ib_udata *udata, ret = get_mtt_entries(udata, uctx, &qp->user_qp.sq_mtt, va, qp->attrs.sq_size << SQEBB_SHIFT, 0, va, - (SZ_1M - SZ_4K), 0, false); + (SZ_1M - SZ_4K), false); if (ret) return ret; @@ -777,7 +1068,7 @@ static int init_user_qp(struct erdma_qp *qp, struct ib_udata *udata, ret = get_mtt_entries(udata, uctx, &qp->user_qp.rq_mtt, va + rq_offset, qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset, - (SZ_1M - SZ_4K), 0, false); + (SZ_1M - SZ_4K), false); if (ret) goto put_sq_mtt; @@ -816,6 +1107,7 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, struct erdma_qp *qp = to_eqp(ibqp); struct erdma_ureq_create_qp ureq; struct erdma_ucontext *uctx; + u32 next_idx; int ret; uctx = rdma_udata_to_drv_context(udata, struct erdma_ucontext, @@ -840,6 +1132,10 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, kref_init(&qp->ref); init_completion(&qp->safe_free); + if (rand_qpn) { + get_random_bytes(&next_idx, sizeof(u32)); + dev->next_alloc_qpn = next_idx % dev->attrs.max_qp; + } ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp, XA_LIMIT(1, dev->attrs.max_qp - 1), &dev->next_alloc_qpn, GFP_KERNEL); @@ -1038,35 +1334,23 @@ struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, mr->mem.page_size = PAGE_SIZE; /* update it later. */ mr->mem.page_cnt = max_num_sg; - mr->mem.mtt_type = ERDMA_MR_INDIRECT_MTT; - mr->mem.mtt_buf = - alloc_pages_exact(MTT_SIZE(mr->mem.page_cnt), GFP_KERNEL); - if (!mr->mem.mtt_buf) { - ret = -ENOMEM; - goto out_remove_stag; - } - mr->mem.mtt_entry[0] = - dma_map_single(&dev->pdev->dev, mr->mem.mtt_buf, - MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE); - if (dma_mapping_error(&dev->pdev->dev, mr->mem.mtt_entry[0])) { - ret = -ENOMEM; - goto out_free_mtt; + mr->mem.pbl = erdma_create_pbl(dev, MTT_SIZE(max_num_sg), true); + if (IS_ERR(mr->mem.pbl)) { + ret = PTR_ERR(mr->mem.pbl); + goto out_remove_stag; } ret = regmr_cmd(dev, mr); if (ret) { ret = -EIO; - goto out_dma_unmap; + goto out_destroy_pbl; } return &mr->ibmr; -out_dma_unmap: - dma_unmap_single(&dev->pdev->dev, mr->mem.mtt_entry[0], - MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE); -out_free_mtt: - free_pages_exact(mr->mem.mtt_buf, MTT_SIZE(mr->mem.page_cnt)); +out_destroy_pbl: + erdma_destroy_pbl(dev, mr->mem.pbl); out_remove_stag: erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], @@ -1087,7 +1371,7 @@ static int erdma_set_page(struct ib_mr *ibmr, u64 addr) if (mr->mem.mtt_nents >= mr->mem.page_cnt) return -1; - *((u64 *)mr->mem.mtt_buf + mr->mem.mtt_nents) = addr; + mr->mem.pbl->buf[mr->mem.mtt_nents] = addr; mr->mem.mtt_nents++; return 0; @@ -1119,6 +1403,10 @@ struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, ERDMA_INC_CNT(dev, CMD_REG_USR_MR); + ibdev_dbg(&dev->ibdev, + "start:0x%llx, len:%llu, virt:0x%llx, access:0x%x\n", start, + len, virt, access); + if (!len || len > dev->attrs.max_mr_size) { ibdev_err(&dev->ibdev, "ERROR: Out of mr size: %llu, max %llu\n", len, @@ -1132,7 +1420,7 @@ struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, return ERR_PTR(-ENOMEM); ret = get_mtt_entries(udata, uctx, &mr->mem, start, len, access, virt, - SZ_2G - SZ_4K, 0, true); + SZ_2G - SZ_4K, true); if (ret) goto err_out_free; @@ -1145,6 +1433,8 @@ struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr->mem.va = virt; mr->mem.len = len; mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(access); + if (compat_mode) + mr->access = mr->access | ERDMA_MR_ACC_RW; mr->valid = 1; mr->type = ERDMA_MR_TYPE_NORMAL; @@ -1188,7 +1478,8 @@ int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); if (ret) { ERDMA_INC_CNT(dev, CMD_DEREG_MR_FAILED); - dev_err(&dev->pdev->dev, + dev_err_ratelimited( + &dev->pdev->dev, "ERROR: err code = %d, cmd of dereg mr failed.\n", ret); return ret; } @@ -1201,6 +1492,21 @@ int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) return 0; } +static void free_kernel_cq(struct erdma_dev *dev, struct erdma_kcq_info *kcq) +{ + if (kcq->qbuf) { + put_mtt_entries(dev, &kcq->qbuf_mtt); + vfree(kcq->qbuf); + kcq->qbuf = NULL; + } + + if (kcq->db_record) { + dma_pool_free(dev->db_pool, kcq->db_record, + kcq->db_info_dma_addr); + kcq->db_record = NULL; + } +} + int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct erdma_cq *cq = to_ecq(ibcq); @@ -1210,6 +1516,7 @@ int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) int err; struct erdma_cmdq_destroy_cq_req req; + ERDMA_INC_CNT(dev, CMD_DESTROY_CQ); hrtimer_cancel(&cq->dim.timer); @@ -1220,16 +1527,15 @@ int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); if (err) { - dev_err(&dev->pdev->dev, + dev_err_ratelimited( + &dev->pdev->dev, "ERROR: err code = %d, cmd of destroy cq failed.\n", err); ERDMA_INC_CNT(dev, CMD_DESTROY_CQ_FAILED); return err; } if (rdma_is_kernel_res(&cq->ibcq.res)) { - dma_free_coherent(&dev->pdev->dev, - WARPPED_BUFSIZE(cq->depth << CQE_SHIFT), - cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr); + free_kernel_cq(dev, &cq->kern_cq); } else { erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page); put_mtt_entries(dev, &cq->user_cq.qbuf_mtt); @@ -1310,8 +1616,6 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) struct erdma_cmdq_destroy_qp_req req; unsigned long flags; - ERDMA_INC_CNT(dev, CMD_DESTROY_QP); - if (rdma_is_kernel_res(&qp->ibqp.res)) { local_irq_save(flags); erdma_ib_lock_cqs(qp->scq, qp->rcq); @@ -1320,6 +1624,9 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) local_irq_restore(flags); } + + ERDMA_INC_CNT(dev, CMD_DESTROY_QP); + down_write(&qp->state_lock); qp_attrs.state = ERDMA_QP_STATE_ERROR; erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE); @@ -1333,7 +1640,8 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); if (err) { - dev_err(&dev->pdev->dev, + dev_err_ratelimited( + &dev->pdev->dev, "ERROR: err code = %d, cmd of destroy qp failed.\n", err); ERDMA_INC_CNT(dev, CMD_DESTROY_QP_FAILED); @@ -1481,6 +1789,7 @@ int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata) INIT_LIST_HEAD(&ctx->dbrecords_page_list); mutex_init(&ctx->dbrecords_page_mutex); + alloc_db_resources(dev, ctx); ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET; @@ -1531,6 +1840,7 @@ int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata) return ret; } + void erdma_dealloc_ucontext(struct ib_ucontext *ibctx) { struct erdma_ucontext *ctx = to_ectx(ibctx); @@ -1564,60 +1874,20 @@ static int ib_qp_state_to_erdma_qp_state[IB_QPS_ERR + 1] = { int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { - struct erdma_qp_attrs new_attrs; enum erdma_qp_attr_mask erdma_attr_mask = 0; struct erdma_qp *qp = to_eqp(ibqp); + struct erdma_qp_attrs new_attrs; int ret = 0; - struct rdma_ah_attr *ah_attr; - const struct ib_gid_attr *sgid_attr; + if (attr_mask & IB_QP_OOB_CONN_ATTR) { - ret = update_kernel_qp_oob_attr(qp); + ret = update_kernel_qp_oob_attr(qp, attr, attr_mask); if (ret) return ret; } - if (compat_mode) { - dprint(DBG_QP, "attr mask: %x, av: %d, state:%d\n", attr_mask, - attr_mask & IB_QP_AV, attr_mask & IB_QP_STATE); - - if (attr_mask & IB_QP_AV) { - ah_attr = &attr->ah_attr; - sgid_attr = ah_attr->grh.sgid_attr; - - dprint(DBG_QP, "attr_type:%d\n", ah_attr->type); - dprint(DBG_QP, "gid type:%u, sgid: %pI6\n", - rdma_gid_attr_network_type(sgid_attr), - sgid_attr->gid.raw); - dprint(DBG_QP, "dgid: %pI6\n", - rdma_ah_read_grh(ah_attr)->dgid.raw); - - rdma_gid2ip((struct sockaddr *)&qp->attrs.laddr, - &sgid_attr->gid); - rdma_gid2ip((struct sockaddr *)&qp->attrs.raddr, - &rdma_ah_read_grh(ah_attr)->dgid); - dprint(DBG_QP, "laddr:0x%x\n", - ntohl(qp->attrs.laddr.in.sin_addr.s_addr)); - dprint(DBG_QP, "raddr:0x%x\n", - ntohl(qp->attrs.raddr.in.sin_addr.s_addr)); - } - - if (attr_mask & IB_QP_DEST_QPN) { - dprint(DBG_QP, "get remote qpn %u\n", - attr->dest_qp_num); - qp->attrs.remote_qp_num = attr->dest_qp_num; - } - - if (attr_mask & IB_QP_SQ_PSN) { - dprint(DBG_QP, "get sqsn:%u\n", attr->sq_psn); - qp->attrs.sq_psn = attr->sq_psn; - } - - if (attr_mask & IB_QP_RQ_PSN) { - dprint(DBG_QP, "get rqsn:%u\n", attr->rq_psn); - qp->attrs.rq_psn = attr->rq_psn; - } - } + if (compat_mode) + erdma_handle_compat_attr(qp, attr, attr_mask); memset(&new_attrs, 0, sizeof(new_attrs)); @@ -1704,7 +1974,7 @@ static int erdma_init_user_cq(struct ib_udata *udata, ret = get_mtt_entries(udata, uctx, &cq->user_cq.qbuf_mtt, ureq->qbuf_va, ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K, - 1, false); + false); if (ret) return ret; @@ -1719,22 +1989,41 @@ static int erdma_init_user_cq(struct ib_udata *udata, static int erdma_init_kernel_cq(struct erdma_cq *cq) { + int ret; + u64 cq_sz = cq->depth << CQE_SHIFT; struct erdma_dev *dev = to_edev(cq->ibcq.device); - cq->kern_cq.qbuf = - dma_alloc_coherent(&dev->pdev->dev, - WARPPED_BUFSIZE(cq->depth << CQE_SHIFT), - &cq->kern_cq.qbuf_dma_addr, GFP_KERNEL); - if (!cq->kern_cq.qbuf) - return -ENOMEM; + cq->kern_cq.qbuf = vzalloc(cq_sz); + if (!cq->kern_cq.qbuf) { + ret = -ENOMEM; + goto err_out; + } + + ret = get_mtt_entries(dev, NULL, &cq->kern_cq.qbuf_mtt, (u64)cq->kern_cq.qbuf, + cq_sz, 0, (u64)cq->kern_cq.qbuf, 0, false); + if (ret) + goto err_free_qbuf; + + cq->kern_cq.db_record = dma_pool_alloc(dev->db_pool, GFP_KERNEL, + &cq->kern_cq.db_info_dma_addr); + if (!cq->kern_cq.db_record) { + ret = -ENOMEM; + goto err_free_mtt; + } - cq->kern_cq.db_record = - (u64 *)(cq->kern_cq.qbuf + (cq->depth << CQE_SHIFT)); spin_lock_init(&cq->kern_cq.lock); /* use default cqdb addr */ cq->kern_cq.db = dev->func_bar + ERDMA_BAR_CQDB_SPACE_OFFSET; return 0; + +err_free_mtt: + put_mtt_entries(dev, &cq->kern_cq.qbuf_mtt); +err_free_qbuf: + vfree(cq->kern_cq.qbuf); + cq->kern_cq.qbuf = NULL; +err_out: + return ret; } int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, @@ -1810,9 +2099,7 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, erdma_unmap_user_dbrecords(uctx, &cq->user_cq.user_dbr_page); put_mtt_entries(dev, &cq->user_cq.qbuf_mtt); } else { - dma_free_coherent(&dev->pdev->dev, - WARPPED_BUFSIZE(depth << CQE_SHIFT), - cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr); + free_kernel_cq(dev, &cq->kern_cq); } err_out_xa: @@ -1821,6 +2108,7 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return ret; } + struct net_device *erdma_get_netdev(struct ib_device *device, port_t port_num) { struct erdma_dev *edev = to_edev(device); @@ -1846,6 +2134,25 @@ void erdma_set_mtu(struct erdma_dev *dev, u32 mtu) erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } +int erdma_set_retrans_num(struct erdma_dev *dev, u32 retrans_num) +{ + struct erdma_cmdq_set_retrans_num_req req; + int ret; + + if (retrans_num == 0 || retrans_num > 0xffUL) + return -EINVAL; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_SET_RETRANS_NUM); + req.retrans_num = retrans_num; + + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + if (!ret) + dev->attrs.retrans_num = retrans_num; + + return ret; +} + void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason) { struct ib_event event; @@ -1857,11 +2164,6 @@ void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason) ib_dispatch_event(&event); } -void erdma_destroy_ah(struct ib_ah *ibah, u32 flags) -{ - return; -} - int erdma_query_hw_stats(struct erdma_dev *dev) { struct erdma_cmdq_query_stats_resp *stats; @@ -1899,3 +2201,10 @@ int erdma_query_hw_stats(struct erdma_dev *dev) return err; } + +const struct cpumask *erdma_get_vector_affinity(struct ib_device *ibdev, int comp_vector) +{ + struct erdma_dev *dev = to_edev(ibdev); + + return &dev->ceqs[comp_vector].irq.affinity_hint_mask; +} diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index 87a8c652a422..e91c53e36a1f 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -20,6 +20,7 @@ #define ERDMA_MAX_RECV_SGE 1 #define ERDMA_MAX_INLINE (sizeof(struct erdma_sge) * (ERDMA_MAX_SEND_SGE)) #define ERDMA_MAX_FRMR_PA 512 +#define ERDMA_DEFAULT_RETRANS_NUM 24 enum { ERDMA_MMAP_IO_NC = 0, /* no cache */ @@ -59,7 +60,7 @@ struct erdma_pd { * MemoryRegion definition. */ #define ERDMA_MAX_INLINE_MTT_ENTRIES 4 -#define MTT_SIZE(mtt_cnt) (mtt_cnt << 3) /* per mtt takes 8 Bytes. */ +#define MTT_SIZE(mtt_cnt) ((mtt_cnt) << 3) /* per mtt takes 8 Bytes. */ #define ERDMA_MR_MAX_MTT_CNT 524288 #define ERDMA_MTT_ENTRY_SIZE 8 @@ -84,19 +85,48 @@ static inline u8 to_erdma_access_flags(int access) (access & IB_ACCESS_REMOTE_ATOMIC ? ERDMA_MR_ACC_RA : 0); } +struct erdma_pbl { + u64 *buf; + size_t size; + + bool continuous; + union { + dma_addr_t buf_dma; + struct { + struct scatterlist *sglist; + u32 nsg; + u32 level; + }; + }; + + struct erdma_pbl *low_level; +}; + +enum erdma_mem_type { + ERDMA_UMEM = 0, + ERDMA_KMEM = 1, +}; + +struct erdma_kmem { + struct scatterlist *sgl; + u64 nmap; +}; + struct erdma_mem { - struct ib_umem *umem; - void *mtt_buf; - u32 mtt_type; + enum erdma_mem_type type; + union { + struct ib_umem *umem; + struct erdma_kmem *kmem; + }; u32 page_size; u32 page_offset; u32 page_cnt; u32 mtt_nents; + struct erdma_pbl *pbl; + u64 va; u64 len; - - u64 mtt_entry[ERDMA_MAX_INLINE_MTT_ENTRIES]; }; struct erdma_mr { @@ -133,7 +163,6 @@ struct erdma_kqp { u64 *swr_tbl; void *hw_sq_db; void *sq_buf; - dma_addr_t sq_buf_dma_addr; void *sq_db_info; spinlock_t rq_lock ____cacheline_aligned; @@ -142,9 +171,11 @@ struct erdma_kqp { u64 *rwr_tbl; void *hw_rq_db; void *rq_buf; - dma_addr_t rq_buf_dma_addr; void *rq_db_info; + struct erdma_mem sq_mtt; + struct erdma_mem rq_mtt; + dma_addr_t sq_db_info_dma_addr; dma_addr_t rq_db_info_dma_addr; @@ -162,11 +193,6 @@ enum erdma_qp_state { ERDMA_QP_STATE_COUNT = 8 }; -enum erdma_qp_flags { - ERDMA_QP_IN_DESTROY = (1 << 0), - ERDMA_QP_IN_FLUSHING = (1 << 1), -}; - enum erdma_qp_attr_mask { ERDMA_QP_ATTR_STATE = (1 << 0), ERDMA_QP_ATTR_LLP_HANDLE = (1 << 2), @@ -177,6 +203,11 @@ enum erdma_qp_attr_mask { ERDMA_QP_ATTR_MPA = (1 << 7) }; +enum erdma_qp_flags { + ERDMA_QP_IN_DESTROY = (1 << 0), + ERDMA_QP_IN_FLUSHING = (1 << 1), +}; + struct erdma_qp_attrs { enum erdma_qp_state state; enum erdma_cc_alg cc; /* Congestion control algorithm */ @@ -195,8 +226,6 @@ struct erdma_qp_attrs { u8 qp_type; u8 pd_len; bool connect_without_cm; - __u32 sip; - __u32 dip; __u16 sport; __u16 dport; union { @@ -216,6 +245,9 @@ struct erdma_qp { struct erdma_cep *cep; struct rw_semaphore state_lock; + unsigned long flags; + struct delayed_work reflush_dwork; + union { struct erdma_kqp kern_qp; struct erdma_uqp user_qp; @@ -225,14 +257,13 @@ struct erdma_qp { struct erdma_cq *rcq; struct erdma_qp_attrs attrs; - unsigned long flags; - struct delayed_work reflush_dwork; }; struct erdma_kcq_info { void *qbuf; - dma_addr_t qbuf_dma_addr; + struct erdma_mem qbuf_mtt; + dma_addr_t db_info_dma_addr; u32 ci; u32 cmdsn; u32 notify_cnt; @@ -265,6 +296,7 @@ struct erdma_cq { struct erdma_kcq_info kern_cq; struct erdma_ucq_info user_cq; }; + struct erdma_dim dim; }; @@ -365,16 +397,18 @@ int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, void erdma_disassociate_ucontext(struct ib_ucontext *ibcontext); void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason); void erdma_set_mtu(struct erdma_dev *dev, u32 mtu); +int erdma_set_retrans_num(struct erdma_dev *dev, u32 retrans_num); struct net_device *erdma_get_netdev(struct ib_device *device, port_t port_num); enum rdma_link_layer erdma_get_link_layer(struct ib_device *dev, port_t port_num); int erdma_query_pkey(struct ib_device *ibdev, port_t port, u16 index, u16 *pkey); - -void erdma_destroy_ah(struct ib_ah *ibah, u32 flags); int erdma_modify_cq(struct ib_cq *ibcq, u16 cq_count, u16 cq_period); int erdma_query_hw_stats(struct erdma_dev *dev); +const struct cpumask *erdma_get_vector_affinity(struct ib_device *ibdev, int comp_vector); + +#include "erdma_compat.h" #endif diff --git a/drivers/infiniband/hw/erdma/kcompat.h b/drivers/infiniband/hw/erdma/kcompat.h new file mode 100644 index 000000000000..70dae74fb2d2 --- /dev/null +++ b/drivers/infiniband/hw/erdma/kcompat.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ + +/* Authors: Cheng Xu */ +/* Kai Shen */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +/* + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef __KCOMPAT_H__ +#define __KCOMPAT_H__ + +#include +#include +#include + +#define ERDMA_MAJOR_VER 0 +#define ERDMA_MEDIUM_VER 2 +#define ERDMA_MINOR_VER 36 + +#include +#ifndef RDMA_DRIVER_ERDMA +#define RDMA_DRIVER_ERDMA 19 +#endif + +#ifndef upper_16_bits +#define upper_16_bits(n) ((u16)((n) >> 16)) +#define lower_16_bits(n) ((u16)((n) & 0xffff)) +#endif + +typedef u8 port_t; + +#include +#include +#include +#include + +#endif -- Gitee