diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst index a93857e580b0d56222d46cb70a412714fc41f3ff..2c4b5c2181f785bb8390e222c154828d1807d085 100644 --- a/Documentation/networking/smc-sysctl.rst +++ b/Documentation/networking/smc-sysctl.rst @@ -34,3 +34,28 @@ smcr_buf_type - INTEGER - 1 - Use virtually contiguous buffers - 2 - Mixed use of the two types. Try physically contiguous buffers first. If not available, use virtually contiguous buffers then. + +smcr_testlink_time - INTEGER + How frequently SMC-R link sends out TEST_LINK LLC messages to confirm + viability, after the last activity of connections on it. Value 0 means + disabling TEST_LINK. + + Default: 30 seconds. + +wmem - INTEGER + Initial size of send buffer used by SMC sockets. + The default value inherits from net.ipv4.tcp_wmem[1]. + + The minimum value is 16KiB and there is no hard limit for max value, but + only allowed 512KiB for SMC-R and 1MiB for SMC-D. + + Default: 16K + +rmem - INTEGER + Initial size of receive buffer (RMB) used by SMC sockets. + The default value inherits from net.ipv4.tcp_rmem[1]. + + The minimum value is 16KiB and there is no hard limit for max value, but + only allowed 512KiB for SMC-R and 1MiB for SMC-D. + + Default: 128K diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index 26cc943d2034046e2ef8e956a53dbe46c0228b8d..1adb00ca0a0a4c005a7f3cbe89630bff16440016 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -409,20 +409,19 @@ static void ism_create_system_eid(void) memcpy(&SYSTEM_EID.type, tmp, 4); } -static void ism_get_system_eid(struct smcd_dev *smcd, u8 **eid) +static u8 *ism_get_system_eid(void) { - *eid = &SYSTEM_EID.seid_string[0]; + return SYSTEM_EID.seid_string; } static u16 ism_get_chid(struct smcd_dev *smcd) { - struct ism_dev *ismdev; + struct ism_dev *ism = (struct ism_dev *)smcd->priv; - ismdev = (struct ism_dev *)smcd->priv; - if (!ismdev || !ismdev->pdev) + if (!ism || !ism->pdev) return 0; - return to_zpci(ismdev->pdev)->pchid; + return to_zpci(ism->pdev)->pchid; } static void ism_handle_event(struct ism_dev *ism) @@ -444,6 +443,7 @@ static irqreturn_t ism_handle_irq(int irq, void *data) struct ism_dev *ism = data; unsigned long bit, end; unsigned long *bv; + u16 dmbemask; bv = (void *) &ism->sba->dmb_bits[ISM_DMB_WORD_OFFSET]; end = sizeof(ism->sba->dmb_bits) * BITS_PER_BYTE - ISM_DMB_BIT_OFFSET; @@ -457,9 +457,10 @@ static irqreturn_t ism_handle_irq(int irq, void *data) break; clear_bit_inv(bit, bv); + dmbemask = ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET]; ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET] = 0; barrier(); - smcd_handle_irq(ism->smcd, bit + ISM_DMB_BIT_OFFSET); + smcd_handle_irq(ism->smcd, bit + ISM_DMB_BIT_OFFSET, dmbemask); } if (ism->sba->e) { diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index e1253a8e0233ff1fe1a922e1e24ae3d111f80e8b..647233c0f423c7999725fd29d049263aa7b562b9 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -26,8 +26,9 @@ struct netns_smc { #endif unsigned int sysctl_autocorking_size; unsigned int sysctl_smcr_buf_type; - int sysctl_wmem_default; - int sysctl_rmem_default; + int sysctl_smcr_testlink_time; + int sysctl_wmem; + int sysctl_rmem; int sysctl_tcp2smc; int sysctl_allow_different_subnet; int sysctl_disable_multiple_link; diff --git a/include/net/smc.h b/include/net/smc.h index 743b4fe74346659b4ecabc731a935d9ee9ad54d3..ed406e50d50ea7d3f402fa0b93060baea202561e 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -68,7 +68,7 @@ struct smcd_ops { int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx, bool sf, unsigned int offset, void *data, unsigned int size); - void (*get_system_eid)(struct smcd_dev *dev, u8 **eid); + u8* (*get_system_eid)(void); u16 (*get_chid)(struct smcd_dev *dev); }; @@ -97,5 +97,5 @@ int smcd_register_dev(struct smcd_dev *smcd); void smcd_unregister_dev(struct smcd_dev *smcd); void smcd_free_dev(struct smcd_dev *smcd); void smcd_handle_event(struct smcd_dev *dev, struct smcd_event *event); -void smcd_handle_irq(struct smcd_dev *dev, unsigned int bit); +void smcd_handle_irq(struct smcd_dev *dev, unsigned int bit, u16 dmbemask); #endif /* _SMC_H */ diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index bcab1a9102b4c82c3078ad69e04ea8f9e25fc720..dcbaf94bb4955feb3d18cf41c95b32d1dcfb1819 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include @@ -419,8 +418,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, sk->sk_state = SMC_INIT; sk->sk_destruct = smc_destruct; sk->sk_protocol = protocol; - sk->sk_sndbuf = net->smc.sysctl_wmem_default; - sk->sk_rcvbuf = net->smc.sysctl_rmem_default; + WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem)); + WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem)); smc = smc_sk(sk); smc->keep_clcsock = false; for (i = 0; i < SMC_MAX_TCP_LISTEN_WORKS; i++) { @@ -483,6 +482,8 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr, smc->clcsock->sk->sk_reuse = SK_CAN_REUSE; else smc->clcsock->sk->sk_reuse = sk->sk_reuse; + + smc->clcsock->sk->sk_reuseport = sk->sk_reuseport; rc = kernel_bind(smc->clcsock, uaddr, addr_len); out_rel: @@ -1968,7 +1969,6 @@ static void smc_listen_out_connected(struct smc_sock *new_smc) { struct sock *newsmcsk = &new_smc->sk; - sk_refcnt_debug_inc(newsmcsk); if (newsmcsk->sk_state == SMC_INIT) newsmcsk->sk_state = SMC_ACTIVE; @@ -3505,14 +3505,14 @@ static int __init smc_init(void) rc = register_pernet_subsys(&smc_net_stat_ops); if (rc) - return rc; + goto out_pernet_subsys; smc_ism_init(); smc_clc_init(); rc = smc_nl_init(); if (rc) - goto out_pernet_subsys; + goto out_pernet_subsys_stat; rc = smc_pnet_init(); if (rc) @@ -3624,6 +3624,8 @@ static int __init smc_init(void) smc_pnet_exit(); out_nl: smc_nl_exit(); +out_pernet_subsys_stat: + unregister_pernet_subsys(&smc_net_stat_ops); out_pernet_subsys: unregister_pernet_subsys(&smc_net_ops); @@ -3660,3 +3662,4 @@ MODULE_DESCRIPTION("smc socket address family"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_SMC); MODULE_ALIAS_TCP_ULP("smc"); +MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 5c985a86f186f841961f923a6a221b84e1fb7308..2086fabf016b9b13069d3b83022211fb77932039 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -924,7 +924,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) } memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1], SMC_MAX_PNETID_LEN); - if (smc_wr_alloc_lgr_mem(lgr)) + rc = smc_wr_alloc_lgr_mem(lgr); + if (rc) goto free_wq; smc_llc_lgr_init(lgr, smc); @@ -2310,7 +2311,7 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, static int smcr_buf_map_usable_links(struct smc_link_group *lgr, struct smc_buf_desc *buf_desc, bool is_rmb) { - int i, rc = 0, lnk_cnt = 0; + int i, rc = 0, cnt = 0; /* protect against parallel link reconfiguration */ mutex_lock(&lgr->llc_conf_mutex); @@ -2323,11 +2324,11 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr, rc = -ENOMEM; goto out; } - lnk_cnt++; + cnt++; } out: mutex_unlock(&lgr->llc_conf_mutex); - if (!lnk_cnt) + if (!rc && !cnt) rc = -EINVAL; return rc; } @@ -2378,14 +2379,13 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) bool is_dgraded = false; struct mutex *lock; /* lock buffer list */ int sk_buf_size; - int rc = 0; if (is_rmb) /* use socket recv buffer size (w/o overhead) as start value */ - sk_buf_size = smc->sk.sk_rcvbuf / 2; + sk_buf_size = smc->sk.sk_rcvbuf; else /* use socket send buffer size (w/o overhead) as start value */ - sk_buf_size = smc->sk.sk_sndbuf / 2; + sk_buf_size = smc->sk.sk_sndbuf; for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb); bufsize_short >= 0; bufsize_short--) { @@ -2435,17 +2435,16 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) return PTR_ERR(buf_desc); if (!is_smcd) { - rc = smcr_buf_map_usable_links(lgr, buf_desc, is_rmb); - if (rc) { + if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) { smcr_buf_unuse(buf_desc, is_rmb, lgr); - return rc; + return -ENOMEM; } } if (is_rmb) { conn->rmb_desc = buf_desc; conn->rmbe_size_short = bufsize_short; - smc->sk.sk_rcvbuf = bufsize * 2; + smc->sk.sk_rcvbuf = bufsize; atomic_set(&conn->bytes_to_rcv, 0); conn->rmbe_update_limit = smc_rmb_wnd_update_limit(buf_desc->len); @@ -2453,7 +2452,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ } else { conn->sndbuf_desc = buf_desc; - smc->sk.sk_sndbuf = bufsize * 2; + smc->sk.sk_sndbuf = bufsize; atomic_set(&conn->sndbuf_space, bufsize); } return 0; diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index bbe00b50b6662379241672f7f4bd91db00a3c65f..fea662f95bd81bd516f73f799bc0f28cdb620c9e 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -275,3 +275,4 @@ module_init(smc_diag_init); module_exit(smc_diag_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 43 /* AF_SMC */); +MODULE_ALIAS_GENL_FAMILY(SMCR_GENL_FAMILY_NAME); diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index fd28cc498b984b1bc9e7ac4bef313e36dc332243..98de2bc6148332b7a4e5dd1b6401480111d5d658 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -32,17 +32,6 @@ int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd) vlan_id); } -int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos, - void *data, size_t len) -{ - int rc; - - rc = smcd->ops->move_data(smcd, pos->token, pos->index, pos->signal, - pos->offset, data, len); - - return rc < 0 ? rc : 0; -} - void smc_ism_get_system_eid(u8 **eid) { if (!smc_ism_v2_capable) @@ -439,7 +428,7 @@ int smcd_register_dev(struct smcd_dev *smcd) if (list_empty(&smcd_dev_list.list)) { u8 *system_eid = NULL; - smcd->ops->get_system_eid(smcd, &system_eid); + system_eid = smcd->ops->get_system_eid(); if (system_eid[24] != '0' || system_eid[28] != '0') { smc_ism_v2_capable = true; memcpy(smc_ism_v2_system_eid, system_eid, @@ -518,13 +507,13 @@ void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event) EXPORT_SYMBOL_GPL(smcd_handle_event); /* SMCD Device interrupt handler. Called from ISM device interrupt handler. - * Parameters are smcd device pointer and DMB number. Find the connection and - * schedule the tasklet for this connection. + * Parameters are smcd device pointer, DMB number, and the DMBE bitmask. + * Find the connection and schedule the tasklet for this connection. * * Context: * - Function called in IRQ context from ISM device driver IRQ handler. */ -void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno) +void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno, u16 dmbemask) { struct smc_connection *conn = NULL; unsigned long flags; diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h index 004b22a13ffadde88cc93deac8295e4d0e1282bc..d6b2db604fe8fcc01bd3817b2420de8cd0f1bd6b 100644 --- a/net/smc/smc_ism.h +++ b/net/smc/smc_ism.h @@ -28,13 +28,6 @@ struct smc_ism_vlanid { /* VLAN id set on ISM device */ refcount_t refcnt; /* Reference count */ }; -struct smc_ism_position { /* ISM device position to write to */ - u64 token; /* Token of DMB */ - u32 offset; /* Offset into DMBE */ - u8 index; /* Index of DMBE */ - u8 signal; /* Generate interrupt on owner side */ -}; - struct smcd_dev; int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *dev); @@ -45,12 +38,21 @@ int smc_ism_put_vlan(struct smcd_dev *dev, unsigned short vlan_id); int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size, struct smc_buf_desc *dmb_desc); int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc); -int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos, - void *data, size_t len); int smc_ism_signal_shutdown(struct smc_link_group *lgr); void smc_ism_get_system_eid(u8 **eid); u16 smc_ism_get_chid(struct smcd_dev *dev); bool smc_ism_is_v2_capable(void); void smc_ism_init(void); int smcd_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb); + +static inline int smc_ism_write(struct smcd_dev *smcd, u64 dmb_tok, + unsigned int idx, bool sf, unsigned int offset, + void *data, size_t len) +{ + int rc; + + rc = smcd->ops->move_data(smcd, dmb_tok, idx, sf, offset, data, len); + return rc < 0 ? rc : 0; +} + #endif diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index f9fb12382f9e61991bb4a89a7131f68206ea6331..791b453293492349343f3deb6bef9b88885e22fa 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -2219,7 +2219,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) init_waitqueue_head(&lgr->llc_flow_waiter); init_waitqueue_head(&lgr->llc_msg_waiter); mutex_init(&lgr->llc_conf_mutex); - lgr->llc_testlink_time = READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); + lgr->llc_testlink_time = READ_ONCE(net->smc.sysctl_smcr_testlink_time); } /* called after lgr was removed from lgr_list */ diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index f8a14643faf4ff080c51d535920651bc2a9551ea..d0c941e20beec6bce638709f1520d1f8cad2f387 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -19,6 +19,7 @@ #define SMC_LLC_WAIT_FIRST_TIME (5 * HZ) #define SMC_LLC_WAIT_TIME (2 * HZ) +#define SMC_LLC_TESTLINK_DEFAULT_TIME (30 * HZ) #define SMC_LLC_ANNOUNCE_CR_MAX_RETRY (1) diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index 2b2bf13fc986355cf17c4a8fcca88e5124b4d650..e583bf0be6880cb815340570afea5be1d735d9f3 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -16,14 +16,13 @@ #include "smc.h" #include "smc_core.h" +#include "smc_llc.h" #include "smc_sysctl.h" -#include "smc_core.h" +static int two = 2; static int min_sndbuf = SMC_BUF_MIN_SIZE; static int min_rcvbuf = SMC_BUF_MIN_SIZE; -static int two = 2; - static struct ctl_table smc_table[] = { { .procname = "autocorking_size", @@ -42,20 +41,27 @@ static struct ctl_table smc_table[] = { .extra2 = &two, }, { - .procname = "wmem_default", - .data = &init_net.smc.sysctl_wmem_default, - .maxlen = sizeof(init_net.smc.sysctl_wmem_default), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_sndbuf, + .procname = "smcr_testlink_time", + .data = &init_net.smc.sysctl_smcr_testlink_time, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, }, { - .procname = "rmem_default", - .data = &init_net.smc.sysctl_rmem_default, - .maxlen = sizeof(init_net.smc.sysctl_rmem_default), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_rcvbuf, + .procname = "wmem", + .data = &init_net.smc.sysctl_wmem, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_sndbuf, + }, + { + .procname = "rmem", + .data = &init_net.smc.sysctl_rmem, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_rcvbuf, }, { .procname = "tcp2smc", @@ -152,8 +158,9 @@ int __net_init smc_sysctl_net_init(struct net *net) net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE; net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS; - net->smc.sysctl_wmem_default = 256 * 1024; - net->smc.sysctl_rmem_default = 384 * 1024; + net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME; + WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1])); + WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1])); net->smc.sysctl_tcp2smc = 0; net->smc.sysctl_allow_different_subnet = 1; net->smc.sysctl_keep_first_contact_clcsock = 1; diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index c7beaa1f38d9c23d092ca93dc370cdb477592564..5e84bd07dfed68e69620e7b70e1842b22061fc22 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -326,15 +326,11 @@ int smc_tx_sendpage(struct smc_sock *smc, struct page *page, int offset, int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len, u32 offset, int signal) { - struct smc_ism_position pos; int rc; - memset(&pos, 0, sizeof(pos)); - pos.token = conn->peer_token; - pos.index = conn->peer_rmbe_idx; - pos.offset = conn->tx_off + offset; - pos.signal = signal; - rc = smc_ism_write(conn->lgr->smcd, &pos, data, len); + rc = smc_ism_write(conn->lgr->smcd, conn->peer_token, + conn->peer_rmbe_idx, signal, conn->tx_off + offset, + data, len); if (rc) conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; return rc;