diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 794d62228c8cd3433efff6a25cb199d5c5e68516..95e18a2df6d7ce0ac0df86d2af6a34f26894b1cf 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1106,6 +1106,20 @@ static int smc_find_proposal_devices(struct smc_sock *smc, ini->smcr_version &= ~SMC_V1; /* else RDMA is supported for this connection */ + /* make sure SMC_V1 ibdev still available */ + if (ini->smcr_version & SMC_V1) { + mutex_lock(&smc_ib_devices.mutex); + if (list_empty(&ini->ib_dev->list)) { + ini->ib_dev = NULL; + ini->ib_port = 0; + ini->smcr_version &= ~SMC_V1; + } else { + /* put in __smc_connect */ + smc_ib_get_pending_device(ini->ib_dev); + } + mutex_unlock(&smc_ib_devices.mutex); + } + ini->smc_type_v1 = smc_indicated_type(ini->smcd_version & SMC_V1, ini->smcr_version & SMC_V1); @@ -1125,6 +1139,20 @@ static int smc_find_proposal_devices(struct smc_sock *smc, ini->smcr_version &= ~SMC_V2; ini->check_smcrv2 = false; + /* make sure SMC_V2 ibdev still available */ + if (ini->smcr_version & SMC_V2) { + mutex_lock(&smc_ib_devices.mutex); + if (list_empty(&ini->smcrv2.ib_dev_v2->list)) { + ini->smcrv2.ib_dev_v2 = NULL; + ini->smcrv2.ib_port_v2 = 0; + ini->smcr_version &= ~SMC_V2; + } else { + /* put in __smc_connect */ + smc_ib_get_pending_device(ini->smcrv2.ib_dev_v2); + } + mutex_unlock(&smc_ib_devices.mutex); + } + ini->smc_type_v2 = smc_indicated_type(ini->smcd_version & SMC_V2, ini->smcr_version & SMC_V2); @@ -1581,6 +1609,10 @@ static int __smc_connect(struct smc_sock *smc) if (rc) goto vlan_cleanup; + if (ini->smcrv2.ib_dev_v2) + smc_ib_put_pending_device(ini->smcrv2.ib_dev_v2); + if (ini->ib_dev) + smc_ib_put_pending_device(ini->ib_dev); SMC_STAT_CLNT_SUCC_INC(sock_net(smc->clcsock->sk), aclc); smc_connect_ism_vlan_cleanup(smc, ini); kfree(buf); @@ -1591,6 +1623,10 @@ static int __smc_connect(struct smc_sock *smc) smc_connect_ism_vlan_cleanup(smc, ini); kfree(buf); fallback: + if (ini->smcrv2.ib_dev_v2) + smc_ib_put_pending_device(ini->smcrv2.ib_dev_v2); + if (ini->ib_dev) + smc_ib_put_pending_device(ini->ib_dev); kfree(ini); return smc_connect_decline_fallback(smc, rc, version); } @@ -2295,6 +2331,17 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc, smc_find_ism_store_rc(rc, ini); goto not_found; } + /* make sure SMC_V2 ibdev still available */ + mutex_lock(&smc_ib_devices.mutex); + if (list_empty(&ini->smcrv2.ib_dev_v2->list)) { + smc_find_ism_store_rc(SMC_CLC_DECL_NOSMCRDEV, ini); + goto not_found; + } else { + /* put below or in smc_listen_work */ + smc_ib_get_pending_device(ini->smcrv2.ib_dev_v2); + } + mutex_unlock(&smc_ib_devices.mutex); + if (!ini->smcrv2.uses_gateway) memcpy(ini->smcrv2.nexthop_mac, pclc->lcl.mac, ETH_ALEN); @@ -2310,6 +2357,7 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc, return; ini->smcr_version = smcr_version; smc_find_ism_store_rc(rc, ini); + smc_ib_put_pending_device(ini->smcrv2.ib_dev_v2); not_found: ini->smcr_version &= ~SMC_V2; @@ -2335,6 +2383,18 @@ static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc, /* no RDMA device found */ return SMC_CLC_DECL_NOSMCDEV; } + /* make sure SMC_V1 ibdev still available */ + mutex_lock(&smc_ib_devices.mutex); + if (list_empty(&ini->ib_dev->list)) { + ini->ib_dev = NULL; + ini->ib_port = 0; + mutex_unlock(&smc_ib_devices.mutex); + return SMC_CLC_DECL_NOSMCDEV; + } + /* put in smc_listen_work */ + smc_ib_get_pending_device(ini->ib_dev); + mutex_unlock(&smc_ib_devices.mutex); + rc = smc_listen_rdma_init(new_smc, ini); if (rc) return rc; @@ -2537,6 +2597,10 @@ static void smc_listen_work(struct work_struct *work) goto out_unlock; mutex_unlock(&smc_server_lgr_pending); } + if (ini->smcrv2.ib_dev_v2) + smc_ib_put_pending_device(ini->smcrv2.ib_dev_v2); + if (ini->ib_dev) + smc_ib_put_pending_device(ini->ib_dev); smc_conn_save_peer_info(new_smc, cclc); smc_listen_out_connected(new_smc); if (newclcsock->sk) @@ -2544,6 +2608,10 @@ static void smc_listen_work(struct work_struct *work) goto out_free; out_unlock: + if (ini->smcrv2.ib_dev_v2) + smc_ib_put_pending_device(ini->smcrv2.ib_dev_v2); + if (ini->ib_dev) + smc_ib_put_pending_device(ini->ib_dev); mutex_unlock(&smc_server_lgr_pending); out_decl: smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0, diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index c0b1131776a353ec93d48d08304518bece409b70..92c8021ba7c43486592c1c08d126b7abfe60eefc 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -907,14 +907,6 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, lnk->smcibdev = ini->ib_dev; lnk->ibport = ini->ib_port; } - - if (!lnk->smcibdev->ibdev) { - /* check if smcibdev still available */ - memset(lnk, 0, sizeof(struct smc_link)); - lnk->state = SMC_LNK_UNUSED; - return SMC_CLC_DECL_NOSMCRDEV; - } - get_device(&lnk->smcibdev->ibdev->dev); atomic_inc(&lnk->smcibdev->lnk_cnt); refcount_set(&lnk->refcnt, 1); /* link refcnt is set to 1 */ @@ -988,14 +980,12 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, /* create a new SMC link group */ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) { - struct smc_ib_device *ibdev; struct smc_link_group *lgr; struct list_head *lgr_list; struct smcd_dev *smcd; struct smc_link *lnk; spinlock_t *lgr_lock; u8 link_idx; - int ibport; int rc = 0; int i; @@ -1050,6 +1040,9 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt); } else { /* SMC-R specific settings */ + struct smc_ib_device *ibdev; + int ibport; + lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; lgr->smc_version = ini->smcr_version; memcpy(lgr->peer_systemid, ini->peer_systemid, @@ -1073,19 +1066,12 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->credits_en = 0; lgr->use_rwwi = 0; } + memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1], + SMC_MAX_PNETID_LEN); + rc = smc_lgr_link_stats_init(lgr); if (rc) goto free_wq; - - mutex_lock(&smc_ib_devices.mutex); - if (list_empty(&ibdev->list) || - test_bit(ibport, ibdev->ports_going_away)) { - /* ibdev unavailable */ - rc = SMC_CLC_DECL_NOSMCRDEV; - goto free_stats; - } - memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1], - SMC_MAX_PNETID_LEN); rc = smc_wr_alloc_lgr_mem(lgr); if (rc) goto free_stats; @@ -1110,16 +1096,12 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) spin_lock_bh(lgr_lock); list_add_tail(&lgr->list, lgr_list); spin_unlock_bh(lgr_lock); - if (!ini->is_smcd) - mutex_unlock(&smc_ib_devices.mutex); return 0; free_stats: if (!ini->is_smcd) smc_lgr_link_stats_free(lgr); free_wq: - if (!ini->is_smcd) - mutex_unlock(&smc_ib_devices.mutex); destroy_workqueue(lgr->tx_wq); free_lgr: kfree(lgr); @@ -1128,16 +1110,10 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) smc_ism_put_vlan(ini->ism_dev[ini->ism_selected], ini->vlan_id); out: if (rc < 0) { - switch (rc) { - case -ENOMEM: + if (rc == -ENOMEM) rc = SMC_CLC_DECL_MEM; - break; - case SMC_CLC_DECL_NOSMCRDEV: - break; - default: + else rc = SMC_CLC_DECL_INTERR; - break; - } } return rc; } diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 5f472883d709a753d198b331cbf6703c2df9b22d..d0df482f537ef574adc8e66fee2728b74eedbecd 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -41,6 +41,11 @@ struct smc_ib_devices smc_ib_devices = { /* smc-registered ib devices */ .list = LIST_HEAD_INIT(smc_ib_devices.list), }; +struct smc_ib_devices smc_ib_devices_dump = { /* smc-registered ib devices */ + .mutex = __MUTEX_INITIALIZER(smc_ib_devices_dump.mutex), + .list = LIST_HEAD_INIT(smc_ib_devices_dump.list), +}; + u8 local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */ static void smc_ib_modify_qp_iw_extension(struct smc_link *lnk) @@ -214,6 +219,7 @@ static int smc_ib_fill_mac(struct smc_ib_device *smcibdev, u8 ibport) if (!ndev) return -ENODEV; ether_addr_copy(smcibdev->mac[ibport - 1], ndev->dev_addr); + dev_put(ndev); return 0; } @@ -377,16 +383,18 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, struct ib_device *ibdev = smcibdev->ibdev; const struct ib_gid_attr *attr; const struct net_device *ndev; + bool iwarp_ndev = false; int i; + iwarp_ndev = smc_ib_is_iwarp(ibdev, ibport) && ibdev->ops.get_netdev; + for (i = 0; i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) { attr = rdma_get_gid_attr(ibdev, ibport, i); if (IS_ERR(attr)) continue; rcu_read_lock(); - if (smc_ib_is_iwarp(ibdev, ibport) && - ibdev->ops.get_netdev) + if (iwarp_ndev) ndev = ibdev->ops.get_netdev(ibdev, ibport); else ndev = rdma_read_gid_attr_ndev_rcu(attr); @@ -397,11 +405,15 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, vlan_dev_vlan_id(ndev) == vlan_id))) { if (!smc_ib_determine_gid_rcu(ndev, attr, gid, sgid_index, smcrv2)) { + if (iwarp_ndev) + dev_put((struct net_device *)ndev); rcu_read_unlock(); rdma_put_gid_attr(attr); return 0; } } + if (ndev && iwarp_ndev) + dev_put((struct net_device *)ndev); rcu_read_unlock(); rdma_put_gid_attr(attr); } @@ -1135,6 +1147,17 @@ bool smc_ib_is_iwarp(struct ib_device *ibdev, u8 ibport) return rdma_protocol_iwarp(ibdev, ibport); } +void smc_ib_get_pending_device(struct smc_ib_device *smcibdev) +{ + refcount_inc(&smcibdev->lnk_pending_cnt); +} + +void smc_ib_put_pending_device(struct smc_ib_device *smcibdev) +{ + if (refcount_dec_and_test(&smcibdev->lnk_pending_cnt)) + wake_up(&smcibdev->lnks_pending); +} + /* Reserve socket ports of each net namespace which can be accessed * by eRDMA (iWARP) device for out-bound RC establishment. */ @@ -1229,11 +1252,18 @@ static int smc_ib_add_dev(struct ib_device *ibdev) } INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work); atomic_set(&smcibdev->lnk_cnt, 0); + refcount_set(&smcibdev->lnk_pending_cnt, 1); init_waitqueue_head(&smcibdev->lnks_deleted); + init_waitqueue_head(&smcibdev->lnks_pending); mutex_init(&smcibdev->mutex); mutex_lock(&smc_ib_devices.mutex); list_add_tail(&smcibdev->list, &smc_ib_devices.list); mutex_unlock(&smc_ib_devices.mutex); + + mutex_lock(&smc_ib_devices_dump.mutex); + list_add_tail(&smcibdev->list_dump, &smc_ib_devices_dump.list); + mutex_unlock(&smc_ib_devices_dump.mutex); + ib_set_client_data(ibdev, &smc_ib_client, smcibdev); INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev, smc_ib_global_event_handler); @@ -1272,6 +1302,9 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) mutex_lock(&smc_ib_devices.mutex); list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ mutex_unlock(&smc_ib_devices.mutex); + smc_ib_put_pending_device(smcibdev); + wait_event(smcibdev->lnks_pending, /* wait for no pending usage */ + !refcount_read(&smcibdev->lnk_pending_cnt)); pr_info_ratelimited("smc: removing ib device %s\n", smcibdev->ibdev->name); smc_smcr_terminate_all(smcibdev); @@ -1279,6 +1312,9 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) smc_ib_cleanup_per_ibdev(smcibdev); ib_unregister_event_handler(&smcibdev->event_handler); cancel_work_sync(&smcibdev->port_event_work); + mutex_lock(&smc_ib_devices_dump.mutex); + list_del_init(&smcibdev->list_dump); /* remove from smc_ib_devices */ + mutex_unlock(&smc_ib_devices_dump.mutex); kfree(smcibdev); } diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 709c383e776aad97e2018c6d7099e40dfcfb4351..b6bb49b409f027504075ec778ef5941583107cb0 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -31,6 +31,7 @@ struct smc_ib_devices { /* list of smc ib devices definition */ }; extern struct smc_ib_devices smc_ib_devices; /* list of smc ib devices */ +extern struct smc_ib_devices smc_ib_devices_dump; extern struct smc_lgr_list smc_lgr_list; /* list of linkgroups */ extern struct smc_lgr_list smc_lgr_stats_list; /* list of statistic linkgroups */ @@ -43,6 +44,7 @@ struct smc_ib_cq { /* ib_cq wrapper for smc */ struct smc_ib_device { /* ib-device infos for smc */ struct list_head list; + struct list_head list_dump; struct ib_device *ibdev; struct ib_port_attr pattr[SMC_MAX_PORTS]; /* ib dev. port attrs */ struct ib_event_handler event_handler; /* global ib_event handler */ @@ -59,7 +61,9 @@ struct smc_ib_device { /* ib-device infos for smc */ unsigned long port_event_mask; DECLARE_BITMAP(ports_going_away, SMC_MAX_PORTS); atomic_t lnk_cnt; /* number of links on ibdev */ + refcount_t lnk_pending_cnt;/* number of links attempt to use ibdev */ wait_queue_head_t lnks_deleted; /* wait 4 removal of all links*/ + wait_queue_head_t lnks_pending; /* wait 4 pending establish of links */ struct mutex mutex; /* protect dev setup+cleanup */ atomic_t lnk_cnt_by_port[SMC_MAX_PORTS]; /* number of links per port */ @@ -124,5 +128,7 @@ int smc_ib_find_route(__be32 saddr, __be32 daddr, u8 nexthop_mac[], u8 *uses_gateway); bool smc_ib_is_valid_local_systemid(void); bool smc_ib_is_iwarp(struct ib_device *ibdev, u8 ibport); +void smc_ib_get_pending_device(struct smc_ib_device *smcibdev); +void smc_ib_put_pending_device(struct smc_ib_device *smcibdev); int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb); #endif diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 661e92fb4909fecb207e2d4bde82b59bd8ed84a3..520c1a15665f403b45d799caeb0a86a15662786c 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -1098,6 +1098,7 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) struct smc_llc_msg_add_link *llc = &qentry->msg.add_link; enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC; struct smc_link_group *lgr = smc_get_lgr(link); + struct smc_ib_device *ibdev_selected = NULL; struct smc_init_info *ini = NULL; struct smc_link *lnk_new = NULL; int lnk_idx, rc = 0; @@ -1137,14 +1138,29 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) ini->ib_dev = link->smcibdev; ini->ib_port = link->ibport; } + + mutex_lock(&smc_ib_devices.mutex); + if (lgr->smc_version == SMC_V2) + ibdev_selected = ini->smcrv2.ib_dev_v2; + else if (lgr->smc_version < SMC_V2) + ibdev_selected = ini->ib_dev; + if (list_empty(&ibdev_selected->list)) { + rc = -ENODEV; + ibdev_selected = NULL; + mutex_unlock(&smc_ib_devices.mutex); + goto out_reject; + } + smc_ib_get_pending_device(ibdev_selected); /* put below */ + mutex_unlock(&smc_ib_devices.mutex); + lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t); if (lnk_idx < 0) - goto out_reject; + goto out_pending_dev; lnk_new = &lgr->lnk[lnk_idx]; lnk_new->iw_conn_param = link->iw_conn_param; rc = smcr_link_init(lgr, lnk_new, lnk_idx, ini); if (rc) - goto out_reject; + goto out_pending_dev; smc_llc_save_add_link_info(lnk_new, llc); lnk_new->link_id = llc->link_num; /* SMC server assigns link id */ smc_llc_link_set_uid(lnk_new); @@ -1172,11 +1188,15 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) } } rc = smc_llc_cli_conf_link(link, ini, lnk_new, lgr_new_t); - if (!rc) + if (!rc) { + smc_ib_put_pending_device(ibdev_selected); goto out; + } out_clear_lnk: lnk_new->state = SMC_LNK_INACTIVE; smcr_link_clear(lnk_new, false); +out_pending_dev: + smc_ib_put_pending_device(ibdev_selected); out_reject: smc_llc_cli_add_link_reject(qentry); out: @@ -1469,6 +1489,7 @@ int smc_llc_srv_add_link(struct smc_link *link, struct smc_llc_qentry *req_qentry) { enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC; + struct smc_ib_device *ibdev_selected = NULL; struct smc_link_group *lgr = link->lgr; struct smc_llc_msg_add_link *add_llc; struct smc_llc_qentry *qentry = NULL; @@ -1512,16 +1533,31 @@ int smc_llc_srv_add_link(struct smc_link *link, ini->ib_dev = link->smcibdev; ini->ib_port = link->ibport; } + + mutex_lock(&smc_ib_devices.mutex); + if (lgr->smc_version == SMC_V2) + ibdev_selected = ini->smcrv2.ib_dev_v2; + else if (lgr->smc_version < SMC_V2) + ibdev_selected = ini->ib_dev; + if (list_empty(&ibdev_selected->list)) { + rc = -ENODEV; + ibdev_selected = NULL; + mutex_unlock(&smc_ib_devices.mutex); + goto out; + } + smc_ib_get_pending_device(ibdev_selected); /* put below */ + mutex_unlock(&smc_ib_devices.mutex); + lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t); if (lnk_idx < 0) { rc = 0; - goto out; + goto out_dev; } lgr->lnk[lnk_idx].iw_conn_param = link->iw_conn_param; rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, ini); if (rc) - goto out; + goto out_dev; link_new = &lgr->lnk[lnk_idx]; rc = smcr_buf_map_lgr(link_new); @@ -1571,6 +1607,8 @@ int smc_llc_srv_add_link(struct smc_link *link, rc = smc_llc_srv_conf_link(link, link_new, lgr_new_t); if (rc) goto out_err; + + smc_ib_put_pending_device(ibdev_selected); kfree(ini); return 0; out_err: @@ -1578,6 +1616,8 @@ int smc_llc_srv_add_link(struct smc_link *link, link_new->state = SMC_LNK_INACTIVE; smcr_link_clear(link_new, false); } +out_dev: + smc_ib_put_pending_device(ibdev_selected); out: kfree(ini); if (send_req_add_link_resp)