From 7562a13d5a8ce9bc5020705da5f50221021f5a2c Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 1 May 2020 12:48:01 +0200 Subject: [PATCH 01/13] net/smc: multiple link support for rmb buffer registration The CONFIRM_RKEY LLC processing handles all links in one LLC message. Move the call to this processing out of smcr_link_reg_rmb() which does processing per link, into smcr_lgr_reg_rmbs() which is responsible for link group level processing. Move smcr_link_reg_rmb() into module smc_core.c. >From af_smc.c now call smcr_lgr_reg_rmbs() to register new rmbs on all available links. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 54 ++++++++++++++++------------------------------ net/smc/smc_core.c | 16 ++++++++++++++ net/smc/smc_core.h | 1 + 3 files changed, 36 insertions(+), 35 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index bd9662d06896..20d6d3fbb86c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -337,46 +337,30 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); } -/* register a new rmb, send confirm_rkey msg to register with peer */ -static int smcr_link_reg_rmb(struct smc_link *link, - struct smc_buf_desc *rmb_desc, bool conf_rkey) -{ - if (!rmb_desc->is_reg_mr[link->link_idx]) { - /* register memory region for new rmb */ - if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) { - rmb_desc->is_reg_err = true; - return -EFAULT; - } - rmb_desc->is_reg_mr[link->link_idx] = true; - } - if (!conf_rkey) - return 0; - - /* exchange confirm_rkey msg with peer */ - if (!rmb_desc->is_conf_rkey) { - if (smc_llc_do_confirm_rkey(link, rmb_desc)) { - rmb_desc->is_reg_err = true; - return -EFAULT; - } - rmb_desc->is_conf_rkey = true; - } - return 0; -} - /* register the new rmb on all links */ -static int smcr_lgr_reg_rmbs(struct smc_link_group *lgr, +static int smcr_lgr_reg_rmbs(struct smc_link *link, struct smc_buf_desc *rmb_desc) { - int i, rc; + struct smc_link_group *lgr = link->lgr; + int i, rc = 0; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (lgr->lnk[i].state != SMC_LNK_ACTIVE) continue; - rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc, true); + rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc); if (rc) - return rc; + goto out; } - return 0; + + /* exchange confirm_rkey msg with peer */ + rc = smc_llc_do_confirm_rkey(link, rmb_desc); + if (rc) { + rc = -EFAULT; + goto out; + } + rmb_desc->is_conf_rkey = true; +out: + return rc; } static int smcr_clnt_conf_first_link(struct smc_sock *smc) @@ -408,7 +392,7 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc) smc_wr_remember_qp_attr(link); - if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false)) + if (smcr_link_reg_rmb(link, smc->conn.rmb_desc)) return SMC_CLC_DECL_ERR_REGRMB; /* confirm_rkey is implicit on 1st contact */ @@ -670,7 +654,7 @@ static int smc_connect_rdma(struct smc_sock *smc, return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK, ini->cln_first_contact); } else { - if (smcr_lgr_reg_rmbs(smc->conn.lgr, smc->conn.rmb_desc)) + if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB, ini->cln_first_contact); } @@ -1045,7 +1029,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc) link->lgr->type = SMC_LGR_SINGLE; - if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false)) + if (smcr_link_reg_rmb(link, smc->conn.rmb_desc)) return SMC_CLC_DECL_ERR_REGRMB; /* send CONFIRM LINK request to client over the RoCE fabric */ @@ -1220,7 +1204,7 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) struct smc_connection *conn = &new_smc->conn; if (local_contact != SMC_FIRST_CONTACT) { - if (smcr_lgr_reg_rmbs(conn->lgr, conn->rmb_desc)) + if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc)) return SMC_CLC_DECL_ERR_REGRMB; } smc_rmb_sync_sg_for_device(&new_smc->conn); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 3539ceef9a97..de6bc36fe9a7 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1127,6 +1127,22 @@ free_table: return rc; } +/* register a new rmb on IB device */ +int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc) +{ + if (list_empty(&link->lgr->list)) + return -ENOLINK; + if (!rmb_desc->is_reg_mr[link->link_idx]) { + /* register memory region for new rmb */ + if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) { + rmb_desc->is_reg_err = true; + return -EFAULT; + } + rmb_desc->is_reg_mr[link->link_idx] = true; + } + return 0; +} + static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, bool is_rmb, int bufsize) { diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index f12474cc666c..fd512188d2c6 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -367,6 +367,7 @@ void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr); int smc_core_init(void); void smc_core_exit(void); +int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc); static inline struct smc_link_group *smc_get_lgr(struct smc_link *link) { return link->lgr; From 4a3641c160873fe6b6bcff00a6ea15e7430d8d42 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 1 May 2020 12:48:02 +0200 Subject: [PATCH 02/13] net/smc: unmapping of buffers to support multiple links With the support of multiple links that are created and cleared there is a need to unmap one link from all current buffers. Add unmapping by link and by rmb. And make smcr_link_clear() available to be called from the LLC layer. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 76 +++++++++++++++++++++++++++++++++++----------- net/smc/smc_core.h | 1 + 2 files changed, 60 insertions(+), 17 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index de6bc36fe9a7..d5ecea490b4e 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -498,14 +498,69 @@ void smc_conn_free(struct smc_connection *conn) smc_lgr_schedule_free_work(lgr); } -static void smcr_link_clear(struct smc_link *lnk) +/* unregister a link from a buf_desc */ +static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb, + struct smc_link *lnk) +{ + if (is_rmb) + buf_desc->is_reg_mr[lnk->link_idx] = false; + if (!buf_desc->is_map_ib[lnk->link_idx]) + return; + if (is_rmb) { + if (buf_desc->mr_rx[lnk->link_idx]) { + smc_ib_put_memory_region( + buf_desc->mr_rx[lnk->link_idx]); + buf_desc->mr_rx[lnk->link_idx] = NULL; + } + smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE); + } else { + smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE); + } + sg_free_table(&buf_desc->sgt[lnk->link_idx]); + buf_desc->is_map_ib[lnk->link_idx] = false; +} + +/* unmap all buffers of lgr for a deleted link */ +static void smcr_buf_unmap_lgr(struct smc_link *lnk) +{ + struct smc_link_group *lgr = lnk->lgr; + struct smc_buf_desc *buf_desc, *bf; + int i; + + for (i = 0; i < SMC_RMBE_SIZES; i++) { + mutex_lock(&lgr->rmbs_lock); + list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) + smcr_buf_unmap_link(buf_desc, true, lnk); + mutex_unlock(&lgr->rmbs_lock); + mutex_lock(&lgr->sndbufs_lock); + list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], + list) + smcr_buf_unmap_link(buf_desc, false, lnk); + mutex_unlock(&lgr->sndbufs_lock); + } +} + +static void smcr_rtoken_clear_link(struct smc_link *lnk) +{ + struct smc_link_group *lgr = lnk->lgr; + int i; + + for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { + lgr->rtokens[i][lnk->link_idx].rkey = 0; + lgr->rtokens[i][lnk->link_idx].dma_addr = 0; + } +} + +void smcr_link_clear(struct smc_link *lnk) { struct smc_ib_device *smcibdev; - if (lnk->peer_qpn == 0) + if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED) return; lnk->peer_qpn = 0; smc_llc_link_clear(lnk); + smcr_buf_unmap_lgr(lnk); + smcr_rtoken_clear_link(lnk); smc_ib_modify_qp_reset(lnk); smc_wr_free_link(lnk); smc_ib_destroy_queue_pair(lnk); @@ -522,23 +577,10 @@ static void smcr_link_clear(struct smc_link *lnk) static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, struct smc_buf_desc *buf_desc) { - struct smc_link *lnk; int i; - for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - lnk = &lgr->lnk[i]; - if (!buf_desc->is_map_ib[lnk->link_idx]) - continue; - if (is_rmb) { - if (buf_desc->mr_rx[lnk->link_idx]) - smc_ib_put_memory_region( - buf_desc->mr_rx[lnk->link_idx]); - smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE); - } else { - smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE); - } - sg_free_table(&buf_desc->sgt[lnk->link_idx]); - } + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) + smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]); if (buf_desc->pages) __free_pages(buf_desc->pages, buf_desc->order); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index fd512188d2c6..fa532a423fd7 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -367,6 +367,7 @@ void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr); int smc_core_init(void); void smc_core_exit(void); +void smcr_link_clear(struct smc_link *lnk); int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc); static inline struct smc_link_group *smc_get_lgr(struct smc_link *link) { From fb33d27727254618aaf6bc2fedcb0fda1d5c0239 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 1 May 2020 12:48:03 +0200 Subject: [PATCH 03/13] net/smc: map and register buffers for a new link Introduce support to map and register all current buffers for a new link. smcr_buf_map_lgr() will map used buffers for a new link and smcr_buf_reg_lgr() can be called to register used buffers on the IB device of the new link. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++ net/smc/smc_core.h | 2 ++ 2 files changed, 62 insertions(+) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index d5ecea490b4e..0e87f652caea 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1185,6 +1185,66 @@ int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc) return 0; } +static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock, + struct list_head *lst, bool is_rmb) +{ + struct smc_buf_desc *buf_desc, *bf; + int rc = 0; + + mutex_lock(lock); + list_for_each_entry_safe(buf_desc, bf, lst, list) { + if (!buf_desc->used) + continue; + rc = smcr_buf_map_link(buf_desc, is_rmb, lnk); + if (rc) + goto out; + } +out: + mutex_unlock(lock); + return rc; +} + +/* map all used buffers of lgr for a new link */ +int smcr_buf_map_lgr(struct smc_link *lnk) +{ + struct smc_link_group *lgr = lnk->lgr; + int i, rc = 0; + + for (i = 0; i < SMC_RMBE_SIZES; i++) { + rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock, + &lgr->rmbs[i], true); + if (rc) + return rc; + rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock, + &lgr->sndbufs[i], false); + if (rc) + return rc; + } + return 0; +} + +/* register all used buffers of lgr for a new link */ +int smcr_buf_reg_lgr(struct smc_link *lnk) +{ + struct smc_link_group *lgr = lnk->lgr; + struct smc_buf_desc *buf_desc, *bf; + int i, rc = 0; + + mutex_lock(&lgr->rmbs_lock); + for (i = 0; i < SMC_RMBE_SIZES; i++) { + list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) { + if (!buf_desc->used) + continue; + rc = smcr_link_reg_rmb(lnk, buf_desc); + if (rc) + goto out; + } + } +out: + mutex_unlock(&lgr->rmbs_lock); + return rc; +} + static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, bool is_rmb, int bufsize) { diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index fa532a423fd7..61ddb5264936 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -368,6 +368,8 @@ int smc_core_init(void); void smc_core_exit(void); void smcr_link_clear(struct smc_link *lnk); +int smcr_buf_map_lgr(struct smc_link *lnk); +int smcr_buf_reg_lgr(struct smc_link *lnk); int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc); static inline struct smc_link_group *smc_get_lgr(struct smc_link *link) { From fbed3b37c89633eb602f4ec8e30186e601b793e5 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 1 May 2020 12:48:04 +0200 Subject: [PATCH 04/13] net/smc: extend smc_llc_send_add_link() and smc_llc_send_delete_link() All LLC sends are done from worker context only, so remove the prep functions which were used to build the message before it was sent, and add the function content into the respective send function smc_llc_send_add_link() and smc_llc_send_delete_link(). Extend smc_llc_send_add_link() to include the qp_mtu value in the LLC message, which is needed to establish a link after the initial link was created. Extend smc_llc_send_delete_link() to contain a link_id and a reason code for the link deletion in the LLC message, which is needed when a specific link should be deleted. And add the list of existing DELETE_LINK reason codes. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 3 +- net/smc/smc_llc.c | 89 ++++++++++++++++++++++++---------------------- net/smc/smc_llc.h | 16 +++++++-- 3 files changed, 62 insertions(+), 46 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 0e87f652caea..c905675017c7 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -199,7 +199,8 @@ void smc_lgr_cleanup_early(struct smc_connection *conn) static int smcr_link_send_delete(struct smc_link *lnk, bool orderly) { if (lnk->state == SMC_LNK_ACTIVE && - !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) { + !smc_llc_send_delete_link(lnk, 0, SMC_LLC_REQ, orderly, + SMC_LLC_DEL_PROG_INIT_TERM)) { return 0; } return -ENOTCONN; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 327cf30b98cc..171835926db6 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -58,7 +58,13 @@ struct smc_llc_msg_add_link { /* type 0x02 */ u8 sender_gid[SMC_GID_SIZE]; u8 sender_qp_num[3]; u8 link_num; - u8 flags2; /* QP mtu */ +#if defined(__BIG_ENDIAN_BITFIELD) + u8 reserved3 : 4, + qp_mtu : 4; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u8 qp_mtu : 4, + reserved3 : 4; +#endif u8 initial_psn[3]; u8 reserved[8]; }; @@ -427,26 +433,9 @@ static int smc_llc_send_delete_rkey(struct smc_link *link, return rc; } -/* prepare an add link message */ -static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc, - struct smc_link *link, u8 mac[], u8 gid[], - enum smc_llc_reqresp reqresp) -{ - memset(addllc, 0, sizeof(*addllc)); - addllc->hd.common.type = SMC_LLC_ADD_LINK; - addllc->hd.length = sizeof(struct smc_llc_msg_add_link); - if (reqresp == SMC_LLC_RESP) { - addllc->hd.flags |= SMC_LLC_FLAG_RESP; - /* always reject more links for now */ - addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ; - addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH; - } - memcpy(addllc->sender_mac, mac, ETH_ALEN); - memcpy(addllc->sender_gid, gid, SMC_GID_SIZE); -} - /* send ADD LINK request or response */ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], + struct smc_link *link_new, enum smc_llc_reqresp reqresp) { struct smc_llc_msg_add_link *addllc; @@ -458,32 +447,33 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], if (rc) return rc; addllc = (struct smc_llc_msg_add_link *)wr_buf; - smc_llc_prep_add_link(addllc, link, mac, gid, reqresp); + + memset(addllc, 0, sizeof(*addllc)); + addllc->hd.common.type = SMC_LLC_ADD_LINK; + addllc->hd.length = sizeof(struct smc_llc_msg_add_link); + if (reqresp == SMC_LLC_RESP) + addllc->hd.flags |= SMC_LLC_FLAG_RESP; + memcpy(addllc->sender_mac, mac, ETH_ALEN); + memcpy(addllc->sender_gid, gid, SMC_GID_SIZE); + if (link_new) { + addllc->link_num = link_new->link_id; + hton24(addllc->sender_qp_num, link_new->roce_qp->qp_num); + hton24(addllc->initial_psn, link_new->psn_initial); + if (reqresp == SMC_LLC_REQ) + addllc->qp_mtu = link_new->path_mtu; + else + addllc->qp_mtu = min(link_new->path_mtu, + link_new->peer_mtu); + } /* send llc message */ rc = smc_wr_tx_send(link, pend); return rc; } -/* prepare a delete link message */ -static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc, - struct smc_link *link, - enum smc_llc_reqresp reqresp, bool orderly) -{ - memset(delllc, 0, sizeof(*delllc)); - delllc->hd.common.type = SMC_LLC_DELETE_LINK; - delllc->hd.length = sizeof(struct smc_llc_msg_add_link); - if (reqresp == SMC_LLC_RESP) - delllc->hd.flags |= SMC_LLC_FLAG_RESP; - /* DEL_LINK_ALL because only 1 link supported */ - delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; - if (orderly) - delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; - delllc->link_num = link->link_id; -} - /* send DELETE LINK request or response */ -int smc_llc_send_delete_link(struct smc_link *link, - enum smc_llc_reqresp reqresp, bool orderly) +int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id, + enum smc_llc_reqresp reqresp, bool orderly, + u32 reason) { struct smc_llc_msg_del_link *delllc; struct smc_wr_tx_pend_priv *pend; @@ -494,7 +484,19 @@ int smc_llc_send_delete_link(struct smc_link *link, if (rc) return rc; delllc = (struct smc_llc_msg_del_link *)wr_buf; - smc_llc_prep_delete_link(delllc, link, reqresp, orderly); + + memset(delllc, 0, sizeof(*delllc)); + delllc->hd.common.type = SMC_LLC_DELETE_LINK; + delllc->hd.length = sizeof(struct smc_llc_msg_del_link); + if (reqresp == SMC_LLC_RESP) + delllc->hd.flags |= SMC_LLC_FLAG_RESP; + if (orderly) + delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; + if (link_del_id) + delllc->link_num = link_del_id; + else + delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; + delllc->reason = htonl(reason); /* send llc message */ rc = smc_wr_tx_send(link, pend); return rc; @@ -547,12 +549,13 @@ static void smc_llc_rx_delete_link(struct smc_link *link, smc_lgr_forget(lgr); if (lgr->role == SMC_SERV) { /* client asks to delete this link, send request */ - smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true); + smc_llc_send_delete_link(link, 0, SMC_LLC_REQ, true, + SMC_LLC_DEL_PROG_INIT_TERM); } else { /* server requests to delete this link, send response */ - smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true); + smc_llc_send_delete_link(link, 0, SMC_LLC_RESP, true, + SMC_LLC_DEL_PROG_INIT_TERM); } - smc_llc_send_message(link, llc); smc_lgr_terminate_sched(lgr); } diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 48029a5e14c3..d2c50d3e43a6 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -35,6 +35,16 @@ enum smc_llc_msg_type { SMC_LLC_DELETE_RKEY = 0x09, }; +/* LLC DELETE LINK Request Reason Codes */ +#define SMC_LLC_DEL_LOST_PATH 0x00010000 +#define SMC_LLC_DEL_OP_INIT_TERM 0x00020000 +#define SMC_LLC_DEL_PROG_INIT_TERM 0x00030000 +#define SMC_LLC_DEL_PROT_VIOL 0x00040000 +#define SMC_LLC_DEL_NO_ASYM_NEEDED 0x00050000 +/* LLC DELETE LINK Response Reason Codes */ +#define SMC_LLC_DEL_NOLNK 0x00100000 /* Unknown Link ID (no link) */ +#define SMC_LLC_DEL_NOLGR 0x00200000 /* Unknown Link Group */ + /* returns a usable link of the link group, or NULL */ static inline struct smc_link *smc_llc_usable_link(struct smc_link_group *lgr) { @@ -50,9 +60,11 @@ static inline struct smc_link *smc_llc_usable_link(struct smc_link_group *lgr) int smc_llc_send_confirm_link(struct smc_link *lnk, enum smc_llc_reqresp reqresp); int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], + struct smc_link *link_new, enum smc_llc_reqresp reqresp); -int smc_llc_send_delete_link(struct smc_link *link, - enum smc_llc_reqresp reqresp, bool orderly); +int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id, + enum smc_llc_reqresp reqresp, bool orderly, + u32 reason); void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc); void smc_llc_lgr_clear(struct smc_link_group *lgr); int smc_llc_link_init(struct smc_link *link); From d550066776aae3bb31e0240cab24f62e33c47fd3 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 1 May 2020 12:48:05 +0200 Subject: [PATCH 05/13] net/smc: mutex to protect the lgr against parallel reconfigurations Introduce llc_conf_mutex in the link group which is used to protect the buffers and lgr states against parallel link reconfiguration. This ensures that new connections do not start to register buffers with the links of a link group when link creation or termination is running. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 9 +++++++++ net/smc/smc_core.c | 26 ++++++++++++++++++++++---- net/smc/smc_core.h | 2 ++ net/smc/smc_llc.c | 9 +-------- 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 20d6d3fbb86c..6663a63be9e4 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -344,6 +344,13 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link, struct smc_link_group *lgr = link->lgr; int i, rc = 0; + rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); + if (rc) + return rc; + /* protect against parallel smc_llc_cli_rkey_exchange() and + * parallel smcr_link_reg_rmb() + */ + mutex_lock(&lgr->llc_conf_mutex); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (lgr->lnk[i].state != SMC_LNK_ACTIVE) continue; @@ -360,6 +367,8 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link, } rmb_desc->is_conf_rkey = true; out: + mutex_unlock(&lgr->llc_conf_mutex); + smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); return rc; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index c905675017c7..4c3af05d76a5 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -448,11 +448,21 @@ out: static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc, struct smc_link_group *lgr) { + int rc; + if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) { /* unregister rmb with peer */ - smc_llc_do_delete_rkey(lgr, rmb_desc); - rmb_desc->is_conf_rkey = false; + rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); + if (!rc) { + /* protect against smc_llc_cli_rkey_exchange() */ + mutex_lock(&lgr->llc_conf_mutex); + smc_llc_do_delete_rkey(lgr, rmb_desc); + rmb_desc->is_conf_rkey = false; + mutex_unlock(&lgr->llc_conf_mutex); + smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); + } } + if (rmb_desc->is_reg_err) { /* buf registration failed, reuse not possible */ mutex_lock(&lgr->rmbs_lock); @@ -552,6 +562,7 @@ static void smcr_rtoken_clear_link(struct smc_link *lnk) } } +/* must be called under lgr->llc_conf_mutex lock */ void smcr_link_clear(struct smc_link *lnk) { struct smc_ib_device *smcibdev; @@ -1170,7 +1181,9 @@ free_table: return rc; } -/* register a new rmb on IB device */ +/* register a new rmb on IB device, + * must be called under lgr->llc_conf_mutex lock + */ int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc) { if (list_empty(&link->lgr->list)) @@ -1224,7 +1237,9 @@ int smcr_buf_map_lgr(struct smc_link *lnk) return 0; } -/* register all used buffers of lgr for a new link */ +/* register all used buffers of lgr for a new link, + * must be called under lgr->llc_conf_mutex lock + */ int smcr_buf_reg_lgr(struct smc_link *lnk) { struct smc_link_group *lgr = lnk->lgr; @@ -1278,6 +1293,8 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr, { int i, rc = 0; + /* protect against parallel link reconfiguration */ + mutex_lock(&lgr->llc_conf_mutex); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *lnk = &lgr->lnk[i]; @@ -1289,6 +1306,7 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr, } } out: + mutex_unlock(&lgr->llc_conf_mutex); return rc; } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 61ddb5264936..aa198dd0f0e4 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -248,6 +248,8 @@ struct smc_link_group { /* queue for llc events */ spinlock_t llc_event_q_lock; /* protects llc_event_q */ + struct mutex llc_conf_mutex; + /* protects lgr reconfig. */ struct work_struct llc_event_work; /* llc event worker */ wait_queue_head_t llc_waiter; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 171835926db6..ceed3c89926f 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -848,6 +848,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) spin_lock_init(&lgr->llc_event_q_lock); spin_lock_init(&lgr->llc_flow_lock); init_waitqueue_head(&lgr->llc_waiter); + mutex_init(&lgr->llc_conf_mutex); lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time; } @@ -897,9 +898,6 @@ int smc_llc_do_confirm_rkey(struct smc_link *send_link, struct smc_llc_qentry *qentry = NULL; int rc = 0; - rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); - if (rc) - return rc; rc = smc_llc_send_confirm_rkey(send_link, rmb_desc); if (rc) goto out; @@ -911,7 +909,6 @@ int smc_llc_do_confirm_rkey(struct smc_link *send_link, out: if (qentry) smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); - smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); return rc; } @@ -927,9 +924,6 @@ int smc_llc_do_delete_rkey(struct smc_link_group *lgr, if (!send_link) return -ENOLINK; - rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); - if (rc) - return rc; /* protected by llc_flow control */ rc = smc_llc_send_delete_rkey(send_link, rmb_desc); if (rc) @@ -942,7 +936,6 @@ int smc_llc_do_delete_rkey(struct smc_link_group *lgr, out: if (qentry) smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); - smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); return rc; } From 35dcf7ec02dcff16504bc52a368822254f889f00 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 1 May 2020 12:48:06 +0200 Subject: [PATCH 06/13] net/smc: remember PNETID of IB device for later device matching The PNETID is needed to find an alternate link for a link group. Save the PNETID of the link that is used to create the link group for later device matching. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 2 ++ net/smc/smc_core.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 4c3af05d76a5..d7ab92fc5b15 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -413,6 +413,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, SMC_SYSTEMID_LEN); + memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1], + SMC_MAX_PNETID_LEN); smc_llc_lgr_init(lgr, smc); link_idx = SMC_SINGLE_LINK; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index aa198dd0f0e4..413eaad50c7f 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -244,6 +244,8 @@ struct smc_link_group { u8 next_link_id; enum smc_lgr_type type; /* redundancy state */ + u8 pnet_id[SMC_MAX_PNETID_LEN + 1]; + /* pnet id of this lgr */ struct list_head llc_event_q; /* queue for llc events */ spinlock_t llc_event_q_lock; From 1f90a05d9ff907c70456e7c9d7058372679a88c6 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 1 May 2020 12:48:07 +0200 Subject: [PATCH 07/13] net/smc: add smcr_port_add() and smcr_link_up() processing Call smcr_port_add() when an IB event reports a new active IB device. smcr_port_add() will start a work which either triggers the local ADD_LINK processing, or send an ADD_LINK LLC message to the SMC server to initiate the processing. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++ net/smc/smc_core.h | 1 + net/smc/smc_ib.c | 1 + 3 files changed, 88 insertions(+) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index d7ab92fc5b15..20bc9e46bf52 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -44,10 +44,19 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */ static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */ static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted); +struct smc_ib_up_work { + struct work_struct work; + struct smc_link_group *lgr; + struct smc_ib_device *smcibdev; + u8 ibport; +}; + static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, struct smc_buf_desc *buf_desc); static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft); +static void smc_link_up_work(struct work_struct *work); + /* return head of link group list and its lock for a given link group */ static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr, spinlock_t **lgr_lock) @@ -928,6 +937,83 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) } } +/* link is up - establish alternate link if applicable */ +static void smcr_link_up(struct smc_link_group *lgr, + struct smc_ib_device *smcibdev, u8 ibport) +{ + struct smc_link *link = NULL; + + if (list_empty(&lgr->list) || + lgr->type == SMC_LGR_SYMMETRIC || + lgr->type == SMC_LGR_ASYMMETRIC_PEER) + return; + + if (lgr->role == SMC_SERV) { + /* trigger local add link processing */ + link = smc_llc_usable_link(lgr); + if (!link) + return; + /* tbd: call smc_llc_srv_add_link_local(link); */ + } else { + /* invite server to start add link processing */ + u8 gid[SMC_GID_SIZE]; + + if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid, + NULL)) + return; + if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { + /* some other llc task is ongoing */ + wait_event_interruptible_timeout(lgr->llc_waiter, + (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), + SMC_LLC_WAIT_TIME); + } + if (list_empty(&lgr->list) || + !smc_ib_port_active(smcibdev, ibport)) + return; /* lgr or device no longer active */ + link = smc_llc_usable_link(lgr); + if (!link) + return; + smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid, + NULL, SMC_LLC_REQ); + } +} + +void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) +{ + struct smc_ib_up_work *ib_work; + struct smc_link_group *lgr, *n; + + list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { + if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, + SMC_MAX_PNETID_LEN) || + lgr->type == SMC_LGR_SYMMETRIC || + lgr->type == SMC_LGR_ASYMMETRIC_PEER) + continue; + ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL); + if (!ib_work) + continue; + INIT_WORK(&ib_work->work, smc_link_up_work); + ib_work->lgr = lgr; + ib_work->smcibdev = smcibdev; + ib_work->ibport = ibport; + schedule_work(&ib_work->work); + } +} + +static void smc_link_up_work(struct work_struct *work) +{ + struct smc_ib_up_work *ib_work = container_of(work, + struct smc_ib_up_work, + work); + struct smc_link_group *lgr = ib_work->lgr; + + if (list_empty(&lgr->list)) + goto out; + smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport); +out: + kfree(ib_work); +} + /* Determine vlan of internal TCP socket. * @vlan_id: address to store the determined vlan id into */ diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 413eaad50c7f..86453ad83491 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -345,6 +345,7 @@ void smc_lgr_forget(struct smc_link_group *lgr); void smc_lgr_cleanup_early(struct smc_connection *conn); void smc_lgr_terminate_sched(struct smc_link_group *lgr); void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport); +void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport); void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan); void smc_smcd_terminate_all(struct smcd_dev *dev); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index c090678a3e5a..545fb0bc3714 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -252,6 +252,7 @@ static void smc_ib_port_event_work(struct work_struct *work) smc_port_terminate(smcibdev, port_idx + 1); } else { clear_bit(port_idx, smcibdev->ports_going_away); + smcr_port_add(smcibdev, port_idx + 1); } } } From 541afa10c126b6c22c2a805a559c70cc41fd156e Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 1 May 2020 12:48:08 +0200 Subject: [PATCH 08/13] net/smc: add smcr_port_err() and smcr_link_down() processing Call smcr_port_err() when an IB event reports an inactive IB device. smcr_port_err() calls smcr_link_down() for all affected links. smcr_link_down() either triggers the local DELETE_LINK processing, or sends an DELETE_LINK LLC message to the SMC server to initiate the processing. The old handler function smc_port_terminate() is removed. Add helper smcr_link_down_cond() to take a link down conditionally, and smcr_link_down_cond_sched() to schedule the link_down processing to a work. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 119 +++++++++++++++++++++++++++++++++------------ net/smc/smc_core.h | 6 ++- net/smc/smc_ib.c | 2 +- net/smc/smc_llc.h | 3 ++ 4 files changed, 98 insertions(+), 32 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 20bc9e46bf52..62108e0cd529 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -56,6 +56,7 @@ static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft); static void smc_link_up_work(struct work_struct *work); +static void smc_link_down_work(struct work_struct *work); /* return head of link group list and its lock for a given link group */ static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr, @@ -320,6 +321,7 @@ static int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, lnk->smcibdev = ini->ib_dev; lnk->ibport = ini->ib_port; lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; + INIT_WORK(&lnk->link_down_wrk, smc_link_down_work); if (!ini->ib_dev->initialized) { rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev); if (rc) @@ -818,36 +820,6 @@ void smc_lgr_terminate_sched(struct smc_link_group *lgr) schedule_work(&lgr->terminate_work); } -/* Called when IB port is terminated */ -void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) -{ - struct smc_link_group *lgr, *l; - LIST_HEAD(lgr_free_list); - int i; - - spin_lock_bh(&smc_lgr_list.lock); - list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { - if (lgr->is_smcd) - continue; - /* tbd - terminate only when no more links are active */ - for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (!smc_link_usable(&lgr->lnk[i])) - continue; - if (lgr->lnk[i].smcibdev == smcibdev && - lgr->lnk[i].ibport == ibport) { - list_move(&lgr->list, &lgr_free_list); - lgr->freeing = 1; - } - } - } - spin_unlock_bh(&smc_lgr_list.lock); - - list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { - list_del_init(&lgr->list); - __smc_lgr_terminate(lgr, false); - } -} - /* Called when peer lgr shutdown (regularly or abnormally) is received */ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) { @@ -1000,6 +972,79 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) } } +/* link is down - switch connections to alternate link, + * must be called under lgr->llc_conf_mutex lock + */ +static void smcr_link_down(struct smc_link *lnk) +{ + struct smc_link_group *lgr = lnk->lgr; + struct smc_link *to_lnk; + int del_link_id; + + if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list)) + return; + + smc_ib_modify_qp_reset(lnk); + to_lnk = NULL; + /* tbd: call to_lnk = smc_switch_conns(lgr, lnk, true); */ + if (!to_lnk) { /* no backup link available */ + smcr_link_clear(lnk); + return; + } + lgr->type = SMC_LGR_SINGLE; + del_link_id = lnk->link_id; + + if (lgr->role == SMC_SERV) { + /* trigger local delete link processing */ + } else { + if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { + /* another llc task is ongoing */ + mutex_unlock(&lgr->llc_conf_mutex); + wait_event_interruptible_timeout(lgr->llc_waiter, + (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), + SMC_LLC_WAIT_TIME); + mutex_lock(&lgr->llc_conf_mutex); + } + smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true, + SMC_LLC_DEL_LOST_PATH); + } +} + +/* must be called under lgr->llc_conf_mutex lock */ +void smcr_link_down_cond(struct smc_link *lnk) +{ + if (smc_link_downing(&lnk->state)) + smcr_link_down(lnk); +} + +/* will get the lgr->llc_conf_mutex lock */ +void smcr_link_down_cond_sched(struct smc_link *lnk) +{ + if (smc_link_downing(&lnk->state)) + schedule_work(&lnk->link_down_wrk); +} + +void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport) +{ + struct smc_link_group *lgr, *n; + int i; + + list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { + if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, + SMC_MAX_PNETID_LEN)) + continue; /* lgr is not affected */ + if (list_empty(&lgr->list)) + continue; + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + struct smc_link *lnk = &lgr->lnk[i]; + + if (smc_link_usable(lnk) && + lnk->smcibdev == smcibdev && lnk->ibport == ibport) + smcr_link_down_cond_sched(lnk); + } + } +} + static void smc_link_up_work(struct work_struct *work) { struct smc_ib_up_work *ib_work = container_of(work, @@ -1014,6 +1059,20 @@ out: kfree(ib_work); } +static void smc_link_down_work(struct work_struct *work) +{ + struct smc_link *link = container_of(work, struct smc_link, + link_down_wrk); + struct smc_link_group *lgr = link->lgr; + + if (list_empty(&lgr->list)) + return; + wake_up_interruptible_all(&lgr->llc_waiter); + mutex_lock(&lgr->llc_conf_mutex); + smcr_link_down(link); + mutex_unlock(&lgr->llc_conf_mutex); +} + /* Determine vlan of internal TCP socket. * @vlan_id: address to store the determined vlan id into */ diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 86453ad83491..da3cddbd1651 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -117,6 +117,7 @@ struct smc_link { u8 link_id; /* unique # within link group */ u8 link_idx; /* index in lgr link array */ struct smc_link_group *lgr; /* parent link group */ + struct work_struct link_down_wrk; /* wrk to bring link down */ enum smc_link_state state; /* state of link */ struct delayed_work llc_testlink_wrk; /* testlink worker */ @@ -344,8 +345,8 @@ struct smc_clc_msg_local; void smc_lgr_forget(struct smc_link_group *lgr); void smc_lgr_cleanup_early(struct smc_connection *conn); void smc_lgr_terminate_sched(struct smc_link_group *lgr); -void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport); void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport); +void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport); void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan); void smc_smcd_terminate_all(struct smcd_dev *dev); @@ -376,6 +377,9 @@ void smcr_link_clear(struct smc_link *lnk); int smcr_buf_map_lgr(struct smc_link *lnk); int smcr_buf_reg_lgr(struct smc_link *lnk); int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc); +void smcr_link_down_cond(struct smc_link *lnk); +void smcr_link_down_cond_sched(struct smc_link *lnk); + static inline struct smc_link_group *smc_get_lgr(struct smc_link *link) { return link->lgr; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 545fb0bc3714..2c743caad69a 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -249,7 +249,7 @@ static void smc_ib_port_event_work(struct work_struct *work) clear_bit(port_idx, &smcibdev->port_event_mask); if (!smc_ib_port_active(smcibdev, port_idx + 1)) { set_bit(port_idx, smcibdev->ports_going_away); - smc_port_terminate(smcibdev, port_idx + 1); + smcr_port_err(smcibdev, port_idx + 1); } else { clear_bit(port_idx, smcibdev->ports_going_away); smcr_port_add(smcibdev, port_idx + 1); diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index d2c50d3e43a6..4ed4486e5082 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -35,6 +35,9 @@ enum smc_llc_msg_type { SMC_LLC_DELETE_RKEY = 0x09, }; +#define smc_link_downing(state) \ + (cmpxchg(state, SMC_LNK_ACTIVE, SMC_LNK_INACTIVE) == SMC_LNK_ACTIVE) + /* LLC DELETE LINK Request Reason Codes */ #define SMC_LLC_DEL_LOST_PATH 0x00010000 #define SMC_LLC_DEL_OP_INIT_TERM 0x00020000 From 87523930a16eb57ebb20318e92b5df4b64fe8b20 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 1 May 2020 12:48:09 +0200 Subject: [PATCH 09/13] net/smc: take link down instead of terminating the link group Use the introduced link down processing in all places where the link group is terminated and take down the affected link only. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 7 ++----- net/smc/smc_llc.c | 4 ++-- net/smc/smc_tx.c | 2 +- net/smc/smc_wr.c | 19 ++++++++----------- 4 files changed, 13 insertions(+), 19 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 62108e0cd529..849ae3f9b796 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -884,11 +884,8 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) } else { list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].smcibdev == smcibdev) { - list_move(&lgr->list, &lgr_free_list); - lgr->freeing = 1; - break; - } + if (lgr->lnk[i].smcibdev == smcibdev) + smcr_link_down_cond_sched(&lgr->lnk[i]); } } } diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index ceed3c89926f..e478a4c11877 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -556,7 +556,7 @@ static void smc_llc_rx_delete_link(struct smc_link *link, smc_llc_send_delete_link(link, 0, SMC_LLC_RESP, true, SMC_LLC_DEL_PROG_INIT_TERM); } - smc_lgr_terminate_sched(lgr); + smcr_link_down_cond(link); } /* process a confirm_rkey request from peer, remote flow */ @@ -831,7 +831,7 @@ static void smc_llc_testlink_work(struct work_struct *work) if (link->state != SMC_LNK_ACTIVE) return; /* link state changed */ if (rc <= 0) { - smc_lgr_terminate_sched(smc_get_lgr(link)); + smcr_link_down_cond_sched(link); return; } next_interval = link->llc_testlink_time; diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index d74bfe6a90f1..417204572a69 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -283,7 +283,7 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][link->link_idx].rkey; rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL); if (rc) - smc_lgr_terminate_sched(lgr); + smcr_link_down_cond_sched(link); return rc; } diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 93223628c002..031e6c9561b1 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -120,8 +120,8 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) sizeof(link->wr_tx_bufs[i])); clear_bit(i, link->wr_tx_mask); } - /* terminate connections of this link group abnormally */ - smc_lgr_terminate_sched(smc_get_lgr(link)); + /* terminate link */ + smcr_link_down_cond_sched(link); } if (pnd_snd.handler) pnd_snd.handler(&pnd_snd.priv, link, wc->status); @@ -212,8 +212,8 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), SMC_WR_TX_WAIT_FREE_SLOT_TIME); if (!rc) { - /* timeout - terminate connections */ - smc_lgr_terminate_sched(lgr); + /* timeout - terminate link */ + smcr_link_down_cond_sched(link); return -EPIPE; } if (idx == link->wr_tx_cnt) @@ -270,7 +270,7 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL); if (rc) { smc_wr_tx_put_slot(link, priv); - smc_lgr_terminate_sched(smc_get_lgr(link)); + smcr_link_down_cond_sched(link); } return rc; } @@ -294,8 +294,8 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) (link->wr_reg_state != POSTED), SMC_WR_REG_MR_WAIT_TIME); if (!rc) { - /* timeout - terminate connections */ - smc_lgr_terminate_sched(smc_get_lgr(link)); + /* timeout - terminate link */ + smcr_link_down_cond_sched(link); return -EPIPE; } if (rc == -ERESTARTSYS) @@ -393,10 +393,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num) case IB_WC_RETRY_EXC_ERR: case IB_WC_RNR_RETRY_EXC_ERR: case IB_WC_WR_FLUSH_ERR: - /* terminate connections of this link group - * abnormally - */ - smc_lgr_terminate_sched(smc_get_lgr(link)); + smcr_link_down_cond_sched(link); break; default: smc_wr_rx_post(link); /* refill WR RX */ From 33d203302d1cc744a13349d2576c985feb469220 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 1 May 2020 12:48:10 +0200 Subject: [PATCH 10/13] net/smc: remove DELETE LINK processing from smc_core.c Support for multiple links makes the former DELETE LINK processing obsolete which sent one DELETE_LINK LLC message for each single link. Remove this processing from smc_core.c. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 849ae3f9b796..60c708f6de51 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -202,20 +202,6 @@ void smc_lgr_cleanup_early(struct smc_connection *conn) smc_lgr_schedule_free_work_fast(lgr); } -/* Send delete link, either as client to request the initiation - * of the DELETE LINK sequence from server; or as server to - * initiate the delete processing. See smc_llc_rx_delete_link(). - */ -static int smcr_link_send_delete(struct smc_link *lnk, bool orderly) -{ - if (lnk->state == SMC_LNK_ACTIVE && - !smc_llc_send_delete_link(lnk, 0, SMC_LLC_REQ, orderly, - SMC_LLC_DEL_PROG_INIT_TERM)) { - return 0; - } - return -ENOTCONN; -} - static void smc_lgr_free(struct smc_link_group *lgr); static void smc_lgr_free_work(struct work_struct *work) @@ -241,25 +227,6 @@ static void smc_lgr_free_work(struct work_struct *work) return; } list_del_init(&lgr->list); /* remove from smc_lgr_list */ - - if (!lgr->is_smcd && !lgr->terminating) { - bool do_wait = false; - - for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - struct smc_link *lnk = &lgr->lnk[i]; - /* try to send del link msg, on err free immediately */ - if (lnk->state == SMC_LNK_ACTIVE && - !smcr_link_send_delete(lnk, true)) { - /* reschedule in case we never receive a resp */ - smc_lgr_schedule_free_work(lgr); - do_wait = true; - } - } - if (do_wait) { - spin_unlock_bh(lgr_lock); - return; /* wait for resp, see smc_llc_rx_delete_link */ - } - } lgr->freeing = 1; /* this instance does the freeing, no new schedule */ spin_unlock_bh(lgr_lock); cancel_delayed_work(&lgr->free_work); From 6c868a3edc70ec9819d6a94268625d25e6bc9587 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 1 May 2020 12:48:11 +0200 Subject: [PATCH 11/13] net/smc: introduce smc_pnet_find_alt_roce() Introduce a new function in smc_pnet.c that searches for an alternate IB device, using an existing link group and a primary IB device. The alternate IB device needs to be active and must have the same PNETID as the link group. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_pnet.c | 15 +++++++++++++-- net/smc/smc_pnet.h | 5 ++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index bd01c71b827a..50c96e843fab 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -777,7 +777,8 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, /* find a roce device for the given pnetid */ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, - struct smc_init_info *ini) + struct smc_init_info *ini, + struct smc_ib_device *known_dev) { struct smc_ib_device *ibdev; int i; @@ -785,6 +786,8 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, ini->ib_dev = NULL; spin_lock(&smc_ib_devices.lock); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { + if (ibdev == known_dev) + continue; for (i = 1; i <= SMC_MAX_PORTS; i++) { if (!rdma_is_port_valid(ibdev->ibdev, i)) continue; @@ -803,6 +806,14 @@ out: spin_unlock(&smc_ib_devices.lock); } +/* find alternate roce device with same pnet_id and vlan_id */ +void smc_pnet_find_alt_roce(struct smc_link_group *lgr, + struct smc_init_info *ini, + struct smc_ib_device *known_dev) +{ + _smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev); +} + /* if handshake network device belongs to a roce device, return its * IB device and port */ @@ -857,7 +868,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, smc_pnet_find_rdma_dev(ndev, ini); return; /* pnetid could not be determined */ } - _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini); + _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL); } static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index ea207f8fc6f7..811a65986691 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -19,6 +19,7 @@ struct smc_ib_device; struct smcd_dev; struct smc_init_info; +struct smc_link_group; /** * struct smc_pnettable - SMC PNET table anchor @@ -48,5 +49,7 @@ void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini); void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini); int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port); int smc_pnetid_by_table_smcd(struct smcd_dev *smcd); - +void smc_pnet_find_alt_roce(struct smc_link_group *lgr, + struct smc_init_info *ini, + struct smc_ib_device *known_dev); #endif From 8574cf4055ab44724ee9a4c30921d3ed853d787c Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 1 May 2020 12:48:12 +0200 Subject: [PATCH 12/13] net/smc: allocate index for a new link Add smc_llc_alloc_alt_link() to find a free link index for a new link, depending on the new link group type. And update constants for the maximum number of links to 3 (2 symmetric and 1 dangling asymmetric link). These maximum numbers are the same as used by other implementations of the SMC-R protocol. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.h | 2 +- net/smc/smc_llc.c | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index da3cddbd1651..eb27f2eb7c8c 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -128,7 +128,7 @@ struct smc_link { /* For now we just allow one parallel link per link group. The SMC protocol * allows more (up to 8). */ -#define SMC_LINKS_PER_LGR_MAX 1 +#define SMC_LINKS_PER_LGR_MAX 3 #define SMC_SINGLE_LINK 0 #define SMC_FIRST_CONTACT 1 /* first contact to a peer */ diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index e478a4c11877..3a25b6ebe3a8 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -541,6 +541,30 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf) /********************************* receive ***********************************/ +static int smc_llc_alloc_alt_link(struct smc_link_group *lgr, + enum smc_lgr_type lgr_new_t) +{ + int i; + + if (lgr->type == SMC_LGR_SYMMETRIC || + (lgr->type != SMC_LGR_SINGLE && + (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL || + lgr_new_t == SMC_LGR_ASYMMETRIC_PEER))) + return -EMLINK; + + if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL || + lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) { + for (i = SMC_LINKS_PER_LGR_MAX - 1; i >= 0; i--) + if (lgr->lnk[i].state == SMC_LNK_UNUSED) + return i; + } else { + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) + if (lgr->lnk[i].state == SMC_LNK_UNUSED) + return i; + } + return -EMLINK; +} + static void smc_llc_rx_delete_link(struct smc_link *link, struct smc_llc_msg_del_link *llc) { From b45e7f98ab7c2d7035d92100ee011584693eccce Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 1 May 2020 12:48:13 +0200 Subject: [PATCH 13/13] net/smc: llc_add_link_work to handle ADD_LINK LLC requests Introduce a work that is scheduled when a new ADD_LINK LLC request is received. The work will call either the SMC client or SMC server ADD_LINK processing. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.h | 1 + net/smc/smc_llc.c | 24 ++++++++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index eb27f2eb7c8c..555ada9d2423 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -253,6 +253,7 @@ struct smc_link_group { /* protects llc_event_q */ struct mutex llc_conf_mutex; /* protects lgr reconfig. */ + struct work_struct llc_add_link_work; struct work_struct llc_event_work; /* llc event worker */ wait_queue_head_t llc_waiter; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 3a25b6ebe3a8..50f59746bdf9 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -565,6 +565,24 @@ static int smc_llc_alloc_alt_link(struct smc_link_group *lgr, return -EMLINK; } +/* worker to process an add link message */ +static void smc_llc_add_link_work(struct work_struct *work) +{ + struct smc_link_group *lgr = container_of(work, struct smc_link_group, + llc_add_link_work); + + if (list_empty(&lgr->list)) { + /* link group is terminating */ + smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); + goto out; + } + + /* tbd: call smc_llc_process_cli_add_link(lgr); */ + /* tbd: call smc_llc_process_srv_add_link(lgr); */ +out: + smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); +} + static void smc_llc_rx_delete_link(struct smc_link *link, struct smc_llc_msg_del_link *llc) { @@ -685,11 +703,11 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) wake_up_interruptible(&lgr->llc_waiter); } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { - /* tbd: schedule_work(&lgr->llc_add_link_work); */ + schedule_work(&lgr->llc_add_link_work); } } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { /* as smc server, handle client suggestion */ - /* tbd: schedule_work(&lgr->llc_add_link_work); */ + schedule_work(&lgr->llc_add_link_work); } return; case SMC_LLC_CONFIRM_LINK: @@ -868,6 +886,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) struct net *net = sock_net(smc->clcsock->sk); INIT_WORK(&lgr->llc_event_work, smc_llc_event_work); + INIT_WORK(&lgr->llc_add_link_work, smc_llc_add_link_work); INIT_LIST_HEAD(&lgr->llc_event_q); spin_lock_init(&lgr->llc_event_q_lock); spin_lock_init(&lgr->llc_flow_lock); @@ -882,6 +901,7 @@ void smc_llc_lgr_clear(struct smc_link_group *lgr) smc_llc_event_flush(lgr); wake_up_interruptible_all(&lgr->llc_waiter); cancel_work_sync(&lgr->llc_event_work); + cancel_work_sync(&lgr->llc_add_link_work); if (lgr->delayed_event) { kfree(lgr->delayed_event); lgr->delayed_event = NULL;