From 73130a7b1ac92c9f30e0a255951129f4851c5794 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 18 Jun 2022 17:24:23 -0500 Subject: [PATCH 1/7] smb3: fix empty netname context on secondary channels Some servers do not allow null netname contexts, which would cause multichannel to revert to single channel when mounting to some servers (e.g. Azure xSMB). Fixes: 4c14d7043fede ("cifs: populate empty hostnames for extra channels") Reviewed-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index b515140bad8d..5e8c4737b183 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -570,16 +570,18 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, *total_len += ctxt_len; pneg_ctxt += ctxt_len; - ctxt_len = build_netname_ctxt((struct smb2_netname_neg_context *)pneg_ctxt, - server->hostname); - *total_len += ctxt_len; - pneg_ctxt += ctxt_len; - build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt); *total_len += sizeof(struct smb2_posix_neg_context); pneg_ctxt += sizeof(struct smb2_posix_neg_context); - neg_context_count = 4; + if (server->hostname && (server->hostname[0] != 0)) { + ctxt_len = build_netname_ctxt((struct smb2_netname_neg_context *)pneg_ctxt, + server->hostname); + *total_len += ctxt_len; + pneg_ctxt += ctxt_len; + neg_context_count = 4; + } else /* second channels do not have a hostname */ + neg_context_count = 3; if (server->compress_algorithm) { build_compression_ctxt((struct smb2_compression_capabilities_context *) From 9de74996a739bf0b7b5d8c260bd207ad6007442b Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Wed, 22 Jun 2022 12:36:36 -0500 Subject: [PATCH 2/7] smb3: use netname when available on secondary channels Some servers do not allow null netname contexts, which would cause multichannel to revert to single channel when mounting to some servers (e.g. Azure xSMB). The previous patch fixed that by avoiding incorrectly sending the netname context when there would be a null hostname sent in the netname context, while this patch fixes the null hostname for the secondary channel by using the hostname of the primary channel for the secondary channel. Fixes: 4c14d7043fede ("cifs: populate empty hostnames for extra channels") Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5e8c4737b183..12b4dddaedb0 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -543,6 +543,7 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, struct TCP_Server_Info *server, unsigned int *total_len) { char *pneg_ctxt; + char *hostname = NULL; unsigned int ctxt_len, neg_context_count; if (*total_len > 200) { @@ -574,9 +575,15 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, *total_len += sizeof(struct smb2_posix_neg_context); pneg_ctxt += sizeof(struct smb2_posix_neg_context); - if (server->hostname && (server->hostname[0] != 0)) { + /* + * secondary channels don't have the hostname field populated + * use the hostname field in the primary channel instead + */ + hostname = CIFS_SERVER_IS_CHAN(server) ? + server->primary_server->hostname : server->hostname; + if (hostname && (hostname[0] != 0)) { ctxt_len = build_netname_ctxt((struct smb2_netname_neg_context *)pneg_ctxt, - server->hostname); + hostname); *total_len += ctxt_len; pneg_ctxt += ctxt_len; neg_context_count = 4; From aa45dadd34e44fcd6a9df4b395bee5b5633b4cec Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Sat, 1 Jan 2022 12:50:21 +0000 Subject: [PATCH 3/7] cifs: change iface_list from array to sorted linked list A server's published interface list can change over time, and needs to be updated. We've storing iface_list as a simple array, which makes it difficult to manipulate an existing list. With this change, iface_list is modified into a linked list of interfaces, which is kept sorted by speed. Also added a reference counter for an iface entry, so that each channel can maintain a backpointer to the iface and drop it easily when needed. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/cifs_debug.c | 12 ++- fs/cifs/cifsglob.h | 54 +++++++++- fs/cifs/connect.c | 6 +- fs/cifs/misc.c | 9 +- fs/cifs/sess.c | 80 +++++++-------- fs/cifs/smb2ops.c | 229 ++++++++++++++++++++++--------------------- 6 files changed, 231 insertions(+), 159 deletions(-) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 1dd995efd5b8..2cfbac8bb965 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -162,6 +162,8 @@ cifs_dump_iface(struct seq_file *m, struct cifs_server_iface *iface) seq_printf(m, "\t\tIPv4: %pI4\n", &ipv4->sin_addr); else if (iface->sockaddr.ss_family == AF_INET6) seq_printf(m, "\t\tIPv6: %pI6\n", &ipv6->sin6_addr); + if (!iface->is_active) + seq_puts(m, "\t\t[for-cleanup]\n"); } static int cifs_debug_files_proc_show(struct seq_file *m, void *v) @@ -221,6 +223,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) struct TCP_Server_Info *server; struct cifs_ses *ses; struct cifs_tcon *tcon; + struct cifs_server_iface *iface; int c, i, j; seq_puts(m, @@ -456,11 +459,10 @@ skip_rdma: if (ses->iface_count) seq_printf(m, "\n\n\tServer interfaces: %zu", ses->iface_count); - for (j = 0; j < ses->iface_count; j++) { - struct cifs_server_iface *iface; - - iface = &ses->iface_list[j]; - seq_printf(m, "\n\t%d)", j+1); + j = 0; + list_for_each_entry(iface, &ses->iface_list, + iface_head) { + seq_printf(m, "\n\t%d)", ++j); cifs_dump_iface(m, iface); if (is_ses_using_iface(ses, iface)) seq_puts(m, "\t\t[CONNECTED]\n"); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index e7737166e5b8..7fc2addc95a3 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -933,15 +933,67 @@ static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net) #endif struct cifs_server_iface { + struct list_head iface_head; + struct kref refcount; size_t speed; unsigned int rdma_capable : 1; unsigned int rss_capable : 1; + unsigned int is_active : 1; /* unset if non existent */ struct sockaddr_storage sockaddr; }; +/* release iface when last ref is dropped */ +static inline void +release_iface(struct kref *ref) +{ + struct cifs_server_iface *iface = container_of(ref, + struct cifs_server_iface, + refcount); + list_del_init(&iface->iface_head); + kfree(iface); +} + +/* + * compare two interfaces a and b + * return 0 if everything matches. + * return 1 if a has higher link speed, or rdma capable, or rss capable + * return -1 otherwise. + */ +static inline int +iface_cmp(struct cifs_server_iface *a, struct cifs_server_iface *b) +{ + int cmp_ret = 0; + + WARN_ON(!a || !b); + if (a->speed == b->speed) { + if (a->rdma_capable == b->rdma_capable) { + if (a->rss_capable == b->rss_capable) { + cmp_ret = memcmp(&a->sockaddr, &b->sockaddr, + sizeof(a->sockaddr)); + if (!cmp_ret) + return 0; + else if (cmp_ret > 0) + return 1; + else + return -1; + } else if (a->rss_capable > b->rss_capable) + return 1; + else + return -1; + } else if (a->rdma_capable > b->rdma_capable) + return 1; + else + return -1; + } else if (a->speed > b->speed) + return 1; + else + return -1; +} + struct cifs_chan { unsigned int in_reconnect : 1; /* if session setup in progress for this channel */ struct TCP_Server_Info *server; + struct cifs_server_iface *iface; /* interface in use */ __u8 signkey[SMB3_SIGN_KEY_SIZE]; }; @@ -993,7 +1045,7 @@ struct cifs_ses { */ spinlock_t iface_lock; /* ========= begin: protected by iface_lock ======== */ - struct cifs_server_iface *iface_list; + struct list_head iface_list; size_t iface_count; unsigned long iface_last_update; /* jiffies */ /* ========= end: protected by iface_lock ======== */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 1849e3411487..248fe1805c17 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1894,9 +1894,11 @@ void cifs_put_smb_ses(struct cifs_ses *ses) int i; for (i = 1; i < chan_count; i++) { - spin_unlock(&ses->chan_lock); + if (ses->chans[i].iface) { + kref_put(&ses->chans[i].iface->refcount, release_iface); + ses->chans[i].iface = NULL; + } cifs_put_tcp_session(ses->chans[i].server, 0); - spin_lock(&ses->chan_lock); ses->chans[i].server = NULL; } } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index c69e1240d730..0e84e6fcf8ab 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -75,6 +75,7 @@ sesInfoAlloc(void) INIT_LIST_HEAD(&ret_buf->tcon_list); mutex_init(&ret_buf->session_mutex); spin_lock_init(&ret_buf->iface_lock); + INIT_LIST_HEAD(&ret_buf->iface_list); spin_lock_init(&ret_buf->chan_lock); } return ret_buf; @@ -83,6 +84,8 @@ sesInfoAlloc(void) void sesInfoFree(struct cifs_ses *buf_to_free) { + struct cifs_server_iface *iface = NULL, *niface = NULL; + if (buf_to_free == NULL) { cifs_dbg(FYI, "Null buffer passed to sesInfoFree\n"); return; @@ -96,7 +99,11 @@ sesInfoFree(struct cifs_ses *buf_to_free) kfree(buf_to_free->user_name); kfree(buf_to_free->domainName); kfree_sensitive(buf_to_free->auth_key.response); - kfree(buf_to_free->iface_list); + spin_lock(&buf_to_free->iface_lock); + list_for_each_entry_safe(iface, niface, &buf_to_free->iface_list, + iface_head) + kref_put(&iface->refcount, release_iface); + spin_unlock(&buf_to_free->iface_lock); kfree_sensitive(buf_to_free); } diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index d417de354d9d..d7b6e5687df2 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -58,7 +58,7 @@ bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface) spin_lock(&ses->chan_lock); for (i = 0; i < ses->chan_count; i++) { - if (is_server_using_iface(ses->chans[i].server, iface)) { + if (ses->chans[i].iface == iface) { spin_unlock(&ses->chan_lock); return true; } @@ -151,11 +151,9 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) { int old_chan_count, new_chan_count; int left; - int i = 0; int rc = 0; int tries = 0; - struct cifs_server_iface *ifaces = NULL; - size_t iface_count; + struct cifs_server_iface *iface = NULL, *niface = NULL; spin_lock(&ses->chan_lock); @@ -184,33 +182,17 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) } spin_unlock(&ses->chan_lock); - /* - * Make a copy of the iface list at the time and use that - * instead so as to not hold the iface spinlock for opening - * channels - */ - spin_lock(&ses->iface_lock); - iface_count = ses->iface_count; - if (iface_count <= 0) { - spin_unlock(&ses->iface_lock); - cifs_dbg(VFS, "no iface list available to open channels\n"); - return 0; - } - ifaces = kmemdup(ses->iface_list, iface_count*sizeof(*ifaces), - GFP_ATOMIC); - if (!ifaces) { - spin_unlock(&ses->iface_lock); - return 0; - } - spin_unlock(&ses->iface_lock); - /* * Keep connecting to same, fastest, iface for all channels as * long as its RSS. Try next fastest one if not RSS or channel * creation fails. */ + spin_lock(&ses->iface_lock); + iface = list_first_entry(&ses->iface_list, struct cifs_server_iface, + iface_head); + spin_unlock(&ses->iface_lock); + while (left > 0) { - struct cifs_server_iface *iface; tries++; if (tries > 3*ses->chan_max) { @@ -219,27 +201,46 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) break; } - iface = &ifaces[i]; - if (is_ses_using_iface(ses, iface) && !iface->rss_capable) { - i = (i+1) % iface_count; - continue; + spin_lock(&ses->iface_lock); + if (!ses->iface_count) { + spin_unlock(&ses->iface_lock); + break; } - rc = cifs_ses_add_channel(cifs_sb, ses, iface); - if (rc) { - cifs_dbg(FYI, "failed to open extra channel on iface#%d rc=%d\n", - i, rc); - i = (i+1) % iface_count; - continue; - } + list_for_each_entry_safe_from(iface, niface, &ses->iface_list, + iface_head) { + /* skip ifaces that are unusable */ + if (!iface->is_active || + (is_ses_using_iface(ses, iface) && + !iface->rss_capable)) { + continue; + } + + /* take ref before unlock */ + kref_get(&iface->refcount); + + spin_unlock(&ses->iface_lock); + rc = cifs_ses_add_channel(cifs_sb, ses, iface); + spin_lock(&ses->iface_lock); + + if (rc) { + cifs_dbg(VFS, "failed to open extra channel on iface:%pIS rc=%d\n", + &iface->sockaddr, + rc); + kref_put(&iface->refcount, release_iface); + continue; + } + + cifs_dbg(FYI, "successfully opened new channel on iface:%pIS\n", + &iface->sockaddr); + break; + } + spin_unlock(&ses->iface_lock); - cifs_dbg(FYI, "successfully opened new channel on iface#%d\n", - i); left--; new_chan_count++; } - kfree(ifaces); return new_chan_count - old_chan_count; } @@ -355,6 +356,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, spin_unlock(&ses->chan_lock); goto out; } + chan->iface = iface; ses->chan_count++; atomic_set(&ses->chan_seq, 0); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 8543cafdfd34..db4ccf1d235e 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -512,30 +512,135 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) static int parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, size_t buf_len, - struct cifs_server_iface **iface_list, - size_t *iface_count) + struct cifs_ses *ses) { struct network_interface_info_ioctl_rsp *p; struct sockaddr_in *addr4; struct sockaddr_in6 *addr6; struct iface_info_ipv4 *p4; struct iface_info_ipv6 *p6; - struct cifs_server_iface *info; + struct cifs_server_iface *info = NULL, *iface = NULL, *niface = NULL; + struct cifs_server_iface tmp_iface; ssize_t bytes_left; size_t next = 0; int nb_iface = 0; - int rc = 0; - - *iface_list = NULL; - *iface_count = 0; - - /* - * Fist pass: count and sanity check - */ + int rc = 0, ret = 0; bytes_left = buf_len; p = buf; + + spin_lock(&ses->iface_lock); + /* + * Go through iface_list and do kref_put to remove + * any unused ifaces. ifaces in use will be removed + * when the last user calls a kref_put on it + */ + list_for_each_entry_safe(iface, niface, &ses->iface_list, + iface_head) { + iface->is_active = 0; + kref_put(&iface->refcount, release_iface); + } + spin_unlock(&ses->iface_lock); + while (bytes_left >= sizeof(*p)) { + memset(&tmp_iface, 0, sizeof(tmp_iface)); + tmp_iface.speed = le64_to_cpu(p->LinkSpeed); + tmp_iface.rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0; + tmp_iface.rss_capable = le32_to_cpu(p->Capability & RSS_CAPABLE) ? 1 : 0; + + switch (p->Family) { + /* + * The kernel and wire socket structures have the same + * layout and use network byte order but make the + * conversion explicit in case either one changes. + */ + case INTERNETWORK: + addr4 = (struct sockaddr_in *)&tmp_iface.sockaddr; + p4 = (struct iface_info_ipv4 *)p->Buffer; + addr4->sin_family = AF_INET; + memcpy(&addr4->sin_addr, &p4->IPv4Address, 4); + + /* [MS-SMB2] 2.2.32.5.1.1 Clients MUST ignore these */ + addr4->sin_port = cpu_to_be16(CIFS_PORT); + + cifs_dbg(FYI, "%s: ipv4 %pI4\n", __func__, + &addr4->sin_addr); + break; + case INTERNETWORKV6: + addr6 = (struct sockaddr_in6 *)&tmp_iface.sockaddr; + p6 = (struct iface_info_ipv6 *)p->Buffer; + addr6->sin6_family = AF_INET6; + memcpy(&addr6->sin6_addr, &p6->IPv6Address, 16); + + /* [MS-SMB2] 2.2.32.5.1.2 Clients MUST ignore these */ + addr6->sin6_flowinfo = 0; + addr6->sin6_scope_id = 0; + addr6->sin6_port = cpu_to_be16(CIFS_PORT); + + cifs_dbg(FYI, "%s: ipv6 %pI6\n", __func__, + &addr6->sin6_addr); + break; + default: + cifs_dbg(VFS, + "%s: skipping unsupported socket family\n", + __func__); + goto next_iface; + } + + /* + * The iface_list is assumed to be sorted by speed. + * Check if the new interface exists in that list. + * NEVER change iface. it could be in use. + * Add a new one instead + */ + spin_lock(&ses->iface_lock); + iface = niface = NULL; + list_for_each_entry_safe(iface, niface, &ses->iface_list, + iface_head) { + ret = iface_cmp(iface, &tmp_iface); + if (!ret) { + /* just get a ref so that it doesn't get picked/freed */ + iface->is_active = 1; + kref_get(&iface->refcount); + spin_unlock(&ses->iface_lock); + goto next_iface; + } else if (ret < 0) { + /* all remaining ifaces are slower */ + kref_get(&iface->refcount); + break; + } + } + spin_unlock(&ses->iface_lock); + + /* no match. insert the entry in the list */ + info = kmalloc(sizeof(struct cifs_server_iface), + GFP_KERNEL); + if (!info) { + rc = -ENOMEM; + goto out; + } + memcpy(info, &tmp_iface, sizeof(tmp_iface)); + + /* add this new entry to the list */ + kref_init(&info->refcount); + info->is_active = 1; + + cifs_dbg(FYI, "%s: adding iface %zu\n", __func__, ses->iface_count); + cifs_dbg(FYI, "%s: speed %zu bps\n", __func__, info->speed); + cifs_dbg(FYI, "%s: capabilities 0x%08x\n", __func__, + le32_to_cpu(p->Capability)); + + spin_lock(&ses->iface_lock); + if (!list_entry_is_head(iface, &ses->iface_list, iface_head)) { + list_add_tail(&info->iface_head, &iface->iface_head); + kref_put(&iface->refcount, release_iface); + } else + list_add_tail(&info->iface_head, &ses->iface_list); + spin_unlock(&ses->iface_lock); + + ses->iface_count++; + ses->iface_last_update = jiffies; +next_iface: nb_iface++; next = le32_to_cpu(p->Next); if (!next) { @@ -557,108 +662,21 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, cifs_dbg(VFS, "%s: incomplete interface info\n", __func__); - /* - * Second pass: extract info to internal structure - */ - - *iface_list = kcalloc(nb_iface, sizeof(**iface_list), GFP_KERNEL); - if (!*iface_list) { - rc = -ENOMEM; - goto out; - } - - info = *iface_list; - bytes_left = buf_len; - p = buf; - while (bytes_left >= sizeof(*p)) { - info->speed = le64_to_cpu(p->LinkSpeed); - info->rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0; - info->rss_capable = le32_to_cpu(p->Capability & RSS_CAPABLE) ? 1 : 0; - - cifs_dbg(FYI, "%s: adding iface %zu\n", __func__, *iface_count); - cifs_dbg(FYI, "%s: speed %zu bps\n", __func__, info->speed); - cifs_dbg(FYI, "%s: capabilities 0x%08x\n", __func__, - le32_to_cpu(p->Capability)); - - switch (p->Family) { - /* - * The kernel and wire socket structures have the same - * layout and use network byte order but make the - * conversion explicit in case either one changes. - */ - case INTERNETWORK: - addr4 = (struct sockaddr_in *)&info->sockaddr; - p4 = (struct iface_info_ipv4 *)p->Buffer; - addr4->sin_family = AF_INET; - memcpy(&addr4->sin_addr, &p4->IPv4Address, 4); - - /* [MS-SMB2] 2.2.32.5.1.1 Clients MUST ignore these */ - addr4->sin_port = cpu_to_be16(CIFS_PORT); - - cifs_dbg(FYI, "%s: ipv4 %pI4\n", __func__, - &addr4->sin_addr); - break; - case INTERNETWORKV6: - addr6 = (struct sockaddr_in6 *)&info->sockaddr; - p6 = (struct iface_info_ipv6 *)p->Buffer; - addr6->sin6_family = AF_INET6; - memcpy(&addr6->sin6_addr, &p6->IPv6Address, 16); - - /* [MS-SMB2] 2.2.32.5.1.2 Clients MUST ignore these */ - addr6->sin6_flowinfo = 0; - addr6->sin6_scope_id = 0; - addr6->sin6_port = cpu_to_be16(CIFS_PORT); - - cifs_dbg(FYI, "%s: ipv6 %pI6\n", __func__, - &addr6->sin6_addr); - break; - default: - cifs_dbg(VFS, - "%s: skipping unsupported socket family\n", - __func__); - goto next_iface; - } - - (*iface_count)++; - info++; -next_iface: - next = le32_to_cpu(p->Next); - if (!next) - break; - p = (struct network_interface_info_ioctl_rsp *)((u8 *)p+next); - bytes_left -= next; - } - - if (!*iface_count) { + if (!ses->iface_count) { rc = -EINVAL; goto out; } out: - if (rc) { - kfree(*iface_list); - *iface_count = 0; - *iface_list = NULL; - } return rc; } -static int compare_iface(const void *ia, const void *ib) -{ - const struct cifs_server_iface *a = (struct cifs_server_iface *)ia; - const struct cifs_server_iface *b = (struct cifs_server_iface *)ib; - - return a->speed == b->speed ? 0 : (a->speed > b->speed ? -1 : 1); -} - static int SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon) { int rc; unsigned int ret_data_len = 0; struct network_interface_info_ioctl_rsp *out_buf = NULL; - struct cifs_server_iface *iface_list; - size_t iface_count; struct cifs_ses *ses = tcon->ses; rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, @@ -674,21 +692,10 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon) goto out; } - rc = parse_server_interfaces(out_buf, ret_data_len, - &iface_list, &iface_count); + rc = parse_server_interfaces(out_buf, ret_data_len, ses); if (rc) goto out; - /* sort interfaces from fastest to slowest */ - sort(iface_list, iface_count, sizeof(*iface_list), compare_iface, NULL); - - spin_lock(&ses->iface_lock); - kfree(ses->iface_list); - ses->iface_list = iface_list; - ses->iface_count = iface_count; - ses->iface_last_update = jiffies; - spin_unlock(&ses->iface_lock); - out: kfree(out_buf); return rc; From b54034a73baf9fe31fb3f218c17bd5308a27a1ca Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 3 Jan 2022 08:47:30 +0000 Subject: [PATCH 4/7] cifs: during reconnect, update interface if necessary Going forward, the plan is to periodically query the server for it's interfaces (when multichannel is enabled). This change allows checking for inactive interfaces during reconnect, and reconnect to a new interface if necessary. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 5 +++ fs/cifs/connect.c | 4 +++ fs/cifs/sess.c | 79 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 3b7366ec03c7..00c4a8aa2649 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -636,6 +636,11 @@ cifs_chan_clear_need_reconnect(struct cifs_ses *ses, bool cifs_chan_needs_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server); +bool +cifs_chan_is_iface_active(struct cifs_ses *ses, + struct TCP_Server_Info *server); +int +cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server); void extract_unc_hostname(const char *unc, const char **h, size_t *len); int copy_path_name(char *dst, const char *src); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 248fe1805c17..d8aae257649f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -232,6 +232,10 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + /* check if iface is still active */ + if (!cifs_chan_is_iface_active(ses, server)) + cifs_chan_update_iface(ses, server); + spin_lock(&ses->chan_lock); if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server)) goto next_session; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index d7b6e5687df2..7c26ee70c8b0 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -146,6 +146,16 @@ cifs_chan_needs_reconnect(struct cifs_ses *ses, return CIFS_CHAN_NEEDS_RECONNECT(ses, chan_index); } +bool +cifs_chan_is_iface_active(struct cifs_ses *ses, + struct TCP_Server_Info *server) +{ + unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + + return ses->chans[chan_index].iface && + ses->chans[chan_index].iface->is_active; +} + /* returns number of channels added */ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) { @@ -244,6 +254,75 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) return new_chan_count - old_chan_count; } +/* + * update the iface for the channel if necessary. + * will return 0 when iface is updated. 1 otherwise + * Must be called with chan_lock held. + */ +int +cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) +{ + unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + struct cifs_server_iface *iface = NULL; + struct cifs_server_iface *old_iface = NULL; + int rc = 0; + + /* primary channel. This can never go away */ + if (!chan_index) + return 0; + + if (ses->chans[chan_index].iface) { + old_iface = ses->chans[chan_index].iface; + if (old_iface->is_active) + return 1; + } + + spin_lock(&ses->iface_lock); + + /* then look for a new one */ + list_for_each_entry(iface, &ses->iface_list, iface_head) { + if (!iface->is_active || + (is_ses_using_iface(ses, iface) && + !iface->rss_capable)) { + continue; + } + kref_get(&iface->refcount); + } + + if (!list_entry_is_head(iface, &ses->iface_list, iface_head)) { + rc = 1; + iface = NULL; + cifs_dbg(FYI, "unable to find a suitable iface\n"); + } + + ses->chans[chan_index].iface = iface; + + /* now drop the ref to the current iface */ + if (old_iface && iface) { + kref_put(&old_iface->refcount, release_iface); + cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n", + &old_iface->sockaddr, + &iface->sockaddr); + } else if (old_iface) { + kref_put(&old_iface->refcount, release_iface); + cifs_dbg(FYI, "releasing ref to iface: %pIS\n", + &old_iface->sockaddr); + } else { + WARN_ON(!iface); + cifs_dbg(FYI, "adding new iface: %pIS\n", &iface->sockaddr); + } + + spin_unlock(&ses->iface_lock); + + /* No iface is found. if secondary chan, drop connection */ + if (!iface && CIFS_SERVER_IS_CHAN(server)) { + cifs_put_tcp_session(server, false); + ses->chans[chan_index].server = NULL; + } + + return rc; +} + /* * If server is a channel of ses, return the corresponding enclosing * cifs_chan otherwise return NULL. From 6e1c1c08cdf3d7d7b8c571c0f032a283af6ca024 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 6 Jun 2022 09:17:56 +0000 Subject: [PATCH 5/7] cifs: periodically query network interfaces from server Currently, we only query the server for network interfaces information at the time of mount, and never afterwards. This can be a problem, especially for services like Azure, where the IP address of the channel endpoints can change over time. With this change, we schedule a 600s polling of this info from the server for each tree connect. An alternative for periodic polling was to do this only at the time of reconnect. But this could delay the reconnect time slightly. Also, there are some challenges w.r.t how we have cifs_reconnect implemented today. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 4 ++++ fs/cifs/cifsproto.h | 2 ++ fs/cifs/connect.c | 28 ++++++++++++++++++++++++++++ fs/cifs/smb2ops.c | 2 +- 4 files changed, 35 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 7fc2addc95a3..a643c84ff1e9 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -80,6 +80,9 @@ #define SMB_DNS_RESOLVE_INTERVAL_MIN 120 #define SMB_DNS_RESOLVE_INTERVAL_DEFAULT 600 +/* smb multichannel query server interfaces interval in seconds */ +#define SMB_INTERFACE_POLL_INTERVAL 600 + /* maximum number of PDUs in one compound */ #define MAX_COMPOUND 5 @@ -1255,6 +1258,7 @@ struct cifs_tcon { #ifdef CONFIG_CIFS_DFS_UPCALL struct list_head ulist; /* cache update list */ #endif + struct delayed_work query_interfaces; /* query interfaces workqueue job */ }; /* diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 00c4a8aa2649..d59aebefa71c 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -641,6 +641,8 @@ cifs_chan_is_iface_active(struct cifs_ses *ses, struct TCP_Server_Info *server); int cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server); +int +SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon); void extract_unc_hostname(const char *unc, const char **h, size_t *len); int copy_path_name(char *dst, const char *src); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d8aae257649f..e666d2643ede 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -145,6 +145,25 @@ requeue_resolve: return rc; } +static void smb2_query_server_interfaces(struct work_struct *work) +{ + int rc; + struct cifs_tcon *tcon = container_of(work, + struct cifs_tcon, + query_interfaces.work); + + /* + * query server network interfaces, in case they change + */ + rc = SMB3_request_interfaces(0, tcon); + if (rc) { + cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n", + __func__, rc); + } + + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); +} static void cifs_resolve_server(struct work_struct *work) { @@ -2276,6 +2295,9 @@ cifs_put_tcon(struct cifs_tcon *tcon) list_del_init(&tcon->tcon_list); spin_unlock(&cifs_tcp_ses_lock); + /* cancel polling of interfaces */ + cancel_delayed_work_sync(&tcon->query_interfaces); + if (tcon->use_witness) { int rc; @@ -2513,6 +2535,12 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) tcon->local_lease = ctx->local_lease; INIT_LIST_HEAD(&tcon->pending_opens); + /* schedule query interfaces poll */ + INIT_DELAYED_WORK(&tcon->query_interfaces, + smb2_query_server_interfaces); + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); + spin_lock(&cifs_tcp_ses_lock); list_add(&tcon->tcon_list, &ses->tcon_list); spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index db4ccf1d235e..8802995b2d3d 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -671,7 +671,7 @@ out: return rc; } -static int +int SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon) { int rc; From 8da33fd11c05b7c64ef6456970f2fce61851806e Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 24 Jun 2022 09:43:59 +0000 Subject: [PATCH 6/7] cifs: avoid deadlocks while updating iface We use cifs_tcp_ses_lock to protect a lot of things. Not only does it protect the lists of connections, sessions, tree connects, open file lists, etc., we also use it to protect some fields in each of it's entries. In this case, cifs_mark_ses_for_reconnect takes the cifs_tcp_ses_lock to traverse the lists, and then calls cifs_update_iface. However, that can end up calling cifs_put_tcp_session, which picks up the same lock again. Avoid this by taking a ref for the session, drop the lock, and then call update iface. Also, in cifs_update_iface, avoid nested locking of iface_lock and chan_lock, as much as possible. When unavoidable, we need to pick iface_lock first. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/connect.c | 15 ++++++++++++--- fs/cifs/sess.c | 33 +++++++++++++++++++++------------ 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e666d2643ede..8d56325915d0 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -236,7 +236,7 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, bool mark_smb_session) { struct TCP_Server_Info *pserver; - struct cifs_ses *ses; + struct cifs_ses *ses, *nses; struct cifs_tcon *tcon; /* @@ -250,10 +250,19 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) { /* check if iface is still active */ - if (!cifs_chan_is_iface_active(ses, server)) + if (!cifs_chan_is_iface_active(ses, server)) { + /* + * HACK: drop the lock before calling + * cifs_chan_update_iface to avoid deadlock + */ + ses->ses_count++; + spin_unlock(&cifs_tcp_ses_lock); cifs_chan_update_iface(ses, server); + spin_lock(&cifs_tcp_ses_lock); + ses->ses_count--; + } spin_lock(&ses->chan_lock); if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server)) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 7c26ee70c8b0..b85718f32b53 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -256,29 +256,34 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) /* * update the iface for the channel if necessary. - * will return 0 when iface is updated. 1 otherwise + * will return 0 when iface is updated, 1 if removed, 2 otherwise * Must be called with chan_lock held. */ int cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) { - unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + unsigned int chan_index; struct cifs_server_iface *iface = NULL; struct cifs_server_iface *old_iface = NULL; int rc = 0; - /* primary channel. This can never go away */ - if (!chan_index) + spin_lock(&ses->chan_lock); + chan_index = cifs_ses_get_chan_index(ses, server); + if (!chan_index) { + spin_unlock(&ses->chan_lock); return 0; + } if (ses->chans[chan_index].iface) { old_iface = ses->chans[chan_index].iface; - if (old_iface->is_active) + if (old_iface->is_active) { + spin_unlock(&ses->chan_lock); return 1; + } } + spin_unlock(&ses->chan_lock); spin_lock(&ses->iface_lock); - /* then look for a new one */ list_for_each_entry(iface, &ses->iface_list, iface_head) { if (!iface->is_active || @@ -295,8 +300,6 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) cifs_dbg(FYI, "unable to find a suitable iface\n"); } - ses->chans[chan_index].iface = iface; - /* now drop the ref to the current iface */ if (old_iface && iface) { kref_put(&old_iface->refcount, release_iface); @@ -311,14 +314,20 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) WARN_ON(!iface); cifs_dbg(FYI, "adding new iface: %pIS\n", &iface->sockaddr); } - spin_unlock(&ses->iface_lock); + spin_lock(&ses->chan_lock); + chan_index = cifs_ses_get_chan_index(ses, server); + ses->chans[chan_index].iface = iface; + /* No iface is found. if secondary chan, drop connection */ - if (!iface && CIFS_SERVER_IS_CHAN(server)) { - cifs_put_tcp_session(server, false); + if (!iface && CIFS_SERVER_IS_CHAN(server)) ses->chans[chan_index].server = NULL; - } + + spin_unlock(&ses->chan_lock); + + if (!iface && CIFS_SERVER_IS_CHAN(server)) + cifs_put_tcp_session(server, false); return rc; } From af3a6d1018f02c6dc8388f1f3785a559c7ab5961 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 24 Jun 2022 15:01:43 -0300 Subject: [PATCH 7/7] cifs: update cifs_ses::ip_addr after failover cifs_ses::ip_addr wasn't being updated in cifs_session_setup() when reconnecting SMB sessions thus returning wrong value in /proc/fs/cifs/DebugData. Signed-off-by: Paulo Alcantara (SUSE) Cc: stable@kernel.org Signed-off-by: Steve French --- fs/cifs/connect.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8d56325915d0..fa29c9aae24b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -4025,10 +4025,16 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, struct nls_table *nls_info) { int rc = -ENOSYS; + struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr; + struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; bool is_binding = false; - spin_lock(&cifs_tcp_ses_lock); + if (server->dstaddr.ss_family == AF_INET6) + scnprintf(ses->ip_addr, sizeof(ses->ip_addr), "%pI6", &addr6->sin6_addr); + else + scnprintf(ses->ip_addr, sizeof(ses->ip_addr), "%pI4", &addr->sin_addr); + if (ses->ses_status != SES_GOOD && ses->ses_status != SES_NEW && ses->ses_status != SES_NEED_RECON) {