From e091853ebdb486fd8bde86b87178fdf3850914fc Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 23 Jun 2021 23:28:46 -0400 Subject: [PATCH 1/8] SUNRPC mark the first transport When an RPC client gets created it's first transport is special and should be marked a main transport. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/clnt.c | 1 + 2 files changed, 2 insertions(+) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 13a4eaf385cf..692e5946c029 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -293,6 +293,7 @@ struct rpc_xprt { struct rcu_head rcu; const struct xprt_class *xprt_class; struct rpc_sysfs_xprt *xprt_sysfs; + bool main; /*mark if this is the 1st transport */ }; #if defined(CONFIG_SUNRPC_BACKCHANNEL) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9bf820bad84c..408618765aa5 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -412,6 +412,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, } rpc_clnt_set_transport(clnt, xprt, timeout); + xprt->main = true; xprt_iter_init(&clnt->cl_xpi, xps); xprt_switch_put(xps); From 0e65ea43d9c7c038e167b20165a0e9ed1e9cca83 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 23 Jun 2021 23:28:47 -0400 Subject: [PATCH 2/8] SUNRPC display xprt's main value in sysfs's xprt_info Display in sysfs in the information about the xprt if this is a main transport or not. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- net/sunrpc/sysfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 2fbaba27d5c6..124f2e1e3409 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -103,10 +103,10 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, ret = sprintf(buf, "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n" "max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n" "binding_q_len=%u\nsending_q_len=%u\npending_q_len=%u\n" - "backlog_q_len=%u\n", xprt->last_used, xprt->cong, - xprt->cwnd, xprt->max_reqs, xprt->min_reqs, + "backlog_q_len=%u\nmain_xprt=%d\n", xprt->last_used, + xprt->cong, xprt->cwnd, xprt->max_reqs, xprt->min_reqs, xprt->num_reqs, xprt->binding.qlen, xprt->sending.qlen, - xprt->pending.qlen, xprt->backlog.qlen); + xprt->pending.qlen, xprt->backlog.qlen, xprt->main); xprt_put(xprt); return ret + 1; } From a8482488a7d6d320f63a9ee1912dbb5ae5b80a61 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 23 Jun 2021 23:28:48 -0400 Subject: [PATCH 3/8] SUNRPC query transport's source port Provide ability to query transport's source port. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 1 + net/sunrpc/xprtsock.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 3c1423ee74b4..8c2a712cb242 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -10,6 +10,7 @@ int init_socket_xprt(void); void cleanup_socket_xprt(void); +unsigned short get_srcport(struct rpc_xprt *); #define RPC_MIN_RESVPORT (1U) #define RPC_MAX_RESVPORT (65535U) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 2ad4d0df45fe..4611845ec1eb 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1653,6 +1653,13 @@ static int xs_get_srcport(struct sock_xprt *transport) return port; } +unsigned short get_srcport(struct rpc_xprt *xprt) +{ + struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); + return sock->srcport; +} +EXPORT_SYMBOL(get_srcport); + static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port) { if (transport->srcport != 0) From c1830a63c79aa90f725ed6feaad097473f2b990d Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 23 Jun 2021 23:28:49 -0400 Subject: [PATCH 4/8] SUNRPC for TCP display xprt's source port in sysfs xprt_info Using TCP connection's source port it is useful to match connections seen on the network traces to the xprts used by the linux nfs client. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- net/sunrpc/sysfs.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 124f2e1e3409..08aa503295b7 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "sysfs.h" @@ -103,10 +104,13 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, ret = sprintf(buf, "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n" "max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n" "binding_q_len=%u\nsending_q_len=%u\npending_q_len=%u\n" - "backlog_q_len=%u\nmain_xprt=%d\n", xprt->last_used, - xprt->cong, xprt->cwnd, xprt->max_reqs, xprt->min_reqs, - xprt->num_reqs, xprt->binding.qlen, xprt->sending.qlen, - xprt->pending.qlen, xprt->backlog.qlen, xprt->main); + "backlog_q_len=%u\nmain_xprt=%d\nsrc_port=%u\n", + xprt->last_used, xprt->cong, xprt->cwnd, xprt->max_reqs, + xprt->min_reqs, xprt->num_reqs, xprt->binding.qlen, + xprt->sending.qlen, xprt->pending.qlen, + xprt->backlog.qlen, xprt->main, + (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ? + get_srcport(xprt) : 0); xprt_put(xprt); return ret + 1; } From 5b7eb78486cd9ac58bfbd6d84ea0fe2d9fead03b Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 23 Jun 2021 23:28:50 -0400 Subject: [PATCH 5/8] SUNRPC: take a xprt offline using sysfs Using sysfs's xprt_state attribute, mark a particular transport offline. It will not be picked during the round-robin selection. It's not allowed to take the main (1st created transport associated with the rpc_client) offline. Also bring a transport back online via sysfs by writing "online" and that would allow for this transport to be picked during the round- robin selection. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/sysfs.c | 66 ++++++++++++++++++++++++++++++++++--- net/sunrpc/sysfs.h | 1 + net/sunrpc/xprtmultipath.c | 6 ++-- 4 files changed, 68 insertions(+), 6 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 692e5946c029..b8ed7fa1b4ca 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -427,6 +427,7 @@ void xprt_release_write(struct rpc_xprt *, struct rpc_task *); #define XPRT_BOUND (4) #define XPRT_BINDING (5) #define XPRT_CLOSING (6) +#define XPRT_OFFLINE (7) #define XPRT_CONGESTED (9) #define XPRT_CWND_WAIT (10) #define XPRT_WRITE_SPACE (11) diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 08aa503295b7..a30ad18aa7dc 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -68,6 +68,15 @@ rpc_sysfs_xprt_kobj_get_xprt(struct kobject *kobj) return xprt_get(x->xprt); } +static inline struct rpc_xprt_switch * +rpc_sysfs_xprt_kobj_get_xprt_switch(struct kobject *kobj) +{ + struct rpc_sysfs_xprt *x = container_of(kobj, + struct rpc_sysfs_xprt, kobject); + + return xprt_switch_get(x->xprt_switch); +} + static inline struct rpc_xprt_switch * rpc_sysfs_xprt_switch_kobj_get_xprt(struct kobject *kobj) { @@ -122,7 +131,7 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); ssize_t ret; int locked, connected, connecting, close_wait, bound, binding, - closing, congested, cwnd_wait, write_space; + closing, congested, cwnd_wait, write_space, offline; if (!xprt) return 0; @@ -140,8 +149,9 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, congested = test_bit(XPRT_CONGESTED, &xprt->state); cwnd_wait = test_bit(XPRT_CWND_WAIT, &xprt->state); write_space = test_bit(XPRT_WRITE_SPACE, &xprt->state); + offline = test_bit(XPRT_OFFLINE, &xprt->state); - ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s %s\n", + ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s %s %s\n", locked ? "LOCKED" : "", connected ? "CONNECTED" : "", connecting ? "CONNECTING" : "", @@ -151,7 +161,8 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, closing ? "CLOSING" : "", congested ? "CONGESTED" : "", cwnd_wait ? "CWND_WAIT" : "", - write_space ? "WRITE_SPACE" : ""); + write_space ? "WRITE_SPACE" : "", + offline ? "OFFLINE" : ""); } xprt_put(xprt); @@ -233,6 +244,52 @@ out_err: goto out; } +static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); + int offline = 0, online = 0; + struct rpc_xprt_switch *xps = rpc_sysfs_xprt_kobj_get_xprt_switch(kobj); + + if (!xprt) + return 0; + + if (!strncmp(buf, "offline", 7)) + offline = 1; + else if (!strncmp(buf, "online", 6)) + online = 1; + else + return -EINVAL; + + if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) { + count = -EINTR; + goto out_put; + } + if (xprt->main) { + count = -EINVAL; + goto release_tasks; + } + if (offline) { + set_bit(XPRT_OFFLINE, &xprt->state); + spin_lock(&xps->xps_lock); + xps->xps_nactive--; + spin_unlock(&xps->xps_lock); + } else if (online) { + clear_bit(XPRT_OFFLINE, &xprt->state); + spin_lock(&xps->xps_lock); + xps->xps_nactive++; + spin_unlock(&xps->xps_lock); + } + +release_tasks: + xprt_release_write(xprt, NULL); +out_put: + xprt_put(xprt); + xprt_switch_put(xps); + return count; +} + int rpc_sysfs_init(void) { rpc_sunrpc_kset = kset_create_and_add("sunrpc", NULL, kernel_kobj); @@ -303,7 +360,7 @@ static struct kobj_attribute rpc_sysfs_xprt_info = __ATTR(xprt_info, 0444, rpc_sysfs_xprt_info_show, NULL); static struct kobj_attribute rpc_sysfs_xprt_change_state = __ATTR(xprt_state, - 0644, rpc_sysfs_xprt_state_show, NULL); + 0644, rpc_sysfs_xprt_state_show, rpc_sysfs_xprt_state_change); static struct attribute *rpc_sysfs_xprt_attrs[] = { &rpc_sysfs_xprt_dstaddr.attr, @@ -466,6 +523,7 @@ void rpc_sysfs_xprt_setup(struct rpc_xprt_switch *xprt_switch, if (rpc_xprt) { xprt->xprt_sysfs = rpc_xprt; rpc_xprt->xprt = xprt; + rpc_xprt->xprt_switch = xprt_switch; kobject_uevent(&rpc_xprt->kobject, KOBJ_ADD); } } diff --git a/net/sunrpc/sysfs.h b/net/sunrpc/sysfs.h index ff10451de6fa..6620cebd1037 100644 --- a/net/sunrpc/sysfs.h +++ b/net/sunrpc/sysfs.h @@ -22,6 +22,7 @@ struct rpc_sysfs_xprt_switch { struct rpc_sysfs_xprt { struct kobject kobject; struct rpc_xprt *xprt; + struct rpc_xprt_switch *xprt_switch; }; int rpc_sysfs_init(void); diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 07e76ae1028a..5f4845d1e922 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -65,7 +65,8 @@ static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps, { if (unlikely(xprt == NULL)) return; - xps->xps_nactive--; + if (!test_bit(XPRT_OFFLINE, &xprt->state)) + xps->xps_nactive--; xps->xps_nxprts--; if (xps->xps_nxprts == 0) xps->xps_net = NULL; @@ -230,7 +231,8 @@ void xprt_iter_default_rewind(struct rpc_xprt_iter *xpi) static bool xprt_is_active(const struct rpc_xprt *xprt) { - return kref_read(&xprt->kref) != 0; + return (kref_read(&xprt->kref) != 0 && + !test_bit(XPRT_OFFLINE, &xprt->state)); } static From 85e39feead948bdf8322c961d7a9bebc20d629f3 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 23 Jun 2021 23:28:51 -0400 Subject: [PATCH 6/8] NFSv4.1 identify and mark RPC tasks that can move between transports In preparation for when we can re-try a task on a different transport, identify and mark such RPC tasks as moveable. Only 4.1+ operarations can be re-tried on a different transport. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 38 +++++++++++++++++++++++++++++++----- fs/nfs/pagelist.c | 8 ++++++-- fs/nfs/write.c | 6 +++++- include/linux/sunrpc/sched.h | 2 ++ 4 files changed, 46 insertions(+), 8 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e653654c10bc..d3ee3700c9dd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1155,7 +1155,11 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res) { - return nfs4_do_call_sync(clnt, server, msg, args, res, 0); + unsigned short task_flags = 0; + + if (server->nfs_client->cl_minorversion) + task_flags = RPC_TASK_MOVEABLE; + return nfs4_do_call_sync(clnt, server, msg, args, res, task_flags); } @@ -2569,6 +2573,9 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, }; int status; + if (server->nfs_client->cl_minorversion) + task_setup_data.flags |= RPC_TASK_MOVEABLE; + kref_get(&data->kref); data->rpc_done = false; data->rpc_status = 0; @@ -3749,6 +3756,9 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) }; int status = -ENOMEM; + if (server->nfs_client->cl_minorversion) + task_setup_data.flags |= RPC_TASK_MOVEABLE; + nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_CLEANUP, &task_setup_data.rpc_client, &msg); @@ -4188,6 +4198,9 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, }; unsigned short task_flags = 0; + if (nfs4_has_session(server->nfs_client)) + task_flags = RPC_TASK_MOVEABLE; + /* Is this is an attribute revalidation, subject to softreval? */ if (inode && (server->flags & NFS_MOUNT_SOFTREVAL)) task_flags |= RPC_TASK_TIMEOUT; @@ -4307,6 +4320,9 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, }; unsigned short task_flags = 0; + if (server->nfs_client->cl_minorversion) + task_flags = RPC_TASK_MOVEABLE; + /* Is this is an attribute revalidation, subject to softreval? */ if (nfs_lookup_is_soft_revalidate(dentry)) task_flags |= RPC_TASK_TIMEOUT; @@ -6538,7 +6554,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, .rpc_client = server->client, .rpc_message = &msg, .callback_ops = &nfs4_delegreturn_ops, - .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, + .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT | RPC_TASK_MOVEABLE, }; int status = 0; @@ -6856,6 +6872,11 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; + struct nfs_client *client = + NFS_SERVER(lsp->ls_state->inode)->nfs_client; + + if (client->cl_minorversion) + task_setup_data.flags |= RPC_TASK_MOVEABLE; nfs4_state_protect(NFS_SERVER(lsp->ls_state->inode)->nfs_client, NFS_SP4_MACH_CRED_CLEANUP, &task_setup_data.rpc_client, &msg); @@ -7130,6 +7151,10 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF, }; int ret; + struct nfs_client *client = NFS_SERVER(state->inode)->nfs_client; + + if (client->cl_minorversion) + task_setup_data.flags |= RPC_TASK_MOVEABLE; dprintk("%s: begin!\n", __func__); data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), @@ -9186,7 +9211,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, .rpc_client = clp->cl_rpcclient, .rpc_message = &msg, .callback_ops = &nfs41_sequence_ops, - .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, + .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT | RPC_TASK_MOVEABLE, }; struct rpc_task *ret; @@ -9509,7 +9534,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) .rpc_message = &msg, .callback_ops = &nfs4_layoutget_call_ops, .callback_data = lgp, - .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF, + .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF | + RPC_TASK_MOVEABLE, }; struct pnfs_layout_segment *lseg = NULL; struct nfs4_exception exception = { @@ -9650,6 +9676,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) .rpc_message = &msg, .callback_ops = &nfs4_layoutreturn_call_ops, .callback_data = lrp, + .flags = RPC_TASK_MOVEABLE, }; int status = 0; @@ -9804,6 +9831,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) .rpc_message = &msg, .callback_ops = &nfs4_layoutcommit_ops, .callback_data = data, + .flags = RPC_TASK_MOVEABLE, }; struct rpc_task *task; int status = 0; @@ -10131,7 +10159,7 @@ static int nfs41_free_stateid(struct nfs_server *server, .rpc_client = server->client, .rpc_message = &msg, .callback_ops = &nfs41_free_stateid_ops, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE, }; struct nfs_free_stateid_data *data; struct rpc_task *task; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index cf9cc62ec48e..cc232d1f16f2 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -954,6 +954,7 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) { struct nfs_pgio_header *hdr; int ret; + unsigned short task_flags = 0; hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { @@ -962,14 +963,17 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) } nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); ret = nfs_generic_pgio(desc, hdr); - if (ret == 0) + if (ret == 0) { + if (NFS_SERVER(hdr->inode)->nfs_client->cl_minorversion) + task_flags = RPC_TASK_MOVEABLE; ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), hdr, hdr->cred, NFS_PROTO(hdr->inode), desc->pg_rpc_callops, desc->pg_ioflags, - RPC_TASK_CRED_NOREF); + RPC_TASK_CRED_NOREF | task_flags); + } return ret; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3bf82178166a..eae9bf114041 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1810,6 +1810,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, struct nfs_commit_info *cinfo) { struct nfs_commit_data *data; + unsigned short task_flags = 0; /* another commit raced with us */ if (list_empty(head)) @@ -1820,8 +1821,11 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, /* Set up the argument struct */ nfs_init_commit(data, head, NULL, cinfo); atomic_inc(&cinfo->mds->rpcs_out); + if (NFS_SERVER(inode)->nfs_client->cl_minorversion) + task_flags = RPC_TASK_MOVEABLE; return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), - data->mds_ops, how, RPC_TASK_CRED_NOREF); + data->mds_ops, how, + RPC_TASK_CRED_NOREF | task_flags); } /* diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index df696efdd675..a237b8dbf608 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -121,6 +121,7 @@ struct rpc_task_setup { */ #define RPC_TASK_ASYNC 0x0001 /* is an async task */ #define RPC_TASK_SWAPPER 0x0002 /* is swapping in/out */ +#define RPC_TASK_MOVEABLE 0x0004 /* nfs4.1+ rpc tasks */ #define RPC_TASK_NULLCREDS 0x0010 /* Use AUTH_NULL credential */ #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ #define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ @@ -139,6 +140,7 @@ struct rpc_task_setup { #define RPC_IS_SOFT(t) ((t)->tk_flags & (RPC_TASK_SOFT|RPC_TASK_TIMEOUT)) #define RPC_IS_SOFTCONN(t) ((t)->tk_flags & RPC_TASK_SOFTCONN) #define RPC_WAS_SENT(t) ((t)->tk_flags & RPC_TASK_SENT) +#define RPC_IS_MOVEABLE(t) ((t)->tk_flags & RPC_TASK_MOVEABLE) #define RPC_TASK_RUNNING 0 #define RPC_TASK_QUEUED 1 From 6a2840590b66e4914d583be61e40445386bb5835 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 23 Jun 2021 23:28:52 -0400 Subject: [PATCH 7/8] sunrpc: display xprt's queuelen of assigned tasks via sysfs Once a task grabs a trasnport it's reflected in the queuelen of the rpc_xprt structure. Add display of that value in the xprt's info file in sysfs. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- net/sunrpc/sysfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index a30ad18aa7dc..b576c7f06829 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -113,13 +113,15 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, ret = sprintf(buf, "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n" "max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n" "binding_q_len=%u\nsending_q_len=%u\npending_q_len=%u\n" - "backlog_q_len=%u\nmain_xprt=%d\nsrc_port=%u\n", + "backlog_q_len=%u\nmain_xprt=%d\nsrc_port=%u\n" + "tasks_queuelen=%ld\n", xprt->last_used, xprt->cong, xprt->cwnd, xprt->max_reqs, xprt->min_reqs, xprt->num_reqs, xprt->binding.qlen, xprt->sending.qlen, xprt->pending.qlen, xprt->backlog.qlen, xprt->main, (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ? - get_srcport(xprt) : 0); + get_srcport(xprt) : 0, + atomic_long_read(&xprt->queuelen)); xprt_put(xprt); return ret + 1; } From 6f081693e7b2ba63422b735684b05a850a6351ba Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 23 Jun 2021 23:28:53 -0400 Subject: [PATCH 8/8] sunrpc: remove an offlined xprt using sysfs Once a transport has been put offline, this transport can be also removed from the list of transports. Any tasks that have been stuck on this transport would find the next available active transport and be re-tried. This transport would be removed from the xprt_switch list and freed. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/clnt.c | 24 ++++++++++++++++++++++++ net/sunrpc/sysfs.c | 26 ++++++++++++++++++++++---- 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index b8ed7fa1b4ca..c8c39f22d3b1 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -428,6 +428,7 @@ void xprt_release_write(struct rpc_xprt *, struct rpc_task *); #define XPRT_BINDING (5) #define XPRT_CLOSING (6) #define XPRT_OFFLINE (7) +#define XPRT_REMOVE (8) #define XPRT_CONGESTED (9) #define XPRT_CWND_WAIT (10) #define XPRT_WRITE_SPACE (11) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 408618765aa5..8b4de70e8ead 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2106,6 +2106,30 @@ call_connect_status(struct rpc_task *task) case -ENOTCONN: case -EAGAIN: case -ETIMEDOUT: + if (!(task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) && + (task->tk_flags & RPC_TASK_MOVEABLE) && + test_bit(XPRT_REMOVE, &xprt->state)) { + struct rpc_xprt *saved = task->tk_xprt; + struct rpc_xprt_switch *xps; + + rcu_read_lock(); + xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch)); + rcu_read_unlock(); + if (xps->xps_nxprts > 1) { + long value; + + xprt_release(task); + value = atomic_long_dec_return(&xprt->queuelen); + if (value == 0) + rpc_xprt_switch_remove_xprt(xps, saved); + xprt_put(saved); + task->tk_xprt = NULL; + task->tk_action = call_start; + } + xprt_switch_put(xps); + if (!task->tk_xprt) + return; + } goto out_retry; case -ENOBUFS: rpc_delay(task, HZ >> 2); diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index b576c7f06829..64da3bfd28e6 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -133,7 +133,7 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); ssize_t ret; int locked, connected, connecting, close_wait, bound, binding, - closing, congested, cwnd_wait, write_space, offline; + closing, congested, cwnd_wait, write_space, offline, remove; if (!xprt) return 0; @@ -152,8 +152,9 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, cwnd_wait = test_bit(XPRT_CWND_WAIT, &xprt->state); write_space = test_bit(XPRT_WRITE_SPACE, &xprt->state); offline = test_bit(XPRT_OFFLINE, &xprt->state); + remove = test_bit(XPRT_REMOVE, &xprt->state); - ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s %s %s\n", + ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s %s %s %s\n", locked ? "LOCKED" : "", connected ? "CONNECTED" : "", connecting ? "CONNECTING" : "", @@ -164,7 +165,8 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, congested ? "CONGESTED" : "", cwnd_wait ? "CWND_WAIT" : "", write_space ? "WRITE_SPACE" : "", - offline ? "OFFLINE" : ""); + offline ? "OFFLINE" : "", + remove ? "REMOVE" : ""); } xprt_put(xprt); @@ -251,7 +253,7 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, const char *buf, size_t count) { struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); - int offline = 0, online = 0; + int offline = 0, online = 0, remove = 0; struct rpc_xprt_switch *xps = rpc_sysfs_xprt_kobj_get_xprt_switch(kobj); if (!xprt) @@ -261,6 +263,8 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, offline = 1; else if (!strncmp(buf, "online", 6)) online = 1; + else if (!strncmp(buf, "remove", 6)) + remove = 1; else return -EINVAL; @@ -282,6 +286,20 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, spin_lock(&xps->xps_lock); xps->xps_nactive++; spin_unlock(&xps->xps_lock); + } else if (remove) { + if (test_bit(XPRT_OFFLINE, &xprt->state)) { + set_bit(XPRT_REMOVE, &xprt->state); + xprt_force_disconnect(xprt); + if (test_bit(XPRT_CONNECTED, &xprt->state)) { + if (!xprt->sending.qlen && + !xprt->pending.qlen && + !xprt->backlog.qlen && + !atomic_long_read(&xprt->queuelen)) + rpc_xprt_switch_remove_xprt(xps, xprt); + } + } else { + count = -EINVAL; + } } release_tasks: