mirror of
https://github.com/torvalds/linux.git
synced 2024-11-26 06:02:05 +00:00
rpcrdma: fix handling for RDMA_CM_EVENT_DEVICE_REMOVAL
Under the scenario of IB device bonding, when bringing down one of the
ports, or all ports, we saw xprtrdma entering a non-recoverable state
where it is not even possible to complete the disconnect and shut it
down the mount, requiring a reboot. Following debug, we saw that
transport connect never ended after receiving the
RDMA_CM_EVENT_DEVICE_REMOVAL callback.
The DEVICE_REMOVAL callback is irrespective of whether the CM_ID is
connected, and ESTABLISHED may not have happened. So need to work with
each of these states accordingly.
Fixes: 2acc5cae29
('xprtrdma: Prevent dereferencing r_xprt->rx_ep after it is freed')
Cc: Sagi Grimberg <sagi.grimberg@vastdata.com>
Signed-off-by: Dan Aloni <dan.aloni@vastdata.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
This commit is contained in:
parent
d1404e46ae
commit
4836da2197
@ -244,7 +244,11 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
|
||||
case RDMA_CM_EVENT_DEVICE_REMOVAL:
|
||||
pr_info("rpcrdma: removing device %s for %pISpc\n",
|
||||
ep->re_id->device->name, sap);
|
||||
fallthrough;
|
||||
switch (xchg(&ep->re_connect_status, -ENODEV)) {
|
||||
case 0: goto wake_connect_worker;
|
||||
case 1: goto disconnected;
|
||||
}
|
||||
return 0;
|
||||
case RDMA_CM_EVENT_ADDR_CHANGE:
|
||||
ep->re_connect_status = -ENODEV;
|
||||
goto disconnected;
|
||||
|
Loading…
Reference in New Issue
Block a user