IB/srp: Add periodic reconnect functionality
After a transport layer occurred, periodically try to reconnect to the target until the dev_loss timer expires. Protect the callback functions that can be invoked from inside the SCSI EH against concurrent invocation with srp_reconnect_rport() via the rport mutex. Change the default dev_loss_tmo from 60s into 600s to give the reconnect mechanism a chance to kick in. Signed-off-by: Bart Van Assche <bvanassche@acm.org> Acked-by: David Dillow <dillowda@ornl.gov> Signed-off-by: Roland Dreier <roland@purestorage.com>
This commit is contained in:
parent
8c64e4531c
commit
a95cadb9da
@ -88,6 +88,11 @@ MODULE_PARM_DESC(topspin_workarounds,
|
|||||||
|
|
||||||
static struct kernel_param_ops srp_tmo_ops;
|
static struct kernel_param_ops srp_tmo_ops;
|
||||||
|
|
||||||
|
static int srp_reconnect_delay = 10;
|
||||||
|
module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
|
||||||
|
S_IRUGO | S_IWUSR);
|
||||||
|
MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
|
||||||
|
|
||||||
static int srp_fast_io_fail_tmo = 15;
|
static int srp_fast_io_fail_tmo = 15;
|
||||||
module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
|
module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
|
||||||
S_IRUGO | S_IWUSR);
|
S_IRUGO | S_IWUSR);
|
||||||
@ -96,7 +101,7 @@ MODULE_PARM_DESC(fast_io_fail_tmo,
|
|||||||
" layer error and failing all I/O. \"off\" means that this"
|
" layer error and failing all I/O. \"off\" means that this"
|
||||||
" functionality is disabled.");
|
" functionality is disabled.");
|
||||||
|
|
||||||
static int srp_dev_loss_tmo = 60;
|
static int srp_dev_loss_tmo = 600;
|
||||||
module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
|
module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
|
||||||
S_IRUGO | S_IWUSR);
|
S_IRUGO | S_IWUSR);
|
||||||
MODULE_PARM_DESC(dev_loss_tmo,
|
MODULE_PARM_DESC(dev_loss_tmo,
|
||||||
@ -144,10 +149,14 @@ static int srp_tmo_set(const char *val, const struct kernel_param *kp)
|
|||||||
} else {
|
} else {
|
||||||
tmo = -1;
|
tmo = -1;
|
||||||
}
|
}
|
||||||
if (kp->arg == &srp_fast_io_fail_tmo)
|
if (kp->arg == &srp_reconnect_delay)
|
||||||
res = srp_tmo_valid(-1, tmo, srp_dev_loss_tmo);
|
res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
|
||||||
|
srp_dev_loss_tmo);
|
||||||
|
else if (kp->arg == &srp_fast_io_fail_tmo)
|
||||||
|
res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
|
||||||
else
|
else
|
||||||
res = srp_tmo_valid(-1, srp_fast_io_fail_tmo, tmo);
|
res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
|
||||||
|
tmo);
|
||||||
if (res)
|
if (res)
|
||||||
goto out;
|
goto out;
|
||||||
*(int *)kp->arg = tmo;
|
*(int *)kp->arg = tmo;
|
||||||
@ -1426,18 +1435,29 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
|
|||||||
static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
|
static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
|
||||||
{
|
{
|
||||||
struct srp_target_port *target = host_to_target(shost);
|
struct srp_target_port *target = host_to_target(shost);
|
||||||
|
struct srp_rport *rport = target->rport;
|
||||||
struct srp_request *req;
|
struct srp_request *req;
|
||||||
struct srp_iu *iu;
|
struct srp_iu *iu;
|
||||||
struct srp_cmd *cmd;
|
struct srp_cmd *cmd;
|
||||||
struct ib_device *dev;
|
struct ib_device *dev;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
int len, result;
|
int len, result;
|
||||||
|
const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The SCSI EH thread is the only context from which srp_queuecommand()
|
||||||
|
* can get invoked for blocked devices (SDEV_BLOCK /
|
||||||
|
* SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
|
||||||
|
* locking the rport mutex if invoked from inside the SCSI EH.
|
||||||
|
*/
|
||||||
|
if (in_scsi_eh)
|
||||||
|
mutex_lock(&rport->mutex);
|
||||||
|
|
||||||
result = srp_chkready(target->rport);
|
result = srp_chkready(target->rport);
|
||||||
if (unlikely(result)) {
|
if (unlikely(result)) {
|
||||||
scmnd->result = result;
|
scmnd->result = result;
|
||||||
scmnd->scsi_done(scmnd);
|
scmnd->scsi_done(scmnd);
|
||||||
return 0;
|
goto unlock_rport;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock_irqsave(&target->lock, flags);
|
spin_lock_irqsave(&target->lock, flags);
|
||||||
@ -1482,6 +1502,10 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
|
|||||||
goto err_unmap;
|
goto err_unmap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unlock_rport:
|
||||||
|
if (in_scsi_eh)
|
||||||
|
mutex_unlock(&rport->mutex);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
err_unmap:
|
err_unmap:
|
||||||
@ -1496,6 +1520,9 @@ err_iu:
|
|||||||
err_unlock:
|
err_unlock:
|
||||||
spin_unlock_irqrestore(&target->lock, flags);
|
spin_unlock_irqrestore(&target->lock, flags);
|
||||||
|
|
||||||
|
if (in_scsi_eh)
|
||||||
|
mutex_unlock(&rport->mutex);
|
||||||
|
|
||||||
return SCSI_MLQUEUE_HOST_BUSY;
|
return SCSI_MLQUEUE_HOST_BUSY;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1780,6 +1807,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
|
|||||||
static int srp_send_tsk_mgmt(struct srp_target_port *target,
|
static int srp_send_tsk_mgmt(struct srp_target_port *target,
|
||||||
u64 req_tag, unsigned int lun, u8 func)
|
u64 req_tag, unsigned int lun, u8 func)
|
||||||
{
|
{
|
||||||
|
struct srp_rport *rport = target->rport;
|
||||||
struct ib_device *dev = target->srp_host->srp_dev->dev;
|
struct ib_device *dev = target->srp_host->srp_dev->dev;
|
||||||
struct srp_iu *iu;
|
struct srp_iu *iu;
|
||||||
struct srp_tsk_mgmt *tsk_mgmt;
|
struct srp_tsk_mgmt *tsk_mgmt;
|
||||||
@ -1789,12 +1817,20 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
|
|||||||
|
|
||||||
init_completion(&target->tsk_mgmt_done);
|
init_completion(&target->tsk_mgmt_done);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Lock the rport mutex to avoid that srp_create_target_ib() is
|
||||||
|
* invoked while a task management function is being sent.
|
||||||
|
*/
|
||||||
|
mutex_lock(&rport->mutex);
|
||||||
spin_lock_irq(&target->lock);
|
spin_lock_irq(&target->lock);
|
||||||
iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT);
|
iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT);
|
||||||
spin_unlock_irq(&target->lock);
|
spin_unlock_irq(&target->lock);
|
||||||
|
|
||||||
if (!iu)
|
if (!iu) {
|
||||||
|
mutex_unlock(&rport->mutex);
|
||||||
|
|
||||||
return -1;
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
|
ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
|
||||||
DMA_TO_DEVICE);
|
DMA_TO_DEVICE);
|
||||||
@ -1811,8 +1847,11 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
|
|||||||
DMA_TO_DEVICE);
|
DMA_TO_DEVICE);
|
||||||
if (srp_post_send(target, iu, sizeof *tsk_mgmt)) {
|
if (srp_post_send(target, iu, sizeof *tsk_mgmt)) {
|
||||||
srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT);
|
srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT);
|
||||||
|
mutex_unlock(&rport->mutex);
|
||||||
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
mutex_unlock(&rport->mutex);
|
||||||
|
|
||||||
if (!wait_for_completion_timeout(&target->tsk_mgmt_done,
|
if (!wait_for_completion_timeout(&target->tsk_mgmt_done,
|
||||||
msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
|
msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
|
||||||
@ -2713,6 +2752,7 @@ static void srp_remove_one(struct ib_device *device)
|
|||||||
static struct srp_function_template ib_srp_transport_functions = {
|
static struct srp_function_template ib_srp_transport_functions = {
|
||||||
.has_rport_state = true,
|
.has_rport_state = true,
|
||||||
.reset_timer_if_blocked = true,
|
.reset_timer_if_blocked = true,
|
||||||
|
.reconnect_delay = &srp_reconnect_delay,
|
||||||
.fast_io_fail_tmo = &srp_fast_io_fail_tmo,
|
.fast_io_fail_tmo = &srp_fast_io_fail_tmo,
|
||||||
.dev_loss_tmo = &srp_dev_loss_tmo,
|
.dev_loss_tmo = &srp_dev_loss_tmo,
|
||||||
.reconnect = srp_rport_reconnect,
|
.reconnect = srp_rport_reconnect,
|
||||||
|
Loading…
Reference in New Issue
Block a user