linux/drivers/scsi/libsas/sas_ata.c

863 lines
22 KiB
C
Raw Normal View History

/*
* Support for SATA devices on Serial Attached SCSI (SAS) controllers
*
* Copyright (C) 2006 IBM Corporation
*
* Written by: Darrick J. Wong <djwong@us.ibm.com>, IBM Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*/
#include <linux/scatterlist.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 08:04:11 +00:00
#include <linux/slab.h>
#include <linux/async.h>
#include <linux/export.h>
#include <scsi/sas_ata.h>
#include "sas_internal.h"
#include <scsi/scsi_host.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_tcq.h>
#include <scsi/scsi.h>
#include <scsi/scsi_transport.h>
#include <scsi/scsi_transport_sas.h>
#include "../scsi_sas_internal.h"
#include "../scsi_transport_api.h"
#include <scsi/scsi_eh.h>
static enum ata_completion_errors sas_to_ata_err(struct task_status_struct *ts)
{
/* Cheesy attempt to translate SAS errors into ATA. Hah! */
/* transport error */
if (ts->resp == SAS_TASK_UNDELIVERED)
return AC_ERR_ATA_BUS;
/* ts->resp == SAS_TASK_COMPLETE */
/* task delivered, what happened afterwards? */
switch (ts->stat) {
case SAS_DEV_NO_RESPONSE:
return AC_ERR_TIMEOUT;
case SAS_INTERRUPTED:
case SAS_PHY_DOWN:
case SAS_NAK_R_ERR:
return AC_ERR_ATA_BUS;
case SAS_DATA_UNDERRUN:
/*
* Some programs that use the taskfile interface
* (smartctl in particular) can cause underrun
* problems. Ignore these errors, perhaps at our
* peril.
*/
return 0;
case SAS_DATA_OVERRUN:
case SAS_QUEUE_FULL:
case SAS_DEVICE_UNKNOWN:
case SAS_SG_ERR:
return AC_ERR_INVALID;
case SAS_OPEN_TO:
case SAS_OPEN_REJECT:
SAS_DPRINTK("%s: Saw error %d. What to do?\n",
__func__, ts->stat);
return AC_ERR_OTHER;
case SAM_STAT_CHECK_CONDITION:
case SAS_ABORTED_TASK:
return AC_ERR_DEV;
case SAS_PROTO_RESPONSE:
/* This means the ending_fis has the error
* value; return 0 here to collect it */
return 0;
default:
return 0;
}
}
static void sas_ata_task_done(struct sas_task *task)
{
struct ata_queued_cmd *qc = task->uldd_task;
struct domain_device *dev = task->dev;
struct task_status_struct *stat = &task->task_status;
struct ata_task_resp *resp = (struct ata_task_resp *)stat->buf;
struct sas_ha_struct *sas_ha = dev->port->ha;
enum ata_completion_errors ac;
unsigned long flags;
struct ata_link *link;
struct ata_port *ap;
spin_lock_irqsave(&dev->done_lock, flags);
if (test_bit(SAS_HA_FROZEN, &sas_ha->state))
task = NULL;
else if (qc && qc->scsicmd)
ASSIGN_SAS_TASK(qc->scsicmd, NULL);
spin_unlock_irqrestore(&dev->done_lock, flags);
/* check if libsas-eh got to the task before us */
if (unlikely(!task))
return;
if (!qc)
goto qc_already_gone;
ap = qc->ap;
link = &ap->link;
spin_lock_irqsave(ap->lock, flags);
/* check if we lost the race with libata/sas_ata_post_internal() */
if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) {
spin_unlock_irqrestore(ap->lock, flags);
if (qc->scsicmd)
goto qc_already_gone;
else {
/* if eh is not involved and the port is frozen then the
* ata internal abort process has taken responsibility
* for this sas_task
*/
return;
}
}
if (stat->stat == SAS_PROTO_RESPONSE || stat->stat == SAM_STAT_GOOD ||
((stat->stat == SAM_STAT_CHECK_CONDITION &&
dev->sata_dev.class == ATA_DEV_ATAPI))) {
memcpy(dev->sata_dev.fis, resp->ending_fis, ATA_RESP_FIS_SIZE);
if (!link->sactive) {
qc->err_mask |= ac_err_mask(dev->sata_dev.fis[2]);
} else {
link->eh_info.err_mask |= ac_err_mask(dev->sata_dev.fis[2]);
if (unlikely(link->eh_info.err_mask))
qc->flags |= ATA_QCFLAG_FAILED;
}
} else {
ac = sas_to_ata_err(stat);
if (ac) {
SAS_DPRINTK("%s: SAS error %x\n", __func__,
stat->stat);
/* We saw a SAS error. Send a vague error. */
if (!link->sactive) {
qc->err_mask = ac;
} else {
link->eh_info.err_mask |= AC_ERR_DEV;
qc->flags |= ATA_QCFLAG_FAILED;
}
dev->sata_dev.fis[3] = 0x04; /* status err */
dev->sata_dev.fis[2] = ATA_ERR;
}
}
qc->lldd_task = NULL;
ata_qc_complete(qc);
spin_unlock_irqrestore(ap->lock, flags);
qc_already_gone:
sas_free_task(task);
}
static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
{
unsigned long flags;
struct sas_task *task;
struct scatterlist *sg;
int ret = AC_ERR_SYSTEM;
unsigned int si, xfer = 0;
struct ata_port *ap = qc->ap;
struct domain_device *dev = ap->private_data;
struct sas_ha_struct *sas_ha = dev->port->ha;
struct Scsi_Host *host = sas_ha->core.shost;
struct sas_internal *i = to_sas_internal(host->transportt);
/* TODO: audit callers to ensure they are ready for qc_issue to
* unconditionally re-enable interrupts
*/
local_irq_save(flags);
spin_unlock(ap->lock);
/* If the device fell off, no sense in issuing commands */
if (test_bit(SAS_DEV_GONE, &dev->state))
goto out;
task = sas_alloc_task(GFP_ATOMIC);
if (!task)
goto out;
task->dev = dev;
task->task_proto = SAS_PROTOCOL_STP;
task->task_done = sas_ata_task_done;
if (qc->tf.command == ATA_CMD_FPDMA_WRITE ||
qc->tf.command == ATA_CMD_FPDMA_READ ||
qc->tf.command == ATA_CMD_FPDMA_RECV ||
qc->tf.command == ATA_CMD_FPDMA_SEND ||
qc->tf.command == ATA_CMD_NCQ_NON_DATA) {
/* Need to zero out the tag libata assigned us */
qc->tf.nsect = 0;
}
ata_tf_to_fis(&qc->tf, qc->dev->link->pmp, 1, (u8 *)&task->ata_task.fis);
task->uldd_task = qc;
if (ata_is_atapi(qc->tf.protocol)) {
memcpy(task->ata_task.atapi_packet, qc->cdb, qc->dev->cdb_len);
libata: eliminate the home grown dma padding in favour of that provided by the block layer ATA requires that all DMA transfers begin and end on word boundaries. Because of this, a large amount of machinery grew up in ide to adjust scatterlists on this basis. However, as of 2.5, the block layer has a dma_alignment variable which ensures both the beginning and length of a DMA transfer are aligned on the dma_alignment boundary. Although the block layer does adjust the beginning of the transfer to ensure this happens, it doesn't actually adjust the length, it merely makes sure that space is allocated for transfers beyond the declared length. The upshot of this is that scatterlists may be padded to any size between the actual length and the length adjusted to the dma_alignment safely knowing that memory is allocated in this region. Right at the moment, SCSI takes the default dma_aligment which is on a 512 byte boundary. Note that this aligment only applies to transfers coming in from user space. However, since all kernel allocations are automatically aligned on a minimum of 32 byte boundaries, it is safe to adjust them in this manner as well. tj: * Adjusting sg after padding is done in block layer. Make libata set queue alignment correctly for ATAPI devices and drop broken sg mangling from ata_sg_setup(). * Use request->raw_data_len for ATAPI transfer chunk size. * Killed qc->raw_nbytes. * Separated out killing qc->n_iter. Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com> Signed-off-by: Tejun Heo <htejun@gmail.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
2008-02-19 10:36:56 +00:00
task->total_xfer_len = qc->nbytes;
task->num_scatter = qc->n_elem;
} else {
for_each_sg(qc->sg, sg, qc->n_elem, si)
xfer += sg_dma_len(sg);
task->total_xfer_len = xfer;
task->num_scatter = si;
}
task->data_dir = qc->dma_dir;
task->scatter = qc->sg;
task->ata_task.retry_count = 1;
task->task_state_flags = SAS_TASK_STATE_PENDING;
qc->lldd_task = task;
task->ata_task.use_ncq = ata_is_ncq(qc->tf.protocol);
task->ata_task.dma_xfer = ata_is_dma(qc->tf.protocol);
if (qc->scsicmd)
ASSIGN_SAS_TASK(qc->scsicmd, task);
ret = i->dft->lldd_execute_task(task, GFP_ATOMIC);
if (ret) {
SAS_DPRINTK("lldd_execute_task returned: %d\n", ret);
if (qc->scsicmd)
ASSIGN_SAS_TASK(qc->scsicmd, NULL);
sas_free_task(task);
qc->lldd_task = NULL;
ret = AC_ERR_SYSTEM;
}
out:
spin_lock(ap->lock);
local_irq_restore(flags);
return ret;
}
static bool sas_ata_qc_fill_rtf(struct ata_queued_cmd *qc)
{
struct domain_device *dev = qc->ap->private_data;
ata_tf_from_fis(dev->sata_dev.fis, &qc->result_tf);
return true;
}
static struct sas_internal *dev_to_sas_internal(struct domain_device *dev)
{
return to_sas_internal(dev->port->ha->core.shost->transportt);
}
static int sas_get_ata_command_set(struct domain_device *dev);
int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy)
{
if (phy->attached_tproto & SAS_PROTOCOL_STP)
dev->tproto = phy->attached_tproto;
if (phy->attached_sata_dev)
dev->tproto |= SAS_SATA_DEV;
if (phy->attached_dev_type == SAS_SATA_PENDING)
dev->dev_type = SAS_SATA_PENDING;
else {
int res;
dev->dev_type = SAS_SATA_DEV;
res = sas_get_report_phy_sata(dev->parent, phy->phy_id,
&dev->sata_dev.rps_resp);
if (res) {
SAS_DPRINTK("report phy sata to %016llx:0x%x returned "
"0x%x\n", SAS_ADDR(dev->parent->sas_addr),
phy->phy_id, res);
return res;
}
memcpy(dev->frame_rcvd, &dev->sata_dev.rps_resp.rps.fis,
sizeof(struct dev_to_host_fis));
dev->sata_dev.class = sas_get_ata_command_set(dev);
}
return 0;
}
static int sas_ata_clear_pending(struct domain_device *dev, struct ex_phy *phy)
{
int res;
/* we weren't pending, so successfully end the reset sequence now */
if (dev->dev_type != SAS_SATA_PENDING)
return 1;
/* hmmm, if this succeeds do we need to repost the domain_device to the
* lldd so it can pick up new parameters?
*/
res = sas_get_ata_info(dev, phy);
if (res)
return 0; /* retry */
else
return 1;
}
static int smp_ata_check_ready(struct ata_link *link)
{
int res;
struct ata_port *ap = link->ap;
struct domain_device *dev = ap->private_data;
struct domain_device *ex_dev = dev->parent;
struct sas_phy *phy = sas_get_local_phy(dev);
struct ex_phy *ex_phy = &ex_dev->ex_dev.ex_phy[phy->number];
res = sas_ex_phy_discover(ex_dev, phy->number);
sas_put_local_phy(phy);
/* break the wait early if the expander is unreachable,
* otherwise keep polling
*/
if (res == -ECOMM)
return res;
if (res != SMP_RESP_FUNC_ACC)
return 0;
switch (ex_phy->attached_dev_type) {
case SAS_SATA_PENDING:
return 0;
case SAS_END_DEVICE:
if (ex_phy->attached_sata_dev)
return sas_ata_clear_pending(dev, ex_phy);
default:
return -ENODEV;
}
}
static int local_ata_check_ready(struct ata_link *link)
{
struct ata_port *ap = link->ap;
struct domain_device *dev = ap->private_data;
struct sas_internal *i = dev_to_sas_internal(dev);
if (i->dft->lldd_ata_check_ready)
return i->dft->lldd_ata_check_ready(dev);
else {
/* lldd's that don't implement 'ready' checking get the
* old default behavior of not coordinating reset
* recovery with libata
*/
return 1;
}
}
static int sas_ata_printk(const char *level, const struct domain_device *ddev,
const char *fmt, ...)
{
struct ata_port *ap = ddev->sata_dev.ap;
struct device *dev = &ddev->rphy->dev;
struct va_format vaf;
va_list args;
int r;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
r = printk("%ssas: ata%u: %s: %pV",
level, ap->print_id, dev_name(dev), &vaf);
va_end(args);
return r;
}
static int sas_ata_hard_reset(struct ata_link *link, unsigned int *class,
unsigned long deadline)
{
int ret = 0, res;
struct sas_phy *phy;
struct ata_port *ap = link->ap;
int (*check_ready)(struct ata_link *link);
struct domain_device *dev = ap->private_data;
struct sas_internal *i = dev_to_sas_internal(dev);
res = i->dft->lldd_I_T_nexus_reset(dev);
if (res == -ENODEV)
return res;
if (res != TMF_RESP_FUNC_COMPLETE)
sas_ata_printk(KERN_DEBUG, dev, "Unable to reset ata device?\n");
phy = sas_get_local_phy(dev);
if (scsi_is_sas_phy_local(phy))
check_ready = local_ata_check_ready;
else
check_ready = smp_ata_check_ready;
sas_put_local_phy(phy);
ret = ata_wait_after_reset(link, deadline, check_ready);
if (ret && ret != -EAGAIN)
sas_ata_printk(KERN_ERR, dev, "reset failed (errno=%d)\n", ret);
*class = dev->sata_dev.class;
ap->cbl = ATA_CBL_SATA;
return ret;
}
/*
* notify the lldd to forget the sas_task for this internal ata command
* that bypasses scsi-eh
*/
static void sas_ata_internal_abort(struct sas_task *task)
{
struct sas_internal *si = dev_to_sas_internal(task->dev);
unsigned long flags;
int res;
spin_lock_irqsave(&task->task_state_lock, flags);
if (task->task_state_flags & SAS_TASK_STATE_ABORTED ||
task->task_state_flags & SAS_TASK_STATE_DONE) {
spin_unlock_irqrestore(&task->task_state_lock, flags);
SAS_DPRINTK("%s: Task %p already finished.\n", __func__,
task);
goto out;
}
task->task_state_flags |= SAS_TASK_STATE_ABORTED;
spin_unlock_irqrestore(&task->task_state_lock, flags);
res = si->dft->lldd_abort_task(task);
spin_lock_irqsave(&task->task_state_lock, flags);
if (task->task_state_flags & SAS_TASK_STATE_DONE ||
res == TMF_RESP_FUNC_COMPLETE) {
spin_unlock_irqrestore(&task->task_state_lock, flags);
goto out;
}
/* XXX we are not prepared to deal with ->lldd_abort_task()
* failures. TODO: lldds need to unconditionally forget about
* aborted ata tasks, otherwise we (likely) leak the sas task
* here
*/
SAS_DPRINTK("%s: Task %p leaked.\n", __func__, task);
if (!(task->task_state_flags & SAS_TASK_STATE_DONE))
task->task_state_flags &= ~SAS_TASK_STATE_ABORTED;
spin_unlock_irqrestore(&task->task_state_lock, flags);
return;
out:
sas_free_task(task);
}
static void sas_ata_post_internal(struct ata_queued_cmd *qc)
{
if (qc->flags & ATA_QCFLAG_FAILED)
qc->err_mask |= AC_ERR_OTHER;
if (qc->err_mask) {
/*
* Find the sas_task and kill it. By this point, libata
* has decided to kill the qc and has frozen the port.
* In this state sas_ata_task_done() will no longer free
* the sas_task, so we need to notify the lldd (via
* ->lldd_abort_task) that the task is dead and free it
* ourselves.
*/
struct sas_task *task = qc->lldd_task;
qc->lldd_task = NULL;
if (!task)
return;
task->uldd_task = NULL;
sas_ata_internal_abort(task);
}
}
static void sas_ata_set_dmamode(struct ata_port *ap, struct ata_device *ata_dev)
{
struct domain_device *dev = ap->private_data;
struct sas_internal *i = dev_to_sas_internal(dev);
if (i->dft->lldd_ata_set_dmamode)
i->dft->lldd_ata_set_dmamode(dev);
}
static void sas_ata_sched_eh(struct ata_port *ap)
{
struct domain_device *dev = ap->private_data;
struct sas_ha_struct *ha = dev->port->ha;
unsigned long flags;
spin_lock_irqsave(&ha->lock, flags);
if (!test_and_set_bit(SAS_DEV_EH_PENDING, &dev->state))
ha->eh_active++;
ata_std_sched_eh(ap);
spin_unlock_irqrestore(&ha->lock, flags);
}
void sas_ata_end_eh(struct ata_port *ap)
{
struct domain_device *dev = ap->private_data;
struct sas_ha_struct *ha = dev->port->ha;
unsigned long flags;
spin_lock_irqsave(&ha->lock, flags);
if (test_and_clear_bit(SAS_DEV_EH_PENDING, &dev->state))
ha->eh_active--;
spin_unlock_irqrestore(&ha->lock, flags);
}
static struct ata_port_operations sas_sata_ops = {
.prereset = ata_std_prereset,
.hardreset = sas_ata_hard_reset,
.postreset = ata_std_postreset,
.error_handler = ata_std_error_handler,
.post_internal_cmd = sas_ata_post_internal,
.qc_defer = ata_std_qc_defer,
.qc_prep = ata_noop_qc_prep,
.qc_issue = sas_ata_qc_issue,
.qc_fill_rtf = sas_ata_qc_fill_rtf,
.port_start = ata_sas_port_start,
.port_stop = ata_sas_port_stop,
.set_dmamode = sas_ata_set_dmamode,
.sched_eh = sas_ata_sched_eh,
.end_eh = sas_ata_end_eh,
};
static struct ata_port_info sata_port_info = {
ata: Add a new flag to destinguish sas controller SAS controller has its own tag allocation, which doesn't directly match to ATA tag, so SAS and SATA have different code path for ata tags. Originally we use port->scsi_host (98bd4be1) to destinguish SAS controller, but libsas set ->scsi_host too, so we can't use it for the destinguish, we add a new flag for this purpose. Without this patch, the following oops can happen because scsi-mq uses a host-wide tag map shared among all devices with some integer tag values >= ATA_MAX_QUEUE. These unexpectedly high tag values cause __ata_qc_from_tag() to return NULL, which is then dereferenced in ata_qc_new_init(). BUG: unable to handle kernel NULL pointer dereference at 0000000000000058 IP: [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120 PGD 32adf0067 PUD 32adf1067 PMD 0 Oops: 0002 [#1] SMP DEBUG_PAGEALLOC Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi igb i2c_algo_bit ptp pps_core pm80xx libsas scsi_transport_sas sg coretemp eeprom w83795 i2c_i801 CPU: 4 PID: 1450 Comm: cydiskbench Not tainted 4.0.0-rc3 #1 Hardware name: Supermicro X8DTH-i/6/iF/6F/X8DTH, BIOS 2.1b 05/04/12 task: ffff8800ba86d500 ti: ffff88032a064000 task.ti: ffff88032a064000 RIP: 0010:[<ffffffff804fd46e>] [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120 RSP: 0018:ffff88032a067858 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff8800ba0d2230 RCX: 000000000000002a RDX: ffffffff80505ae0 RSI: 0000000000000020 RDI: ffff8800ba0d2230 RBP: ffff88032a067868 R08: 0000000000000201 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ba0d0000 R13: ffff8800ba0d2230 R14: ffffffff80505ae0 R15: ffff8800ba0d0000 FS: 0000000041223950(0063) GS:ffff88033e480000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000058 CR3: 000000032a0a3000 CR4: 00000000000006e0 Stack: ffff880329eee758 ffff880329eee758 ffff88032a0678a8 ffffffff80502dad ffff8800ba167978 ffff880329eee758 ffff88032bf9c520 ffff8800ba167978 ffff88032bf9c520 ffff88032bf9a290 ffff88032a0678b8 ffffffff80506909 Call Trace: [<ffffffff80502dad>] ata_scsi_translate+0x3d/0x1b0 [<ffffffff80506909>] ata_sas_queuecmd+0x149/0x2a0 [<ffffffffa0046650>] sas_queuecommand+0xa0/0x1f0 [libsas] [<ffffffff804ea544>] scsi_dispatch_cmd+0xd4/0x1a0 [<ffffffff804eb50f>] scsi_queue_rq+0x66f/0x7f0 [<ffffffff803e5098>] __blk_mq_run_hw_queue+0x208/0x3f0 [<ffffffff803e54b8>] blk_mq_run_hw_queue+0x88/0xc0 [<ffffffff803e5c74>] blk_mq_insert_request+0xc4/0x130 [<ffffffff803e0b63>] blk_execute_rq_nowait+0x73/0x160 [<ffffffffa0023fca>] sg_common_write+0x3da/0x720 [sg] [<ffffffffa0025100>] sg_new_write+0x250/0x360 [sg] [<ffffffffa0025feb>] sg_write+0x13b/0x450 [sg] [<ffffffff8032ec91>] vfs_write+0xd1/0x1b0 [<ffffffff8032ee54>] SyS_write+0x54/0xc0 [<ffffffff80689932>] system_call_fastpath+0x12/0x17 tj: updated description. Fixes: 12cb5ce101ab ("libata: use blk taging") Reported-and-tested-by: Tony Battersby <tonyb@cybernetics.com> Signed-off-by: Shaohua Li <shli@fb.com> Signed-off-by: Tejun Heo <tj@kernel.org>
2015-03-12 17:32:18 +00:00
.flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ |
ATA_FLAG_SAS_HOST | ATA_FLAG_FPDMA_AUX,
.pio_mask = ATA_PIO4,
.mwdma_mask = ATA_MWDMA2,
.udma_mask = ATA_UDMA6,
.port_ops = &sas_sata_ops
};
int sas_ata_init(struct domain_device *found_dev)
{
struct sas_ha_struct *ha = found_dev->port->ha;
struct Scsi_Host *shost = ha->core.shost;
struct ata_port *ap;
int rc;
ata_host_init(&found_dev->sata_dev.ata_host, ha->dev, &sas_sata_ops);
ap = ata_sas_port_alloc(&found_dev->sata_dev.ata_host,
&sata_port_info,
shost);
if (!ap) {
SAS_DPRINTK("ata_sas_port_alloc failed.\n");
return -ENODEV;
}
ap->private_data = found_dev;
ap->cbl = ATA_CBL_SATA;
ap->scsi_host = shost;
rc = ata_sas_port_init(ap);
if (rc) {
ata_sas_port_destroy(ap);
return rc;
}
found_dev->sata_dev.ap = ap;
return 0;
}
void sas_ata_task_abort(struct sas_task *task)
{
struct ata_queued_cmd *qc = task->uldd_task;
struct completion *waiting;
/* Bounce SCSI-initiated commands to the SCSI EH */
if (qc->scsicmd) {
struct request_queue *q = qc->scsicmd->device->request_queue;
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
blk_abort_request(qc->scsicmd->request);
spin_unlock_irqrestore(q->queue_lock, flags);
return;
}
/* Internal command, fake a timeout and complete. */
qc->flags &= ~ATA_QCFLAG_ACTIVE;
qc->flags |= ATA_QCFLAG_FAILED;
qc->err_mask |= AC_ERR_TIMEOUT;
waiting = qc->private_data;
complete(waiting);
}
static int sas_get_ata_command_set(struct domain_device *dev)
{
struct dev_to_host_fis *fis =
(struct dev_to_host_fis *) dev->frame_rcvd;
struct ata_taskfile tf;
if (dev->dev_type == SAS_SATA_PENDING)
return ATA_DEV_UNKNOWN;
ata_tf_from_fis((const u8 *)fis, &tf);
return ata_dev_classify(&tf);
}
void sas_probe_sata(struct asd_sas_port *port)
{
struct domain_device *dev, *n;
mutex_lock(&port->ha->disco_mutex);
list_for_each_entry(dev, &port->disco_list, disco_list_node) {
if (!dev_is_sata(dev))
continue;
ata_sas_async_probe(dev->sata_dev.ap);
}
mutex_unlock(&port->ha->disco_mutex);
list_for_each_entry_safe(dev, n, &port->disco_list, disco_list_node) {
if (!dev_is_sata(dev))
continue;
sas_ata_wait_eh(dev);
/* if libata could not bring the link up, don't surface
* the device
*/
if (ata_dev_disabled(sas_to_ata_dev(dev)))
sas_fail_probe(dev, __func__, -ENODEV);
}
}
libata, libsas: kill pm_result and related cleanup Tejun says: "At least for libata, worrying about suspend/resume failures don't make whole lot of sense. If suspend failed, just proceed with suspend. If the device can't be woken up afterwards, that's that. There isn't anything we could have done differently anyway. The same for resume, if spinup fails, the device is dud and the following commands will invoke EH actions and will eventually fail. Again, there really isn't any *choice* to make. Just making sure the errors are handled gracefully (ie. don't crash) and the following commands are handled correctly should be enough." The only libata user that actually cares about the result from a suspend operation is libsas. However, it only cares about whether queuing a new operation collides with an in-flight one. All libsas does with the error is retry, but we can just let libata wait for the previous operation before continuing. Other cleanups include: 1/ Unifying all ata port pm operations on an ata_port_pm_ prefix 2/ Marking all ata port pm helper routines as returning void, only ata_port_pm_ entry points need to fake a 0 return value. 3/ Killing ata_port_{suspend|resume}_common() in favor of calling ata_port_request_pm() directly 4/ Killing the wrappers that just do a to_ata_port() conversion 5/ Clearly marking the entry points that do async operations with an _async suffix. Reference: http://marc.info/?l=linux-scsi&m=138995409532286&w=2 Cc: Phillip Susi <psusi@ubuntu.com> Cc: Alan Stern <stern@rowland.harvard.edu> Suggested-by: Tejun Heo <tj@kernel.org> Signed-off-by: Todd Brandt <todd.e.brandt@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Tejun Heo <tj@kernel.org>
2014-03-14 20:52:48 +00:00
static void sas_ata_flush_pm_eh(struct asd_sas_port *port, const char *func)
{
struct domain_device *dev, *n;
list_for_each_entry_safe(dev, n, &port->dev_list, dev_list_node) {
if (!dev_is_sata(dev))
continue;
sas_ata_wait_eh(dev);
/* if libata failed to power manage the device, tear it down */
if (ata_dev_disabled(sas_to_ata_dev(dev)))
sas_fail_probe(dev, func, -ENODEV);
}
}
void sas_suspend_sata(struct asd_sas_port *port)
{
struct domain_device *dev;
mutex_lock(&port->ha->disco_mutex);
list_for_each_entry(dev, &port->dev_list, dev_list_node) {
struct sata_device *sata;
if (!dev_is_sata(dev))
continue;
sata = &dev->sata_dev;
if (sata->ap->pm_mesg.event == PM_EVENT_SUSPEND)
continue;
libata, libsas: kill pm_result and related cleanup Tejun says: "At least for libata, worrying about suspend/resume failures don't make whole lot of sense. If suspend failed, just proceed with suspend. If the device can't be woken up afterwards, that's that. There isn't anything we could have done differently anyway. The same for resume, if spinup fails, the device is dud and the following commands will invoke EH actions and will eventually fail. Again, there really isn't any *choice* to make. Just making sure the errors are handled gracefully (ie. don't crash) and the following commands are handled correctly should be enough." The only libata user that actually cares about the result from a suspend operation is libsas. However, it only cares about whether queuing a new operation collides with an in-flight one. All libsas does with the error is retry, but we can just let libata wait for the previous operation before continuing. Other cleanups include: 1/ Unifying all ata port pm operations on an ata_port_pm_ prefix 2/ Marking all ata port pm helper routines as returning void, only ata_port_pm_ entry points need to fake a 0 return value. 3/ Killing ata_port_{suspend|resume}_common() in favor of calling ata_port_request_pm() directly 4/ Killing the wrappers that just do a to_ata_port() conversion 5/ Clearly marking the entry points that do async operations with an _async suffix. Reference: http://marc.info/?l=linux-scsi&m=138995409532286&w=2 Cc: Phillip Susi <psusi@ubuntu.com> Cc: Alan Stern <stern@rowland.harvard.edu> Suggested-by: Tejun Heo <tj@kernel.org> Signed-off-by: Todd Brandt <todd.e.brandt@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Tejun Heo <tj@kernel.org>
2014-03-14 20:52:48 +00:00
ata_sas_port_suspend(sata->ap);
}
mutex_unlock(&port->ha->disco_mutex);
libata, libsas: kill pm_result and related cleanup Tejun says: "At least for libata, worrying about suspend/resume failures don't make whole lot of sense. If suspend failed, just proceed with suspend. If the device can't be woken up afterwards, that's that. There isn't anything we could have done differently anyway. The same for resume, if spinup fails, the device is dud and the following commands will invoke EH actions and will eventually fail. Again, there really isn't any *choice* to make. Just making sure the errors are handled gracefully (ie. don't crash) and the following commands are handled correctly should be enough." The only libata user that actually cares about the result from a suspend operation is libsas. However, it only cares about whether queuing a new operation collides with an in-flight one. All libsas does with the error is retry, but we can just let libata wait for the previous operation before continuing. Other cleanups include: 1/ Unifying all ata port pm operations on an ata_port_pm_ prefix 2/ Marking all ata port pm helper routines as returning void, only ata_port_pm_ entry points need to fake a 0 return value. 3/ Killing ata_port_{suspend|resume}_common() in favor of calling ata_port_request_pm() directly 4/ Killing the wrappers that just do a to_ata_port() conversion 5/ Clearly marking the entry points that do async operations with an _async suffix. Reference: http://marc.info/?l=linux-scsi&m=138995409532286&w=2 Cc: Phillip Susi <psusi@ubuntu.com> Cc: Alan Stern <stern@rowland.harvard.edu> Suggested-by: Tejun Heo <tj@kernel.org> Signed-off-by: Todd Brandt <todd.e.brandt@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Tejun Heo <tj@kernel.org>
2014-03-14 20:52:48 +00:00
sas_ata_flush_pm_eh(port, __func__);
}
void sas_resume_sata(struct asd_sas_port *port)
{
struct domain_device *dev;
mutex_lock(&port->ha->disco_mutex);
list_for_each_entry(dev, &port->dev_list, dev_list_node) {
struct sata_device *sata;
if (!dev_is_sata(dev))
continue;
sata = &dev->sata_dev;
if (sata->ap->pm_mesg.event == PM_EVENT_ON)
continue;
libata, libsas: kill pm_result and related cleanup Tejun says: "At least for libata, worrying about suspend/resume failures don't make whole lot of sense. If suspend failed, just proceed with suspend. If the device can't be woken up afterwards, that's that. There isn't anything we could have done differently anyway. The same for resume, if spinup fails, the device is dud and the following commands will invoke EH actions and will eventually fail. Again, there really isn't any *choice* to make. Just making sure the errors are handled gracefully (ie. don't crash) and the following commands are handled correctly should be enough." The only libata user that actually cares about the result from a suspend operation is libsas. However, it only cares about whether queuing a new operation collides with an in-flight one. All libsas does with the error is retry, but we can just let libata wait for the previous operation before continuing. Other cleanups include: 1/ Unifying all ata port pm operations on an ata_port_pm_ prefix 2/ Marking all ata port pm helper routines as returning void, only ata_port_pm_ entry points need to fake a 0 return value. 3/ Killing ata_port_{suspend|resume}_common() in favor of calling ata_port_request_pm() directly 4/ Killing the wrappers that just do a to_ata_port() conversion 5/ Clearly marking the entry points that do async operations with an _async suffix. Reference: http://marc.info/?l=linux-scsi&m=138995409532286&w=2 Cc: Phillip Susi <psusi@ubuntu.com> Cc: Alan Stern <stern@rowland.harvard.edu> Suggested-by: Tejun Heo <tj@kernel.org> Signed-off-by: Todd Brandt <todd.e.brandt@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Tejun Heo <tj@kernel.org>
2014-03-14 20:52:48 +00:00
ata_sas_port_resume(sata->ap);
}
mutex_unlock(&port->ha->disco_mutex);
libata, libsas: kill pm_result and related cleanup Tejun says: "At least for libata, worrying about suspend/resume failures don't make whole lot of sense. If suspend failed, just proceed with suspend. If the device can't be woken up afterwards, that's that. There isn't anything we could have done differently anyway. The same for resume, if spinup fails, the device is dud and the following commands will invoke EH actions and will eventually fail. Again, there really isn't any *choice* to make. Just making sure the errors are handled gracefully (ie. don't crash) and the following commands are handled correctly should be enough." The only libata user that actually cares about the result from a suspend operation is libsas. However, it only cares about whether queuing a new operation collides with an in-flight one. All libsas does with the error is retry, but we can just let libata wait for the previous operation before continuing. Other cleanups include: 1/ Unifying all ata port pm operations on an ata_port_pm_ prefix 2/ Marking all ata port pm helper routines as returning void, only ata_port_pm_ entry points need to fake a 0 return value. 3/ Killing ata_port_{suspend|resume}_common() in favor of calling ata_port_request_pm() directly 4/ Killing the wrappers that just do a to_ata_port() conversion 5/ Clearly marking the entry points that do async operations with an _async suffix. Reference: http://marc.info/?l=linux-scsi&m=138995409532286&w=2 Cc: Phillip Susi <psusi@ubuntu.com> Cc: Alan Stern <stern@rowland.harvard.edu> Suggested-by: Tejun Heo <tj@kernel.org> Signed-off-by: Todd Brandt <todd.e.brandt@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Tejun Heo <tj@kernel.org>
2014-03-14 20:52:48 +00:00
sas_ata_flush_pm_eh(port, __func__);
}
/**
* sas_discover_sata -- discover an STP/SATA domain device
* @dev: pointer to struct domain_device of interest
*
* Devices directly attached to a HA port, have no parents. All other
* devices do, and should have their "parent" pointer set appropriately
* before calling this function.
*/
int sas_discover_sata(struct domain_device *dev)
{
int res;
if (dev->dev_type == SAS_SATA_PM)
return -ENODEV;
dev->sata_dev.class = sas_get_ata_command_set(dev);
sas_fill_in_rphy(dev, dev->rphy);
res = sas_notify_lldd_dev_found(dev);
if (res)
return res;
sas_discover_event(dev->port, DISCE_PROBE);
return 0;
}
static void async_sas_ata_eh(void *data, async_cookie_t cookie)
{
struct domain_device *dev = data;
struct ata_port *ap = dev->sata_dev.ap;
struct sas_ha_struct *ha = dev->port->ha;
sas_ata_printk(KERN_DEBUG, dev, "dev error handler\n");
ata_scsi_port_error_handler(ha->core.shost, ap);
[SCSI] libsas: close scsi_remove_target() vs libata-eh race ata_port lifetime in libata follows the host. In libsas it follows the scsi_target. Once scsi_remove_device() has caused all commands to be completed it allows scsi_remove_target() to immediately proceed to freeing the ata_port causing bug reports like: [ 848.393333] BUG: spinlock bad magic on CPU#4, kworker/u:2/5107 [ 848.400262] general protection fault: 0000 [#1] SMP [ 848.406244] CPU 4 [ 848.408310] Modules linked in: nls_utf8 ipv6 uinput i2c_i801 i2c_core iTCO_wdt iTCO_vendor_support ioatdma dca sg sd_mod sr_mod cdrom ahci libahci isci libsas libata scsi_transport_sas [last unloaded: scsi_wait_scan] [ 848.432060] [ 848.434137] Pid: 5107, comm: kworker/u:2 Not tainted 3.2.0-isci+ #8 Intel Corporation S2600CP/S2600CP [ 848.445310] RIP: 0010:[<ffffffff8126a68c>] [<ffffffff8126a68c>] spin_dump+0x5e/0x8c [ 848.454787] RSP: 0018:ffff8807f868dca0 EFLAGS: 00010002 [ 848.461137] RAX: 0000000000000048 RBX: ffff8807fe86a630 RCX: ffffffff817d0be0 [ 848.469520] RDX: 0000000000000000 RSI: ffffffff814af1cf RDI: 0000000000000002 [ 848.477959] RBP: ffff8807f868dcb0 R08: 00000000ffffffff R09: 000000006b6b6b6b [ 848.486327] R10: 000000000003fb8c R11: ffffffff81a19448 R12: 6b6b6b6b6b6b6b6b [ 848.494699] R13: ffff8808027dc520 R14: 0000000000000000 R15: 000000000000001e [ 848.503067] FS: 0000000000000000(0000) GS:ffff88083fd00000(0000) knlGS:0000000000000000 [ 848.512899] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 848.519710] CR2: 00007ff77d001000 CR3: 00000007f7a5d000 CR4: 00000000000406e0 [ 848.528072] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 848.536446] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 848.544831] Process kworker/u:2 (pid: 5107, threadinfo ffff8807f868c000, task ffff8807ff348000) [ 848.555327] Stack: [ 848.557959] ffff8807fe86a630 ffff8807fe86a630 ffff8807f868dcd0 ffffffff8126a6e0 [ 848.567072] ffffffff817c142f ffff8807fe86a630 ffff8807f868dcf0 ffffffff8126a703 [ 848.576190] ffff8808027dc520 0000000000000286 ffff8807f868dd10 ffffffff814af1bb [ 848.585281] Call Trace: [ 848.588409] [<ffffffff8126a6e0>] spin_bug+0x26/0x28 [ 848.594357] [<ffffffff8126a703>] do_raw_spin_unlock+0x21/0x88 [ 848.601283] [<ffffffff814af1bb>] _raw_spin_unlock_irqrestore+0x2c/0x65 [ 848.609089] [<ffffffffa001c103>] ata_scsi_port_error_handler+0x548/0x557 [libata] [ 848.618331] [<ffffffff81061813>] ? async_schedule+0x17/0x17 [ 848.625060] [<ffffffffa004f30f>] async_sas_ata_eh+0x45/0x69 [libsas] [ 848.632655] [<ffffffff810618aa>] async_run_entry_fn+0x97/0x125 [ 848.639670] [<ffffffff81057439>] process_one_work+0x207/0x38d [ 848.646577] [<ffffffff8105738c>] ? process_one_work+0x15a/0x38d [ 848.653681] [<ffffffff810576f7>] worker_thread+0x138/0x21c [ 848.660305] [<ffffffff810575bf>] ? process_one_work+0x38d/0x38d [ 848.667493] [<ffffffff8105b098>] kthread+0x9d/0xa5 [ 848.673382] [<ffffffff8106e1bd>] ? trace_hardirqs_on_caller+0x12f/0x166 [ 848.681304] [<ffffffff814b7704>] kernel_thread_helper+0x4/0x10 [ 848.688324] [<ffffffff814af534>] ? retint_restore_args+0x13/0x13 [ 848.695530] [<ffffffff8105affb>] ? __init_kthread_worker+0x5b/0x5b [ 848.702929] [<ffffffff814b7700>] ? gs_change+0x13/0x13 [ 848.709155] Code: 00 00 48 8d 88 38 04 00 00 44 8b 80 84 02 00 00 31 c0 e8 cf 1b 24 00 41 83 c8 ff 44 8b 4b 08 48 c7 c1 e0 0b 7d 81 4d 85 e4 74 10 <45> 8b 84 24 84 02 00 00 49 8d 8c 24 38 04 00 00 8b 53 04 48 89 [ 848.732467] RIP [<ffffffff8126a68c>] spin_dump+0x5e/0x8c [ 848.738905] RSP <ffff8807f868dca0> [ 848.743743] ---[ end trace 143161646eee8caa ]--- ...so arrange for the ata_port to have the same end of life as the domain device. Reported-by: Marcin Tomczak <marcin.tomczak@intel.com> Acked-by: Jeff Garzik <jgarzik@redhat.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2012-01-10 23:14:09 +00:00
sas_put_device(dev);
}
void sas_ata_strategy_handler(struct Scsi_Host *shost)
{
struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost);
ASYNC_DOMAIN_EXCLUSIVE(async);
int i;
/* it's ok to defer revalidation events during ata eh, these
* disks are in one of three states:
* 1/ present for initial domain discovery, and these
* resets will cause bcn flutters
* 2/ hot removed, we'll discover that after eh fails
* 3/ hot added after initial discovery, lost the race, and need
* to catch the next train.
*/
sas_disable_revalidation(sas_ha);
spin_lock_irq(&sas_ha->phy_port_lock);
for (i = 0; i < sas_ha->num_phys; i++) {
struct asd_sas_port *port = sas_ha->sas_port[i];
struct domain_device *dev;
spin_lock(&port->dev_list_lock);
list_for_each_entry(dev, &port->dev_list, dev_list_node) {
if (!dev_is_sata(dev))
continue;
/* hold a reference over eh since we may be
* racing with final remove once all commands
* are completed
*/
kref_get(&dev->kref);
async_schedule_domain(async_sas_ata_eh, dev, &async);
}
spin_unlock(&port->dev_list_lock);
}
spin_unlock_irq(&sas_ha->phy_port_lock);
async_synchronize_full_domain(&async);
sas_enable_revalidation(sas_ha);
}
void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q,
struct list_head *done_q)
{
struct scsi_cmnd *cmd, *n;
struct domain_device *eh_dev;
do {
LIST_HEAD(sata_q);
eh_dev = NULL;
list_for_each_entry_safe(cmd, n, work_q, eh_entry) {
struct domain_device *ddev = cmd_to_domain_dev(cmd);
if (!dev_is_sata(ddev) || TO_SAS_TASK(cmd))
continue;
if (eh_dev && eh_dev != ddev)
continue;
eh_dev = ddev;
list_move(&cmd->eh_entry, &sata_q);
}
if (!list_empty(&sata_q)) {
struct ata_port *ap = eh_dev->sata_dev.ap;
sas_ata_printk(KERN_DEBUG, eh_dev, "cmd error handler\n");
ata_scsi_cmd_error_handler(shost, ap, &sata_q);
/*
* ata's error handler may leave the cmd on the list
* so make sure they don't remain on a stack list
* about to go out of scope.
*
* This looks strange, since the commands are
* now part of no list, but the next error
* action will be ata_port_error_handler()
* which takes no list and sweeps them up
* anyway from the ata tag array.
*/
while (!list_empty(&sata_q))
list_del_init(sata_q.next);
}
} while (eh_dev);
}
void sas_ata_schedule_reset(struct domain_device *dev)
{
struct ata_eh_info *ehi;
struct ata_port *ap;
unsigned long flags;
if (!dev_is_sata(dev))
return;
ap = dev->sata_dev.ap;
ehi = &ap->link.eh_info;
spin_lock_irqsave(ap->lock, flags);
ehi->err_mask |= AC_ERR_TIMEOUT;
ehi->action |= ATA_EH_RESET;
ata_port_schedule_eh(ap);
spin_unlock_irqrestore(ap->lock, flags);
}
EXPORT_SYMBOL_GPL(sas_ata_schedule_reset);
void sas_ata_wait_eh(struct domain_device *dev)
{
struct ata_port *ap;
if (!dev_is_sata(dev))
return;
ap = dev->sata_dev.ap;
ata_port_wait_eh(ap);
}