Merge branch 'nvme-4.14' of git://git.infradead.org/nvme into for-4.14/block-postmerge

Pull NVMe changes from Christoph:

"Below is the current set of NVMe updates for Linux 4.14, now against
 your postmerge branch, and with three more patches.

 The biggest bit comes from Sagi and refactors the RDMA driver to
 prepare for more code sharing in the setup and teardown path.  But we
 have various features and bug fixes from a lot of people as well."
This commit is contained in:
Jens Axboe 2017-08-29 09:09:11 -06:00
commit 2b76da9563
15 changed files with 679 additions and 446 deletions

View File

@ -76,6 +76,11 @@ static DEFINE_SPINLOCK(dev_list_lock);
static struct class *nvme_class;
static __le32 nvme_get_log_dw10(u8 lid, size_t size)
{
return cpu_to_le32((((size / 4) - 1) << 16) | lid);
}
int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
@ -108,7 +113,16 @@ static blk_status_t nvme_error_status(struct request *req)
case NVME_SC_WRITE_FAULT:
case NVME_SC_READ_ERROR:
case NVME_SC_UNWRITTEN_BLOCK:
case NVME_SC_ACCESS_DENIED:
case NVME_SC_READ_ONLY:
return BLK_STS_MEDIUM;
case NVME_SC_GUARD_CHECK:
case NVME_SC_APPTAG_CHECK:
case NVME_SC_REFTAG_CHECK:
case NVME_SC_INVALID_PI:
return BLK_STS_PROTECTION;
case NVME_SC_RESERVATION_CONFLICT:
return BLK_STS_NEXUS;
default:
return BLK_STS_IOERR;
}
@ -162,9 +176,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
enum nvme_ctrl_state new_state)
{
enum nvme_ctrl_state old_state;
unsigned long flags;
bool changed = false;
spin_lock_irq(&ctrl->lock);
spin_lock_irqsave(&ctrl->lock, flags);
old_state = ctrl->state;
switch (new_state) {
@ -225,7 +240,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
if (changed)
ctrl->state = new_state;
spin_unlock_irq(&ctrl->lock);
spin_unlock_irqrestore(&ctrl->lock, flags);
return changed;
}
@ -307,7 +322,7 @@ static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
memset(&c, 0, sizeof(c));
c.directive.opcode = nvme_admin_directive_send;
c.directive.nsid = cpu_to_le32(0xffffffff);
c.directive.nsid = cpu_to_le32(NVME_NSID_ALL);
c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
c.directive.dtype = NVME_DIR_IDENTIFY;
c.directive.tdtype = NVME_DIR_STREAMS;
@ -357,7 +372,7 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl)
if (ret)
return ret;
ret = nvme_get_stream_params(ctrl, &s, 0xffffffff);
ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL);
if (ret)
return ret;
@ -768,7 +783,8 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
return error;
}
static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid)
static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
u8 *eui64, u8 *nguid, uuid_t *uuid)
{
struct nvme_command c = { };
int status;
@ -784,7 +800,7 @@ static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid)
if (!data)
return -ENOMEM;
status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, data,
status = nvme_submit_sync_cmd(ctrl->admin_q, &c, data,
NVME_IDENTIFY_DATA_SIZE);
if (status)
goto free_data;
@ -798,33 +814,33 @@ static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid)
switch (cur->nidt) {
case NVME_NIDT_EUI64:
if (cur->nidl != NVME_NIDT_EUI64_LEN) {
dev_warn(ns->ctrl->device,
dev_warn(ctrl->device,
"ctrl returned bogus length: %d for NVME_NIDT_EUI64\n",
cur->nidl);
goto free_data;
}
len = NVME_NIDT_EUI64_LEN;
memcpy(ns->eui, data + pos + sizeof(*cur), len);
memcpy(eui64, data + pos + sizeof(*cur), len);
break;
case NVME_NIDT_NGUID:
if (cur->nidl != NVME_NIDT_NGUID_LEN) {
dev_warn(ns->ctrl->device,
dev_warn(ctrl->device,
"ctrl returned bogus length: %d for NVME_NIDT_NGUID\n",
cur->nidl);
goto free_data;
}
len = NVME_NIDT_NGUID_LEN;
memcpy(ns->nguid, data + pos + sizeof(*cur), len);
memcpy(nguid, data + pos + sizeof(*cur), len);
break;
case NVME_NIDT_UUID:
if (cur->nidl != NVME_NIDT_UUID_LEN) {
dev_warn(ns->ctrl->device,
dev_warn(ctrl->device,
"ctrl returned bogus length: %d for NVME_NIDT_UUID\n",
cur->nidl);
goto free_data;
}
len = NVME_NIDT_UUID_LEN;
uuid_copy(&ns->uuid, data + pos + sizeof(*cur));
uuid_copy(uuid, data + pos + sizeof(*cur));
break;
default:
/* Skip unnkown types */
@ -849,9 +865,10 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
}
static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
struct nvme_id_ns **id)
static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
unsigned nsid)
{
struct nvme_id_ns *id;
struct nvme_command c = { };
int error;
@ -860,15 +877,18 @@ static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
c.identify.nsid = cpu_to_le32(nsid);
c.identify.cns = NVME_ID_CNS_NS;
*id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
if (!*id)
return -ENOMEM;
id = kmalloc(sizeof(*id), GFP_KERNEL);
if (!id)
return NULL;
error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
sizeof(struct nvme_id_ns));
if (error)
kfree(*id);
return error;
error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
if (error) {
dev_warn(ctrl->device, "Identify namespace failed\n");
kfree(id);
return NULL;
}
return id;
}
static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
@ -1159,32 +1179,21 @@ static void nvme_config_discard(struct nvme_ns *ns)
blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX);
}
static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
struct nvme_id_ns *id, u8 *eui64, u8 *nguid, uuid_t *uuid)
{
if (nvme_identify_ns(ns->ctrl, ns->ns_id, id)) {
dev_warn(ns->ctrl->dev, "%s: Identify failure\n", __func__);
return -ENODEV;
}
if ((*id)->ncap == 0) {
kfree(*id);
return -ENODEV;
}
if (ns->ctrl->vs >= NVME_VS(1, 1, 0))
memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui));
if (ns->ctrl->vs >= NVME_VS(1, 2, 0))
memcpy(ns->nguid, (*id)->nguid, sizeof(ns->nguid));
if (ns->ctrl->vs >= NVME_VS(1, 3, 0)) {
if (ctrl->vs >= NVME_VS(1, 1, 0))
memcpy(eui64, id->eui64, sizeof(id->eui64));
if (ctrl->vs >= NVME_VS(1, 2, 0))
memcpy(nguid, id->nguid, sizeof(id->nguid));
if (ctrl->vs >= NVME_VS(1, 3, 0)) {
/* Don't treat error as fatal we potentially
* already have a NGUID or EUI-64
*/
if (nvme_identify_ns_descs(ns, ns->ns_id))
dev_warn(ns->ctrl->device,
if (nvme_identify_ns_descs(ctrl, nsid, eui64, nguid, uuid))
dev_warn(ctrl->device,
"%s: Identify Descriptors failed\n", __func__);
}
return 0;
}
static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
@ -1225,22 +1234,38 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
static int nvme_revalidate_disk(struct gendisk *disk)
{
struct nvme_ns *ns = disk->private_data;
struct nvme_id_ns *id = NULL;
int ret;
struct nvme_ctrl *ctrl = ns->ctrl;
struct nvme_id_ns *id;
u8 eui64[8] = { 0 }, nguid[16] = { 0 };
uuid_t uuid = uuid_null;
int ret = 0;
if (test_bit(NVME_NS_DEAD, &ns->flags)) {
set_capacity(disk, 0);
return -ENODEV;
}
ret = nvme_revalidate_ns(ns, &id);
if (ret)
return ret;
id = nvme_identify_ns(ctrl, ns->ns_id);
if (!id)
return -ENODEV;
__nvme_revalidate_disk(disk, id);
if (id->ncap == 0) {
ret = -ENODEV;
goto out;
}
nvme_report_ns_ids(ctrl, ns->ns_id, id, eui64, nguid, &uuid);
if (!uuid_equal(&ns->uuid, &uuid) ||
memcmp(&ns->nguid, &nguid, sizeof(ns->nguid)) ||
memcmp(&ns->eui, &eui64, sizeof(ns->eui))) {
dev_err(ctrl->device,
"identifiers changed for nsid %d\n", ns->ns_id);
ret = -ENODEV;
}
out:
kfree(id);
return 0;
return ret;
}
static char nvme_pr_type(enum pr_type type)
@ -1440,7 +1465,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
ctrl->ctrl_config = NVME_CC_CSS_NVM;
ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
ctrl->ctrl_config |= NVME_CC_ENABLE;
@ -1453,7 +1478,7 @@ EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
{
unsigned long timeout = jiffies + (shutdown_timeout * HZ);
unsigned long timeout = jiffies + (ctrl->shutdown_timeout * HZ);
u32 csts;
int ret;
@ -1502,6 +1527,23 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
blk_queue_write_cache(q, vwc, vwc);
}
static int nvme_configure_timestamp(struct nvme_ctrl *ctrl)
{
__le64 ts;
int ret;
if (!(ctrl->oncs & NVME_CTRL_ONCS_TIMESTAMP))
return 0;
ts = cpu_to_le64(ktime_to_ms(ktime_get_real()));
ret = nvme_set_features(ctrl, NVME_FEAT_TIMESTAMP, 0, &ts, sizeof(ts),
NULL);
if (ret)
dev_warn_once(ctrl->device,
"could not set timestamp (%d)\n", ret);
return ret;
}
static int nvme_configure_apst(struct nvme_ctrl *ctrl)
{
/*
@ -1804,6 +1846,20 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->sgls = le32_to_cpu(id->sgls);
ctrl->kas = le16_to_cpu(id->kas);
if (id->rtd3e) {
/* us -> s */
u32 transition_time = le32_to_cpu(id->rtd3e) / 1000000;
ctrl->shutdown_timeout = clamp_t(unsigned int, transition_time,
shutdown_timeout, 60);
if (ctrl->shutdown_timeout != shutdown_timeout)
dev_warn(ctrl->device,
"Shutdown timeout set to %u seconds\n",
ctrl->shutdown_timeout);
} else
ctrl->shutdown_timeout = shutdown_timeout;
ctrl->npss = id->npss;
ctrl->apsta = id->apsta;
prev_apst_enabled = ctrl->apst_enabled;
@ -1856,6 +1912,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ret = nvme_configure_apst(ctrl);
if (ret < 0)
return ret;
ret = nvme_configure_timestamp(ctrl);
if (ret < 0)
return ret;
ret = nvme_configure_directives(ctrl);
if (ret < 0)
@ -2311,9 +2371,15 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
if (nvme_revalidate_ns(ns, &id))
id = nvme_identify_ns(ctrl, nsid);
if (!id)
goto out_free_queue;
if (id->ncap == 0)
goto out_free_id;
nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid);
if (nvme_nvm_ns_supported(ns, id) &&
nvme_nvm_register(ns, disk_name, node)) {
dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__);
@ -2534,6 +2600,71 @@ static void nvme_async_event_work(struct work_struct *work)
spin_unlock_irq(&ctrl->lock);
}
static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)
{
u32 csts;
if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))
return false;
if (csts == ~0)
return false;
return ((ctrl->ctrl_config & NVME_CC_ENABLE) && (csts & NVME_CSTS_PP));
}
static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
{
struct nvme_command c = { };
struct nvme_fw_slot_info_log *log;
log = kmalloc(sizeof(*log), GFP_KERNEL);
if (!log)
return;
c.common.opcode = nvme_admin_get_log_page;
c.common.nsid = cpu_to_le32(NVME_NSID_ALL);
c.common.cdw10[0] = nvme_get_log_dw10(NVME_LOG_FW_SLOT, sizeof(*log));
if (!nvme_submit_sync_cmd(ctrl->admin_q, &c, log, sizeof(*log)))
dev_warn(ctrl->device,
"Get FW SLOT INFO log error\n");
kfree(log);
}
static void nvme_fw_act_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl = container_of(work,
struct nvme_ctrl, fw_act_work);
unsigned long fw_act_timeout;
if (ctrl->mtfa)
fw_act_timeout = jiffies +
msecs_to_jiffies(ctrl->mtfa * 100);
else
fw_act_timeout = jiffies +
msecs_to_jiffies(admin_timeout * 1000);
nvme_stop_queues(ctrl);
while (nvme_ctrl_pp_status(ctrl)) {
if (time_after(jiffies, fw_act_timeout)) {
dev_warn(ctrl->device,
"Fw activation timeout, reset controller\n");
nvme_reset_ctrl(ctrl);
break;
}
msleep(100);
}
if (ctrl->state != NVME_CTRL_LIVE)
return;
nvme_start_queues(ctrl);
/* read FW slot informationi to clear the AER*/
nvme_get_fw_slot_info(ctrl);
}
void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
union nvme_result *res)
{
@ -2560,6 +2691,9 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
dev_info(ctrl->device, "rescanning\n");
nvme_queue_scan(ctrl);
break;
case NVME_AER_NOTICE_FW_ACT_STARTING:
schedule_work(&ctrl->fw_act_work);
break;
default:
dev_warn(ctrl->device, "async event result %08x\n", result);
}
@ -2607,6 +2741,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
nvme_stop_keep_alive(ctrl);
flush_work(&ctrl->async_event_work);
flush_work(&ctrl->scan_work);
cancel_work_sync(&ctrl->fw_act_work);
}
EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
@ -2670,6 +2805,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->quirks = quirks;
INIT_WORK(&ctrl->scan_work, nvme_scan_work);
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
ret = nvme_set_instance(ctrl);
if (ret)

View File

@ -735,6 +735,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
goto out;
}
if (uuid_parse(p, &hostid)) {
pr_err("Invalid hostid %s\n", p);
ret = -EINVAL;
goto out;
}

View File

@ -220,6 +220,90 @@ static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *);
static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *,
struct nvme_fc_queue *, unsigned int);
static void
nvme_fc_free_lport(struct kref *ref)
{
struct nvme_fc_lport *lport =
container_of(ref, struct nvme_fc_lport, ref);
unsigned long flags;
WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED);
WARN_ON(!list_empty(&lport->endp_list));
/* remove from transport list */
spin_lock_irqsave(&nvme_fc_lock, flags);
list_del(&lport->port_list);
spin_unlock_irqrestore(&nvme_fc_lock, flags);
/* let the LLDD know we've finished tearing it down */
lport->ops->localport_delete(&lport->localport);
ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
ida_destroy(&lport->endp_cnt);
put_device(lport->dev);
kfree(lport);
}
static void
nvme_fc_lport_put(struct nvme_fc_lport *lport)
{
kref_put(&lport->ref, nvme_fc_free_lport);
}
static int
nvme_fc_lport_get(struct nvme_fc_lport *lport)
{
return kref_get_unless_zero(&lport->ref);
}
static struct nvme_fc_lport *
nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo)
{
struct nvme_fc_lport *lport;
unsigned long flags;
spin_lock_irqsave(&nvme_fc_lock, flags);
list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
if (lport->localport.node_name != pinfo->node_name ||
lport->localport.port_name != pinfo->port_name)
continue;
if (lport->localport.port_state != FC_OBJSTATE_DELETED) {
lport = ERR_PTR(-EEXIST);
goto out_done;
}
if (!nvme_fc_lport_get(lport)) {
/*
* fails if ref cnt already 0. If so,
* act as if lport already deleted
*/
lport = NULL;
goto out_done;
}
/* resume the lport */
lport->localport.port_role = pinfo->port_role;
lport->localport.port_id = pinfo->port_id;
lport->localport.port_state = FC_OBJSTATE_ONLINE;
spin_unlock_irqrestore(&nvme_fc_lock, flags);
return lport;
}
lport = NULL;
out_done:
spin_unlock_irqrestore(&nvme_fc_lock, flags);
return lport;
}
/**
* nvme_fc_register_localport - transport entry point called by an
@ -257,6 +341,28 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
goto out_reghost_failed;
}
/*
* look to see if there is already a localport that had been
* deregistered and in the process of waiting for all the
* references to fully be removed. If the references haven't
* expired, we can simply re-enable the localport. Remoteports
* and controller reconnections should resume naturally.
*/
newrec = nvme_fc_attach_to_unreg_lport(pinfo);
/* found an lport, but something about its state is bad */
if (IS_ERR(newrec)) {
ret = PTR_ERR(newrec);
goto out_reghost_failed;
/* found existing lport, which was resumed */
} else if (newrec) {
*portptr = &newrec->localport;
return 0;
}
/* nothing found - allocate a new localport struct */
newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz),
GFP_KERNEL);
if (!newrec) {
@ -310,44 +416,6 @@ out_reghost_failed:
}
EXPORT_SYMBOL_GPL(nvme_fc_register_localport);
static void
nvme_fc_free_lport(struct kref *ref)
{
struct nvme_fc_lport *lport =
container_of(ref, struct nvme_fc_lport, ref);
unsigned long flags;
WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED);
WARN_ON(!list_empty(&lport->endp_list));
/* remove from transport list */
spin_lock_irqsave(&nvme_fc_lock, flags);
list_del(&lport->port_list);
spin_unlock_irqrestore(&nvme_fc_lock, flags);
/* let the LLDD know we've finished tearing it down */
lport->ops->localport_delete(&lport->localport);
ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
ida_destroy(&lport->endp_cnt);
put_device(lport->dev);
kfree(lport);
}
static void
nvme_fc_lport_put(struct nvme_fc_lport *lport)
{
kref_put(&lport->ref, nvme_fc_free_lport);
}
static int
nvme_fc_lport_get(struct nvme_fc_lport *lport)
{
return kref_get_unless_zero(&lport->ref);
}
/**
* nvme_fc_unregister_localport - transport entry point called by an
* LLDD to deregister/remove a previously
@ -2731,6 +2799,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
if (ret)
goto out_free_queues;
ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set;
ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
if (IS_ERR(ctrl->ctrl.admin_q)) {

View File

@ -125,6 +125,7 @@ struct nvme_ctrl {
struct kref kref;
int instance;
struct blk_mq_tag_set *tagset;
struct blk_mq_tag_set *admin_tagset;
struct list_head namespaces;
struct mutex namespaces_mutex;
struct device *device; /* char device */
@ -142,6 +143,7 @@ struct nvme_ctrl {
u16 cntlid;
u32 ctrl_config;
u16 mtfa;
u32 queue_count;
u64 cap;
@ -160,6 +162,7 @@ struct nvme_ctrl {
u16 kas;
u8 npss;
u8 apsta;
unsigned int shutdown_timeout;
unsigned int kato;
bool subsystem;
unsigned long quirks;
@ -167,6 +170,7 @@ struct nvme_ctrl {
struct work_struct scan_work;
struct work_struct async_event_work;
struct delayed_work ka_work;
struct work_struct fw_act_work;
/* Power saving configuration */
u64 ps_max_latency_us;
@ -207,13 +211,9 @@ struct nvme_ns {
bool ext;
u8 pi_type;
unsigned long flags;
u16 noiob;
#define NVME_NS_REMOVING 0
#define NVME_NS_DEAD 1
u64 mode_select_num_blocks;
u32 mode_select_block_len;
u16 noiob;
};
struct nvme_ctrl_ops {

View File

@ -555,8 +555,10 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
int nprps, i;
length -= (page_size - offset);
if (length <= 0)
if (length <= 0) {
iod->first_dma = 0;
return BLK_STS_OK;
}
dma_len -= (page_size - offset);
if (dma_len) {
@ -1376,6 +1378,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
if (blk_mq_alloc_tag_set(&dev->admin_tagset))
return -ENOMEM;
dev->ctrl.admin_tagset = &dev->admin_tagset;
dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
if (IS_ERR(dev->ctrl.admin_q)) {

View File

@ -36,8 +36,6 @@
#define NVME_RDMA_CONNECT_TIMEOUT_MS 3000 /* 3 second */
#define NVME_RDMA_MAX_SEGMENT_SIZE 0xffffff /* 24-bit SGL field */
#define NVME_RDMA_MAX_SEGMENTS 256
#define NVME_RDMA_MAX_INLINE_SEGMENTS 1
@ -151,6 +149,9 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event);
static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
static const struct blk_mq_ops nvme_rdma_mq_ops;
static const struct blk_mq_ops nvme_rdma_admin_mq_ops;
/* XXX: really should move to a generic header sooner or later.. */
static inline void put_unaligned_le24(u32 val, u8 *p)
{
@ -503,7 +504,7 @@ out_put_dev:
return ret;
}
static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
int idx, size_t queue_size)
{
struct nvme_rdma_queue *queue;
@ -561,22 +562,20 @@ out_destroy_cm_id:
static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
{
if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
return;
rdma_disconnect(queue->cm_id);
ib_drain_qp(queue->qp);
}
static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
{
nvme_rdma_destroy_queue_ib(queue);
rdma_destroy_id(queue->cm_id);
}
static void nvme_rdma_stop_and_free_queue(struct nvme_rdma_queue *queue)
{
if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags))
return;
nvme_rdma_stop_queue(queue);
nvme_rdma_free_queue(queue);
nvme_rdma_destroy_queue_ib(queue);
rdma_destroy_id(queue->cm_id);
}
static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
@ -584,31 +583,53 @@ static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
int i;
for (i = 1; i < ctrl->ctrl.queue_count; i++)
nvme_rdma_stop_and_free_queue(&ctrl->queues[i]);
nvme_rdma_free_queue(&ctrl->queues[i]);
}
static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl)
static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl)
{
int i;
for (i = 1; i < ctrl->ctrl.queue_count; i++)
nvme_rdma_stop_queue(&ctrl->queues[i]);
}
static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
{
int ret;
if (idx)
ret = nvmf_connect_io_queue(&ctrl->ctrl, idx);
else
ret = nvmf_connect_admin_queue(&ctrl->ctrl);
if (!ret)
set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[idx].flags);
else
dev_info(ctrl->ctrl.device,
"failed to connect queue: %d ret=%d\n", idx, ret);
return ret;
}
static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl)
{
int i, ret = 0;
for (i = 1; i < ctrl->ctrl.queue_count; i++) {
ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
if (ret) {
dev_info(ctrl->ctrl.device,
"failed to connect i/o queue: %d\n", ret);
goto out_free_queues;
}
set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
ret = nvme_rdma_start_queue(ctrl, i);
if (ret)
goto out_stop_queues;
}
return 0;
out_free_queues:
nvme_rdma_free_io_queues(ctrl);
out_stop_queues:
for (i--; i >= 1; i--)
nvme_rdma_stop_queue(&ctrl->queues[i]);
return ret;
}
static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl)
static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
{
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
unsigned int nr_io_queues;
@ -627,32 +648,230 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl)
"creating %d I/O queues.\n", nr_io_queues);
for (i = 1; i < ctrl->ctrl.queue_count; i++) {
ret = nvme_rdma_init_queue(ctrl, i,
ctrl->ctrl.opts->queue_size);
if (ret) {
dev_info(ctrl->ctrl.device,
"failed to initialize i/o queue: %d\n", ret);
ret = nvme_rdma_alloc_queue(ctrl, i,
ctrl->ctrl.sqsize + 1);
if (ret)
goto out_free_queues;
}
}
return 0;
out_free_queues:
for (i--; i >= 1; i--)
nvme_rdma_stop_and_free_queue(&ctrl->queues[i]);
nvme_rdma_free_queue(&ctrl->queues[i]);
return ret;
}
static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl)
static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl, bool admin)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
struct blk_mq_tag_set *set = admin ?
&ctrl->admin_tag_set : &ctrl->tag_set;
blk_mq_free_tag_set(set);
nvme_rdma_dev_put(ctrl->device);
}
static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
bool admin)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
struct blk_mq_tag_set *set;
int ret;
if (admin) {
set = &ctrl->admin_tag_set;
memset(set, 0, sizeof(*set));
set->ops = &nvme_rdma_admin_mq_ops;
set->queue_depth = NVME_RDMA_AQ_BLKMQ_DEPTH;
set->reserved_tags = 2; /* connect + keep-alive */
set->numa_node = NUMA_NO_NODE;
set->cmd_size = sizeof(struct nvme_rdma_request) +
SG_CHUNK_SIZE * sizeof(struct scatterlist);
set->driver_data = ctrl;
set->nr_hw_queues = 1;
set->timeout = ADMIN_TIMEOUT;
} else {
set = &ctrl->tag_set;
memset(set, 0, sizeof(*set));
set->ops = &nvme_rdma_mq_ops;
set->queue_depth = nctrl->opts->queue_size;
set->reserved_tags = 1; /* fabric connect */
set->numa_node = NUMA_NO_NODE;
set->flags = BLK_MQ_F_SHOULD_MERGE;
set->cmd_size = sizeof(struct nvme_rdma_request) +
SG_CHUNK_SIZE * sizeof(struct scatterlist);
set->driver_data = ctrl;
set->nr_hw_queues = nctrl->queue_count - 1;
set->timeout = NVME_IO_TIMEOUT;
}
ret = blk_mq_alloc_tag_set(set);
if (ret)
goto out;
/*
* We need a reference on the device as long as the tag_set is alive,
* as the MRs in the request structures need a valid ib_device.
*/
ret = nvme_rdma_dev_get(ctrl->device);
if (!ret) {
ret = -EINVAL;
goto out_free_tagset;
}
return set;
out_free_tagset:
blk_mq_free_tag_set(set);
out:
return ERR_PTR(ret);
}
static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
bool remove)
{
nvme_rdma_free_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe,
sizeof(struct nvme_command), DMA_TO_DEVICE);
nvme_rdma_stop_and_free_queue(&ctrl->queues[0]);
blk_cleanup_queue(ctrl->ctrl.admin_q);
blk_mq_free_tag_set(&ctrl->admin_tag_set);
nvme_rdma_dev_put(ctrl->device);
nvme_rdma_stop_queue(&ctrl->queues[0]);
if (remove) {
blk_cleanup_queue(ctrl->ctrl.admin_q);
nvme_rdma_free_tagset(&ctrl->ctrl, true);
}
nvme_rdma_free_queue(&ctrl->queues[0]);
}
static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
bool new)
{
int error;
error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
if (error)
return error;
ctrl->device = ctrl->queues[0].device;
ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS,
ctrl->device->dev->attrs.max_fast_reg_page_list_len);
if (new) {
ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
if (IS_ERR(ctrl->ctrl.admin_tagset))
goto out_free_queue;
ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
if (IS_ERR(ctrl->ctrl.admin_q)) {
error = PTR_ERR(ctrl->ctrl.admin_q);
goto out_free_tagset;
}
} else {
error = blk_mq_reinit_tagset(&ctrl->admin_tag_set,
nvme_rdma_reinit_request);
if (error)
goto out_free_queue;
}
error = nvme_rdma_start_queue(ctrl, 0);
if (error)
goto out_cleanup_queue;
error = ctrl->ctrl.ops->reg_read64(&ctrl->ctrl, NVME_REG_CAP,
&ctrl->ctrl.cap);
if (error) {
dev_err(ctrl->ctrl.device,
"prop_get NVME_REG_CAP failed\n");
goto out_cleanup_queue;
}
ctrl->ctrl.sqsize =
min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
if (error)
goto out_cleanup_queue;
ctrl->ctrl.max_hw_sectors =
(ctrl->max_fr_pages - 1) << (PAGE_SHIFT - 9);
error = nvme_init_identify(&ctrl->ctrl);
if (error)
goto out_cleanup_queue;
error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev,
&ctrl->async_event_sqe, sizeof(struct nvme_command),
DMA_TO_DEVICE);
if (error)
goto out_cleanup_queue;
return 0;
out_cleanup_queue:
if (new)
blk_cleanup_queue(ctrl->ctrl.admin_q);
out_free_tagset:
if (new)
nvme_rdma_free_tagset(&ctrl->ctrl, true);
out_free_queue:
nvme_rdma_free_queue(&ctrl->queues[0]);
return error;
}
static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
bool remove)
{
nvme_rdma_stop_io_queues(ctrl);
if (remove) {
blk_cleanup_queue(ctrl->ctrl.connect_q);
nvme_rdma_free_tagset(&ctrl->ctrl, false);
}
nvme_rdma_free_io_queues(ctrl);
}
static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
{
int ret;
ret = nvme_rdma_alloc_io_queues(ctrl);
if (ret)
return ret;
if (new) {
ctrl->ctrl.tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, false);
if (IS_ERR(ctrl->ctrl.tagset))
goto out_free_io_queues;
ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
if (IS_ERR(ctrl->ctrl.connect_q)) {
ret = PTR_ERR(ctrl->ctrl.connect_q);
goto out_free_tag_set;
}
} else {
ret = blk_mq_reinit_tagset(&ctrl->tag_set,
nvme_rdma_reinit_request);
if (ret)
goto out_free_io_queues;
blk_mq_update_nr_hw_queues(&ctrl->tag_set,
ctrl->ctrl.queue_count - 1);
}
ret = nvme_rdma_start_io_queues(ctrl);
if (ret)
goto out_cleanup_connect_q;
return 0;
out_cleanup_connect_q:
if (new)
blk_cleanup_queue(ctrl->ctrl.connect_q);
out_free_tag_set:
if (new)
nvme_rdma_free_tagset(&ctrl->ctrl, false);
out_free_io_queues:
nvme_rdma_free_io_queues(ctrl);
return ret;
}
static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
@ -701,47 +920,18 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
++ctrl->ctrl.nr_reconnects;
if (ctrl->ctrl.queue_count > 1) {
nvme_rdma_free_io_queues(ctrl);
if (ctrl->ctrl.queue_count > 1)
nvme_rdma_destroy_io_queues(ctrl, false);
ret = blk_mq_reinit_tagset(&ctrl->tag_set,
nvme_rdma_reinit_request);
if (ret)
goto requeue;
}
nvme_rdma_stop_and_free_queue(&ctrl->queues[0]);
ret = blk_mq_reinit_tagset(&ctrl->admin_tag_set,
nvme_rdma_reinit_request);
if (ret)
goto requeue;
ret = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
if (ret)
goto requeue;
ret = nvmf_connect_admin_queue(&ctrl->ctrl);
if (ret)
goto requeue;
set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
nvme_rdma_destroy_admin_queue(ctrl, false);
ret = nvme_rdma_configure_admin_queue(ctrl, false);
if (ret)
goto requeue;
if (ctrl->ctrl.queue_count > 1) {
ret = nvme_rdma_init_io_queues(ctrl);
ret = nvme_rdma_configure_io_queues(ctrl, false);
if (ret)
goto requeue;
ret = nvme_rdma_connect_io_queues(ctrl);
if (ret)
goto requeue;
blk_mq_update_nr_hw_queues(&ctrl->tag_set,
ctrl->ctrl.queue_count - 1);
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@ -764,16 +954,15 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, err_work);
int i;
nvme_stop_ctrl(&ctrl->ctrl);
for (i = 0; i < ctrl->ctrl.queue_count; i++)
clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
if (ctrl->ctrl.queue_count > 1)
if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
nvme_rdma_stop_io_queues(ctrl);
}
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
nvme_rdma_stop_queue(&ctrl->queues[0]);
/* We must take care of fastfail/requeue all our inflight requests */
if (ctrl->ctrl.queue_count > 1)
@ -858,7 +1047,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
if (req->mr->need_inval) {
res = nvme_rdma_inv_rkey(queue, req);
if (res < 0) {
if (unlikely(res < 0)) {
dev_err(ctrl->ctrl.device,
"Queueing INV WR for rkey %#x failed (%d)\n",
req->mr->rkey, res);
@ -923,7 +1112,7 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
int nr;
nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, PAGE_SIZE);
if (nr < count) {
if (unlikely(nr < count)) {
if (nr < 0)
return nr;
return -EINVAL;
@ -1059,7 +1248,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
first = &wr;
ret = ib_post_send(queue->qp, first, &bad_wr);
if (ret) {
if (unlikely(ret)) {
dev_err(queue->ctrl->ctrl.device,
"%s failed with error code %d\n", __func__, ret);
}
@ -1085,7 +1274,7 @@ static int nvme_rdma_post_recv(struct nvme_rdma_queue *queue,
wr.num_sge = 1;
ret = ib_post_recv(queue->qp, &wr, &bad_wr);
if (ret) {
if (unlikely(ret)) {
dev_err(queue->ctrl->ctrl.device,
"%s failed with error code %d\n", __func__, ret);
}
@ -1445,7 +1634,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(rq);
err = nvme_rdma_map_data(queue, rq, c);
if (err < 0) {
if (unlikely(err < 0)) {
dev_err(queue->ctrl->ctrl.device,
"Failed to map data (%d)\n", err);
nvme_cleanup_cmd(rq);
@ -1459,7 +1648,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
flush = true;
err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
if (err) {
if (unlikely(err)) {
nvme_rdma_unmap_data(queue, rq);
goto err;
}
@ -1519,98 +1708,7 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
.timeout = nvme_rdma_timeout,
};
static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
{
int error;
error = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
if (error)
return error;
ctrl->device = ctrl->queues[0].device;
/*
* We need a reference on the device as long as the tag_set is alive,
* as the MRs in the request structures need a valid ib_device.
*/
error = -EINVAL;
if (!nvme_rdma_dev_get(ctrl->device))
goto out_free_queue;
ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS,
ctrl->device->dev->attrs.max_fast_reg_page_list_len);
memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
ctrl->admin_tag_set.ops = &nvme_rdma_admin_mq_ops;
ctrl->admin_tag_set.queue_depth = NVME_RDMA_AQ_BLKMQ_DEPTH;
ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_rdma_request) +
SG_CHUNK_SIZE * sizeof(struct scatterlist);
ctrl->admin_tag_set.driver_data = ctrl;
ctrl->admin_tag_set.nr_hw_queues = 1;
ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
if (error)
goto out_put_dev;
ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
if (IS_ERR(ctrl->ctrl.admin_q)) {
error = PTR_ERR(ctrl->ctrl.admin_q);
goto out_free_tagset;
}
error = nvmf_connect_admin_queue(&ctrl->ctrl);
if (error)
goto out_cleanup_queue;
set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP,
&ctrl->ctrl.cap);
if (error) {
dev_err(ctrl->ctrl.device,
"prop_get NVME_REG_CAP failed\n");
goto out_cleanup_queue;
}
ctrl->ctrl.sqsize =
min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
if (error)
goto out_cleanup_queue;
ctrl->ctrl.max_hw_sectors =
(ctrl->max_fr_pages - 1) << (PAGE_SHIFT - 9);
error = nvme_init_identify(&ctrl->ctrl);
if (error)
goto out_cleanup_queue;
error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev,
&ctrl->async_event_sqe, sizeof(struct nvme_command),
DMA_TO_DEVICE);
if (error)
goto out_cleanup_queue;
return 0;
out_cleanup_queue:
blk_cleanup_queue(ctrl->ctrl.admin_q);
out_free_tagset:
/* disconnect and drain the queue before freeing the tagset */
nvme_rdma_stop_queue(&ctrl->queues[0]);
blk_mq_free_tag_set(&ctrl->admin_tag_set);
out_put_dev:
nvme_rdma_dev_put(ctrl->device);
out_free_queue:
nvme_rdma_free_queue(&ctrl->queues[0]);
return error;
}
static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
{
cancel_work_sync(&ctrl->err_work);
cancel_delayed_work_sync(&ctrl->reconnect_work);
@ -1619,33 +1717,26 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
nvme_stop_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_cancel_request, &ctrl->ctrl);
nvme_rdma_free_io_queues(ctrl);
nvme_rdma_destroy_io_queues(ctrl, shutdown);
}
if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags))
if (shutdown)
nvme_shutdown_ctrl(&ctrl->ctrl);
else
nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_cancel_request, &ctrl->ctrl);
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
nvme_rdma_destroy_admin_queue(ctrl);
nvme_rdma_destroy_admin_queue(ctrl, shutdown);
}
static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
static void nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl)
{
nvme_stop_ctrl(&ctrl->ctrl);
nvme_remove_namespaces(&ctrl->ctrl);
if (shutdown)
nvme_rdma_shutdown_ctrl(ctrl);
nvme_rdma_shutdown_ctrl(ctrl, true);
nvme_uninit_ctrl(&ctrl->ctrl);
if (ctrl->ctrl.tagset) {
blk_cleanup_queue(ctrl->ctrl.connect_q);
blk_mq_free_tag_set(&ctrl->tag_set);
nvme_rdma_dev_put(ctrl->device);
}
nvme_put_ctrl(&ctrl->ctrl);
}
@ -1654,7 +1745,8 @@ static void nvme_rdma_del_ctrl_work(struct work_struct *work)
struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, delete_work);
__nvme_rdma_remove_ctrl(ctrl, true);
nvme_stop_ctrl(&ctrl->ctrl);
nvme_rdma_remove_ctrl(ctrl);
}
static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
@ -1686,14 +1778,6 @@ static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl)
return ret;
}
static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, delete_work);
__nvme_rdma_remove_ctrl(ctrl, false);
}
static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl =
@ -1702,31 +1786,16 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
bool changed;
nvme_stop_ctrl(&ctrl->ctrl);
nvme_rdma_shutdown_ctrl(ctrl);
nvme_rdma_shutdown_ctrl(ctrl, false);
ret = nvme_rdma_configure_admin_queue(ctrl);
if (ret) {
/* ctrl is already shutdown, just remove the ctrl */
INIT_WORK(&ctrl->delete_work, nvme_rdma_remove_ctrl_work);
goto del_dead_ctrl;
}
ret = nvme_rdma_configure_admin_queue(ctrl, false);
if (ret)
goto out_fail;
if (ctrl->ctrl.queue_count > 1) {
ret = blk_mq_reinit_tagset(&ctrl->tag_set,
nvme_rdma_reinit_request);
ret = nvme_rdma_configure_io_queues(ctrl, false);
if (ret)
goto del_dead_ctrl;
ret = nvme_rdma_init_io_queues(ctrl);
if (ret)
goto del_dead_ctrl;
ret = nvme_rdma_connect_io_queues(ctrl);
if (ret)
goto del_dead_ctrl;
blk_mq_update_nr_hw_queues(&ctrl->tag_set,
ctrl->ctrl.queue_count - 1);
goto out_fail;
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@ -1736,10 +1805,9 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
return;
del_dead_ctrl:
/* Deleting this dead controller... */
out_fail:
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
WARN_ON(!queue_work(nvme_wq, &ctrl->delete_work));
nvme_rdma_remove_ctrl(ctrl);
}
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
@ -1755,62 +1823,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.get_address = nvmf_get_address,
};
static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl)
{
int ret;
ret = nvme_rdma_init_io_queues(ctrl);
if (ret)
return ret;
/*
* We need a reference on the device as long as the tag_set is alive,
* as the MRs in the request structures need a valid ib_device.
*/
ret = -EINVAL;
if (!nvme_rdma_dev_get(ctrl->device))
goto out_free_io_queues;
memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
ctrl->tag_set.ops = &nvme_rdma_mq_ops;
ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
ctrl->tag_set.reserved_tags = 1; /* fabric connect */
ctrl->tag_set.numa_node = NUMA_NO_NODE;
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
ctrl->tag_set.cmd_size = sizeof(struct nvme_rdma_request) +
SG_CHUNK_SIZE * sizeof(struct scatterlist);
ctrl->tag_set.driver_data = ctrl;
ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
if (ret)
goto out_put_dev;
ctrl->ctrl.tagset = &ctrl->tag_set;
ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
if (IS_ERR(ctrl->ctrl.connect_q)) {
ret = PTR_ERR(ctrl->ctrl.connect_q);
goto out_free_tag_set;
}
ret = nvme_rdma_connect_io_queues(ctrl);
if (ret)
goto out_cleanup_connect_q;
return 0;
out_cleanup_connect_q:
blk_cleanup_queue(ctrl->ctrl.connect_q);
out_free_tag_set:
blk_mq_free_tag_set(&ctrl->tag_set);
out_put_dev:
nvme_rdma_dev_put(ctrl->device);
out_free_io_queues:
nvme_rdma_free_io_queues(ctrl);
return ret;
}
static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
struct nvmf_ctrl_options *opts)
{
@ -1868,7 +1880,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
if (!ctrl->queues)
goto out_uninit_ctrl;
ret = nvme_rdma_configure_admin_queue(ctrl);
ret = nvme_rdma_configure_admin_queue(ctrl, true);
if (ret)
goto out_kfree_queues;
@ -1903,7 +1915,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
}
if (opts->nr_io_queues) {
ret = nvme_rdma_create_io_queues(ctrl);
ret = nvme_rdma_configure_io_queues(ctrl, true);
if (ret)
goto out_remove_admin_queue;
}
@ -1925,7 +1937,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
return &ctrl->ctrl;
out_remove_admin_queue:
nvme_rdma_destroy_admin_queue(ctrl);
nvme_rdma_destroy_admin_queue(ctrl, true);
out_kfree_queues:
kfree(ctrl->queues);
out_uninit_ctrl:

View File

@ -100,7 +100,7 @@ static u16 nvmet_get_smart_log(struct nvmet_req *req,
u16 status;
WARN_ON(req == NULL || slog == NULL);
if (req->cmd->get_log_page.nsid == cpu_to_le32(0xFFFFFFFF))
if (req->cmd->get_log_page.nsid == cpu_to_le32(NVME_NSID_ALL))
status = nvmet_get_smart_log_all(req, slog);
else
status = nvmet_get_smart_log_nsid(req, slog);
@ -168,15 +168,6 @@ out:
nvmet_req_complete(req, status);
}
static void copy_and_pad(char *dst, int dst_len, const char *src, int src_len)
{
int len = min(src_len, dst_len);
memcpy(dst, src, len);
if (dst_len > len)
memset(dst + len, ' ', dst_len - len);
}
static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
@ -196,8 +187,9 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
bin2hex(id->sn, &ctrl->subsys->serial,
min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
copy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1);
copy_and_pad(id->fr, sizeof(id->fr), UTS_RELEASE, strlen(UTS_RELEASE));
memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' ');
memcpy_and_pad(id->fr, sizeof(id->fr),
UTS_RELEASE, strlen(UTS_RELEASE), ' ');
id->rab = 6;

View File

@ -444,7 +444,7 @@ static struct config_group *nvmet_ns_make(struct config_group *group,
goto out;
ret = -EINVAL;
if (nsid == 0 || nsid == 0xffffffff)
if (nsid == 0 || nsid == NVME_NSID_ALL)
goto out;
ret = -ENOMEM;

View File

@ -538,37 +538,37 @@ EXPORT_SYMBOL_GPL(nvmet_req_uninit);
static inline bool nvmet_cc_en(u32 cc)
{
return cc & 0x1;
return (cc >> NVME_CC_EN_SHIFT) & 0x1;
}
static inline u8 nvmet_cc_css(u32 cc)
{
return (cc >> 4) & 0x7;
return (cc >> NVME_CC_CSS_SHIFT) & 0x7;
}
static inline u8 nvmet_cc_mps(u32 cc)
{
return (cc >> 7) & 0xf;
return (cc >> NVME_CC_MPS_SHIFT) & 0xf;
}
static inline u8 nvmet_cc_ams(u32 cc)
{
return (cc >> 11) & 0x7;
return (cc >> NVME_CC_AMS_SHIFT) & 0x7;
}
static inline u8 nvmet_cc_shn(u32 cc)
{
return (cc >> 14) & 0x3;
return (cc >> NVME_CC_SHN_SHIFT) & 0x3;
}
static inline u8 nvmet_cc_iosqes(u32 cc)
{
return (cc >> 16) & 0xf;
return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf;
}
static inline u8 nvmet_cc_iocqes(u32 cc)
{
return (cc >> 20) & 0xf;
return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf;
}
static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
@ -749,6 +749,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
hostnqn, subsysnqn);
req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
up_read(&nvmet_config_sem);
status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR;
goto out_put_subsystem;
}
up_read(&nvmet_config_sem);

View File

@ -58,7 +58,8 @@ struct nvmet_fc_ls_iod {
struct work_struct work;
} __aligned(sizeof(unsigned long long));
#define NVMET_FC_MAX_KB_PER_XFR 256
#define NVMET_FC_MAX_SEQ_LENGTH (256 * 1024)
#define NVMET_FC_MAX_XFR_SGENTS (NVMET_FC_MAX_SEQ_LENGTH / PAGE_SIZE)
enum nvmet_fcp_datadir {
NVMET_FCP_NODATA,
@ -74,9 +75,7 @@ struct nvmet_fc_fcp_iod {
struct nvme_fc_ersp_iu rspiubuf;
dma_addr_t rspdma;
struct scatterlist *data_sg;
struct scatterlist *next_sg;
int data_sg_cnt;
u32 next_sg_offset;
u32 total_length;
u32 offset;
enum nvmet_fcp_datadir io_dir;
@ -112,6 +111,7 @@ struct nvmet_fc_tgtport {
struct ida assoc_cnt;
struct nvmet_port *port;
struct kref ref;
u32 max_sg_cnt;
};
struct nvmet_fc_defer_fcp_req {
@ -994,6 +994,8 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo,
INIT_LIST_HEAD(&newrec->assoc_list);
kref_init(&newrec->ref);
ida_init(&newrec->assoc_cnt);
newrec->max_sg_cnt = min_t(u32, NVMET_FC_MAX_XFR_SGENTS,
template->max_sgl_segments);
ret = nvmet_fc_alloc_ls_iodlist(newrec);
if (ret) {
@ -1866,51 +1868,23 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport,
struct nvmet_fc_fcp_iod *fod, u8 op)
{
struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
struct scatterlist *sg, *datasg;
unsigned long flags;
u32 tlen, sg_off;
u32 tlen;
int ret;
fcpreq->op = op;
fcpreq->offset = fod->offset;
fcpreq->timeout = NVME_FC_TGTOP_TIMEOUT_SEC;
tlen = min_t(u32, (NVMET_FC_MAX_KB_PER_XFR * 1024),
tlen = min_t(u32, tgtport->max_sg_cnt * PAGE_SIZE,
(fod->total_length - fod->offset));
tlen = min_t(u32, tlen, NVME_FC_MAX_SEGMENTS * PAGE_SIZE);
tlen = min_t(u32, tlen, fod->tgtport->ops->max_sgl_segments
* PAGE_SIZE);
fcpreq->transfer_length = tlen;
fcpreq->transferred_length = 0;
fcpreq->fcp_error = 0;
fcpreq->rsplen = 0;
fcpreq->sg_cnt = 0;
datasg = fod->next_sg;
sg_off = fod->next_sg_offset;
for (sg = fcpreq->sg ; tlen; sg++) {
*sg = *datasg;
if (sg_off) {
sg->offset += sg_off;
sg->length -= sg_off;
sg->dma_address += sg_off;
sg_off = 0;
}
if (tlen < sg->length) {
sg->length = tlen;
fod->next_sg = datasg;
fod->next_sg_offset += tlen;
} else if (tlen == sg->length) {
fod->next_sg_offset = 0;
fod->next_sg = sg_next(datasg);
} else {
fod->next_sg_offset = 0;
datasg = sg_next(datasg);
}
tlen -= sg->length;
fcpreq->sg_cnt++;
}
fcpreq->sg = &fod->data_sg[fod->offset / PAGE_SIZE];
fcpreq->sg_cnt = DIV_ROUND_UP(tlen, PAGE_SIZE);
/*
* If the last READDATA request: check if LLDD supports
@ -2225,8 +2199,6 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
fod->req.sg = fod->data_sg;
fod->req.sg_cnt = fod->data_sg_cnt;
fod->offset = 0;
fod->next_sg = fod->data_sg;
fod->next_sg_offset = 0;
if (fod->io_dir == NVMET_FCP_WRITE) {
/* pull the data over before invoking nvmet layer */

View File

@ -193,9 +193,6 @@ out_free_options:
#define TGTPORT_OPTS (NVMF_OPT_WWNN | NVMF_OPT_WWPN)
#define ALL_OPTS (NVMF_OPT_WWNN | NVMF_OPT_WWPN | NVMF_OPT_ROLES | \
NVMF_OPT_FCADDR | NVMF_OPT_LPWWNN | NVMF_OPT_LPWWPN)
static DEFINE_SPINLOCK(fcloop_lock);
static LIST_HEAD(fcloop_lports);

View File

@ -375,6 +375,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
if (error)
goto out_free_sq;
ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set;
ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
if (IS_ERR(ctrl->ctrl.admin_q)) {

View File

@ -624,7 +624,7 @@ struct nvmefc_tgt_fcp_req {
u32 timeout;
u32 transfer_length;
struct fc_ba_rjt ba_rjt;
struct scatterlist sg[NVME_FC_MAX_SEGMENTS];
struct scatterlist *sg;
int sg_cnt;
void *rspaddr;
dma_addr_t rspdma;

View File

@ -32,6 +32,8 @@
#define NVME_RDMA_IP_PORT 4420
#define NVME_NSID_ALL 0xffffffff
enum nvme_subsys_type {
NVME_NQN_DISC = 1, /* Discovery type target subsystem */
NVME_NQN_NVME = 2, /* NVME type target subsystem */
@ -133,19 +135,26 @@ enum {
enum {
NVME_CC_ENABLE = 1 << 0,
NVME_CC_CSS_NVM = 0 << 4,
NVME_CC_EN_SHIFT = 0,
NVME_CC_CSS_SHIFT = 4,
NVME_CC_MPS_SHIFT = 7,
NVME_CC_ARB_RR = 0 << 11,
NVME_CC_ARB_WRRU = 1 << 11,
NVME_CC_ARB_VS = 7 << 11,
NVME_CC_SHN_NONE = 0 << 14,
NVME_CC_SHN_NORMAL = 1 << 14,
NVME_CC_SHN_ABRUPT = 2 << 14,
NVME_CC_SHN_MASK = 3 << 14,
NVME_CC_IOSQES = NVME_NVM_IOSQES << 16,
NVME_CC_IOCQES = NVME_NVM_IOCQES << 20,
NVME_CC_AMS_SHIFT = 11,
NVME_CC_SHN_SHIFT = 14,
NVME_CC_IOSQES_SHIFT = 16,
NVME_CC_IOCQES_SHIFT = 20,
NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT,
NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT,
NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT,
NVME_CC_SHN_NONE = 0 << NVME_CC_SHN_SHIFT,
NVME_CC_SHN_NORMAL = 1 << NVME_CC_SHN_SHIFT,
NVME_CC_SHN_ABRUPT = 2 << NVME_CC_SHN_SHIFT,
NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT,
NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT,
NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT,
NVME_CSTS_RDY = 1 << 0,
NVME_CSTS_CFS = 1 << 1,
NVME_CSTS_NSSRO = 1 << 4,
NVME_CSTS_PP = 1 << 5,
NVME_CSTS_SHST_NORMAL = 0 << 2,
NVME_CSTS_SHST_OCCUR = 1 << 2,
NVME_CSTS_SHST_CMPLT = 2 << 2,
@ -251,6 +260,7 @@ enum {
NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1,
NVME_CTRL_ONCS_DSM = 1 << 2,
NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3,
NVME_CTRL_ONCS_TIMESTAMP = 1 << 6,
NVME_CTRL_VWC_PRESENT = 1 << 0,
NVME_CTRL_OACS_SEC_SUPP = 1 << 0,
NVME_CTRL_OACS_DIRECTIVES = 1 << 5,
@ -376,6 +386,13 @@ struct nvme_smart_log {
__u8 rsvd216[296];
};
struct nvme_fw_slot_info_log {
__u8 afi;
__u8 rsvd1[7];
__le64 frs[7];
__u8 rsvd64[448];
};
enum {
NVME_SMART_CRIT_SPARE = 1 << 0,
NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
@ -386,6 +403,7 @@ enum {
enum {
NVME_AER_NOTICE_NS_CHANGED = 0x0002,
NVME_AER_NOTICE_FW_ACT_STARTING = 0x0102,
};
struct nvme_lba_range_type {
@ -677,6 +695,7 @@ enum {
NVME_FEAT_ASYNC_EVENT = 0x0b,
NVME_FEAT_AUTO_PST = 0x0c,
NVME_FEAT_HOST_MEM_BUF = 0x0d,
NVME_FEAT_TIMESTAMP = 0x0e,
NVME_FEAT_KATO = 0x0f,
NVME_FEAT_SW_PROGRESS = 0x80,
NVME_FEAT_HOST_ID = 0x81,

View File

@ -200,6 +200,7 @@ static inline const char *kbasename(const char *path)
void fortify_panic(const char *name) __noreturn __cold;
void __read_overflow(void) __compiletime_error("detected read beyond size of object passed as 1st parameter");
void __read_overflow2(void) __compiletime_error("detected read beyond size of object passed as 2nd parameter");
void __read_overflow3(void) __compiletime_error("detected read beyond size of object passed as 3rd parameter");
void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter");
#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
@ -395,4 +396,33 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q)
#endif
/**
* memcpy_and_pad - Copy one buffer to another with padding
* @dest: Where to copy to
* @dest_len: The destination buffer size
* @src: Where to copy from
* @count: The number of bytes to copy
* @pad: Character to use for padding if space is left in destination.
*/
__FORTIFY_INLINE void memcpy_and_pad(void *dest, size_t dest_len,
const void *src, size_t count, int pad)
{
size_t dest_size = __builtin_object_size(dest, 0);
size_t src_size = __builtin_object_size(src, 0);
if (__builtin_constant_p(dest_len) && __builtin_constant_p(count)) {
if (dest_size < dest_len && dest_size < count)
__write_overflow();
else if (src_size < dest_len && src_size < count)
__read_overflow3();
}
if (dest_size < dest_len)
fortify_panic(__func__);
if (dest_len > count) {
memcpy(dest, src, count);
memset(dest + count, pad, dest_len - count);
} else
memcpy(dest, src, dest_len);
}
#endif /* _LINUX_STRING_H_ */