IB/srpt: Change default behavior from using SRQ to using RC

Although in the RC mode more resources are needed that mode has three
advantages over SRQ:
- It works with all RDMA adapters, even those that do not support
  SRQ.
- Posting WRs and polling WCs does not trigger lock contention
  because only one thread at a time accesses a WR or WC queue in
  non-SRQ mode.
- The end-to-end flow control mechanism is used.

>From the IB spec:

    C9-150.2.1: For QPs that are not associated with an SRQ, each HCA
    receive queue shall generate end-to-end flow control credits. If
    a QP is associated with an SRQ, the HCA receive queue shall not
    generate end-to-end flow control credits.

Add new configfs attributes that allow to configure which mode to use
(/sys/kernel/config/target/srpt/$GUID/$GUID/attrib/use_srq). Note:
only the attribute for port 1 is relevant on multi-port adapters.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
Bart Van Assche 2017-10-11 10:27:25 -07:00 committed by Doug Ledford
parent 74333f1223
commit dea262094c
2 changed files with 123 additions and 35 deletions

View File

@ -295,6 +295,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
{ {
struct srpt_device *sdev = sport->sdev; struct srpt_device *sdev = sport->sdev;
struct ib_dm_ioc_profile *iocp; struct ib_dm_ioc_profile *iocp;
int send_queue_depth;
iocp = (struct ib_dm_ioc_profile *)mad->data; iocp = (struct ib_dm_ioc_profile *)mad->data;
@ -310,6 +311,12 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
return; return;
} }
if (sdev->use_srq)
send_queue_depth = sdev->srq_size;
else
send_queue_depth = min(SRPT_RQ_SIZE,
sdev->device->attrs.max_qp_wr);
memset(iocp, 0, sizeof(*iocp)); memset(iocp, 0, sizeof(*iocp));
strcpy(iocp->id_string, SRPT_ID_STRING); strcpy(iocp->id_string, SRPT_ID_STRING);
iocp->guid = cpu_to_be64(srpt_service_guid); iocp->guid = cpu_to_be64(srpt_service_guid);
@ -322,7 +329,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS); iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
iocp->protocol = cpu_to_be16(SRP_PROTOCOL); iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION); iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
iocp->send_queue_depth = cpu_to_be16(sdev->srq_size); iocp->send_queue_depth = cpu_to_be16(send_queue_depth);
iocp->rdma_read_depth = 4; iocp->rdma_read_depth = 4;
iocp->send_size = cpu_to_be32(srp_max_req_size); iocp->send_size = cpu_to_be32(srp_max_req_size);
iocp->rdma_size = cpu_to_be32(min(sport->port_attrib.srp_max_rdma_size, iocp->rdma_size = cpu_to_be32(min(sport->port_attrib.srp_max_rdma_size,
@ -686,6 +693,9 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring,
{ {
int i; int i;
if (!ioctx_ring)
return;
for (i = 0; i < ring_size; ++i) for (i = 0; i < ring_size; ++i)
srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir); srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir);
kfree(ioctx_ring); kfree(ioctx_ring);
@ -757,7 +767,7 @@ static bool srpt_test_and_set_cmd_state(struct srpt_send_ioctx *ioctx,
/** /**
* srpt_post_recv() - Post an IB receive request. * srpt_post_recv() - Post an IB receive request.
*/ */
static int srpt_post_recv(struct srpt_device *sdev, static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch,
struct srpt_recv_ioctx *ioctx) struct srpt_recv_ioctx *ioctx)
{ {
struct ib_sge list; struct ib_sge list;
@ -774,7 +784,10 @@ static int srpt_post_recv(struct srpt_device *sdev,
wr.sg_list = &list; wr.sg_list = &list;
wr.num_sge = 1; wr.num_sge = 1;
if (sdev->use_srq)
return ib_post_srq_recv(sdev->srq, &wr, &bad_wr); return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
else
return ib_post_recv(ch->qp, &wr, &bad_wr);
} }
/** /**
@ -1517,7 +1530,7 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
break; break;
} }
srpt_post_recv(ch->sport->sdev, recv_ioctx); srpt_post_recv(ch->sport->sdev, ch, recv_ioctx);
return; return;
out_wait: out_wait:
@ -1616,7 +1629,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
struct srpt_device *sdev = sport->sdev; struct srpt_device *sdev = sport->sdev;
const struct ib_device_attr *attrs = &sdev->device->attrs; const struct ib_device_attr *attrs = &sdev->device->attrs;
u32 srp_sq_size = sport->port_attrib.srp_sq_size; u32 srp_sq_size = sport->port_attrib.srp_sq_size;
int ret; int i, ret;
WARN_ON(ch->rq_size < 1); WARN_ON(ch->rq_size < 1);
@ -1640,7 +1653,6 @@ retry:
= (void(*)(struct ib_event *, void*))srpt_qp_event; = (void(*)(struct ib_event *, void*))srpt_qp_event;
qp_init->send_cq = ch->cq; qp_init->send_cq = ch->cq;
qp_init->recv_cq = ch->cq; qp_init->recv_cq = ch->cq;
qp_init->srq = sdev->srq;
qp_init->sq_sig_type = IB_SIGNAL_REQ_WR; qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
qp_init->qp_type = IB_QPT_RC; qp_init->qp_type = IB_QPT_RC;
/* /*
@ -1654,6 +1666,12 @@ retry:
qp_init->cap.max_rdma_ctxs = srp_sq_size / 2; qp_init->cap.max_rdma_ctxs = srp_sq_size / 2;
qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE); qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE);
qp_init->port_num = ch->sport->port; qp_init->port_num = ch->sport->port;
if (sdev->use_srq) {
qp_init->srq = sdev->srq;
} else {
qp_init->cap.max_recv_wr = ch->rq_size;
qp_init->cap.max_recv_sge = qp_init->cap.max_send_sge;
}
ch->qp = ib_create_qp(sdev->pd, qp_init); ch->qp = ib_create_qp(sdev->pd, qp_init);
if (IS_ERR(ch->qp)) { if (IS_ERR(ch->qp)) {
@ -1669,6 +1687,10 @@ retry:
goto err_destroy_cq; goto err_destroy_cq;
} }
if (!sdev->use_srq)
for (i = 0; i < ch->rq_size; i++)
srpt_post_recv(sdev, ch, ch->ioctx_recv_ring[i]);
atomic_set(&ch->sq_wr_avail, qp_init->cap.max_send_wr); atomic_set(&ch->sq_wr_avail, qp_init->cap.max_send_wr);
pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n", pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n",
@ -1818,6 +1840,10 @@ static void srpt_release_channel_work(struct work_struct *w)
ch->sport->sdev, ch->rq_size, ch->sport->sdev, ch->rq_size,
ch->rsp_size, DMA_TO_DEVICE); ch->rsp_size, DMA_TO_DEVICE);
srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring,
sdev, ch->rq_size,
srp_max_req_size, DMA_FROM_DEVICE);
mutex_lock(&sdev->mutex); mutex_lock(&sdev->mutex);
list_del_init(&ch->list); list_del_init(&ch->list);
if (ch->release_done) if (ch->release_done)
@ -1975,6 +2001,19 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
ch->ioctx_ring[i]->ch = ch; ch->ioctx_ring[i]->ch = ch;
list_add_tail(&ch->ioctx_ring[i]->free_list, &ch->free_list); list_add_tail(&ch->ioctx_ring[i]->free_list, &ch->free_list);
} }
if (!sdev->use_srq) {
ch->ioctx_recv_ring = (struct srpt_recv_ioctx **)
srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size,
sizeof(*ch->ioctx_recv_ring[0]),
srp_max_req_size,
DMA_FROM_DEVICE);
if (!ch->ioctx_recv_ring) {
pr_err("rejected SRP_LOGIN_REQ because creating a new QP RQ ring failed.\n");
rej->reason =
cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
goto free_ring;
}
}
ret = srpt_create_ch_ib(ch); ret = srpt_create_ch_ib(ch);
if (ret) { if (ret) {
@ -1982,7 +2021,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
pr_err("rejected SRP_LOGIN_REQ because creating" pr_err("rejected SRP_LOGIN_REQ because creating"
" a new RDMA channel failed.\n"); " a new RDMA channel failed.\n");
goto free_ring; goto free_recv_ring;
} }
ret = srpt_ch_qp_rtr(ch, ch->qp); ret = srpt_ch_qp_rtr(ch, ch->qp);
@ -2073,6 +2112,11 @@ release_channel:
destroy_ib: destroy_ib:
srpt_destroy_ch_ib(ch); srpt_destroy_ch_ib(ch);
free_recv_ring:
srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring,
ch->sport->sdev, ch->rq_size,
srp_max_req_size, DMA_FROM_DEVICE);
free_ring: free_ring:
srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
ch->sport->sdev, ch->rq_size, ch->sport->sdev, ch->rq_size,
@ -2502,20 +2546,38 @@ static void srpt_add_one(struct ib_device *device)
srq_attr.attr.srq_limit = 0; srq_attr.attr.srq_limit = 0;
srq_attr.srq_type = IB_SRQT_BASIC; srq_attr.srq_type = IB_SRQT_BASIC;
sdev->srq = ib_create_srq(sdev->pd, &srq_attr); sdev->srq = sdev->port[0].port_attrib.use_srq ?
if (IS_ERR(sdev->srq)) ib_create_srq(sdev->pd, &srq_attr) : ERR_PTR(-ENOTSUPP);
if (IS_ERR(sdev->srq)) {
pr_debug("ib_create_srq() failed: %ld\n", PTR_ERR(sdev->srq));
/* SRQ not supported. */
sdev->use_srq = false;
} else {
pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n",
sdev->srq_size, sdev->device->attrs.max_srq_wr,
device->name);
sdev->use_srq = true;
sdev->ioctx_ring = (struct srpt_recv_ioctx **)
srpt_alloc_ioctx_ring(sdev, sdev->srq_size,
sizeof(*sdev->ioctx_ring[0]),
srp_max_req_size,
DMA_FROM_DEVICE);
if (!sdev->ioctx_ring)
goto err_pd; goto err_pd;
pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n", for (i = 0; i < sdev->srq_size; ++i)
__func__, sdev->srq_size, sdev->device->attrs.max_srq_wr, srpt_post_recv(sdev, NULL, sdev->ioctx_ring[i]);
device->name); }
if (!srpt_service_guid) if (!srpt_service_guid)
srpt_service_guid = be64_to_cpu(device->node_guid); srpt_service_guid = be64_to_cpu(device->node_guid);
sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev); sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
if (IS_ERR(sdev->cm_id)) if (IS_ERR(sdev->cm_id))
goto err_srq; goto err_ring;
/* print out target login information */ /* print out target login information */
pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx," pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx,"
@ -2535,16 +2597,6 @@ static void srpt_add_one(struct ib_device *device)
srpt_event_handler); srpt_event_handler);
ib_register_event_handler(&sdev->event_handler); ib_register_event_handler(&sdev->event_handler);
sdev->ioctx_ring = (struct srpt_recv_ioctx **)
srpt_alloc_ioctx_ring(sdev, sdev->srq_size,
sizeof(*sdev->ioctx_ring[0]),
srp_max_req_size, DMA_FROM_DEVICE);
if (!sdev->ioctx_ring)
goto err_event;
for (i = 0; i < sdev->srq_size; ++i)
srpt_post_recv(sdev, sdev->ioctx_ring[i]);
WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port)); WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port));
for (i = 1; i <= sdev->device->phys_port_cnt; i++) { for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
@ -2554,12 +2606,13 @@ static void srpt_add_one(struct ib_device *device)
sport->port_attrib.srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE; sport->port_attrib.srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE;
sport->port_attrib.srp_max_rsp_size = DEFAULT_MAX_RSP_SIZE; sport->port_attrib.srp_max_rsp_size = DEFAULT_MAX_RSP_SIZE;
sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE; sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE;
sport->port_attrib.use_srq = false;
INIT_WORK(&sport->work, srpt_refresh_port_work); INIT_WORK(&sport->work, srpt_refresh_port_work);
if (srpt_refresh_port(sport)) { if (srpt_refresh_port(sport)) {
pr_err("MAD registration failed for %s-%d.\n", pr_err("MAD registration failed for %s-%d.\n",
sdev->device->name, i); sdev->device->name, i);
goto err_ring; goto err_event;
} }
} }
@ -2572,16 +2625,16 @@ out:
pr_debug("added %s.\n", device->name); pr_debug("added %s.\n", device->name);
return; return;
err_ring:
srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev,
sdev->srq_size, srp_max_req_size,
DMA_FROM_DEVICE);
err_event: err_event:
ib_unregister_event_handler(&sdev->event_handler); ib_unregister_event_handler(&sdev->event_handler);
err_cm: err_cm:
ib_destroy_cm_id(sdev->cm_id); ib_destroy_cm_id(sdev->cm_id);
err_srq: err_ring:
if (sdev->use_srq)
ib_destroy_srq(sdev->srq); ib_destroy_srq(sdev->srq);
srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev,
sdev->srq_size, srp_max_req_size,
DMA_FROM_DEVICE);
err_pd: err_pd:
ib_dealloc_pd(sdev->pd); ib_dealloc_pd(sdev->pd);
free_dev: free_dev:
@ -2625,12 +2678,12 @@ static void srpt_remove_one(struct ib_device *device, void *client_data)
spin_unlock(&srpt_dev_lock); spin_unlock(&srpt_dev_lock);
srpt_release_sdev(sdev); srpt_release_sdev(sdev);
if (sdev->use_srq)
ib_destroy_srq(sdev->srq); ib_destroy_srq(sdev->srq);
ib_dealloc_pd(sdev->pd);
srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev,
sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE); sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE);
sdev->ioctx_ring = NULL; ib_dealloc_pd(sdev->pd);
kfree(sdev); kfree(sdev);
} }
@ -2928,14 +2981,43 @@ static ssize_t srpt_tpg_attrib_srp_sq_size_store(struct config_item *item,
return count; return count;
} }
static ssize_t srpt_tpg_attrib_use_srq_show(struct config_item *item,
char *page)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
return sprintf(page, "%d\n", sport->port_attrib.use_srq);
}
static ssize_t srpt_tpg_attrib_use_srq_store(struct config_item *item,
const char *page, size_t count)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
unsigned long val;
int ret;
ret = kstrtoul(page, 0, &val);
if (ret < 0)
return ret;
if (val != !!val)
return -EINVAL;
sport->port_attrib.use_srq = val;
return count;
}
CONFIGFS_ATTR(srpt_tpg_attrib_, srp_max_rdma_size); CONFIGFS_ATTR(srpt_tpg_attrib_, srp_max_rdma_size);
CONFIGFS_ATTR(srpt_tpg_attrib_, srp_max_rsp_size); CONFIGFS_ATTR(srpt_tpg_attrib_, srp_max_rsp_size);
CONFIGFS_ATTR(srpt_tpg_attrib_, srp_sq_size); CONFIGFS_ATTR(srpt_tpg_attrib_, srp_sq_size);
CONFIGFS_ATTR(srpt_tpg_attrib_, use_srq);
static struct configfs_attribute *srpt_tpg_attrib_attrs[] = { static struct configfs_attribute *srpt_tpg_attrib_attrs[] = {
&srpt_tpg_attrib_attr_srp_max_rdma_size, &srpt_tpg_attrib_attr_srp_max_rdma_size,
&srpt_tpg_attrib_attr_srp_max_rsp_size, &srpt_tpg_attrib_attr_srp_max_rsp_size,
&srpt_tpg_attrib_attr_srp_sq_size, &srpt_tpg_attrib_attr_srp_sq_size,
&srpt_tpg_attrib_attr_use_srq,
NULL, NULL,
}; };

View File

@ -252,6 +252,7 @@ enum rdma_ch_state {
* @free_list: Head of list with free send I/O contexts. * @free_list: Head of list with free send I/O contexts.
* @state: channel state. See also enum rdma_ch_state. * @state: channel state. See also enum rdma_ch_state.
* @ioctx_ring: Send ring. * @ioctx_ring: Send ring.
* @ioctx_recv_ring: Receive I/O context ring.
* @list: Node for insertion in the srpt_device.rch_list list. * @list: Node for insertion in the srpt_device.rch_list list.
* @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This
* list contains struct srpt_ioctx elements and is protected * list contains struct srpt_ioctx elements and is protected
@ -281,6 +282,7 @@ struct srpt_rdma_ch {
struct list_head free_list; struct list_head free_list;
enum rdma_ch_state state; enum rdma_ch_state state;
struct srpt_send_ioctx **ioctx_ring; struct srpt_send_ioctx **ioctx_ring;
struct srpt_recv_ioctx **ioctx_recv_ring;
struct list_head list; struct list_head list;
struct list_head cmd_wait_list; struct list_head cmd_wait_list;
struct se_session *sess; struct se_session *sess;
@ -295,11 +297,13 @@ struct srpt_rdma_ch {
* @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections. * @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections.
* @srp_max_rsp_size: Maximum size of SRP response messages in bytes. * @srp_max_rsp_size: Maximum size of SRP response messages in bytes.
* @srp_sq_size: Shared receive queue (SRQ) size. * @srp_sq_size: Shared receive queue (SRQ) size.
* @use_srq: Whether or not to use SRQ.
*/ */
struct srpt_port_attrib { struct srpt_port_attrib {
u32 srp_max_rdma_size; u32 srp_max_rdma_size;
u32 srp_max_rsp_size; u32 srp_max_rsp_size;
u32 srp_sq_size; u32 srp_sq_size;
bool use_srq;
}; };
/** /**
@ -347,6 +351,7 @@ struct srpt_port {
* @srq: Per-HCA SRQ (shared receive queue). * @srq: Per-HCA SRQ (shared receive queue).
* @cm_id: Connection identifier. * @cm_id: Connection identifier.
* @srq_size: SRQ size. * @srq_size: SRQ size.
* @use_srq: Whether or not to use SRQ.
* @ioctx_ring: Per-HCA SRQ. * @ioctx_ring: Per-HCA SRQ.
* @rch_list: Per-device channel list -- see also srpt_rdma_ch.list. * @rch_list: Per-device channel list -- see also srpt_rdma_ch.list.
* @ch_releaseQ: Enables waiting for removal from rch_list. * @ch_releaseQ: Enables waiting for removal from rch_list.
@ -362,6 +367,7 @@ struct srpt_device {
struct ib_srq *srq; struct ib_srq *srq;
struct ib_cm_id *cm_id; struct ib_cm_id *cm_id;
int srq_size; int srq_size;
bool use_srq;
struct srpt_recv_ioctx **ioctx_ring; struct srpt_recv_ioctx **ioctx_ring;
struct list_head rch_list; struct list_head rch_list;
wait_queue_head_t ch_releaseQ; wait_queue_head_t ch_releaseQ;