net/rds: Use DMA memory pool allocation for rds_header

Currently, RDS calls ib_dma_alloc_coherent() to allocate a large piece
of contiguous DMA coherent memory to store struct rds_header for
sending/receiving packets.  The memory allocated is then partitioned
into struct rds_header.  This is not necessary and can be costly at
times when memory is fragmented.  Instead, RDS should use the DMA
memory pool interface to handle this.  The DMA addresses of the pre-
allocated headers are stored in an array.  At send/receive ring
initialization and refill time, this arrary is de-referenced to get
the DMA addresses.  This array is not accessed at send/receive packet
processing.

Suggested-by: Håkon Bugge <haakon.bugge@oracle.com>
Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Ka-Cheong Poon 2019-10-02 21:11:08 -07:00 committed by David S. Miller
parent df1025fc27
commit 9b17f5884b
5 changed files with 151 additions and 59 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -107,6 +107,8 @@ static void rds_ib_dev_free(struct work_struct *work)
rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool);
if (rds_ibdev->pd)
ib_dealloc_pd(rds_ibdev->pd);
if (rds_ibdev->rid_hdrs_pool)
dma_pool_destroy(rds_ibdev->rid_hdrs_pool);
list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) {
list_del(&i_ipaddr->list);
@ -179,6 +181,12 @@ static void rds_ib_add_one(struct ib_device *device)
rds_ibdev->pd = NULL;
goto put_dev;
}
rds_ibdev->rid_hdrs_pool = dma_pool_create(device->name,
device->dma_device,
sizeof(struct rds_header),
L1_CACHE_BYTES, 0);
if (!rds_ibdev->rid_hdrs_pool)
goto put_dev;
rds_ibdev->mr_1m_pool =
rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL);

View File

@ -165,8 +165,8 @@ struct rds_ib_connection {
/* tx */
struct rds_ib_work_ring i_send_ring;
struct rm_data_op *i_data_op;
struct rds_header *i_send_hdrs;
dma_addr_t i_send_hdrs_dma;
struct rds_header **i_send_hdrs;
dma_addr_t *i_send_hdrs_dma;
struct rds_ib_send_work *i_sends;
atomic_t i_signaled_sends;
@ -175,8 +175,8 @@ struct rds_ib_connection {
struct rds_ib_work_ring i_recv_ring;
struct rds_ib_incoming *i_ibinc;
u32 i_recv_data_rem;
struct rds_header *i_recv_hdrs;
dma_addr_t i_recv_hdrs_dma;
struct rds_header **i_recv_hdrs;
dma_addr_t *i_recv_hdrs_dma;
struct rds_ib_recv_work *i_recvs;
u64 i_ack_recv; /* last ACK received */
struct rds_ib_refill_cache i_cache_incs;
@ -246,6 +246,7 @@ struct rds_ib_device {
struct list_head conn_list;
struct ib_device *dev;
struct ib_pd *pd;
struct dma_pool *rid_hdrs_pool; /* RDS headers DMA pool */
bool use_fastreg;
unsigned int max_mrs;
@ -381,7 +382,11 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6);
void rds_ib_cm_connect_complete(struct rds_connection *conn,
struct rdma_cm_event *event);
struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev,
struct dma_pool *pool,
dma_addr_t **dma_addrs, u32 num_hdrs);
void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs,
dma_addr_t *dma_addrs, u32 num_hdrs);
#define rds_ib_conn_error(conn, fmt...) \
__rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt)

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -439,6 +439,68 @@ static inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index)
rds_ibdev->vector_load[index]--;
}
/* Allocate DMA coherent memory to be used to store struct rds_header for
* sending/receiving packets. The pointers to the DMA memory and the
* associated DMA addresses are stored in two arrays.
*
* @ibdev: the IB device
* @pool: the DMA memory pool
* @dma_addrs: pointer to the array for storing DMA addresses
* @num_hdrs: number of headers to allocate
*
* It returns the pointer to the array storing the DMA memory pointers. On
* error, NULL pointer is returned.
*/
struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev,
struct dma_pool *pool,
dma_addr_t **dma_addrs, u32 num_hdrs)
{
struct rds_header **hdrs;
dma_addr_t *hdr_daddrs;
u32 i;
hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL,
ibdev_to_node(ibdev));
if (!hdrs)
return NULL;
hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL,
ibdev_to_node(ibdev));
if (!hdr_daddrs) {
kvfree(hdrs);
return NULL;
}
for (i = 0; i < num_hdrs; i++) {
hdrs[i] = dma_pool_zalloc(pool, GFP_KERNEL, &hdr_daddrs[i]);
if (!hdrs[i]) {
rds_dma_hdrs_free(pool, hdrs, hdr_daddrs, i);
return NULL;
}
}
*dma_addrs = hdr_daddrs;
return hdrs;
}
/* Free the DMA memory used to store struct rds_header.
*
* @pool: the DMA memory pool
* @hdrs: pointer to the array storing DMA memory pointers
* @dma_addrs: pointer to the array storing DMA addresses
* @num_hdars: number of headers to free.
*/
void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs,
dma_addr_t *dma_addrs, u32 num_hdrs)
{
u32 i;
for (i = 0; i < num_hdrs; i++)
dma_pool_free(pool, hdrs[i], dma_addrs[i]);
kvfree(hdrs);
kvfree(dma_addrs);
}
/*
* This needs to be very careful to not leave IS_ERR pointers around for
* cleanup to trip over.
@ -451,6 +513,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
struct ib_cq_init_attr cq_attr = {};
struct rds_ib_device *rds_ibdev;
int ret, fr_queue_space;
struct dma_pool *pool;
/*
* It's normal to see a null device if an incoming connection races
@ -541,31 +604,28 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
goto recv_cq_out;
}
ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
ic->i_send_ring.w_nr *
sizeof(struct rds_header),
&ic->i_send_hdrs_dma, GFP_KERNEL);
pool = rds_ibdev->rid_hdrs_pool;
ic->i_send_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_send_hdrs_dma,
ic->i_send_ring.w_nr);
if (!ic->i_send_hdrs) {
ret = -ENOMEM;
rdsdebug("ib_dma_alloc_coherent send failed\n");
rdsdebug("DMA send hdrs alloc failed\n");
goto qp_out;
}
ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
ic->i_recv_ring.w_nr *
sizeof(struct rds_header),
&ic->i_recv_hdrs_dma, GFP_KERNEL);
ic->i_recv_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_recv_hdrs_dma,
ic->i_recv_ring.w_nr);
if (!ic->i_recv_hdrs) {
ret = -ENOMEM;
rdsdebug("ib_dma_alloc_coherent recv failed\n");
rdsdebug("DMA recv hdrs alloc failed\n");
goto send_hdrs_dma_out;
}
ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
&ic->i_ack_dma, GFP_KERNEL);
ic->i_ack = dma_pool_zalloc(pool, GFP_KERNEL,
&ic->i_ack_dma);
if (!ic->i_ack) {
ret = -ENOMEM;
rdsdebug("ib_dma_alloc_coherent ack failed\n");
rdsdebug("DMA ack header alloc failed\n");
goto recv_hdrs_dma_out;
}
@ -596,17 +656,23 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
sends_out:
vfree(ic->i_sends);
ack_dma_out:
ib_dma_free_coherent(dev, sizeof(struct rds_header),
ic->i_ack, ic->i_ack_dma);
dma_pool_free(pool, ic->i_ack, ic->i_ack_dma);
ic->i_ack = NULL;
recv_hdrs_dma_out:
ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr *
sizeof(struct rds_header),
ic->i_recv_hdrs, ic->i_recv_hdrs_dma);
rds_dma_hdrs_free(pool, ic->i_recv_hdrs, ic->i_recv_hdrs_dma,
ic->i_recv_ring.w_nr);
ic->i_recv_hdrs = NULL;
ic->i_recv_hdrs_dma = NULL;
send_hdrs_dma_out:
ib_dma_free_coherent(dev, ic->i_send_ring.w_nr *
sizeof(struct rds_header),
ic->i_send_hdrs, ic->i_send_hdrs_dma);
rds_dma_hdrs_free(pool, ic->i_send_hdrs, ic->i_send_hdrs_dma,
ic->i_send_ring.w_nr);
ic->i_send_hdrs = NULL;
ic->i_send_hdrs_dma = NULL;
qp_out:
rdma_destroy_qp(ic->i_cm_id);
recv_cq_out:
@ -984,8 +1050,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
ic->i_cm_id ? ic->i_cm_id->qp : NULL);
if (ic->i_cm_id) {
struct ib_device *dev = ic->i_cm_id->device;
rdsdebug("disconnecting cm %p\n", ic->i_cm_id);
err = rdma_disconnect(ic->i_cm_id);
if (err) {
@ -1035,24 +1099,39 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
ib_destroy_cq(ic->i_recv_cq);
}
/* then free the resources that ib callbacks use */
if (ic->i_send_hdrs)
ib_dma_free_coherent(dev,
ic->i_send_ring.w_nr *
sizeof(struct rds_header),
ic->i_send_hdrs,
ic->i_send_hdrs_dma);
if (ic->rds_ibdev) {
struct dma_pool *pool;
if (ic->i_recv_hdrs)
ib_dma_free_coherent(dev,
ic->i_recv_ring.w_nr *
sizeof(struct rds_header),
ic->i_recv_hdrs,
ic->i_recv_hdrs_dma);
pool = ic->rds_ibdev->rid_hdrs_pool;
if (ic->i_ack)
ib_dma_free_coherent(dev, sizeof(struct rds_header),
ic->i_ack, ic->i_ack_dma);
/* then free the resources that ib callbacks use */
if (ic->i_send_hdrs) {
rds_dma_hdrs_free(pool, ic->i_send_hdrs,
ic->i_send_hdrs_dma,
ic->i_send_ring.w_nr);
ic->i_send_hdrs = NULL;
ic->i_send_hdrs_dma = NULL;
}
if (ic->i_recv_hdrs) {
rds_dma_hdrs_free(pool, ic->i_recv_hdrs,
ic->i_recv_hdrs_dma,
ic->i_recv_ring.w_nr);
ic->i_recv_hdrs = NULL;
ic->i_recv_hdrs_dma = NULL;
}
if (ic->i_ack) {
dma_pool_free(pool, ic->i_ack, ic->i_ack_dma);
ic->i_ack = NULL;
}
} else {
WARN_ON(ic->i_send_hdrs);
WARN_ON(ic->i_send_hdrs_dma);
WARN_ON(ic->i_recv_hdrs);
WARN_ON(ic->i_recv_hdrs_dma);
WARN_ON(ic->i_ack);
}
if (ic->i_sends)
rds_ib_send_clear_ring(ic);
@ -1071,9 +1150,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
ic->i_pd = NULL;
ic->i_send_cq = NULL;
ic->i_recv_cq = NULL;
ic->i_send_hdrs = NULL;
ic->i_recv_hdrs = NULL;
ic->i_ack = NULL;
}
BUG_ON(ic->rds_ibdev);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -61,7 +61,7 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
recv->r_wr.num_sge = RDS_IB_RECV_SGE;
sge = &recv->r_sge[0];
sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
sge->addr = ic->i_recv_hdrs_dma[i];
sge->length = sizeof(struct rds_header);
sge->lkey = ic->i_pd->local_dma_lkey;
@ -343,7 +343,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
WARN_ON(ret != 1);
sge = &recv->r_sge[0];
sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
sge->addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs];
sge->length = sizeof(struct rds_header);
sge = &recv->r_sge[1];
@ -861,7 +861,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
}
data_len -= sizeof(struct rds_header);
ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
ihdr = ic->i_recv_hdrs[recv - ic->i_recvs];
/* Validate the checksum. */
if (!rds_message_verify_checksum(ihdr)) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -201,7 +201,8 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic)
send->s_wr.ex.imm_data = 0;
sge = &send->s_sge[0];
sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
sge->addr = ic->i_send_hdrs_dma[i];
sge->length = sizeof(struct rds_header);
sge->lkey = ic->i_pd->local_dma_lkey;
@ -631,11 +632,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
send->s_queued = jiffies;
send->s_op = NULL;
send->s_sge[0].addr = ic->i_send_hdrs_dma
+ (pos * sizeof(struct rds_header));
send->s_sge[0].addr = ic->i_send_hdrs_dma[pos];
send->s_sge[0].length = sizeof(struct rds_header);
memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr,
sizeof(struct rds_header));
/* Set up the data, if present */
if (i < work_alloc
@ -674,7 +677,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
&send->s_wr, send->s_wr.num_sge, send->s_wr.next);
if (ic->i_flowctl && adv_credits) {
struct rds_header *hdr = &ic->i_send_hdrs[pos];
struct rds_header *hdr = ic->i_send_hdrs[pos];
/* add credit and redo the header checksum */
hdr->h_credit = adv_credits;