From 68cdba068d6b7ecb63106151b3b15be245c78f07 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 18 May 2015 15:27:10 -0500 Subject: [PATCH 1/6] RDMA/iw_cm: Export tos field to iwarp providers rdma-cma/iw_cm: Export tos field to iwarp providers Signed-off-by: Steve Wise Signed-off-by: Tatyana Nikolova Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 2 ++ include/rdma/iw_cm.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 06441a43c3aa..248019df5332 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1600,6 +1600,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) if (IS_ERR(id)) return PTR_ERR(id); + id->tos = id_priv->tos; id_priv->cm_id.iw = id; memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), @@ -2847,6 +2848,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, if (IS_ERR(cm_id)) return PTR_ERR(cm_id); + cm_id->tos = id_priv->tos; id_priv->cm_id.iw = cm_id; memcpy(&cm_id->local_addr, cma_src_addr(id_priv), diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h index 1017e0bdf8ba..036bd2772662 100644 --- a/include/rdma/iw_cm.h +++ b/include/rdma/iw_cm.h @@ -91,6 +91,7 @@ struct iw_cm_id { /* Used by provider to add and remove refs on IW cm_id */ void (*add_ref)(struct iw_cm_id *); void (*rem_ref)(struct iw_cm_id *); + u8 tos; }; struct iw_cm_conn_param { From 854ace98e71b68e6e98742f8035315d04288cefe Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Mon, 18 May 2015 15:28:14 -0500 Subject: [PATCH 2/6] RDMA/nes: Enable the use of the tos field in the nes driver RDMA/nes: Enable the use of the tos field in the nes driver Signed-off-by: Faisal Latif Signed-off-by: Tatyana Nikolova Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes_cm.c | 7 +++++++ drivers/infiniband/hw/nes/nes_cm.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 72b43417cbe3..9047af429906 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1616,6 +1616,8 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, &cm_node->loc_addr, cm_node->loc_port, &cm_node->rem_addr, cm_node->rem_port); cm_node->listener = listener; + if (listener) + cm_node->tos = listener->tos; cm_node->netdev = nesvnic->netdev; cm_node->cm_id = cm_info->cm_id; memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN); @@ -2938,6 +2940,9 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod nesqp->nesqp_context->misc2 |= cpu_to_le32(64 << NES_QPCONTEXT_MISC2_TTL_SHIFT); + nesqp->nesqp_context->misc2 |= cpu_to_le32( + cm_node->tos << NES_QPCONTEXT_MISC2_TOS_SHIFT); + nesqp->nesqp_context->mss |= cpu_to_le32(((u32)cm_node->tcp_cntxt.mss) << 16); nesqp->nesqp_context->tcp_state_flow_label |= cpu_to_le32( @@ -3612,6 +3617,7 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->ord_size = 1; cm_node->apbvt_set = apbvt_set; + cm_node->tos = cm_id->tos; nesqp->cm_node = cm_node; cm_node->nesqp = nesqp; nes_add_ref(&nesqp->ibqp); @@ -3666,6 +3672,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) } cm_id->provider_data = cm_node; + cm_node->tos = cm_id->tos; if (!cm_node->reused_node) { if (nes_create_mapinfo(&cm_info)) diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index f522cf639789..32a6420c2940 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -303,6 +303,7 @@ struct nes_cm_listener { int backlog; enum nes_cm_listener_state listener_state; u32 reused_node; + u8 tos; }; /* per connection node and node state information */ @@ -352,6 +353,7 @@ struct nes_cm_node { struct list_head reset_entry; struct nes_qp *nesqp; atomic_t passive_state; + u8 tos; }; /* structure for client or CM to fill when making CM api calls. */ From 523749678145e932014394e1fe44759ccedec576 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 26 May 2015 15:03:48 +0200 Subject: [PATCH 3/6] IB/ipoib: Fix RCU annotations in ipoib_neigh_hash_init() Avoid that sparse complains about ipoib_neigh_hash_init(). This patch does not change any functionality. See also patch "IPoIB: Fix memory leak in the neigh table deletion flow" (commit ID 66172c09938b). Signed-off-by: Bart Van Assche Cc: Shlomo Pongratz Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 9e1b203d756d..fec8207df371 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1128,7 +1128,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) { struct ipoib_neigh_table *ntbl = &priv->ntbl; struct ipoib_neigh_hash *htbl; - struct ipoib_neigh **buckets; + struct ipoib_neigh __rcu **buckets; u32 size; clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); @@ -1146,7 +1146,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) htbl->size = size; htbl->mask = (size - 1); htbl->buckets = buckets; - ntbl->htbl = htbl; + RCU_INIT_POINTER(ntbl->htbl, htbl); htbl->ntbl = ntbl; atomic_set(&ntbl->entries, 0); From d655a9fbc8a51ac8d92db7ff5a599aab17dce3ca Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Thu, 21 May 2015 13:11:40 +0800 Subject: [PATCH 4/6] rds: re-entry of rds_ib_xmit/rds_iw_xmit The BUG_ON at line 452/453 is triggered in function rds_send_xmit. 441 while (ret) { 442 tmp = min_t(int, ret, sg->length - 443 conn->c_xmit_data_off); 444 conn->c_xmit_data_off += tmp; 445 ret -= tmp; 446 if (conn->c_xmit_data_off == sg->length) { 447 conn->c_xmit_data_off = 0; 448 sg++; 449 conn->c_xmit_sg++; 450 if (ret != 0 && conn->c_xmit_sg == rm->data.op_nents) 451 printk(KERN_ERR "conn %p rm %p sg %p ret %d\n", conn, rm, sg, ret); 452 BUG_ON(ret != 0 && 453 conn->c_xmit_sg == rm->data.op_nents); 454 } 455 } it is complaining the total sent length is bigger that we want to send. rds_ib_xmit() is wrong for the second entry for the same rds_message returning wrong value. the sg and off passed by rds_send_xmit to rds_ib_xmit is based on scatterlist.offset/length, but the rds_ib_xmit action is based on scatterlist.dma_address/dma_length. in case dma_length is larger than length there is problem. for the 2nd and later entries of rds_ib_xmit for same rds_message, at least one of the following two is wrong: 1) the scatterlist to start with, the choosen one can far beyond the correct one. 2) the offset to start with within the scatterlist. fix: add op_dmasg and op_dmaoff to rm_data_op structure indicating the scatterlist and offset within the it to start with for rds_ib_xmit respectively. op_dmasg and op_dmaoff are initialized to zero when doing dma mapping for the first see of the message and are changed when filling send slots. the same applies to rds_iw_xmit too. Signed-off-by: Wengang Wang Signed-off-by: Doug Ledford --- net/rds/ib_send.c | 17 +++++++++++------ net/rds/iw_send.c | 18 +++++++++++------- net/rds/rds.h | 2 ++ 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index bd3825d38abc..1df6c84cb945 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -605,6 +605,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, } rds_message_addref(rm); + rm->data.op_dmasg = 0; + rm->data.op_dmaoff = 0; ic->i_data_op = &rm->data; /* Finalize the header */ @@ -658,7 +660,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, send = &ic->i_sends[pos]; first = send; prev = NULL; - scat = &ic->i_data_op->op_sg[sg]; + scat = &ic->i_data_op->op_sg[rm->data.op_dmasg]; i = 0; do { unsigned int len = 0; @@ -680,17 +682,20 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, /* Set up the data, if present */ if (i < work_alloc && scat != &rm->data.op_sg[rm->data.op_count]) { - len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off); + len = min(RDS_FRAG_SIZE, + ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff); send->s_wr.num_sge = 2; - send->s_sge[1].addr = ib_sg_dma_address(dev, scat) + off; + send->s_sge[1].addr = ib_sg_dma_address(dev, scat); + send->s_sge[1].addr += rm->data.op_dmaoff; send->s_sge[1].length = len; bytes_sent += len; - off += len; - if (off == ib_sg_dma_len(dev, scat)) { + rm->data.op_dmaoff += len; + if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) { scat++; - off = 0; + rm->data.op_dmasg++; + rm->data.op_dmaoff = 0; } } diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 13834780a308..334fe98c5084 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -581,6 +581,8 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes; rds_message_addref(rm); + rm->data.op_dmasg = 0; + rm->data.op_dmaoff = 0; ic->i_rm = rm; /* Finalize the header */ @@ -622,7 +624,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, send = &ic->i_sends[pos]; first = send; prev = NULL; - scat = &rm->data.op_sg[sg]; + scat = &rm->data.op_sg[rm->data.op_dmasg]; sent = 0; i = 0; @@ -656,10 +658,11 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, send = &ic->i_sends[pos]; - len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off); + len = min(RDS_FRAG_SIZE, + ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff); rds_iw_xmit_populate_wr(ic, send, pos, - ib_sg_dma_address(dev, scat) + off, len, - send_flags); + ib_sg_dma_address(dev, scat) + rm->data.op_dmaoff, len, + send_flags); /* * We want to delay signaling completions just enough to get @@ -687,10 +690,11 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, &send->s_wr, send->s_wr.num_sge, send->s_wr.next); sent += len; - off += len; - if (off == ib_sg_dma_len(dev, scat)) { + rm->data.op_dmaoff += len; + if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) { scat++; - off = 0; + rm->data.op_dmaoff = 0; + rm->data.op_dmasg++; } add_header: diff --git a/net/rds/rds.h b/net/rds/rds.h index 0d41155a2258..d2c60097ddeb 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -363,6 +363,8 @@ struct rds_message { unsigned int op_active:1; unsigned int op_nents; unsigned int op_count; + unsigned int op_dmasg; + unsigned int op_dmaoff; struct scatterlist *op_sg; } data; }; From 18eaf1f1959add9bbbfa5442fc5d9b59f7d4e1bd Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 29 May 2015 23:10:31 -0700 Subject: [PATCH 5/6] RDMA/ocrdma: Fix memory leak in _ocrdma_alloc_pd() If ocrdma_get_pd_num() fails, then we need to free the pd struct we allocated. This was detected by Coverity (CID 1271245). Signed-off-by: Roland Dreier Acked-By: Devesh Sharma Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 877175563634..cf1f515efda8 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -375,7 +375,12 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, if (dev->pd_mgr->pd_prealloc_valid) { status = ocrdma_get_pd_num(dev, pd); - return (status == 0) ? pd : ERR_PTR(status); + if (status == 0) { + return pd; + } else { + kfree(pd); + return ERR_PTR(status); + } } retry: From 1156256811e42908fd13c4cfcf32965d8b93f2a8 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 29 May 2015 23:11:27 -0700 Subject: [PATCH 6/6] IB/mlx4: Fix error paths in mlx4_ib_create_flow() The unwinding clean up code are err_create_flow starts at the current index i. That means we shouldn't increment i until we're really sure we won't have to destroy the current flow; otherwise we might increment the index, fail inside an is_bonded block, and end up accessing off the end of the reg_id[] array. This was detected by Coverity (CID 1271229). Signed-off-by: Roland Dreier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index cc64400d41ac..8191e176c5b7 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1185,7 +1185,6 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, &mflow->reg_id[i].id); if (err) goto err_create_flow; - i++; if (is_bonded) { /* Application always sees one port so the mirror rule * must be on port #2 @@ -1200,6 +1199,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, j++; } + i++; } if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) { @@ -1207,7 +1207,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, &mflow->reg_id[i].id); if (err) goto err_create_flow; - i++; + if (is_bonded) { flow_attr->port = 2; err = mlx4_ib_tunnel_steer_add(qp, flow_attr, @@ -1218,6 +1218,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, j++; } /* function to create mirror rule */ + i++; } return &mflow->ibflow;