From e7d80c830489f67b1d0257e6919840100085dea9 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 22 Jun 2017 15:01:10 -0400 Subject: [PATCH 01/27] IB/iser: Handle lack of memory management extentions correctly max_fast_reg_page_list_len is only valid when the memory management extentions are signaled by the underlying driver. Fix by adjusting iser_calc_scsi_params() to use ISCSI_ISER_MAX_SG_TABLESIZE when the extentions are not indicated. Reported-by: Thomas Rosenstein Fixes: Commit df749cdc45d9 ("IB/iser: Support up to 8MB data transfer in a single command") Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Acked-by: Sagi Grimberg Tested-by: Thomas Rosenstein Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iser_verbs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index c538a38c91ce..26a004e97ae0 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -708,8 +708,14 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, unsigned short sg_tablesize, sup_sg_tablesize; sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K); - sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE, - device->ib_device->attrs.max_fast_reg_page_list_len); + if (device->ib_device->attrs.device_cap_flags & + IB_DEVICE_MEM_MGT_EXTENSIONS) + sup_sg_tablesize = + min_t( + uint, ISCSI_ISER_MAX_SG_TABLESIZE, + device->ib_device->attrs.max_fast_reg_page_list_len); + else + sup_sg_tablesize = ISCSI_ISER_MAX_SG_TABLESIZE; iser_conn->scsi_sg_tablesize = min(sg_tablesize, sup_sg_tablesize); } From b6ea01ba3fe9dcb5e08bbd9ed482845582a1e80b Mon Sep 17 00:00:00 2001 From: "Amrani, Ram" Date: Mon, 29 May 2017 11:18:54 +0300 Subject: [PATCH 02/27] RDMA/qedr: Add qedr to MAINTAINERS file Signed-off-by: Ram Amrani Signed-off-by: Ariel Elior Signed-off-by: Doug Ledford --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 053c3bdd1fe5..53437d46a39d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10531,6 +10531,14 @@ L: linux-scsi@vger.kernel.org S: Supported F: drivers/scsi/qedf/ +QLOGIC QL4xxx RDMA DRIVER +M: Ram Amrani +M: Ariel Elior +L: linux-rdma@vger.kernel.org +S: Supported +F: drivers/infiniband/hw/qedr/ +F: include/uapi/rdma/qedr-abi.h + QNX4 FILESYSTEM M: Anders Larsen W: http://www.alarsen.net/linux/qnx4fs/ From 91647f4c2d66e16b30524613410a638c2c4532bf Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:18:14 -0700 Subject: [PATCH 03/27] IB/hfi1: Ensure dd->gi_mask can not be overflowed As the code stands today the array access in remap_intr() is OK. To future proof the code though we should explicitly check to ensure the index value is not outside of the valid range. This is not a straight forward calculation so err on the side of caution. Reviewed-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 2ba00b89df6a..94b54850ec75 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12847,7 +12847,12 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr) /* clear from the handled mask of the general interrupt */ m = isrc / 64; n = isrc % 64; - dd->gi_mask[m] &= ~((u64)1 << n); + if (likely(m < CCE_NUM_INT_CSRS)) { + dd->gi_mask[m] &= ~((u64)1 << n); + } else { + dd_dev_err(dd, "remap interrupt err\n"); + return; + } /* direct the chip source to the given MSI-X interrupt */ m = isrc / 8; From 99975cd4fda52974a767aa44fe0b1a8f74950d9d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 24 Apr 2017 15:15:28 -0700 Subject: [PATCH 04/27] mlx5: Avoid that mlx5_ib_sg_to_klms() overflows the klms[] array ib_map_mr_sg() can pass an SG-list to .map_mr_sg() that is larger than what fits into a single MR. .map_mr_sg() must not attempt to map more SG-list elements than what fits into a single MR. Hence make sure that mlx5_ib_sg_to_klms() does not write outside the MR klms[] array. Fixes: b005d3164713 ("mlx5: Add arbitrary sg list support") Signed-off-by: Bart Van Assche Reviewed-by: Max Gurtovoy Cc: Sagi Grimberg Cc: Leon Romanovsky Cc: Israel Rukshin Cc: Acked-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 763bb5b36144..2046a6987453 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1779,7 +1779,7 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, mr->ndescs = sg_nents; for_each_sg(sgl, sg, sg_nents, i) { - if (unlikely(i > mr->max_descs)) + if (unlikely(i >= mr->max_descs)) break; klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); From 28b5b3a23ba67970f4f534b15c4e4d687136605a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 4 May 2017 20:38:20 -0500 Subject: [PATCH 05/27] RDMA/core: Document confusing code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While looking into Coverity ID 1351047 I ran into the following piece of code at drivers/infiniband/core/verbs.c:496: ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,                                    ah_attr->dmac,                                    wc->wc_flags & IB_WC_WITH_VLAN ?                                    NULL : &vlan_id,                                    &if_index, &hoplimit); The issue here is that the position of arguments in the call to rdma_addr_find_l2_eth_by_grh() function do not match the order of the parameters: &dgid is passed to sgid &sgid is passed to dgid This is the function prototype: int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,  const union ib_gid *dgid,  u8 *dmac, u16 *vlan_id, int *if_index,  int *hoplimit) My question here is if this is intentional? Answer: Yes. ib_init_ah_from_wc() creates ah from the incoming packet. Incoming packet has dgid of the receiver node on which this code is getting executed and sgid contains the GID of the sender. When resolving mac address of destination, you use arrived dgid as sgid and use sgid as dgid because sgid contains destinations GID whom to respond to. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index c973a83c898b..47ee1f83c9a9 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -452,6 +452,19 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, } EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr); +/* + * This function creates ah from the incoming packet. + * Incoming packet has dgid of the receiver node on which this code is + * getting executed and, sgid contains the GID of the sender. + * + * When resolving mac address of destination, the arrived dgid is used + * as sgid and, sgid is used as dgid because sgid contains destinations + * GID whom to respond to. + * + * This is why when calling rdma_addr_find_l2_eth_by_grh() function, the + * position of arguments dgid and sgid do not match the order of the + * parameters. + */ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, struct rdma_ah_attr *ah_attr) From c8c16d3bae967f1c7af541e8d016e5c51e4f010a Mon Sep 17 00:00:00 2001 From: Vladimir Neyelov Date: Sun, 21 May 2017 19:17:31 +0300 Subject: [PATCH 06/27] IB/iser: Fix connection teardown race condition Under heavy iser target(scst) start/stop stress during login/logout on iser intitiator side happened trace call provided below. The function iscsi_iser_slave_alloc iser_conn pointer could be NULL, due to the fact that function iscsi_iser_conn_stop can be called before and free iser connection. Let's protect that flow by introducing global mutex. BUG: unable to handle kernel paging request at 0000000000001018 IP: [] iscsi_iser_slave_alloc+0x1e/0x50 [ib_iser] Call Trace: ? scsi_alloc_sdev+0x242/0x300 scsi_probe_and_add_lun+0x9e1/0xea0 ? kfree_const+0x21/0x30 ? kobject_set_name_vargs+0x76/0x90 ? __pm_runtime_resume+0x5b/0x70 __scsi_scan_target+0xf6/0x250 scsi_scan_target+0xea/0x100 iscsi_user_scan_session.part.13+0x101/0x130 [scsi_transport_iscsi] ? iscsi_user_scan_session.part.13+0x130/0x130 [scsi_transport_iscsi] iscsi_user_scan_session+0x1e/0x30 [scsi_transport_iscsi] device_for_each_child+0x50/0x90 iscsi_user_scan+0x44/0x60 [scsi_transport_iscsi] store_scan+0xa8/0x100 ? common_file_perm+0x5d/0x1c0 dev_attr_store+0x18/0x30 sysfs_kf_write+0x37/0x40 kernfs_fop_write+0x12c/0x1c0 __vfs_write+0x18/0x40 vfs_write+0xb5/0x1a0 SyS_write+0x55/0xc0 Fixes: 318d311e8f01 ("iser: Accept arbitrary sg lists mapping if the device supports it") Cc: # v4.5+ Signed-off-by: Vladimir Neyelov Signed-off-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iscsi_iser.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 5a887efb4bdf..37b33d708c2d 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -83,6 +83,7 @@ static struct scsi_host_template iscsi_iser_sht; static struct iscsi_transport iscsi_iser_transport; static struct scsi_transport_template *iscsi_iser_scsi_transport; static struct workqueue_struct *release_wq; +static DEFINE_MUTEX(unbind_iser_conn_mutex); struct iser_global ig; int iser_debug_level = 0; @@ -550,12 +551,14 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) */ if (iser_conn) { mutex_lock(&iser_conn->state_mutex); + mutex_lock(&unbind_iser_conn_mutex); iser_conn_terminate(iser_conn); iscsi_conn_stop(cls_conn, flag); /* unbind */ iser_conn->iscsi_conn = NULL; conn->dd_data = NULL; + mutex_unlock(&unbind_iser_conn_mutex); complete(&iser_conn->stop_completion); mutex_unlock(&iser_conn->state_mutex); @@ -977,13 +980,21 @@ static int iscsi_iser_slave_alloc(struct scsi_device *sdev) struct iser_conn *iser_conn; struct ib_device *ib_dev; + mutex_lock(&unbind_iser_conn_mutex); + session = starget_to_session(scsi_target(sdev))->dd_data; iser_conn = session->leadconn->dd_data; + if (!iser_conn) { + mutex_unlock(&unbind_iser_conn_mutex); + return -ENOTCONN; + } ib_dev = iser_conn->ib_conn.device->ib_device; if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K); + mutex_unlock(&unbind_iser_conn_mutex); + return 0; } From bebb2a473a43c8f84a8210687d1cbdde503046d7 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 23 May 2017 10:48:44 +0300 Subject: [PATCH 07/27] IB/core: Namespace is mandatory input for address resolution In function addr_resolve() the namespace is a required input parameter and not an output. It is passed later for searching the routing table and device addresses. Also, it shouldn't be copied back to the caller. Fixes: 565edd1d5555 ('IB/addr: Pass network namespace as a parameter') Cc: # v4.3+ Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index a6cb379a4ebc..d78bc74bc9a9 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -509,6 +509,11 @@ static int addr_resolve(struct sockaddr *src_in, struct dst_entry *dst; int ret; + if (!addr->net) { + pr_warn_ratelimited("%s: missing namespace\n", __func__); + return -EINVAL; + } + if (src_in->sa_family == AF_INET) { struct rtable *rt = NULL; const struct sockaddr_in *dst_in4 = @@ -546,7 +551,6 @@ static int addr_resolve(struct sockaddr *src_in, } addr->bound_dev_if = ndev->ifindex; - addr->net = dev_net(ndev); dev_put(ndev); return ret; From cbd09aebc2d62095b05797af5c9a315e3a71dcea Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 23 May 2017 10:48:45 +0300 Subject: [PATCH 08/27] IB/core: Don't resolve IP address to the loopback device When resolving an IP address that is on the host of the caller the result from querying the routing table is the loopback device. This is not a valid response, because it doesn't represent the RDMA device and the port. Therefore, callers need to check the resolved device and if it is a loopback device find an alternative way to resolve it. To avoid this we make sure that the response from rdma_resolve_ip() will not be the loopback device. While that, we fix an static checker warning about dereferencing an unintitialized pointer using the same solution as in commit abeffce90c7f ("net/mlx5e: Fix a -Wmaybe-uninitialized warning") as a reference. Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 40 ++++++++++++++++++++++++--------- drivers/infiniband/core/cma.c | 32 +++----------------------- drivers/infiniband/core/verbs.c | 5 ----- 3 files changed, 32 insertions(+), 45 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index d78bc74bc9a9..01236cef7bfb 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -268,6 +268,7 @@ int rdma_translate_ip(const struct sockaddr *addr, return ret; ret = rdma_copy_addr(dev_addr, dev, NULL); + dev_addr->bound_dev_if = dev->ifindex; if (vlan_id) *vlan_id = rdma_vlan_dev_vlan_id(dev); dev_put(dev); @@ -280,6 +281,7 @@ int rdma_translate_ip(const struct sockaddr *addr, &((const struct sockaddr_in6 *)addr)->sin6_addr, dev, 1)) { ret = rdma_copy_addr(dev_addr, dev, NULL); + dev_addr->bound_dev_if = dev->ifindex; if (vlan_id) *vlan_id = rdma_vlan_dev_vlan_id(dev); break; @@ -405,10 +407,10 @@ static int addr4_resolve(struct sockaddr_in *src_in, fl4.saddr = src_ip; fl4.flowi4_oif = addr->bound_dev_if; rt = ip_route_output_key(addr->net, &fl4); - if (IS_ERR(rt)) { - ret = PTR_ERR(rt); - goto out; - } + ret = PTR_ERR_OR_ZERO(rt); + if (ret) + return ret; + src_in->sin_family = AF_INET; src_in->sin_addr.s_addr = fl4.saddr; @@ -423,8 +425,6 @@ static int addr4_resolve(struct sockaddr_in *src_in, *prt = rt; return 0; -out: - return ret; } #if IS_ENABLED(CONFIG_IPV6) @@ -527,8 +527,12 @@ static int addr_resolve(struct sockaddr *src_in, if (resolve_neigh) ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq); - ndev = rt->dst.dev; - dev_hold(ndev); + if (addr->bound_dev_if) { + ndev = dev_get_by_index(addr->net, addr->bound_dev_if); + } else { + ndev = rt->dst.dev; + dev_hold(ndev); + } ip_rt_put(rt); } else { @@ -544,13 +548,27 @@ static int addr_resolve(struct sockaddr *src_in, if (resolve_neigh) ret = addr_resolve_neigh(dst, dst_in, addr, seq); - ndev = dst->dev; - dev_hold(ndev); + if (addr->bound_dev_if) { + ndev = dev_get_by_index(addr->net, addr->bound_dev_if); + } else { + ndev = dst->dev; + dev_hold(ndev); + } dst_release(dst); } - addr->bound_dev_if = ndev->ifindex; + if (ndev->flags & IFF_LOOPBACK) { + ret = rdma_translate_ip(dst_in, addr, NULL); + /* + * Put the loopback device and get the translated + * device instead. + */ + dev_put(ndev); + ndev = dev_get_by_index(addr->net, addr->bound_dev_if); + } else { + addr->bound_dev_if = ndev->ifindex; + } dev_put(ndev); return ret; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 31bb82d8ecd7..11aff923b633 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -623,22 +623,11 @@ static inline int cma_validate_port(struct ib_device *device, u8 port, if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) return ret; - if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { + if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) ndev = dev_get_by_index(&init_net, bound_if_index); - if (ndev && ndev->flags & IFF_LOOPBACK) { - pr_info("detected loopback device\n"); - dev_put(ndev); - - if (!device->get_netdev) - return -EOPNOTSUPP; - - ndev = device->get_netdev(device, port); - if (!ndev) - return -ENODEV; - } - } else { + else gid_type = IB_GID_TYPE_IB; - } + ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, ndev, NULL); @@ -2569,21 +2558,6 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) goto err2; } - if (ndev->flags & IFF_LOOPBACK) { - dev_put(ndev); - if (!id_priv->id.device->get_netdev) { - ret = -EOPNOTSUPP; - goto err2; - } - - ndev = id_priv->id.device->get_netdev(id_priv->id.device, - id_priv->id.port_num); - if (!ndev) { - ret = -ENODEV; - goto err2; - } - } - supported_gids = roce_gid_type_mask_support(id_priv->id.device, id_priv->id.port_num); gid_type = cma_route_gid_type(addr->dev_addr.network, diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 47ee1f83c9a9..644fa0d13f02 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -520,11 +520,6 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, } resolved_dev = dev_get_by_index(&init_net, if_index); - if (resolved_dev->flags & IFF_LOOPBACK) { - dev_put(resolved_dev); - resolved_dev = idev; - dev_hold(resolved_dev); - } rcu_read_lock(); if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev, resolved_dev)) From a512c2fbef9c700ee1ee0e045b75e140fef8f5ee Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 23 May 2017 11:26:08 +0300 Subject: [PATCH 09/27] IB/core: Introduce modify QP operation with udata This patch adds new function ib_modify_qp_with_udata so that uverbs layer can avoid handling L2 mac address at verbs layer and depend on the core layer to resolve the mac address consistently for all required QPs. Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 36 ++++++++++++++++++++++++--------- include/rdma/ib_verbs.h | 16 +++++++++++++++ 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 644fa0d13f02..7f8fe443df46 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1276,20 +1276,36 @@ out: } EXPORT_SYMBOL(ib_resolve_eth_dmac); +/** + * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. + * @qp: The QP to modify. + * @attr: On input, specifies the QP attributes to modify. On output, + * the current values of selected QP attributes are returned. + * @attr_mask: A bit-mask used to specify which attributes of the QP + * are being modified. + * @udata: pointer to user's input output buffer information + * are being modified. + * It returns 0 on success and returns appropriate error code on error. + */ +int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + int ret; + + if (attr_mask & IB_QP_AV) { + ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); + if (ret) + return ret; + } + return ib_security_modify_qp(qp, attr, attr_mask, udata); +} +EXPORT_SYMBOL(ib_modify_qp_with_udata); + int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) { - - if (qp_attr_mask & IB_QP_AV) { - int ret; - - ret = ib_resolve_eth_dmac(qp->device, &qp_attr->ah_attr); - if (ret) - return ret; - } - - return ib_security_modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); + return ib_modify_qp_with_udata(qp, qp_attr, qp_attr_mask, NULL); } EXPORT_SYMBOL(ib_modify_qp); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 356953d3dbd1..6e62c9e2c971 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2947,6 +2947,22 @@ static inline int ib_post_srq_recv(struct ib_srq *srq, struct ib_qp *ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr); +/** + * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. + * @qp: The QP to modify. + * @attr: On input, specifies the QP attributes to modify. On output, + * the current values of selected QP attributes are returned. + * @attr_mask: A bit-mask used to specify which attributes of the QP + * are being modified. + * @udata: pointer to user's input output buffer information + * are being modified. + * It returns 0 on success and returns appropriate error code on error. + */ +int ib_modify_qp_with_udata(struct ib_qp *qp, + struct ib_qp_attr *attr, + int attr_mask, + struct ib_udata *udata); + /** * ib_modify_qp - Modifies the attributes for the specified QP and then * transitions the QP to the given state. From f7c8f2e9ddc71db0ae344f3ffb19df03ef32b719 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 23 May 2017 11:26:09 +0300 Subject: [PATCH 10/27] IB/uverbs: Make use of ib_modify_qp variant to avoid resolving DMAC This patch makes use of IB core's ib_modify_qp_with_udata function that also resolves the DMAC and handles udata. Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 8ba9bfb073d1..3f55d18a3791 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2005,28 +2005,13 @@ static int modify_qp(struct ib_uverbs_file *file, rdma_ah_set_port_num(&attr->alt_ah_attr, cmd->base.alt_dest.port_num); - if (qp->real_qp == qp) { - if (cmd->base.attr_mask & IB_QP_AV) { - ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); - if (ret) - goto release_qp; - } - ret = ib_security_modify_qp(qp, - attr, - modify_qp_mask(qp->qp_type, - cmd->base.attr_mask), - udata); - } else { - ret = ib_security_modify_qp(qp, - attr, - modify_qp_mask(qp->qp_type, - cmd->base.attr_mask), - NULL); - } + ret = ib_modify_qp_with_udata(qp, attr, + modify_qp_mask(qp->qp_type, + cmd->base.attr_mask), + udata); release_qp: uobj_put_obj_read(qp); - out: kfree(attr); From 98e77d9fd7dff05019436370e78c3ec0f9894e25 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 May 2017 11:29:42 +0300 Subject: [PATCH 11/27] IB: Convert msleep below 20ms to usleep_range The msleep(1) may do not sleep 1 ms as expected and will sleep longer. The simple conversion from msleep to usleep_range between 1ms and 2ms can solve an issue. The full and comprehensive explanation can be found at [1] and [2]. [1] https://lkml.org/lkml/2007/8/3/250 [2] Documentation/timers/timers-howto.txt Signed-off-by: Leon Romanovsky Reviewed-by: Erez Shitrit Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 3 ++- drivers/infiniband/hw/mlx4/main.c | 2 +- drivers/infiniband/hw/mlx4/mcg.c | 2 +- drivers/infiniband/hw/nes/nes_hw.c | 4 ++-- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 ++-- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 2 +- 6 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 37d5d29597a4..729f8cc8738b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -995,7 +995,8 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) goto free_work; } ne -= ret; - msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE); + usleep_range(HNS_ROCE_V1_FREE_MR_WAIT_VALUE * 1000, + (1 + HNS_ROCE_V1_FREE_MR_WAIT_VALUE) * 1000); } while (ne && time_before_eq(jiffies, end)); if (ne != 0) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 75b2f7d4cd95..d1b43cbbfea7 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1155,7 +1155,7 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) * call to mlx4_ib_vma_close. */ put_task_struct(owning_process); - msleep(1); + usleep_range(1000, 2000); owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); if (!owning_process || diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 3405e947dc1e..b73f89700ef9 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -1091,7 +1091,7 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy if (!count) break; - msleep(1); + usleep_range(1000, 2000); } while (time_after(end, jiffies)); flush_workqueue(ctx->mcg_wq); diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 8f9d8b4ad583..b0adf65e4bdb 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -551,7 +551,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { if ((0x0F000100 == (pcs_control_status0 & 0x0F000100)) || (0x0F000100 == (pcs_control_status1 & 0x0F000100))) int_cnt++; - msleep(1); + usleep_range(1000, 2000); } if (int_cnt > 1) { spin_lock_irqsave(&nesadapter->phy_lock, flags); @@ -592,7 +592,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { break; } } - msleep(1); + usleep_range(1000, 2000); } } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 7cbcfdac6529..d574d41bdf61 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -954,7 +954,7 @@ void ipoib_cm_dev_stop(struct net_device *dev) break; } spin_unlock_irq(&priv->lock); - msleep(1); + usleep_range(1000, 2000); ipoib_drain_cq(dev); spin_lock_irq(&priv->lock); } @@ -1206,7 +1206,7 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) goto timeout; } - msleep(1); + usleep_range(1000, 2000); } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index efe7402f4885..57a9655e844d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -770,7 +770,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev) ipoib_drain_cq(dev); - msleep(1); + usleep_range(1000, 2000); } ipoib_dbg(priv, "All sends and receives done.\n"); From ed7b521d8a98c3371e3c9300df8bf3cb774d8ea6 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Tue, 23 May 2017 11:42:52 +0300 Subject: [PATCH 12/27] IB/IPoIB: Forward MTU change to driver below This patch checks if there is a driver below that needs to be updated on the new MTU and calls it accordingly. Signed-off-by: Erez Shitrit Reviewed by: Alex Vesker Signed-off-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 6e86eeee370e..3e2b7988ead8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -233,6 +233,7 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu static int ipoib_change_mtu(struct net_device *dev, int new_mtu) { struct ipoib_dev_priv *priv = ipoib_priv(dev); + int ret = 0; /* dev->mtu > 2K ==> connected mode */ if (ipoib_cm_admin_enabled(dev)) { @@ -256,9 +257,23 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) ipoib_dbg(priv, "MTU must be smaller than the underlying " "link layer MTU - 4 (%u)\n", priv->mcast_mtu); - dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); + new_mtu = min(priv->mcast_mtu, priv->admin_mtu); - return 0; + if (priv->rn_ops->ndo_change_mtu) { + bool carrier_status = netif_carrier_ok(dev); + + netif_carrier_off(dev); + + /* notify lower level on the real mtu */ + ret = priv->rn_ops->ndo_change_mtu(dev, new_mtu); + + if (carrier_status) + netif_carrier_on(dev); + } else { + dev->mtu = new_mtu; + } + + return ret; } /* Called with an RCU read lock taken */ From d83187dda9b930dc268ab05da265f3d5d7eca451 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 May 2017 14:38:13 +0300 Subject: [PATCH 13/27] IB/IPoIB: Convert IPoIB to memalloc_noio_* calls Commit 21caf2fc1931 ("mm: teach mm by current context info to not do I/O during memory allocation") added the memalloc_noio_(save|restore) functions to enable people to modify the MM behavior by disabling I/O during memory allocation. This was further extended in Fixes: 934f3072c17c ("mm: clear __GFP_FS when PF_MEMALLOC_NOIO is set"). memalloc_noio_* functions prevent allocation paths recursing back into the filesystem without explicitly changing the flags for every allocation site. However the IPoIB hasn't been keeping up with the changes and missed completely these memalloc_noio_* calls. This led to update of allocation site with special QP creation flag, see commit 09b93088d750 ("IB: Add a QP creation flag to use GFP_NOIO allocations"), while this flag is supported by small number of drivers in IB stack. Let's change it by updating to memalloc_noio_* calls and allow for every driver underneath enjoy NOIO allocations. Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index d574d41bdf61..f87d104837dc 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "ipoib.h" @@ -1047,9 +1048,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_RC, .qp_context = tx, - .create_flags = IB_QP_CREATE_USE_GFP_NOIO + .create_flags = 0 }; - struct ib_qp *tx_qp; if (dev->features & NETIF_F_SG) @@ -1057,10 +1057,6 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1); tx_qp = ib_create_qp(priv->pd, &attr); - if (PTR_ERR(tx_qp) == -EINVAL) { - attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; - tx_qp = ib_create_qp(priv->pd, &attr); - } tx->max_send_sge = attr.cap.max_send_sge; return tx_qp; } @@ -1131,10 +1127,11 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, struct sa_path_rec *pathrec) { struct ipoib_dev_priv *priv = ipoib_priv(p->dev); + unsigned int noio_flag; int ret; - p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring, - GFP_NOIO, PAGE_KERNEL); + noio_flag = memalloc_noio_save(); + p->tx_ring = vzalloc(ipoib_sendq_size * sizeof(*p->tx_ring)); if (!p->tx_ring) { ret = -ENOMEM; goto err_tx; @@ -1142,9 +1139,10 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring); p->qp = ipoib_cm_create_tx_qp(p->dev, p); + memalloc_noio_restore(noio_flag); if (IS_ERR(p->qp)) { ret = PTR_ERR(p->qp); - ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret); + ipoib_warn(priv, "failed to create tx qp: %d\n", ret); goto err_qp; } From 0f4d027c3b4240ecb314daa948238d459fdc3a00 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 May 2017 14:38:14 +0300 Subject: [PATCH 14/27] IB/{rdmavt, qib, hfi1}: Remove gfp flags argument The caller to the driver marks GFP_NOIO allocations with help of memalloc_noio-* calls now. This makes redundant to pass down to the driver gfp flags, which can be GFP_KERNEL only. The patch removes the gfp flags argument and updates all driver paths. Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 7 ++-- drivers/infiniband/hw/hfi1/qp.h | 3 +- drivers/infiniband/hw/qib/qib_qp.c | 15 ++++----- drivers/infiniband/hw/qib/qib_verbs.h | 4 +-- drivers/infiniband/sw/rdmavt/qp.c | 48 ++++++++------------------- include/rdma/rdma_vt.h | 5 ++- 6 files changed, 28 insertions(+), 54 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 650305cc0373..1a7af9f60c13 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -647,18 +647,17 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) qp->pid); } -void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, - gfp_t gfp) +void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp) { struct hfi1_qp_priv *priv; - priv = kzalloc_node(sizeof(*priv), gfp, rdi->dparms.node); + priv = kzalloc_node(sizeof(*priv), GFP_KERNEL, rdi->dparms.node); if (!priv) return ERR_PTR(-ENOMEM); priv->owner = qp; - priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), gfp, + priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), GFP_KERNEL, rdi->dparms.node); if (!priv->s_ahg) { kfree(priv); diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h index 1eb9cd7b8c19..6fe542b6a927 100644 --- a/drivers/infiniband/hw/hfi1/qp.h +++ b/drivers/infiniband/hw/hfi1/qp.h @@ -123,8 +123,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp); /* * Functions provided by hfi1 driver for rdmavt to use */ -void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, - gfp_t gfp); +void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp); void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp); unsigned free_all_qps(struct rvt_dev_info *rdi); void notify_qp_reset(struct rvt_qp *qp); diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 5984981e7dd4..a343e3b5d4cb 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -104,10 +104,9 @@ const struct rvt_operation_params qib_post_parms[RVT_OPERATION_MAX] = { }; -static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, - gfp_t gfp) +static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map) { - unsigned long page = get_zeroed_page(gfp); + unsigned long page = get_zeroed_page(GFP_KERNEL); /* * Free the page if someone raced with us installing it. @@ -126,7 +125,7 @@ static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. */ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port, gfp_t gfp) + enum ib_qp_type type, u8 port) { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; @@ -160,7 +159,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { - get_map_page(qpt, map, gfp); + get_map_page(qpt, map); if (unlikely(!map->page)) break; } @@ -317,16 +316,16 @@ u32 qib_mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu) return ib_mtu_enum_to_int(pmtu); } -void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp) +void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp) { struct qib_qp_priv *priv; - priv = kzalloc(sizeof(*priv), gfp); + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return ERR_PTR(-ENOMEM); priv->owner = qp; - priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), gfp); + priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), GFP_KERNEL); if (!priv->s_hdr) { kfree(priv); return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index da0db5485ddc..a52fc67b40d7 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -274,11 +274,11 @@ int qib_get_counters(struct qib_pportdata *ppd, * Functions provided by qib driver for rdmavt to use */ unsigned qib_free_all_qps(struct rvt_dev_info *rdi); -void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp); +void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp); void qib_qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp); void qib_notify_qp_reset(struct rvt_qp *qp); int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port, gfp_t gfp); + enum ib_qp_type type, u8 port); void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait); #ifdef CONFIG_DEBUG_FS diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 727e81cc2c8f..459865439a0b 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -118,10 +118,9 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = { EXPORT_SYMBOL(ib_rvt_state_ops); static void get_map_page(struct rvt_qpn_table *qpt, - struct rvt_qpn_map *map, - gfp_t gfp) + struct rvt_qpn_map *map) { - unsigned long page = get_zeroed_page(gfp); + unsigned long page = get_zeroed_page(GFP_KERNEL); /* * Free the page if someone raced with us installing it. @@ -173,7 +172,7 @@ static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end); for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) { if (!map->page) { - get_map_page(qpt, map, GFP_KERNEL); + get_map_page(qpt, map); if (!map->page) { ret = -ENOMEM; break; @@ -342,14 +341,14 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, * Return: The queue pair number */ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port_num, gfp_t gfp) + enum ib_qp_type type, u8 port_num) { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; u32 ret; if (rdi->driver_f.alloc_qpn) - return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num, gfp); + return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num); if (type == IB_QPT_SMI || type == IB_QPT_GSI) { unsigned n; @@ -374,7 +373,7 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { - get_map_page(qpt, map, gfp); + get_map_page(qpt, map); if (unlikely(!map->page)) break; } @@ -672,7 +671,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_qp *ret = ERR_PTR(-ENOMEM); struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); void *priv = NULL; - gfp_t gfp; size_t sqsize; if (!rdi) @@ -680,18 +678,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge || init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || - init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO)) + init_attr->create_flags) return ERR_PTR(-EINVAL); - /* GFP_NOIO is applicable to RC QP's only */ - - if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO && - init_attr->qp_type != IB_QPT_RC) - return ERR_PTR(-EINVAL); - - gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ? - GFP_NOIO : GFP_KERNEL; - /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge || @@ -719,14 +708,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, sz = sizeof(struct rvt_sge) * init_attr->cap.max_send_sge + sizeof(struct rvt_swqe); - if (gfp == GFP_NOIO) - swq = __vmalloc( - sqsize * sz, - gfp | __GFP_ZERO, PAGE_KERNEL); - else - swq = vzalloc_node( - sqsize * sz, - rdi->dparms.node); + swq = vzalloc_node(sqsize * sz, rdi->dparms.node); if (!swq) return ERR_PTR(-ENOMEM); @@ -741,7 +723,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); - qp = kzalloc_node(sz + sg_list_sz, gfp, rdi->dparms.node); + qp = kzalloc_node(sz + sg_list_sz, GFP_KERNEL, + rdi->dparms.node); if (!qp) goto bail_swq; @@ -751,7 +734,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, kzalloc_node( sizeof(*qp->s_ack_queue) * rvt_max_atomic(rdi), - gfp, + GFP_KERNEL, rdi->dparms.node); if (!qp->s_ack_queue) goto bail_qp; @@ -766,7 +749,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, * Driver needs to set up it's private QP structure and do any * initialization that is needed. */ - priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp); + priv = rdi->driver_f.qp_priv_alloc(rdi, qp); if (IS_ERR(priv)) { ret = priv; goto bail_qp; @@ -786,11 +769,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->r_rq.wq = vmalloc_user( sizeof(struct rvt_rwq) + qp->r_rq.size * sz); - else if (gfp == GFP_NOIO) - qp->r_rq.wq = __vmalloc( - sizeof(struct rvt_rwq) + - qp->r_rq.size * sz, - gfp | __GFP_ZERO, PAGE_KERNEL); else qp->r_rq.wq = vzalloc_node( sizeof(struct rvt_rwq) + @@ -824,7 +802,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, init_attr->qp_type, - init_attr->port_num, gfp); + init_attr->port_num); if (err < 0) { ret = ERR_PTR(err); goto bail_rq_wq; diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4878aaf7bdff..55af69271053 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -229,8 +229,7 @@ struct rvt_driver_provided { * ERR_PTR(err). The driver is free to return NULL or a valid * pointer. */ - void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp, - gfp_t gfp); + void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp); /* * Free the driver's private qp structure. @@ -319,7 +318,7 @@ struct rvt_driver_provided { /* Let the driver pick the next queue pair number*/ int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port_num, gfp_t gfp); + enum ib_qp_type type, u8 port_num); /* Determine if its safe or allowed to modify the qp */ int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, From 8900b894e769dd88b53e519e3502e0e3c349fe95 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 May 2017 14:38:15 +0300 Subject: [PATCH 15/27] {net, IB}/mlx4: Remove gfp flags argument The caller to the driver marks GFP_NOIO allocations with help of memalloc_noio-* calls now. This makes redundant to pass down to the driver gfp flags, which can be GFP_KERNEL only. The patch removes the gfp flags argument and updates all driver paths. Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/cq.c | 6 +-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 - drivers/infiniband/hw/mlx4/qp.c | 40 ++++++++----------- drivers/infiniband/hw/mlx4/srq.c | 8 ++-- drivers/net/ethernet/mellanox/mlx4/alloc.c | 29 +++++++------- drivers/net/ethernet/mellanox/mlx4/cq.c | 4 +- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 7 ++-- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 2 +- drivers/net/ethernet/mellanox/mlx4/icm.c | 7 ++-- drivers/net/ethernet/mellanox/mlx4/icm.h | 3 +- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 4 +- drivers/net/ethernet/mellanox/mlx4/mr.c | 17 ++++---- drivers/net/ethernet/mellanox/mlx4/qp.c | 20 +++++----- .../ethernet/mellanox/mlx4/resource_tracker.c | 4 +- drivers/net/ethernet/mellanox/mlx4/srq.c | 4 +- include/linux/mlx4/device.h | 10 ++--- 16 files changed, 76 insertions(+), 90 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 4f5a143fc0a7..ff931c580557 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -102,7 +102,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * int err; err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size, - PAGE_SIZE * 2, &buf->buf, GFP_KERNEL); + PAGE_SIZE * 2, &buf->buf); if (err) goto out; @@ -113,7 +113,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL); + err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf); if (err) goto err_mtt; @@ -219,7 +219,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, uar = &to_mucontext(context)->uar; } else { - err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL); + err = mlx4_db_alloc(dev->dev, &cq->db, 1); if (err) goto err_cq; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index c2b9cbf4da05..9db82e67e959 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -185,7 +185,6 @@ enum mlx4_ib_qp_flags { MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP, - MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO, /* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */ MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 996e9058e515..75c0e6c5dd56 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -634,8 +634,8 @@ static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev, static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, - struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp, - gfp_t gfp) + struct ib_udata *udata, int sqpn, + struct mlx4_ib_qp **caller_qp) { int qpn; int err; @@ -691,14 +691,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI || (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) { - sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp); + sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL); if (!sqp) return -ENOMEM; qp = &sqp->qp; qp->pri.vid = 0xFFFF; qp->alt.vid = 0xFFFF; } else { - qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp); + qp = kzalloc(sizeof(struct mlx4_ib_qp), GFP_KERNEL); if (!qp) return -ENOMEM; qp->pri.vid = 0xFFFF; @@ -780,7 +780,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; if (qp_has_rq(init_attr)) { - err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp); + err = mlx4_db_alloc(dev->dev, &qp->db, 0); if (err) goto err; @@ -788,7 +788,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size, - &qp->buf, gfp)) { + &qp->buf)) { memcpy(&init_attr->cap, &backup_cap, sizeof(backup_cap)); err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, @@ -797,7 +797,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_db; if (mlx4_buf_alloc(dev->dev, qp->buf_size, - PAGE_SIZE * 2, &qp->buf, gfp)) { + PAGE_SIZE * 2, &qp->buf)) { err = -ENOMEM; goto err_db; } @@ -808,20 +808,20 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp); + err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf); if (err) goto err_mtt; qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64), - gfp | __GFP_NOWARN); + GFP_KERNEL | __GFP_NOWARN); if (!qp->sq.wrid) qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64), - gfp, PAGE_KERNEL); + GFP_KERNEL, PAGE_KERNEL); qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64), - gfp | __GFP_NOWARN); + GFP_KERNEL | __GFP_NOWARN); if (!qp->rq.wrid) qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64), - gfp, PAGE_KERNEL); + GFP_KERNEL, PAGE_KERNEL); if (!qp->sq.wrid || !qp->rq.wrid) { err = -ENOMEM; goto err_wrid; @@ -859,7 +859,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; - err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp); + err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); if (err) goto err_qpn; @@ -1127,10 +1127,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, int err; int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; u16 xrcdn = 0; - gfp_t gfp; - gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ? - GFP_NOIO : GFP_KERNEL; /* * We only support LSO, vendor flag1, and multicast loopback blocking, * and only for kernel UD QPs. @@ -1140,8 +1137,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP | MLX4_IB_QP_NETIF | - MLX4_IB_QP_CREATE_ROCE_V2_GSI | - MLX4_IB_QP_CREATE_USE_GFP_NOIO)) + MLX4_IB_QP_CREATE_ROCE_V2_GSI)) return ERR_PTR(-EINVAL); if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { @@ -1154,7 +1150,6 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | - MLX4_IB_QP_CREATE_USE_GFP_NOIO | MLX4_IB_QP_CREATE_ROCE_V2_GSI | MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) && init_attr->qp_type != IB_QPT_UD) || @@ -1179,7 +1174,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_RAW_PACKET: - qp = kzalloc(sizeof *qp, gfp); + qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); qp->pri.vid = 0xFFFF; @@ -1188,7 +1183,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_UD: { err = create_qp_common(to_mdev(pd->device), pd, init_attr, - udata, 0, &qp, gfp); + udata, 0, &qp); if (err) { kfree(qp); return ERR_PTR(err); @@ -1217,8 +1212,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, } err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, - sqpn, - &qp, gfp); + sqpn, &qp); if (err) return ERR_PTR(err); diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index e32dd58937a8..0facaf5f6d23 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -135,14 +135,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_mtt; } else { - err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL); + err = mlx4_db_alloc(dev->dev, &srq->db, 0); if (err) goto err_srq; *srq->db.db = 0; - if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf, - GFP_KERNEL)) { + if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, + &srq->buf)) { err = -ENOMEM; goto err_db; } @@ -167,7 +167,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL); + err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf); if (err) goto err_mtt; diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index 249a4584401a..d94b3744a5b9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -578,7 +578,7 @@ out: } static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size, - struct mlx4_buf *buf, gfp_t gfp) + struct mlx4_buf *buf) { dma_addr_t t; @@ -587,7 +587,7 @@ static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size, buf->page_shift = get_order(size) + PAGE_SHIFT; buf->direct.buf = dma_zalloc_coherent(&dev->persist->pdev->dev, - size, &t, gfp); + size, &t, GFP_KERNEL); if (!buf->direct.buf) return -ENOMEM; @@ -607,10 +607,10 @@ static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size, * multiple pages, so we don't require too much contiguous memory. */ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, - struct mlx4_buf *buf, gfp_t gfp) + struct mlx4_buf *buf) { if (size <= max_direct) { - return mlx4_buf_direct_alloc(dev, size, buf, gfp); + return mlx4_buf_direct_alloc(dev, size, buf); } else { dma_addr_t t; int i; @@ -620,14 +620,14 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, buf->npages = buf->nbufs; buf->page_shift = PAGE_SHIFT; buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), - gfp); + GFP_KERNEL); if (!buf->page_list) return -ENOMEM; for (i = 0; i < buf->nbufs; ++i) { buf->page_list[i].buf = dma_zalloc_coherent(&dev->persist->pdev->dev, - PAGE_SIZE, &t, gfp); + PAGE_SIZE, &t, GFP_KERNEL); if (!buf->page_list[i].buf) goto err_free; @@ -663,12 +663,11 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) } EXPORT_SYMBOL_GPL(mlx4_buf_free); -static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device, - gfp_t gfp) +static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device) { struct mlx4_db_pgdir *pgdir; - pgdir = kzalloc(sizeof *pgdir, gfp); + pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL); if (!pgdir) return NULL; @@ -676,7 +675,7 @@ static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device, pgdir->bits[0] = pgdir->order0; pgdir->bits[1] = pgdir->order1; pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE, - &pgdir->db_dma, gfp); + &pgdir->db_dma, GFP_KERNEL); if (!pgdir->db_page) { kfree(pgdir); return NULL; @@ -716,7 +715,7 @@ found: return 0; } -int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp) +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_db_pgdir *pgdir; @@ -728,7 +727,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp if (!mlx4_alloc_db_from_pgdir(pgdir, db, order)) goto out; - pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev, gfp); + pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev); if (!pgdir) { ret = -ENOMEM; goto out; @@ -780,13 +779,13 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, { int err; - err = mlx4_db_alloc(dev, &wqres->db, 1, GFP_KERNEL); + err = mlx4_db_alloc(dev, &wqres->db, 1); if (err) return err; *wqres->db.db = 0; - err = mlx4_buf_direct_alloc(dev, size, &wqres->buf, GFP_KERNEL); + err = mlx4_buf_direct_alloc(dev, size, &wqres->buf); if (err) goto err_db; @@ -795,7 +794,7 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf, GFP_KERNEL); + err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf); if (err) goto err_mtt; diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index fa6d2354a0e9..c56a511b918e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -224,11 +224,11 @@ int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn) if (*cqn == -1) return -ENOMEM; - err = mlx4_table_get(dev, &cq_table->table, *cqn, GFP_KERNEL); + err = mlx4_table_get(dev, &cq_table->table, *cqn); if (err) goto err_out; - err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn, GFP_KERNEL); + err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn); if (err) goto err_put; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index e5fb89505a13..436f7689a032 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1042,7 +1042,7 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, if (!context) return -ENOMEM; - err = mlx4_qp_alloc(mdev->dev, qpn, qp, GFP_KERNEL); + err = mlx4_qp_alloc(mdev->dev, qpn, qp); if (err) { en_err(priv, "Failed to allocate qp #%x\n", qpn); goto out; @@ -1086,7 +1086,7 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) en_err(priv, "Failed reserving drop qpn\n"); return err; } - err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp, GFP_KERNEL); + err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp); if (err) { en_err(priv, "Failed allocating drop qp\n"); mlx4_qp_release_range(priv->mdev->dev, qpn, 1); @@ -1158,8 +1158,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) } /* Configure RSS indirection qp */ - err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, rss_map->indir_qp, - GFP_KERNEL); + err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, rss_map->indir_qp); if (err) { en_err(priv, "Failed to allocate RSS indirection QP\n"); goto rss_err; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 4f3a9b27ce4a..73faa3d77921 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -111,7 +111,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, goto err_hwq_res; } - err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->sp_qp, GFP_KERNEL); + err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->sp_qp); if (err) { en_err(priv, "Failed allocating qp %d\n", ring->qpn); goto err_reserve; diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index e1f9e7cebf8f..5a7816e7c7b4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -251,8 +251,7 @@ int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev) MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); } -int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj, - gfp_t gfp) +int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj) { u32 i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size); @@ -266,7 +265,7 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj, } table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT, - (table->lowmem ? gfp : GFP_HIGHUSER) | + (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | __GFP_NOWARN, table->coherent); if (!table->icm[i]) { ret = -ENOMEM; @@ -363,7 +362,7 @@ int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 i; for (i = start; i <= end; i += inc) { - err = mlx4_table_get(dev, table, i, GFP_KERNEL); + err = mlx4_table_get(dev, table, i); if (err) goto fail; } diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h index 0c7364550150..dee67fa39107 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.h +++ b/drivers/net/ethernet/mellanox/mlx4/icm.h @@ -71,8 +71,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, gfp_t gfp_mask, int coherent); void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent); -int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj, - gfp_t gfp); +int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj); void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj); int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 start, u32 end); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 30616cd0140d..706d7f21ac5c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -969,7 +969,7 @@ void mlx4_cleanup_cq_table(struct mlx4_dev *dev); void mlx4_cleanup_qp_table(struct mlx4_dev *dev); void mlx4_cleanup_srq_table(struct mlx4_dev *dev); void mlx4_cleanup_mcg_table(struct mlx4_dev *dev); -int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp); +int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn); void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn); int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn); void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn); @@ -977,7 +977,7 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn); void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn); int __mlx4_mpt_reserve(struct mlx4_dev *dev); void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index); -int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp); +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index); void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index); u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order); void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order); diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index ce852ca22a96..24282cd017d3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -479,14 +479,14 @@ static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index) __mlx4_mpt_release(dev, index); } -int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp) +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) { struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; - return mlx4_table_get(dev, &mr_table->dmpt_table, index, gfp); + return mlx4_table_get(dev, &mr_table->dmpt_table, index); } -static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp) +static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) { u64 param = 0; @@ -497,7 +497,7 @@ static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp) MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } - return __mlx4_mpt_alloc_icm(dev, index, gfp); + return __mlx4_mpt_alloc_icm(dev, index); } void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index) @@ -629,7 +629,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) struct mlx4_mpt_entry *mpt_entry; int err; - err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key), GFP_KERNEL); + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key)); if (err) return err; @@ -787,14 +787,13 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, EXPORT_SYMBOL_GPL(mlx4_write_mtt); int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, - struct mlx4_buf *buf, gfp_t gfp) + struct mlx4_buf *buf) { u64 *page_list; int err; int i; - page_list = kmalloc(buf->npages * sizeof *page_list, - gfp); + page_list = kcalloc(buf->npages, sizeof(*page_list), GFP_KERNEL); if (!page_list) return -ENOMEM; @@ -841,7 +840,7 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw) struct mlx4_mpt_entry *mpt_entry; int err; - err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key), GFP_KERNEL); + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key)); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 5a310d313e94..26747212526b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -301,29 +301,29 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) } EXPORT_SYMBOL_GPL(mlx4_qp_release_range); -int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp) +int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; int err; - err = mlx4_table_get(dev, &qp_table->qp_table, qpn, gfp); + err = mlx4_table_get(dev, &qp_table->qp_table, qpn); if (err) goto err_out; - err = mlx4_table_get(dev, &qp_table->auxc_table, qpn, gfp); + err = mlx4_table_get(dev, &qp_table->auxc_table, qpn); if (err) goto err_put_qp; - err = mlx4_table_get(dev, &qp_table->altc_table, qpn, gfp); + err = mlx4_table_get(dev, &qp_table->altc_table, qpn); if (err) goto err_put_auxc; - err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn, gfp); + err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn); if (err) goto err_put_altc; - err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn, gfp); + err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn); if (err) goto err_put_rdmarc; @@ -345,7 +345,7 @@ err_out: return err; } -static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp) +static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) { u64 param = 0; @@ -355,7 +355,7 @@ static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp) MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } - return __mlx4_qp_alloc_icm(dev, qpn, gfp); + return __mlx4_qp_alloc_icm(dev, qpn); } void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn) @@ -397,7 +397,7 @@ struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) return qp; } -int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp) +int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; @@ -408,7 +408,7 @@ int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp) qp->qpn = qpn; - err = mlx4_qp_alloc_icm(dev, qpn, gfp); + err = mlx4_qp_alloc_icm(dev, qpn); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 812783865205..215e21c3dc8a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1822,7 +1822,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, return err; if (!fw_reserved(dev, qpn)) { - err = __mlx4_qp_alloc_icm(dev, qpn, GFP_KERNEL); + err = __mlx4_qp_alloc_icm(dev, qpn); if (err) { res_abort_move(dev, slave, RES_QP, qpn); return err; @@ -1909,7 +1909,7 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - err = __mlx4_mpt_alloc_icm(dev, mpt->key, GFP_KERNEL); + err = __mlx4_mpt_alloc_icm(dev, mpt->key); if (err) { res_abort_move(dev, slave, RES_MPT, id); return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index f44d089e2ca6..bedf52126824 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -100,11 +100,11 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn) if (*srqn == -1) return -ENOMEM; - err = mlx4_table_get(dev, &srq_table->table, *srqn, GFP_KERNEL); + err = mlx4_table_get(dev, &srq_table->table, *srqn); if (err) goto err_out; - err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn, GFP_KERNEL); + err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn); if (err) goto err_put; return 0; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index d5bed0875d30..aad5d81dfb44 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1068,7 +1068,7 @@ static inline int mlx4_is_eth(struct mlx4_dev *dev, int port) } int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, - struct mlx4_buf *buf, gfp_t gfp); + struct mlx4_buf *buf); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) { @@ -1105,10 +1105,9 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw); int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, - struct mlx4_buf *buf, gfp_t gfp); + struct mlx4_buf *buf); -int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, - gfp_t gfp); +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order); void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db); int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, @@ -1124,8 +1123,7 @@ int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base, u8 flags); void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); -int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, - gfp_t gfp); +int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp); void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp); int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn, From 7855f5842741e5835b9be073079780c444dca898 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 May 2017 14:38:16 +0300 Subject: [PATCH 16/27] IB/core: Remove NOIO QP create flag There are no users for IB_QP_CREATE_USE_GFP_NOIO flag, so let's remove it. Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6e62c9e2c971..b5732432bb29 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1056,7 +1056,7 @@ enum ib_qp_create_flags { IB_QP_CREATE_MANAGED_RECV = 1 << 4, IB_QP_CREATE_NETIF_QP = 1 << 5, IB_QP_CREATE_SIGNATURE_EN = 1 << 6, - IB_QP_CREATE_USE_GFP_NOIO = 1 << 7, + /* FREE = 1 << 7, */ IB_QP_CREATE_SCATTER_FCS = 1 << 8, IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9, /* reserve bits 26-31 for low level drivers' internal use */ From 12cc1a027341338f54d8d3fcf5d188ae2b39c30d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 30 May 2017 09:44:48 +0300 Subject: [PATCH 17/27] IB/mlx5: Clean mr_cache debugfs in case of failure The failure in creation of debugfs entries for mr_cache left entries, which were already created. It caused to mismatch and misguiding for the end users. The solution is to clean mr_cache debugfs root, so no leftovers will be in the system. In addition, let's document why the error is not needed to be forwarded to user in case of failure. Signed-off-by: Leon Romanovsky Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 34 +++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 2046a6987453..8ab2f1360a45 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -582,6 +582,15 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) } } +static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) +{ + if (!mlx5_debugfs_root) + return; + + debugfs_remove_recursive(dev->cache.root); + dev->cache.root = NULL; +} + static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) { struct mlx5_mr_cache *cache = &dev->cache; @@ -600,38 +609,34 @@ static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) sprintf(ent->name, "%d", ent->order); ent->dir = debugfs_create_dir(ent->name, cache->root); if (!ent->dir) - return -ENOMEM; + goto err; ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent, &size_fops); if (!ent->fsize) - return -ENOMEM; + goto err; ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent, &limit_fops); if (!ent->flimit) - return -ENOMEM; + goto err; ent->fcur = debugfs_create_u32("cur", 0400, ent->dir, &ent->cur); if (!ent->fcur) - return -ENOMEM; + goto err; ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir, &ent->miss); if (!ent->fmiss) - return -ENOMEM; + goto err; } return 0; -} +err: + mlx5_mr_cache_debugfs_cleanup(dev); -static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) -{ - if (!mlx5_debugfs_root) - return; - - debugfs_remove_recursive(dev->cache.root); + return -ENOMEM; } static void delay_time_func(unsigned long ctx) @@ -692,6 +697,11 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) if (err) mlx5_ib_warn(dev, "cache debugfs failure\n"); + /* + * We don't want to fail driver if debugfs failed to initialize, + * so we are not forwarding error to the user. + */ + return 0; } From 8fe8bacb92f249c91a1407b48aa1cb98067fe19d Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Tue, 30 May 2017 09:58:06 +0300 Subject: [PATCH 18/27] IB/core: Add ordered workqueue for RoCE GID management Currently the RoCE GID management uses the ib_wq to do add and delete new GIDs according to the netdev events. The ib_wq isn't an ordered workqueue and thus two work elements can be executed concurrently which will result in unexpected behavior and inconsistency of the GIDs cache content. Example: ifconfig eth1 11.11.11.11/16 up This command will invoke the following netdev events in the following order: 1. NETDEV_UP 2. NETDEV_DOWN 3. NETDEV_UP If (2) and (3) will be executed concurrently or in reverse order, instead of having a new GID with 11.11.11.11 IP, we will end up without any new GIDs. Signed-off-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/core/roce_gid_mgmt.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index db958d3207ef..94a9eefb3cfc 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -42,6 +42,8 @@ #include #include +static struct workqueue_struct *gid_cache_wq; + enum gid_op_type { GID_DEL = 0, GID_ADD @@ -560,7 +562,7 @@ static int netdevice_queue_work(struct netdev_event_work_cmd *cmds, } INIT_WORK(&ndev_work->work, netdevice_event_work_handler); - queue_work(ib_wq, &ndev_work->work); + queue_work(gid_cache_wq, &ndev_work->work); return NOTIFY_DONE; } @@ -693,7 +695,7 @@ static int addr_event(struct notifier_block *this, unsigned long event, dev_hold(ndev); work->gid_attr.ndev = ndev; - queue_work(ib_wq, &work->work); + queue_work(gid_cache_wq, &work->work); return NOTIFY_DONE; } @@ -740,6 +742,10 @@ static struct notifier_block nb_inet6addr = { int __init roce_gid_mgmt_init(void) { + gid_cache_wq = alloc_ordered_workqueue("gid-cache-wq", 0); + if (!gid_cache_wq) + return -ENOMEM; + register_inetaddr_notifier(&nb_inetaddr); if (IS_ENABLED(CONFIG_IPV6)) register_inet6addr_notifier(&nb_inet6addr); @@ -764,4 +770,5 @@ void __exit roce_gid_mgmt_cleanup(void) * ib-core is removed, all physical devices have been removed, * so no issue with remaining hardware contexts. */ + destroy_workqueue(gid_cache_wq); } From b6c871e5875798e5ed3744c725622dcd3c92be92 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Mon, 12 Jun 2017 10:45:21 +0300 Subject: [PATCH 19/27] IB/ipoib: Let lower driver handle get_stats64 call The driver checks if the lower level driver supports get_stats, and if so calls it to get the updated statistics, otherwise takes from the current netdevice stats object. Signed-off-by: Erez Shitrit Reviewed-by: Alex Vesker Signed-off-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 3e2b7988ead8..70dacaf9044e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -276,6 +276,17 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) return ret; } +static void ipoib_get_stats(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + if (priv->rn_ops->ndo_get_stats64) + priv->rn_ops->ndo_get_stats64(dev, stats); + else + netdev_stats_to_stats64(stats, &dev->stats); +} + /* Called with an RCU read lock taken */ static bool ipoib_is_dev_match_addr_rcu(const struct sockaddr *addr, struct net_device *dev) @@ -1823,6 +1834,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = { .ndo_get_vf_stats = ipoib_get_vf_stats, .ndo_set_vf_guid = ipoib_set_vf_guid, .ndo_set_mac_address = ipoib_set_mac, + .ndo_get_stats64 = ipoib_get_stats, }; static const struct net_device_ops ipoib_netdev_ops_vf = { From fda85ce912401750e1e80757627af2784c7cc5a7 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Thu, 22 Jun 2017 17:09:59 +0300 Subject: [PATCH 20/27] IB/rxe: Fix kernel panic from skb destructor In the time between rxe_send has finished and skb destructor called, the QP's ref count might be 0, leading to a possible QP destruction. This will lead to a kernel panic when the destructor dereferences the QP. The operation of incrementing QP ref count at rxe_send and decrementing from skb destructor will prevent this crash. BUG: unable to handle kernel NULL pointer dereference at 000000000000072c IP: [] rxe_skb_tx_dtor+0x15/0x50 [rdma_rxe] PGD 0 [16240.211178] Oops: 0002 [#1] SMP CPU: 3 PID: 0 Comm: swapper/3 Tainted: G OE 4.9.0-mlnx #1 Hardware name: Red Hat KVM, BIOS Bochs 01/01/2011 task: ffff88042d6b1480 task.stack: ffffc90001904000 RIP: 0010:[] [] rxe_skb_tx_dtor+0x15/0x50 [rdma_rxe] RSP: 0018:ffff88043fcc3df0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff880429684700 RCX: ffff88042d248200 RDX: 00000000ffffffff RSI: 00000000fffffe01 RDI: ffff880429684700 RBP: ffff88043fcc3e00 R08: ffff88043fcda240 R09: 00000000ff2d1de6 R10: 0000000000000000 R11: 00000000f49cf6fe R12: ffff880429684700 R13: ffffffff81893f96 R14: ffffffff817d66f0 R15: ffff880427f74200 FS: 0000000000000000(0000) GS:ffff88043fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000072c CR3: 000000041d3df000 CR4: 00000000000006e0 Stack: ffffffff817b29cf ffff880429684700 ffff88043fcc3e18 ffffffff817b42c2 ffff880429684700 ffff88043fcc3e40 ffffffff817b4332 ffff880429684700 ffff880427f74238 ffff880427f74228 ffff88043fcc3e58 ffffffff81893f96 Call Trace: [16240.336345] [] ? skb_release_head_state+0x4f/0xb0 [] skb_release_all+0x12/0x30 [] kfree_skb+0x32/0x90 [] ndisc_error_report+0x36/0x40 [] neigh_invalidate+0x81/0xf0 [] neigh_timer_handler+0x207/0x2b0 [] call_timer_fn+0x35/0x120 [] run_timer_softirq+0x1d7/0x460 [] ? kvm_sched_clock_read+0x1e/0x30 [] ? sched_clock+0x9/0x10 [] ? sched_clock_cpu+0x72/0xa0 [] __do_softirq+0xd7/0x289 [] irq_exit+0xb5/0xc0 [] smp_apic_timer_interrupt+0x42/0x50 [] apic_timer_interrupt+0x82/0x90 [16240.395776] [] ? native_safe_halt+0x6/0x10 [] default_idle+0x1e/0xd0 [] arch_cpu_idle+0xf/0x20 [] default_idle_call+0x35/0x40 [] cpu_startup_entry+0x185/0x210 [] start_secondary+0x103/0x130 RIP [] rxe_skb_tx_dtor+0x15/0x50 [rdma_rxe] Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Reviewed-by: Johannes Thumshirn Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_net.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index c3a140ed4df2..08f3f90d2912 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -441,6 +441,8 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb) if (unlikely(qp->need_req_skb && skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) rxe_run_task(&qp->req.task, 1); + + rxe_drop_ref(qp); } int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb) @@ -473,6 +475,7 @@ int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb) return -EAGAIN; } + rxe_add_ref(pkt->qp); atomic_inc(&pkt->qp->skb_out); kfree_skb(skb); From 56012e1cada54460f9e456cd77276e765e06ce6c Mon Sep 17 00:00:00 2001 From: yonatanc Date: Thu, 22 Jun 2017 17:10:00 +0300 Subject: [PATCH 21/27] IB/rxe: Set dma_mask and coherent_dma_mask The RXE coupled with dummy device causes to the kernel panic attached below. The panic happens when ib_register_device tries to set dma_mask by accessing a NULLed parent device. The RXE does not actually use DMA, so we can set the dma_mask to architecture value. [16240.199689] RIP: 0010:ib_register_device+0x468/0x5a0 [ib_core] [16240.205289] RSP: 0018:ffffc9000220fc10 EFLAGS: 00010246 [16240.209909] RAX: 0000000000000024 RBX: ffff880220d1a2a8 RCX: 0000000000000000 [16240.212244] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000009 [16240.214385] RBP: ffffc9000220fcb0 R08: 0000000000000000 R09: 000000000000023f [16240.254465] R10: 0000000000000007 R11: 0000000000000000 R12: 0000000000000000 [16240.259467] R13: 0000000000000000 R14: 0000000000000000 R15: ffff880220d1a2a8 [16240.263314] FS: 00007fd8ecca0740(0000) GS:ffff8802364c0000(0000) knlGS:0000000000000000 [16240.267292] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [16240.273503] CR2: 0000000000000218 CR3: 00000002253ba000 CR4: 00000000000006e0 [16240.277066] Call Trace: [16240.281836] ? __kmalloc+0x26f/0x280 [16240.286596] rxe_register_device+0x297/0x300 [rdma_rxe] [16240.291377] rxe_add+0x535/0x5b0 [rdma_rxe] [16240.297586] rxe_net_add+0x3e/0xc0 [rdma_rxe] [16240.302375] rxe_param_set_add+0x65/0x144 [rdma_rxe] [16240.307769] param_attr_store+0x68/0xd0 [16240.311640] module_attr_store+0x1d/0x30 [16240.316421] sysfs_kf_write+0x3a/0x50 [16240.317802] kernfs_fop_write+0xff/0x180 [16240.322989] __vfs_write+0x37/0x140 [16240.328164] ? handle_mm_fault+0xce/0x240 [16240.333340] vfs_write+0xb2/0x1b0 [16240.335013] SyS_write+0x55/0xc0 [16240.340632] entry_SYSCALL_64_fastpath+0x1a/0xa9 Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Reviewed-by: Johannes Thumshirn Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 073e66783f1d..07511718d98d 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1240,6 +1240,8 @@ int rxe_register_device(struct rxe_dev *rxe) addrconf_addr_eui48((unsigned char *)&dev->node_guid, rxe->ndev->dev_addr); dev->dev.dma_ops = &dma_virt_ops; + dma_coerce_mask_and_coherent(&dev->dev, + dma_get_required_mask(dev->dev.parent)); dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION; dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) From 5802883d4b7c544012a1857660f78af41f6c183a Mon Sep 17 00:00:00 2001 From: oulijun Date: Sat, 10 Jun 2017 18:49:21 +0800 Subject: [PATCH 22/27] IB/hns: Fix the bug of polling cq failed for loopback Qps In hip06 SoC, RoCE driver creates 8 reserved loopback QPs to ensure zero wqe when free mr. However, if the enabled phy port number is less than 6, it will fail in polling cqe with 8 reserved loopback QPs. In order to solve this problem, the number of loopback Qps will be adjusted based on the number of enabled phy port. Signed-off-by: Shaobo Xu Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 53 ++++++++++++++-------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 729f8cc8738b..c291b2a4c7cf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -661,9 +661,11 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) union ib_gid dgid; u64 subnet_prefix; int attr_mask = 0; - int i; + int i, j; int ret; + u8 queue_en[HNS_ROCE_V1_RESV_QP] = { 0 }; u8 phy_port; + u8 port = 0; u8 sl; priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; @@ -709,11 +711,27 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) attr.rnr_retry = 7; attr.timeout = 0x12; attr.path_mtu = IB_MTU_256; + attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; rdma_ah_set_grh(&attr.ah_attr, NULL, 0, 0, 1, 0); rdma_ah_set_static_rate(&attr.ah_attr, 3); subnet_prefix = cpu_to_be64(0xfe80000000000000LL); for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) { + phy_port = (i >= HNS_ROCE_MAX_PORTS) ? (i - 2) : + (i % HNS_ROCE_MAX_PORTS); + sl = i / HNS_ROCE_MAX_PORTS; + + for (j = 0; j < caps->num_ports; j++) { + if (hr_dev->iboe.phy_port[j] == phy_port) { + queue_en[i] = 1; + port = j; + break; + } + } + + if (!queue_en[i]) + continue; + free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd); if (IS_ERR(free_mr->mr_free_qp[i])) { dev_err(dev, "Create loop qp failed!\n"); @@ -721,15 +739,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) } hr_qp = free_mr->mr_free_qp[i]; - sl = i / caps->num_ports; - - if (caps->num_ports == HNS_ROCE_MAX_PORTS) - phy_port = (i >= HNS_ROCE_MAX_PORTS) ? (i - 2) : - (i % caps->num_ports); - else - phy_port = i % caps->num_ports; - - hr_qp->port = phy_port + 1; + hr_qp->port = port; hr_qp->phy_port = phy_port; hr_qp->ibqp.qp_type = IB_QPT_RC; hr_qp->ibqp.device = &hr_dev->ib_dev; @@ -739,23 +749,22 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) hr_qp->ibqp.recv_cq = cq; hr_qp->ibqp.send_cq = cq; - rdma_ah_set_port_num(&attr.ah_attr, phy_port + 1); - rdma_ah_set_sl(&attr.ah_attr, phy_port + 1); - attr.port_num = phy_port + 1; + rdma_ah_set_port_num(&attr.ah_attr, port + 1); + rdma_ah_set_sl(&attr.ah_attr, sl); + attr.port_num = port + 1; attr.dest_qp_num = hr_qp->qpn; memcpy(rdma_ah_retrieve_dmac(&attr.ah_attr), - hr_dev->dev_addr[phy_port], + hr_dev->dev_addr[port], MAC_ADDR_OCTET_NUM); memcpy(&dgid.raw, &subnet_prefix, sizeof(u64)); - memcpy(&dgid.raw[8], hr_dev->dev_addr[phy_port], 3); - memcpy(&dgid.raw[13], hr_dev->dev_addr[phy_port] + 3, 3); + memcpy(&dgid.raw[8], hr_dev->dev_addr[port], 3); + memcpy(&dgid.raw[13], hr_dev->dev_addr[port] + 3, 3); dgid.raw[11] = 0xff; dgid.raw[12] = 0xfe; dgid.raw[8] ^= 2; rdma_ah_set_dgid_raw(&attr.ah_attr, dgid.raw); - attr_mask |= IB_QP_PORT; ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask, IB_QPS_RESET, IB_QPS_INIT); @@ -812,6 +821,9 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) { hr_qp = free_mr->mr_free_qp[i]; + if (!hr_qp) + continue; + ret = hns_roce_v1_destroy_qp(&hr_qp->ibqp); if (ret) dev_err(dev, "Destroy qp %d for mr free failed(%d)!\n", @@ -963,7 +975,7 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies; int i; int ret; - int ne; + int ne = 0; mr_work = container_of(work, struct hns_roce_mr_free_work, work); hr_mr = (struct hns_roce_mr *)mr_work->mr; @@ -976,6 +988,10 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) { hr_qp = free_mr->mr_free_qp[i]; + if (!hr_qp) + continue; + ne++; + ret = hns_roce_v1_send_lp_wqe(hr_qp); if (ret) { dev_err(dev, @@ -985,7 +1001,6 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) } } - ne = HNS_ROCE_V1_RESV_QP; do { ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc); if (ret < 0) { From 58c4f0d85f59c458074f016c13991c0a81105180 Mon Sep 17 00:00:00 2001 From: oulijun Date: Sat, 10 Jun 2017 18:49:22 +0800 Subject: [PATCH 23/27] IB/hns: Fix the bug with wild pointer when destroy rc qp When destroyed rc qp, the hr_qp will be used after freed. This patch will fix it. Signed-off-by: Lijun Ou Reported-by: Dan Carpenter Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index c291b2a4c7cf..2fe353001b04 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -3657,6 +3657,7 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work) struct hns_roce_dev *hr_dev; struct hns_roce_qp *hr_qp; struct device *dev; + unsigned long qpn; int ret; qp_work_entry = container_of(work, struct hns_roce_qp_work, work); @@ -3664,8 +3665,9 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work) dev = &hr_dev->pdev->dev; priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; hr_qp = qp_work_entry->qp; + qpn = hr_qp->qpn; - dev_dbg(dev, "Schedule destroy QP(0x%lx) work.\n", hr_qp->qpn); + dev_dbg(dev, "Schedule destroy QP(0x%lx) work.\n", qpn); qp_work_entry->sche_cnt++; @@ -3676,7 +3678,7 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work) &qp_work_entry->db_wait_stage); if (ret) { dev_err(dev, "Check QP(0x%lx) db process status failed!\n", - hr_qp->qpn); + qpn); return; } @@ -3690,7 +3692,7 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work) ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET); if (ret) { - dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", hr_qp->qpn); + dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", qpn); return; } @@ -3699,14 +3701,14 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work) if (hr_qp->ibqp.qp_type == IB_QPT_RC) { /* RC QP, release QPN */ - hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); + hns_roce_release_range_qp(hr_dev, qpn, 1); kfree(hr_qp); } else kfree(hr_to_hr_sqp(hr_qp)); kfree(qp_work_entry); - dev_dbg(dev, "Accomplished destroy QP(0x%lx) work.\n", hr_qp->qpn); + dev_dbg(dev, "Accomplished destroy QP(0x%lx) work.\n", qpn); } int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) From 9de61d3fcdde06087f65b4022a1a966c10ab5803 Mon Sep 17 00:00:00 2001 From: oulijun Date: Sat, 10 Jun 2017 18:49:23 +0800 Subject: [PATCH 24/27] IB/hns: Fix the bug with rdma operation When opcode of work request is RDMA read and write, it should use rdma_wr to get remote_addr and rkey. This patch fixes it. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 2fe353001b04..c42e883a18a8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -228,14 +228,14 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_READ: ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_READ; - set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, - atomic_wr(wr)->rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); break; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_WRITE; - set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, - atomic_wr(wr)->rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); break; case IB_WR_SEND: case IB_WR_SEND_WITH_INV: From d322f004aaa647a5dc9dcddfe5ab1bff1e92f634 Mon Sep 17 00:00:00 2001 From: oulijun Date: Sat, 10 Jun 2017 18:49:24 +0800 Subject: [PATCH 25/27] IB/hns: Fix the bug with modifying the MAC address without removing the driver When modified the MAC address used hns_roce_mac function, we release and create reserved qp again, It is not necessary to use spin_lock_bh and spin_unlock_bh in handle_en_event, Otherwise, it will occur a error. This patch mainly fixes it. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_main.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index c3b41f95e70a..d9777b662eba 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -125,8 +125,6 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, return -ENODEV; } - spin_lock_bh(&hr_dev->iboe.lock); - switch (event) { case NETDEV_UP: case NETDEV_CHANGE: @@ -144,7 +142,6 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, break; } - spin_unlock_bh(&hr_dev->iboe.lock); return 0; } From 5f110ac4bed8693adb21146067149a48c2b9bd07 Mon Sep 17 00:00:00 2001 From: oulijun Date: Sat, 10 Jun 2017 18:49:25 +0800 Subject: [PATCH 26/27] IB/hns: Fix for checkpatch.pl comment style warnings This patch correct the comment style warnings caught by checkpatch.pl script. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index c42e883a18a8..23fad6d96944 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2197,7 +2197,7 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq, } wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++wq->tail; - } else { + } else { /* RQ conrespond to CQE */ wc->byte_len = le32_to_cpu(cqe->byte_cnt); opcode = roce_get_field(cqe->cqe_byte_4, @@ -3549,10 +3549,12 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, old_cnt = roce_get_field(old_send, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S); - if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) + if (cur_cnt - old_cnt > + SDB_ST_CMP_VAL) { success_flags = 1; - else { - send_ptr = roce_get_field(old_send, + } else { + send_ptr = + roce_get_field(old_send, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + roce_get_field(sdb_retry_cnt, From ebc9ca43e1d52a85c72fc2d343f353386ed6c188 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Mon, 29 May 2017 17:20:53 -0700 Subject: [PATCH 27/27] IB/core: Allow QP state transition from reset to error Playing with IP-O-IB interface can trigger a warning message: "ib0: Failed to modify QP to ERROR state" to be logged. This happens when the QP is in IB_QPS_RESET state and the stack is trying to transition it to IB_QPS_ERR state in ipoib_ib_dev_stop(). According to the IB spec, Table 91 - "QP State Transition Properties" it looks like the transition from reset to error is valid: Transition: Any State to Error Required Attributes: None Optional Attributes: None allowed Actions: Queue processing is stopped. Work Requests pending or in process are completed in error, when possible. This patch allows the transition and quiets the message. Reviewed-by: Dennis Dalessandro Signed-off-by: Tadeusz Struk Signed-off-by: Dennis Dalessandro Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 7f8fe443df46..fb98ed67d5bc 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -895,6 +895,7 @@ static const struct { } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_INIT] = { .valid = 1, .req_param = {