942c9b6ca8
Hitting the following hardlockup due to a race condition in error CQE processing. [26146.879798] bnxt_en 0000:04:00.0: QPLIB: FP: CQ Processed Req [26146.886346] bnxt_en 0000:04:00.0: QPLIB: wr_id[1251] = 0x0 with status 0xa [26156.350935] NMI watchdog: Watchdog detected hard LOCKUP on cpu 4 [26156.357470] Modules linked in: nfsd auth_rpcgss nfs_acl lockd grace [26156.447957] CPU: 4 PID: 3413 Comm: kworker/4:1H Kdump: loaded [26156.457994] Hardware name: Dell Inc. PowerEdge R430/0CN7X8, [26156.466390] Workqueue: ib-comp-wq ib_cq_poll_work [ib_core] [26156.472639] Call Trace: [26156.475379] <NMI> [<ffffffff98d0d722>] dump_stack+0x19/0x1b [26156.481833] [<ffffffff9873f775>] watchdog_overflow_callback+0x135/0x140 [26156.489341] [<ffffffff9877f237>] __perf_event_overflow+0x57/0x100 [26156.496256] [<ffffffff98787c24>] perf_event_overflow+0x14/0x20 [26156.502887] [<ffffffff9860a580>] intel_pmu_handle_irq+0x220/0x510 [26156.509813] [<ffffffff98d16031>] perf_event_nmi_handler+0x31/0x50 [26156.516738] [<ffffffff98d1790c>] nmi_handle.isra.0+0x8c/0x150 [26156.523273] [<ffffffff98d17be8>] do_nmi+0x218/0x460 [26156.528834] [<ffffffff98d16d79>] end_repeat_nmi+0x1e/0x7e [26156.534980] [<ffffffff987089c0>] ? native_queued_spin_lock_slowpath+0x1d0/0x200 [26156.543268] [<ffffffff987089c0>] ? native_queued_spin_lock_slowpath+0x1d0/0x200 [26156.551556] [<ffffffff987089c0>] ? native_queued_spin_lock_slowpath+0x1d0/0x200 [26156.559842] <EOE> [<ffffffff98d083e4>] queued_spin_lock_slowpath+0xb/0xf [26156.567555] [<ffffffff98d15690>] _raw_spin_lock+0x20/0x30 [26156.573696] [<ffffffffc08381a1>] bnxt_qplib_lock_buddy_cq+0x31/0x40 [bnxt_re] [26156.581789] [<ffffffffc083bbaa>] bnxt_qplib_poll_cq+0x43a/0xf10 [bnxt_re] [26156.589493] [<ffffffffc083239b>] bnxt_re_poll_cq+0x9b/0x760 [bnxt_re] The issue happens if RQ poll_cq or SQ poll_cq or Async error event tries to put the error QP in flush list. Since SQ and RQ of each error qp are added to two different flush list, we need to protect it using locks of corresponding CQs. Difference in order of acquiring the lock in SQ poll_cq and RQ poll_cq can cause a hard lockup. Revisits the locking strategy and removes the usage of qplib_cq.hwq.lock. Instead of this lock, introduces qplib_cq.flush_lock to handle addition/deletion of QPs in flush list. Also, always invoke the flush_lock in order (SQ CQ lock first and then RQ CQ lock) to avoid any potential deadlock. Other than the poll_cq context, the movement of QP to/from flush list can be done in modify_qp context or from an async error event from HW. Synchronize these operations using the bnxt_re verbs layer CQ locks. To achieve this, adds a call back to the HW abstraction layer(qplib) to bnxt_re ib_verbs layer in case of async error event. Also, removes the buddy cq functions as it is no longer required. Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com> Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com> Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
229 lines
7.9 KiB
C
229 lines
7.9 KiB
C
/*
|
|
* Broadcom NetXtreme-E RoCE driver.
|
|
*
|
|
* Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
|
|
* Broadcom refers to Broadcom Limited and/or its subsidiaries.
|
|
*
|
|
* This software is available to you under a choice of one of two
|
|
* licenses. You may choose to be licensed under the terms of the GNU
|
|
* General Public License (GPL) Version 2, available from the file
|
|
* COPYING in the main directory of this source tree, or the
|
|
* BSD license below:
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
|
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
|
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
|
|
* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* Description: IB Verbs interpreter (header)
|
|
*/
|
|
|
|
#ifndef __BNXT_RE_IB_VERBS_H__
|
|
#define __BNXT_RE_IB_VERBS_H__
|
|
|
|
struct bnxt_re_gid_ctx {
|
|
u32 idx;
|
|
u32 refcnt;
|
|
};
|
|
|
|
#define BNXT_RE_FENCE_BYTES 64
|
|
struct bnxt_re_fence_data {
|
|
u32 size;
|
|
u8 va[BNXT_RE_FENCE_BYTES];
|
|
dma_addr_t dma_addr;
|
|
struct bnxt_re_mr *mr;
|
|
struct ib_mw *mw;
|
|
struct bnxt_qplib_swqe bind_wqe;
|
|
u32 bind_rkey;
|
|
};
|
|
|
|
struct bnxt_re_pd {
|
|
struct bnxt_re_dev *rdev;
|
|
struct ib_pd ib_pd;
|
|
struct bnxt_qplib_pd qplib_pd;
|
|
struct bnxt_re_fence_data fence;
|
|
};
|
|
|
|
struct bnxt_re_ah {
|
|
struct bnxt_re_dev *rdev;
|
|
struct ib_ah ib_ah;
|
|
struct bnxt_qplib_ah qplib_ah;
|
|
};
|
|
|
|
struct bnxt_re_srq {
|
|
struct bnxt_re_dev *rdev;
|
|
u32 srq_limit;
|
|
struct ib_srq ib_srq;
|
|
struct bnxt_qplib_srq qplib_srq;
|
|
struct ib_umem *umem;
|
|
spinlock_t lock; /* protect srq */
|
|
};
|
|
|
|
struct bnxt_re_qp {
|
|
struct list_head list;
|
|
struct bnxt_re_dev *rdev;
|
|
struct ib_qp ib_qp;
|
|
spinlock_t sq_lock; /* protect sq */
|
|
spinlock_t rq_lock; /* protect rq */
|
|
struct bnxt_qplib_qp qplib_qp;
|
|
struct ib_umem *sumem;
|
|
struct ib_umem *rumem;
|
|
/* QP1 */
|
|
u32 send_psn;
|
|
struct ib_ud_header qp1_hdr;
|
|
struct bnxt_re_cq *scq;
|
|
struct bnxt_re_cq *rcq;
|
|
};
|
|
|
|
struct bnxt_re_cq {
|
|
struct bnxt_re_dev *rdev;
|
|
spinlock_t cq_lock; /* protect cq */
|
|
u16 cq_count;
|
|
u16 cq_period;
|
|
struct ib_cq ib_cq;
|
|
struct bnxt_qplib_cq qplib_cq;
|
|
struct bnxt_qplib_cqe *cql;
|
|
#define MAX_CQL_PER_POLL 1024
|
|
u32 max_cql;
|
|
struct ib_umem *umem;
|
|
};
|
|
|
|
struct bnxt_re_mr {
|
|
struct bnxt_re_dev *rdev;
|
|
struct ib_mr ib_mr;
|
|
struct ib_umem *ib_umem;
|
|
struct bnxt_qplib_mrw qplib_mr;
|
|
u32 npages;
|
|
u64 *pages;
|
|
struct bnxt_qplib_frpl qplib_frpl;
|
|
};
|
|
|
|
struct bnxt_re_frpl {
|
|
struct bnxt_re_dev *rdev;
|
|
struct bnxt_qplib_frpl qplib_frpl;
|
|
u64 *page_list;
|
|
};
|
|
|
|
struct bnxt_re_fmr {
|
|
struct bnxt_re_dev *rdev;
|
|
struct ib_fmr ib_fmr;
|
|
struct bnxt_qplib_mrw qplib_fmr;
|
|
};
|
|
|
|
struct bnxt_re_mw {
|
|
struct bnxt_re_dev *rdev;
|
|
struct ib_mw ib_mw;
|
|
struct bnxt_qplib_mrw qplib_mw;
|
|
};
|
|
|
|
struct bnxt_re_ucontext {
|
|
struct bnxt_re_dev *rdev;
|
|
struct ib_ucontext ib_uctx;
|
|
struct bnxt_qplib_dpi dpi;
|
|
void *shpg;
|
|
spinlock_t sh_lock; /* protect shpg */
|
|
};
|
|
|
|
struct net_device *bnxt_re_get_netdev(struct ib_device *ibdev, u8 port_num);
|
|
|
|
int bnxt_re_query_device(struct ib_device *ibdev,
|
|
struct ib_device_attr *ib_attr,
|
|
struct ib_udata *udata);
|
|
int bnxt_re_modify_device(struct ib_device *ibdev,
|
|
int device_modify_mask,
|
|
struct ib_device_modify *device_modify);
|
|
int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
|
|
struct ib_port_attr *port_attr);
|
|
int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num,
|
|
struct ib_port_immutable *immutable);
|
|
void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str);
|
|
int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
|
|
u16 index, u16 *pkey);
|
|
int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
|
|
unsigned int index, void **context);
|
|
int bnxt_re_add_gid(struct ib_device *ibdev, u8 port_num,
|
|
unsigned int index, const union ib_gid *gid,
|
|
const struct ib_gid_attr *attr, void **context);
|
|
int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
|
|
int index, union ib_gid *gid);
|
|
enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
|
|
u8 port_num);
|
|
struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
|
|
struct ib_ucontext *context,
|
|
struct ib_udata *udata);
|
|
int bnxt_re_dealloc_pd(struct ib_pd *pd);
|
|
struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd,
|
|
struct rdma_ah_attr *ah_attr,
|
|
struct ib_udata *udata);
|
|
int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
|
|
int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
|
|
int bnxt_re_destroy_ah(struct ib_ah *ah);
|
|
struct ib_srq *bnxt_re_create_srq(struct ib_pd *pd,
|
|
struct ib_srq_init_attr *srq_init_attr,
|
|
struct ib_udata *udata);
|
|
int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
|
|
enum ib_srq_attr_mask srq_attr_mask,
|
|
struct ib_udata *udata);
|
|
int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
|
|
int bnxt_re_destroy_srq(struct ib_srq *srq);
|
|
int bnxt_re_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *recv_wr,
|
|
struct ib_recv_wr **bad_recv_wr);
|
|
struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd,
|
|
struct ib_qp_init_attr *qp_init_attr,
|
|
struct ib_udata *udata);
|
|
int bnxt_re_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
|
|
int qp_attr_mask, struct ib_udata *udata);
|
|
int bnxt_re_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
|
|
int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
|
|
int bnxt_re_destroy_qp(struct ib_qp *qp);
|
|
int bnxt_re_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
|
|
struct ib_send_wr **bad_send_wr);
|
|
int bnxt_re_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
|
|
struct ib_recv_wr **bad_recv_wr);
|
|
struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
|
|
const struct ib_cq_init_attr *attr,
|
|
struct ib_ucontext *context,
|
|
struct ib_udata *udata);
|
|
int bnxt_re_destroy_cq(struct ib_cq *cq);
|
|
int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
|
|
int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
|
|
struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
|
|
|
|
int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
|
|
unsigned int *sg_offset);
|
|
struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type,
|
|
u32 max_num_sg);
|
|
int bnxt_re_dereg_mr(struct ib_mr *mr);
|
|
struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
|
|
struct ib_udata *udata);
|
|
int bnxt_re_dealloc_mw(struct ib_mw *mw);
|
|
struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
|
|
u64 virt_addr, int mr_access_flags,
|
|
struct ib_udata *udata);
|
|
struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
|
|
struct ib_udata *udata);
|
|
int bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
|
|
int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
|
|
|
|
unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp);
|
|
void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, unsigned long flags);
|
|
#endif /* __BNXT_RE_IB_VERBS_H__ */
|