From a3c8ab4fe8f006d742c24be677518bfa9862e732 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 30 Nov 2005 09:55:22 -0800 Subject: [PATCH 01/26] IB/mthca: fix QP size limits for mem-free HCAs Unlike tavor, the max work queue size is an exact power of 2 for arbel mode, despite what the documentation (of the QUERY_DEV_LIM firmware command) says. Without this patch, on Arbel, we can start with a QP of a valid size and get above the reported limit after rounding to the next power of two. Signed-off-by: Jack Morgenstein Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_cmd.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 9ed34587fc5c..22ac72bc20c3 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -937,10 +937,6 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, if (err) goto out; - MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); - dev_lim->max_srq_sz = (1 << field) - 1; - MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); - dev_lim->max_qp_sz = (1 << field) - 1; MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET); dev_lim->reserved_qps = 1 << (field & 0xf); MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET); @@ -1056,6 +1052,10 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags); if (mthca_is_memfree(dev)) { + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); + dev_lim->max_srq_sz = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); + dev_lim->max_qp_sz = 1 << field; MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSZ_SRQ_OFFSET); dev_lim->hca.arbel.resize_srq = field & 1; MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET); @@ -1087,6 +1087,10 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, mthca_dbg(dev, "Max ICM size %lld MB\n", (unsigned long long) dev_lim->hca.arbel.max_icm_sz >> 20); } else { + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); + dev_lim->max_srq_sz = (1 << field) - 1; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); + dev_lim->max_qp_sz = (1 << field) - 1; MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_AV_OFFSET); dev_lim->hca.tavor.max_avs = 1 << (field & 0x3f); dev_lim->mpt_entry_sz = MTHCA_MPT_ENTRY_SIZE; From 227eca83690da7dcbd698d3268e29402e0571723 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 30 Nov 2005 10:00:25 -0800 Subject: [PATCH 02/26] IB/cm: correct reported reject code Change reject code from TIMEOUT to CONSUMER_REJECT when destroying a cm_id in the process of connecting. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 02110e00d145..1fe21865d1f2 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -684,6 +684,13 @@ retest: cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); break; case IB_CM_REQ_SENT: + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, + &cm_id_priv->av.port->cm_dev->ca_guid, + sizeof cm_id_priv->av.port->cm_dev->ca_guid, + NULL, 0); + break; case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: @@ -694,10 +701,8 @@ retest: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, - &cm_id_priv->av.port->cm_dev->ca_guid, - sizeof cm_id_priv->av.port->cm_dev->ca_guid, - NULL, 0); + ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, + NULL, 0, NULL, 0); break; case IB_CM_ESTABLISHED: spin_unlock_irqrestore(&cm_id_priv->lock, flags); From de1bb1a64c29bae4f5330c70bd1dc6a62954c9f4 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 30 Nov 2005 10:01:13 -0800 Subject: [PATCH 03/26] IB/cm: avoid reusing local ID Use an increasing local ID to avoid re-using identifiers while messages may still be outstanding on the old ID. Without this, a quick connect-disconnect-connect sequence can fail by matching messages for the new connection with the old connection. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 1fe21865d1f2..3a611fe5497e 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -308,10 +308,11 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv) { unsigned long flags; int ret; + static int next_id; do { spin_lock_irqsave(&cm.lock, flags); - ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, 1, + ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id++, (__force int *) &cm_id_priv->id.local_id); spin_unlock_irqrestore(&cm.lock, flags); } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); From 0efc4883a6b3de12476cd7a35e638c0a9f5fd75f Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 9 Dec 2005 13:46:32 -0800 Subject: [PATCH 04/26] IB/umad: fix memory leaks Don't leak packet if it had a timeout, and don't leak timeout struct if queue_packet() fails. Signed-off-by: Jack Morgenstein Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/core/user_mad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index eb7f52537ccc..c908de8db5a9 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -197,8 +197,8 @@ static void send_handler(struct ib_mad_agent *agent, memcpy(timeout->mad.data, packet->mad.data, sizeof (struct ib_mad_hdr)); - if (!queue_packet(file, agent, timeout)) - return; + if (queue_packet(file, agent, timeout)) + kfree(timeout); } out: kfree(packet); From 52d0df153c987e4ad57d15f5df91848f65858e5d Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 9 Dec 2005 13:48:50 -0800 Subject: [PATCH 05/26] IB/mthca: fix memory user DB table leak Free the memory allocated in mthca_init_user_db_tab() when releasing the db_tab in mthca_cleanup_user_db_tab(). Signed-off-by: Jack Morgenstein Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_memfree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index d72fe95cba08..5798ed00d83d 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -485,6 +485,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, put_page(db_tab->page[i].mem.page); } } + + kfree(db_tab); } int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, From 94361cf74a6fca1973d2fed5338d5fb4bcd902fa Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 9 Dec 2005 16:32:21 -0800 Subject: [PATCH 06/26] IB/mthca: check RDMA limits Add limit checking on rd_atomic and dest_rd_atomic attributes: especially for max_dest_rd_atomic, a value that is larger than HCA capability can cause RDB overflow and corruption of another QP. Signed-off-by: Jack Morgenstein Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 7450550db736..c5c3d0edbbf5 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -591,6 +591,20 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) return -EINVAL; } + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && + attr->max_rd_atomic > dev->limits.max_qp_init_rdma) { + mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n", + attr->max_rd_atomic, dev->limits.max_qp_init_rdma); + return -EINVAL; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && + attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) { + mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n", + attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift); + return -EINVAL; + } + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); From 6aa2e4e8063114bd7cea8616dd5848d3c64b4c36 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 9 Dec 2005 16:38:04 -0800 Subject: [PATCH 07/26] IB/mthca: correct log2 calculation Fix thinko in rd_atomic calculation: ffs(x) - 1 does not find the next power of 2 -- it should be fls(x - 1). Signed-off-by: Jack Morgenstein Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index c5c3d0edbbf5..84056a8b794e 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -728,9 +728,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { - qp_context->params1 |= cpu_to_be32(min(attr->max_rd_atomic ? - ffs(attr->max_rd_atomic) - 1 : 0, - 7) << 21); + if (attr->max_rd_atomic) + qp_context->params1 |= + cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21); qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX); } @@ -769,8 +769,6 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { - u8 rra_max; - if (qp->resp_depth && !attr->max_dest_rd_atomic) { /* * Lowering our responder resources to zero. @@ -798,13 +796,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) MTHCA_QP_OPTPAR_RAE); } - for (rra_max = 0; - 1 << rra_max < attr->max_dest_rd_atomic && - rra_max < dev->qp_table.rdb_shift; - ++rra_max) - ; /* nothing */ + if (attr->max_dest_rd_atomic) + qp_context->params2 |= + cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); - qp_context->params2 |= cpu_to_be32(rra_max << 21); qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX); qp->resp_depth = attr->max_dest_rd_atomic; From 44b5b0303327cfb23f135b95b2fe5436c81ed27c Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 9 Dec 2005 16:40:14 -0800 Subject: [PATCH 08/26] IB/mthca: don't change driver's copy of attributes if modify QP fails Only change the driver's copy of the QP attributes in modify QP after checking the modify QP command completed successfully. Signed-off-by: Jack Morgenstein Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 84056a8b794e..3543299ecb15 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -764,8 +764,6 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE | MTHCA_QP_OPTPAR_RRE | MTHCA_QP_OPTPAR_RAE); - - qp->atomic_rd_en = attr->qp_access_flags; } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { @@ -801,8 +799,6 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX); - - qp->resp_depth = attr->max_dest_rd_atomic; } qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC); @@ -844,8 +840,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) err = -EINVAL; } - if (!err) + if (!err) { qp->state = new_state; + if (attr_mask & IB_QP_ACCESS_FLAGS) + qp->atomic_rd_en = attr->qp_access_flags; + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->resp_depth = attr->max_dest_rd_atomic; + } mthca_free_mailbox(dev, mailbox); From 6c7d2a75b512c64c910b69adf32dbaddb461910b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 15 Dec 2005 13:55:50 -0800 Subject: [PATCH 09/26] IB/mthca: Fix thinko in mthca_table_find() break only escapes from the innermost loop, and we want to escape both loops and return an answer. Noticed by Ishai Rabinovitch. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_memfree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 5798ed00d83d..9fb985a016e9 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -233,7 +233,7 @@ void *mthca_table_find(struct mthca_icm_table *table, int obj) for (i = 0; i < chunk->npages; ++i) { if (chunk->mem[i].length >= offset) { page = chunk->mem[i].page; - break; + goto out; } offset -= chunk->mem[i].length; } From 576d2e4e40315e8140c04be99cd057720d8a3817 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 15 Dec 2005 14:20:23 -0800 Subject: [PATCH 10/26] IB/mthca: Fix SRQ cleanup during QP destroy When cleaning up a CQ for a QP attached to SRQ, need to free an SRQ WQE only if the CQE is a receive completion. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_cq.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 4a8adcef2079..fcef8dc2c121 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -253,6 +253,15 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn, wake_up(&cq->wait); } +static inline int is_recv_cqe(struct mthca_cqe *cqe) +{ + if ((cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) == + MTHCA_ERROR_CQE_OPCODE_MASK) + return !(cqe->opcode & 0x01); + else + return !(cqe->is_send & 0x80); +} + void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, struct mthca_srq *srq) { @@ -296,7 +305,7 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, while ((int) --prod_index - (int) cq->cons_index >= 0) { cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); if (cqe->my_qpn == cpu_to_be32(qpn)) { - if (srq) + if (srq && is_recv_cqe(cqe)) mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe)); ++nfreed; } else if (nfreed) From d1646f86a2a05a956adbb163c81a81bd621f055e Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 15 Dec 2005 14:36:24 -0800 Subject: [PATCH 11/26] IB/mthca: Fix IB_QP_ACCESS_FLAGS handling. This patch corrects some corner cases in managing the RAE/RRE bits in the mthca qp context. These bits need to be zero if the user requests max_dest_rd_atomic of zero. The bits need to be restored to the value implied by the qp access flags attribute in a previous (or the current) modify-qp command if the dest_rd_atomic variable is changed to non-zero. In the current implementation, the following scenario will not work: RESET-to-INIT set QP access flags to all disabled (zeroes) INIT-to-RTR set max_dest_rd_atomic=10, AND set qp_access_flags = IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_ATOMIC The current code will incorrectly take the access-flags value set in the RESET-to-INIT transition. We can simplify, and correct, this IB_QP_ACCESS_FLAGS handling: it is always safe to set qp access flags in the firmware command if either of IB_QP_MAX_DEST_RD_ATOMIC or IB_QP_ACCESS_FLAGS is set, so let's just set it to the correct value, always. Signed-off-by: Jack Morgenstein Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 87 +++++++++++--------------- 1 file changed, 37 insertions(+), 50 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 3543299ecb15..e826c9ff5d70 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -522,6 +522,36 @@ static void init_port(struct mthca_dev *dev, int port) mthca_warn(dev, "INIT_IB returned status %02x.\n", status); } +static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr, + int attr_mask) +{ + u8 dest_rd_atomic; + u32 access_flags; + u32 hw_access_flags = 0; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + dest_rd_atomic = attr->max_dest_rd_atomic; + else + dest_rd_atomic = qp->resp_depth; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + access_flags = attr->qp_access_flags; + else + access_flags = qp->atomic_rd_en; + + if (!dest_rd_atomic) + access_flags &= IB_ACCESS_REMOTE_WRITE; + + if (access_flags & IB_ACCESS_REMOTE_READ) + hw_access_flags |= MTHCA_QP_BIT_RRE; + if (access_flags & IB_ACCESS_REMOTE_ATOMIC) + hw_access_flags |= MTHCA_QP_BIT_RAE; + if (access_flags & IB_ACCESS_REMOTE_WRITE) + hw_access_flags |= MTHCA_QP_BIT_RWE; + + return cpu_to_be32(hw_access_flags); +} + int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) { struct mthca_dev *dev = to_mdev(ibqp->device); @@ -743,57 +773,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_context->snd_db_index = cpu_to_be32(qp->sq.db_index); } - if (attr_mask & IB_QP_ACCESS_FLAGS) { - qp_context->params2 |= - cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE ? - MTHCA_QP_BIT_RWE : 0); - - /* - * Only enable RDMA reads and atomics if we have - * responder resources set to a non-zero value. - */ - if (qp->resp_depth) { - qp_context->params2 |= - cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_READ ? - MTHCA_QP_BIT_RRE : 0); - qp_context->params2 |= - cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC ? - MTHCA_QP_BIT_RAE : 0); - } - - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE | - MTHCA_QP_OPTPAR_RRE | - MTHCA_QP_OPTPAR_RAE); - } - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { - if (qp->resp_depth && !attr->max_dest_rd_atomic) { - /* - * Lowering our responder resources to zero. - * Turn off reads RDMA and atomics as responder. - * (RRE/RAE in params2 already zero) - */ - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRE | - MTHCA_QP_OPTPAR_RAE); - } - - if (!qp->resp_depth && attr->max_dest_rd_atomic) { - /* - * Increasing our responder resources from - * zero. Turn on RDMA reads and atomics as - * appropriate. - */ - qp_context->params2 |= - cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_READ ? - MTHCA_QP_BIT_RRE : 0); - qp_context->params2 |= - cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_ATOMIC ? - MTHCA_QP_BIT_RAE : 0); - - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRE | - MTHCA_QP_OPTPAR_RAE); - } - if (attr->max_dest_rd_atomic) qp_context->params2 |= cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); @@ -801,6 +781,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX); } + if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { + qp_context->params2 |= get_hw_access_flags(qp, attr, attr_mask); + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE | + MTHCA_QP_OPTPAR_RRE | + MTHCA_QP_OPTPAR_RAE); + } + qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC); if (ibqp->srq) From c4342d8a4d95e18b957b898dbf5bfce28fca2780 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 15 Dec 2005 19:59:01 -0800 Subject: [PATCH 12/26] IB/mthca: Fix corner cases in max_rd_atomic value handling in modify QP sae and sre bits should only be set when setting sra_max. Further, in the old code, if the caller specifies max_rd_atomic = 0, the sre and sae bits are still set, with the result that the QP ends up with max_rd_atomic = 1 in effect. Signed-off-by: Jack Morgenstein Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index e826c9ff5d70..d786ef443614 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -747,9 +747,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_context->wqe_lkey = cpu_to_be32(qp->mr.ibmr.lkey); qp_context->params1 = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) | (MTHCA_FLIGHT_LIMIT << 24) | - MTHCA_QP_BIT_SRE | - MTHCA_QP_BIT_SWE | - MTHCA_QP_BIT_SAE); + MTHCA_QP_BIT_SWE); if (qp->sq_policy == IB_SIGNAL_ALL_WR) qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC); if (attr_mask & IB_QP_RETRY_CNT) { @@ -758,9 +756,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { - if (attr->max_rd_atomic) + if (attr->max_rd_atomic) { + qp_context->params1 |= + cpu_to_be32(MTHCA_QP_BIT_SRE | + MTHCA_QP_BIT_SAE); qp_context->params1 |= cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21); + } qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX); } From 1d7d2f6f476cf7aa65f9f740a6c932fb75608110 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 4 Jan 2006 14:42:39 -0800 Subject: [PATCH 13/26] IB/mthca: fix WQE size calculation in create-srq Thinko: 64 bytes is the minimum SRQ WQE size (not the maximum). Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_srq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index f7d234295efe..e7e153d9c4c6 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -201,7 +201,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, if (mthca_is_memfree(dev)) srq->max = roundup_pow_of_two(srq->max + 1); - ds = min(64UL, + ds = max(64UL, roundup_pow_of_two(sizeof (struct mthca_next_seg) + srq->max_gs * sizeof (struct mthca_data_seg))); srq->wqe_shift = long_log2(ds); From aa2f9367790ad81ef51d3f667124227ca3003d3b Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 5 Jan 2006 16:12:01 -0800 Subject: [PATCH 14/26] IB/mthca: check return value in mthca_dev_lim call Check error return on call to mthca_dev_lim for Tavor (as is done for memfree). Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 6f94b25f3acd..8b00d9a0f6f4 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -261,6 +261,10 @@ static int __devinit mthca_init_tavor(struct mthca_dev *mdev) } err = mthca_dev_lim(mdev, &dev_lim); + if (err) { + mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n"); + goto err_disable; + } profile = default_profile; profile.num_uar = dev_lim.uar_size / PAGE_SIZE; From 38d1e793471d95728219f500bbb8bd25658d73b0 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 5 Jan 2006 16:13:46 -0800 Subject: [PATCH 15/26] IB/mthca: check port validity in modify_qp Modify_qp should check that the physical port number provided is a legal value. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index d786ef443614..ea45fa400fab 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -621,6 +621,12 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) return -EINVAL; } + if ((attr_mask & IB_QP_PORT) && + (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) { + mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num); + return -EINVAL; + } + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > dev->limits.max_qp_init_rdma) { mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n", From 466200562ccd80f728f7ef602d2b97b4fdedd566 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 5 Jan 2006 16:17:38 -0800 Subject: [PATCH 16/26] IB/mthca: create_eq with size not a power of 2 Fix mthca_create_eq for when the EQ size is not a power of 2. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_eq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 34d68e5a72d8..e8a948f087c0 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -484,8 +484,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, u8 intr, struct mthca_eq *eq) { - int npages = (nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) / - PAGE_SIZE; + int npages; u64 *dma_list = NULL; dma_addr_t t; struct mthca_mailbox *mailbox; @@ -496,6 +495,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, eq->dev = dev; eq->nent = roundup_pow_of_two(max(nent, 2)); + npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE; eq->page_list = kmalloc(npages * sizeof *eq->page_list, GFP_KERNEL); From 5b3bc7a68171138d52b1b62012c37ac888895460 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 6 Jan 2006 12:57:30 -0800 Subject: [PATCH 17/26] IB/mthca: max_inline_data handling tweaks Fix a case where copying max_inline_data from a successful create_qp capabilities output to create_qp input could cause EINVAL error: mthca_set_qp_size must check max_inline_data directly against max_desc_sz; checking qp->sq.max_gs is wrong since max_inline_data depends on the qp type and does not involve max_sg. Signed-off-by: Jack Morgenstein Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 62 +++++++++++++++----------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index ea45fa400fab..fd60cf3a5ba3 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -890,18 +890,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) return err; } -static void mthca_adjust_qp_caps(struct mthca_dev *dev, - struct mthca_pd *pd, - struct mthca_qp *qp) +static int mthca_max_data_size(struct mthca_dev *dev, struct mthca_qp *qp, int desc_sz) { - int max_data_size; - /* * Calculate the maximum size of WQE s/g segments, excluding * the next segment and other non-data segments. */ - max_data_size = min(dev->limits.max_desc_sz, 1 << qp->sq.wqe_shift) - - sizeof (struct mthca_next_seg); + int max_data_size = desc_sz - sizeof (struct mthca_next_seg); switch (qp->transport) { case MLX: @@ -920,11 +915,24 @@ static void mthca_adjust_qp_caps(struct mthca_dev *dev, break; } + return max_data_size; +} + +static inline int mthca_max_inline_data(struct mthca_pd *pd, int max_data_size) +{ /* We don't support inline data for kernel QPs (yet). */ - if (!pd->ibpd.uobject) - qp->max_inline_data = 0; - else - qp->max_inline_data = max_data_size - MTHCA_INLINE_HEADER_SIZE; + return pd->ibpd.uobject ? max_data_size - MTHCA_INLINE_HEADER_SIZE : 0; +} + +static void mthca_adjust_qp_caps(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_qp *qp) +{ + int max_data_size = mthca_max_data_size(dev, qp, + min(dev->limits.max_desc_sz, + 1 << qp->sq.wqe_shift)); + + qp->max_inline_data = mthca_max_inline_data(pd, max_data_size); qp->sq.max_gs = min_t(int, dev->limits.max_sg, max_data_size / sizeof (struct mthca_data_seg)); @@ -1191,13 +1199,23 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, } static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, - struct mthca_qp *qp) + struct mthca_pd *pd, struct mthca_qp *qp) { + int max_data_size = mthca_max_data_size(dev, qp, dev->limits.max_desc_sz); + /* Sanity check QP size before proceeding */ - if (cap->max_send_wr > dev->limits.max_wqes || - cap->max_recv_wr > dev->limits.max_wqes || - cap->max_send_sge > dev->limits.max_sg || - cap->max_recv_sge > dev->limits.max_sg) + if (cap->max_send_wr > dev->limits.max_wqes || + cap->max_recv_wr > dev->limits.max_wqes || + cap->max_send_sge > dev->limits.max_sg || + cap->max_recv_sge > dev->limits.max_sg || + cap->max_inline_data > mthca_max_inline_data(pd, max_data_size)) + return -EINVAL; + + /* + * For MLX transport we need 2 extra S/G entries: + * one for the header and one for the checksum at the end + */ + if (qp->transport == MLX && cap->max_recv_sge + 2 > dev->limits.max_sg) return -EINVAL; if (mthca_is_memfree(dev)) { @@ -1216,14 +1234,6 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, MTHCA_INLINE_CHUNK_SIZE) / sizeof (struct mthca_data_seg)); - /* - * For MLX transport we need 2 extra S/G entries: - * one for the header and one for the checksum at the end - */ - if ((qp->transport == MLX && qp->sq.max_gs + 2 > dev->limits.max_sg) || - qp->sq.max_gs > dev->limits.max_sg || qp->rq.max_gs > dev->limits.max_sg) - return -EINVAL; - return 0; } @@ -1238,7 +1248,7 @@ int mthca_alloc_qp(struct mthca_dev *dev, { int err; - err = mthca_set_qp_size(dev, cap, qp); + err = mthca_set_qp_size(dev, cap, pd, qp); if (err) return err; @@ -1281,7 +1291,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev, u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; int err; - err = mthca_set_qp_size(dev, cap, &sqp->qp); + err = mthca_set_qp_size(dev, cap, pd, &sqp->qp); if (err) return err; From 0364ffc3e8c441d4185e3eb41ecc61dbb09614e4 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 6 Jan 2006 13:01:27 -0800 Subject: [PATCH 18/26] IB/mthca: fix for SQEr-to-RTS transition in modify QP Fixes to SQEr->RTS transition in modify_qp: 1. The flag IB_QP_ACCESS_FLAGS is optional for UC qps 2. The SQEr state is not supported for RC qps Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index fd60cf3a5ba3..623f5144eae2 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -476,9 +476,8 @@ static const struct { .opt_param = { [UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), - [UC] = IB_QP_CUR_STATE, - [RC] = (IB_QP_CUR_STATE | - IB_QP_MIN_RNR_TIMER), + [UC] = (IB_QP_CUR_STATE | + IB_QP_ACCESS_FLAGS), [MLX] = (IB_QP_CUR_STATE | IB_QP_QKEY), } From 0d3b525fff40475e58dab9176740d2efc5f37838 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 6 Jan 2006 13:03:43 -0800 Subject: [PATCH 19/26] IB/mthca: fix for RTR-to-RTS transition in modify QP PKEY_INDEX is not a legal parameter in the RTR->RTS transition. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 623f5144eae2..ff2def3e9dd1 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -383,12 +383,10 @@ static const struct { [UC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | - IB_QP_PKEY_INDEX | IB_QP_PATH_MIG_STATE), [RC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | - IB_QP_PKEY_INDEX | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), [MLX] = (IB_QP_CUR_STATE | From 5ceb74557c71465cf8f6fda050aac00e53f9ad3d Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 6 Jan 2006 13:11:07 -0800 Subject: [PATCH 20/26] IB/mthca: multiple fixes for multicast group handling Multicast group management fixes: . Fix leak of mailbox memory in error handling on multicast group operations. . Free AMGM indices at detach and in attach error handling. . Fix amount to shift for aligning next_gid_index in mailbox: it starts at bit 6, not bit 5. . Allocate AMGM index after end of MGM table, in the range num_mgms to multicast table size - 1. Add some BUG_ON checks to catch cases where the index falls in the MGM hash area. . Initialize the list of QPs in a newly-allocated group from AMGM to 0 This is necessary since when a group is moved from AMGM to MGM (in the case where the MGM entry has been emptied of QPs), the AMGM entry is not reset to 0 (and we don't want an extra command to do that). Signed-off-by: Jack Morgenstein Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_mcg.c | 54 ++++++++++++++++--------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index 2fc449da418d..77bc6c746f43 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -111,7 +111,8 @@ static int find_mgm(struct mthca_dev *dev, goto out; if (status) { mthca_err(dev, "READ_MGM returned status %02x\n", status); - return -EINVAL; + err = -EINVAL; + goto out; } if (!memcmp(mgm->gid, zero_gid, 16)) { @@ -126,7 +127,7 @@ static int find_mgm(struct mthca_dev *dev, goto out; *prev = *index; - *index = be32_to_cpu(mgm->next_gid_index) >> 5; + *index = be32_to_cpu(mgm->next_gid_index) >> 6; } while (*index); *index = -1; @@ -153,8 +154,10 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) return PTR_ERR(mailbox); mgm = mailbox->buf; - if (down_interruptible(&dev->mcg_table.sem)) - return -EINTR; + if (down_interruptible(&dev->mcg_table.sem)) { + err = -EINTR; + goto err_sem; + } err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index); if (err) @@ -181,9 +184,8 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) err = -EINVAL; goto out; } - + memset(mgm, 0, sizeof *mgm); memcpy(mgm->gid, gid->raw, 16); - mgm->next_gid_index = 0; } for (i = 0; i < MTHCA_QP_PER_MGM; ++i) @@ -209,6 +211,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (status) { mthca_err(dev, "WRITE_MGM returned status %02x\n", status); err = -EINVAL; + goto out; } if (!link) @@ -223,7 +226,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) goto out; } - mgm->next_gid_index = cpu_to_be32(index << 5); + mgm->next_gid_index = cpu_to_be32(index << 6); err = mthca_WRITE_MGM(dev, prev, mailbox, &status); if (err) @@ -234,7 +237,12 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } out: + if (err && link && index != -1) { + BUG_ON(index < dev->limits.num_mgms); + mthca_free(&dev->mcg_table.alloc, index); + } up(&dev->mcg_table.sem); + err_sem: mthca_free_mailbox(dev, mailbox); return err; } @@ -255,8 +263,10 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) return PTR_ERR(mailbox); mgm = mailbox->buf; - if (down_interruptible(&dev->mcg_table.sem)) - return -EINTR; + if (down_interruptible(&dev->mcg_table.sem)) { + err = -EINTR; + goto err_sem; + } err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index); if (err) @@ -305,13 +315,11 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (i != 1) goto out; - goto out; - if (prev == -1) { /* Remove entry from MGM */ - if (be32_to_cpu(mgm->next_gid_index) >> 5) { - err = mthca_READ_MGM(dev, - be32_to_cpu(mgm->next_gid_index) >> 5, + int amgm_index_to_free = be32_to_cpu(mgm->next_gid_index) >> 6; + if (amgm_index_to_free) { + err = mthca_READ_MGM(dev, amgm_index_to_free, mailbox, &status); if (err) goto out; @@ -332,9 +340,13 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) err = -EINVAL; goto out; } + if (amgm_index_to_free) { + BUG_ON(amgm_index_to_free < dev->limits.num_mgms); + mthca_free(&dev->mcg_table.alloc, amgm_index_to_free); + } } else { /* Remove entry from AMGM */ - index = be32_to_cpu(mgm->next_gid_index) >> 5; + int curr_next_index = be32_to_cpu(mgm->next_gid_index) >> 6; err = mthca_READ_MGM(dev, prev, mailbox, &status); if (err) goto out; @@ -344,7 +356,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) goto out; } - mgm->next_gid_index = cpu_to_be32(index << 5); + mgm->next_gid_index = cpu_to_be32(curr_next_index << 6); err = mthca_WRITE_MGM(dev, prev, mailbox, &status); if (err) @@ -354,10 +366,13 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) err = -EINVAL; goto out; } + BUG_ON(index < dev->limits.num_mgms); + mthca_free(&dev->mcg_table.alloc, index); } out: up(&dev->mcg_table.sem); + err_sem: mthca_free_mailbox(dev, mailbox); return err; } @@ -365,11 +380,12 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) int __devinit mthca_init_mcg_table(struct mthca_dev *dev) { int err; + int table_size = dev->limits.num_mgms + dev->limits.num_amgms; err = mthca_alloc_init(&dev->mcg_table.alloc, - dev->limits.num_amgms, - dev->limits.num_amgms - 1, - 0); + table_size, + table_size - 1, + dev->limits.num_mgms); if (err) return err; From 0f8e8f9607d77ffc1f9820446dfcf781e96fdfd4 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 6 Jan 2006 13:13:32 -0800 Subject: [PATCH 21/26] IB/mthca: Fill in vendor_err field in completion with error Fill vendor_err field in completion with error. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_cq.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index fcef8dc2c121..96f1a86bf049 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -128,12 +128,12 @@ struct mthca_err_cqe { __be32 my_qpn; u32 reserved1[3]; u8 syndrome; - u8 reserved2; + u8 vendor_err; __be16 db_cnt; - u32 reserved3; + u32 reserved2; __be32 wqe; u8 opcode; - u8 reserved4[2]; + u8 reserved3[2]; u8 owner; }; @@ -342,8 +342,8 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, } /* - * For completions in error, only work request ID, status (and - * freed resource count for RD) have to be set. + * For completions in error, only work request ID, status, vendor error + * (and freed resource count for RD) have to be set. */ switch (cqe->syndrome) { case SYNDROME_LOCAL_LENGTH_ERR: @@ -405,6 +405,8 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, break; } + entry->vendor_err = cqe->vendor_err; + /* * Mem-free HCAs always generate one CQE per WQE, even in the * error case, so we don't have to check the doorbell count, etc. From 4de144bf721e46e7ccc8fed45b20a640cc364904 Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Fri, 6 Jan 2006 13:23:58 -0800 Subject: [PATCH 22/26] IB/mthca: Add support for automatic path migration (APM) Add code to modify QP operation to handle setting alternate paths for connected QPs. Signed-off-by: Dotan Barak Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 57 +++++++++++++++++--------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index ff2def3e9dd1..564b6d51c394 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -549,6 +549,25 @@ static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr, return cpu_to_be32(hw_access_flags); } +static void mthca_path_set(struct ib_ah_attr *ah, struct mthca_qp_path *path) +{ + path->g_mylmc = ah->src_path_bits & 0x7f; + path->rlid = cpu_to_be16(ah->dlid); + path->static_rate = !!ah->static_rate; + + if (ah->ah_flags & IB_AH_GRH) { + path->g_mylmc |= 1 << 7; + path->mgid_index = ah->grh.sgid_index; + path->hop_limit = ah->grh.hop_limit; + path->sl_tclass_flowlabel = + cpu_to_be32((ah->sl << 28) | + (ah->grh.traffic_class << 20) | + (ah->grh.flow_label)); + memcpy(path->rgid, ah->grh.dgid.raw, 16); + } else + path->sl_tclass_flowlabel = cpu_to_be32(ah->sl << 28); +} + int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) { struct mthca_dev *dev = to_mdev(ibqp->device); @@ -712,28 +731,14 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) } if (attr_mask & IB_QP_RNR_RETRY) { - qp_context->pri_path.rnr_retry = attr->rnr_retry << 5; - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY); + qp_context->alt_path.rnr_retry = qp_context->pri_path.rnr_retry = + attr->rnr_retry << 5; + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY | + MTHCA_QP_OPTPAR_ALT_RNR_RETRY); } if (attr_mask & IB_QP_AV) { - qp_context->pri_path.g_mylmc = attr->ah_attr.src_path_bits & 0x7f; - qp_context->pri_path.rlid = cpu_to_be16(attr->ah_attr.dlid); - qp_context->pri_path.static_rate = !!attr->ah_attr.static_rate; - if (attr->ah_attr.ah_flags & IB_AH_GRH) { - qp_context->pri_path.g_mylmc |= 1 << 7; - qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index; - qp_context->pri_path.hop_limit = attr->ah_attr.grh.hop_limit; - qp_context->pri_path.sl_tclass_flowlabel = - cpu_to_be32((attr->ah_attr.sl << 28) | - (attr->ah_attr.grh.traffic_class << 20) | - (attr->ah_attr.grh.flow_label)); - memcpy(qp_context->pri_path.rgid, - attr->ah_attr.grh.dgid.raw, 16); - } else { - qp_context->pri_path.sl_tclass_flowlabel = - cpu_to_be32(attr->ah_attr.sl << 28); - } + mthca_path_set(&attr->ah_attr, &qp_context->pri_path); qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); } @@ -742,7 +747,19 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT); } - /* XXX alt_path */ + if (attr_mask & IB_QP_ALT_PATH) { + if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) { + mthca_dbg(dev, "Alternate port number (%u) is invalid\n", + attr->alt_port_num); + return -EINVAL; + } + + mthca_path_set(&attr->alt_ah_attr, &qp_context->alt_path); + qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index | + attr->alt_port_num << 24); + qp_context->alt_path.ackto = attr->alt_timeout << 3; + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ALT_ADDR_PATH); + } /* leave rdd as 0 */ qp_context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pd_num); From b4ca1a3f8ca24033d7b7ef595faef97d9f8b2326 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 6 Jan 2006 16:21:19 -0800 Subject: [PATCH 23/26] IB/uverbs: Fix reference counting on error paths If an operation fails after incrementing an object's reference count, then it should decrement the reference count on the error path. Signed-off-by: Jack Morgenstein Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a57d021d435a..6985a57fa6ae 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -489,6 +489,7 @@ err_idr: err_unreg: ib_dereg_mr(mr); + atomic_dec(&pd->usecnt); err_up: up(&ib_uverbs_idr_mutex); @@ -935,6 +936,11 @@ err_idr: err_destroy: ib_destroy_qp(qp); + atomic_dec(&pd->usecnt); + atomic_dec(&attr.send_cq->usecnt); + atomic_dec(&attr.recv_cq->usecnt); + if (attr.srq) + atomic_dec(&attr.srq->usecnt); err_up: up(&ib_uverbs_idr_mutex); @@ -1729,6 +1735,7 @@ err_idr: err_destroy: ib_destroy_srq(srq); + atomic_dec(&pd->usecnt); err_up: up(&ib_uverbs_idr_mutex); From ea5d4a6ad2bfd1006790666981645cab43d3afbd Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 6 Jan 2006 16:24:45 -0800 Subject: [PATCH 24/26] IB/uverbs: set ah_flags when creating address handle AH attribute's ah_flags need to be set according to the is_global flag passed in from userspace. Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 6985a57fa6ae..12d6cc0a7f80 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1454,6 +1454,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, attr.sl = cmd.attr.sl; attr.src_path_bits = cmd.attr.src_path_bits; attr.static_rate = cmd.attr.static_rate; + attr.ah_flags = cmd.attr.is_global ? IB_AH_GRH : 0; attr.port_num = cmd.attr.port_num; attr.grh.flow_label = cmd.attr.grh.flow_label; attr.grh.sgid_index = cmd.attr.grh.sgid_index; From ac4e7b35579de55db50d602a472858867808a9c3 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 6 Jan 2006 16:43:14 -0800 Subject: [PATCH 25/26] IB/uverbs: Release event file reference on ib_uverbs_create_cq() error ib_uverbs_create_cq() should release the completion channel event file if an error occurs after it looks it up. Also, if userspace asks for a completion channel and we don't find it, an error should be returned instead of silently creating a CQ without a completion channel. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 12d6cc0a7f80..a02c5a05c984 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -594,13 +594,18 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, if (cmd.comp_vector >= file->device->num_comp_vectors) return -EINVAL; - if (cmd.comp_channel >= 0) - ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; + if (cmd.comp_channel >= 0) { + ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); + if (!ev_file) { + ret = -EINVAL; + goto err; + } + } + uobj->uobject.user_handle = cmd.user_handle; uobj->uobject.context = file->ucontext; uobj->uverbs_file = file; @@ -664,6 +669,8 @@ err_up: ib_destroy_cq(cq); err: + if (ev_file) + ib_uverbs_release_ucq(file, ev_file, uobj); kfree(uobj); return ret; } From 4f8448dfe8d3804fadad90c9b77494238b4a4eae Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 6 Jan 2006 16:43:47 -0800 Subject: [PATCH 26/26] IB: Set GIDs correctly in ib_create_ah_from_wc() ib_create_ah_from_wc() doesn't create the correct return address (AH) when there is a GRH present (source & dest GIDs need to be swapped). Signed-off-by: Ralph Campbell Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 4c15e112736c..c857361be449 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -107,9 +107,9 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, if (wc->wc_flags & IB_WC_GRH) { ah_attr.ah_flags = IB_AH_GRH; - ah_attr.grh.dgid = grh->dgid; + ah_attr.grh.dgid = grh->sgid; - ret = ib_find_cached_gid(pd->device, &grh->sgid, &port_num, + ret = ib_find_cached_gid(pd->device, &grh->dgid, &port_num, &gid_index); if (ret) return ERR_PTR(ret);