From 8a095030f7551d07860fd54890d1bcdc77630c29 Mon Sep 17 00:00:00 2001
From: Doug Ledford <dledford@redhat.com>
Date: Wed, 17 Oct 2012 03:23:55 +0000
Subject: [PATCH 1/5] IB/mlx4: Fix build error on platforms where UL is not 64
 bits

Line 110 uses UL as a compiler cast for the 0x constant, but it's not
large enough to hold a 64-bit value on a 32-bit arch.

Signed-off-by: Doug Ledford <dledford@redhat.com>

[ Use "-1" instead of "FFFFFFFFFFFFFFFFULL".  - Roland ]

Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx4/alias_GUID.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index d2fb38d43571..2f215b93db6b 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -107,7 +107,7 @@ static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index
 {
 	if (index >= NUM_ALIAS_GUID_PER_PORT) {
 		pr_err("%s: ERROR: asked for index:%d\n", __func__, index);
-		return  (__force __be64) ((u64) 0xFFFFFFFFFFFFFFFFUL);
+		return (__force __be64) -1;
 	}
 	return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index];
 }

From 2c75d2ccb6e5ffb96ce8624ef4c1f7ba5bd96499 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Wed, 17 Oct 2012 16:42:58 +0000
Subject: [PATCH 2/5] IB/mlx4: Fix QP1 P_Key processing in the Primary Physical
 Function (PPF)

In the MAD paravirtualization code, one of the checks performed when
forwarding QP1 (GSI) packets from wire to slave was a P_Key check: the
P_Key received in the MAD must be present in the guest's paravirtualized
P_Key table, and at least one of the (packet P_Key, guest P_Key) must
be a full-membership P_Key.

However, if everyone involved has only limited membership in the
default P_Key, then packets sent by full-member remote hosts arrive at
the PPF but are not passed on to the VFs with the current P_Key1 check.

Fix this as follows:

1. Don't care if P_Key received over wire is full or not. If it
   successfully passed HW checks on the real QP1, then simply pass it
   to guest regardless of whether the guest has full or limited
   membership in its P_Key table.

2. If the guest (including paravirtualized master) has both full and
   limited P_Key forms in its table, preferentially pass the
   paravirtualized P_Key index of the full P_Key form in the tunnel
   header.

3. In the multicast join flow (mlx4/mcg.c), use the index for the
   default P_Key (wherever it is located) in replies generated from
   within the mcg module (previously, P_Key index 0 was used in all
   cases).

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx4/mad.c | 85 +++++++++++++++-----------------
 drivers/infiniband/hw/mlx4/mcg.c |  3 +-
 2 files changed, 42 insertions(+), 46 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 21a794152d15..0a903c129f0a 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -409,38 +409,45 @@ int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
 }
 
 
-static int get_pkey_phys_indices(struct mlx4_ib_dev *ibdev, u8 port, u8 ph_pkey_ix,
-				 u8 *full_pk_ix, u8 *partial_pk_ix,
-				 int *is_full_member)
+static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave,
+				   u8 port, u16 pkey, u16 *ix)
 {
-	u16 search_pkey;
-	int fm;
-	int err = 0;
-	u16 pk;
+	int i, ret;
+	u8 unassigned_pkey_ix, pkey_ix, partial_ix = 0xFF;
+	u16 slot_pkey;
 
-	err = ib_get_cached_pkey(&ibdev->ib_dev, port, ph_pkey_ix, &search_pkey);
-	if (err)
-		return err;
+	if (slave == mlx4_master_func_num(dev->dev))
+		return ib_find_cached_pkey(&dev->ib_dev, port, pkey, ix);
 
-	fm = (search_pkey & 0x8000) ? 1 : 0;
-	if (fm) {
-		*full_pk_ix = ph_pkey_ix;
-		search_pkey &= 0x7FFF;
-	} else {
-		*partial_pk_ix = ph_pkey_ix;
-		search_pkey |= 0x8000;
+	unassigned_pkey_ix = dev->dev->phys_caps.pkey_phys_table_len[port] - 1;
+
+	for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) {
+		if (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == unassigned_pkey_ix)
+			continue;
+
+		pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][i];
+
+		ret = ib_get_cached_pkey(&dev->ib_dev, port, pkey_ix, &slot_pkey);
+		if (ret)
+			continue;
+		if ((slot_pkey & 0x7FFF) == (pkey & 0x7FFF)) {
+			if (slot_pkey & 0x8000) {
+				*ix = (u16) pkey_ix;
+				return 0;
+			} else {
+				/* take first partial pkey index found */
+				if (partial_ix == 0xFF)
+					partial_ix = pkey_ix;
+			}
+		}
 	}
 
-	if (ib_find_exact_cached_pkey(&ibdev->ib_dev, port, search_pkey, &pk))
-		pk = 0xFFFF;
+	if (partial_ix < 0xFF) {
+		*ix = (u16) partial_ix;
+		return 0;
+	}
 
-	if (fm)
-		*partial_pk_ix = (pk & 0xFF);
-	else
-		*full_pk_ix = (pk & 0xFF);
-
-	*is_full_member = fm;
-	return err;
+	return -EINVAL;
 }
 
 int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
@@ -458,10 +465,8 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 	unsigned tun_tx_ix = 0;
 	int dqpn;
 	int ret = 0;
-	int i;
-	int is_full_member = 0;
 	u16 tun_pkey_ix;
-	u8 ph_pkey_ix, full_pk_ix = 0, partial_pk_ix = 0;
+	u16 cached_pkey;
 
 	if (dest_qpt > IB_QPT_GSI)
 		return -EINVAL;
@@ -481,27 +486,17 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 	else
 		tun_qp = &tun_ctx->qp[1];
 
-	/* compute pkey index for slave */
-	/* get physical pkey -- virtualized Dom0 pkey to phys*/
+	/* compute P_Key index to put in tunnel header for slave */
 	if (dest_qpt) {
-		ph_pkey_ix =
-			dev->pkeys.virt2phys_pkey[mlx4_master_func_num(dev->dev)][port - 1][wc->pkey_index];
-
-		/* now, translate this to the slave pkey index */
-		ret = get_pkey_phys_indices(dev, port, ph_pkey_ix, &full_pk_ix,
-					    &partial_pk_ix, &is_full_member);
+		u16 pkey_ix;
+		ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey);
 		if (ret)
 			return -EINVAL;
 
-		for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) {
-			if ((dev->pkeys.virt2phys_pkey[slave][port - 1][i] == full_pk_ix) ||
-			    (is_full_member &&
-			     (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == partial_pk_ix)))
-				break;
-		}
-		if (i == dev->dev->caps.pkey_table_len[port])
+		ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix);
+		if (ret)
 			return -EINVAL;
-		tun_pkey_ix = i;
+		tun_pkey_ix = pkey_ix;
 	} else
 		tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
 
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 3c3b54c3fdd9..44ff480f79f5 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -233,7 +233,8 @@ static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
 
 	ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
 
-	wc.pkey_index = 0;
+	if (ib_find_cached_pkey(&dev->ib_dev, ctx->port, IB_DEFAULT_PKEY_FULL, &wc.pkey_index))
+		return -EINVAL;
 	wc.sl = 0;
 	wc.dlid_path_bits = 0;
 	wc.port_num = ctx->port;

From bef83ed92caca45a394344a14b0234aea977da6e Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Wed, 17 Oct 2012 16:42:59 +0000
Subject: [PATCH 3/5] IB/mlx4: Synchronize cleanup of MCGs in MCG
 paravirtualization

A client re-register event invokes cleanup of all MCGs.  This is
required to protect against misbehaved guests leading to corruption of
join/leave database.  However, since cleaning up the MCGs is a heavy
operation, it is pushed to a work queue for further processing.
Client re-register is also propagated to ULPs (e.g IPoIB).

However, since the cleanup is performed in a workqueue, the ULP could
leave and re-join groups before the cleanup occurs.  In this case,
when the cleanup takes place, it prunes the (newly-joined) MCGs and
the ULP is left without actual MCGs while believing it joined them.

Fix this by setting the flushing flag before invoking the cleanup task
and clearing it after flushing is complete.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx4/mcg.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 44ff480f79f5..25b2cdff00f8 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -1075,10 +1075,6 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy
 	unsigned long end;
 	int count;
 
-	if (ctx->flushing)
-		return;
-
-	ctx->flushing = 1;
 	for (i = 0; i < MAX_VFS; ++i)
 		clean_vf_mcast(ctx, i);
 
@@ -1108,9 +1104,6 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy
 		force_clean_group(group);
 	}
 	mutex_unlock(&ctx->mcg_table_lock);
-
-	if (!destroy_wq)
-		ctx->flushing = 0;
 }
 
 struct clean_work {
@@ -1124,6 +1117,7 @@ static void mcg_clean_task(struct work_struct *work)
 	struct clean_work *cw = container_of(work, struct clean_work, work);
 
 	_mlx4_ib_mcg_port_cleanup(cw->ctx, cw->destroy_wq);
+	cw->ctx->flushing = 0;
 	kfree(cw);
 }
 
@@ -1131,13 +1125,20 @@ void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
 {
 	struct clean_work *work;
 
+	if (ctx->flushing)
+		return;
+
+	ctx->flushing = 1;
+
 	if (destroy_wq) {
 		_mlx4_ib_mcg_port_cleanup(ctx, destroy_wq);
+		ctx->flushing = 0;
 		return;
 	}
 
 	work = kmalloc(sizeof *work, GFP_KERNEL);
 	if (!work) {
+		ctx->flushing = 0;
 		mcg_warn("failed allocating work for cleanup\n");
 		return;
 	}

From 3cf164c8dee834cef9dac4e7325f89a207339ac8 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Sun, 21 Oct 2012 14:59:22 +0000
Subject: [PATCH 4/5] mlx4_core: Remove annoying debug messages from SR-IOV
 flow

These debug prints left behind by commits c82e9aa0a8bc ("mlx4_core:
resource tracking for HCA resources used by guests"), 54679e148287
("mlx4: Implement QP paravirtualization and maintain phys_pkey_cache
for smp_snoop") and 993c401e2079 ("mlx4_core: Add IB port-state
machine and port mgmt event propagation") make it pretty hard to
actually use the mlx4_core debug messages when running in SRIOV/IB
mode -- for example, the module load sequence of a device with one VF
yielded 631 debug prints, with 408 of them being from this set.  Let's
just remove them.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/net/ethernet/mellanox/mlx4/eq.c               | 6 ------
 drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 6 ------
 2 files changed, 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 51c764901ad2..194221b75b90 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -329,9 +329,6 @@ int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave,
 	ctx = &priv->mfunc.master.slave_state[slave];
 	spin_lock_irqsave(&ctx->lock, flags);
 
-	mlx4_dbg(dev, "%s: slave: %d, current state: %d new event :%d\n",
-		 __func__, slave, cur_state, event);
-
 	switch (cur_state) {
 	case SLAVE_PORT_DOWN:
 		if (MLX4_PORT_STATE_DEV_EVENT_PORT_UP == event)
@@ -366,9 +363,6 @@ int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave,
 			goto out;
 	}
 	ret = mlx4_get_slave_port_state(dev, slave, port);
-	mlx4_dbg(dev, "%s: slave: %d, current state: %d new event"
-		 " :%d gen_event: %d\n",
-		 __func__, slave, cur_state, event, *gen_event);
 
 out:
 	spin_unlock_irqrestore(&ctx->lock, flags);
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 926c911c0ac4..b05705f50f0f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -330,9 +330,6 @@ static void update_pkey_index(struct mlx4_dev *dev, int slave,
 
 	new_index = priv->virt2phys_pkey[slave][port - 1][orig_index];
 	*(u8 *)(inbox->buf + 35) = new_index;
-
-	mlx4_dbg(dev, "port = %d, orig pkey index = %d, "
-		 "new pkey index = %d\n", port, orig_index, new_index);
 }
 
 static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox,
@@ -351,9 +348,6 @@ static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox,
 		if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH)
 			qp_ctx->alt_path.mgid_index = slave & 0x7F;
 	}
-
-	mlx4_dbg(dev, "slave %d, new gid index: 0x%x ",
-		slave, qp_ctx->pri_path.mgid_index);
 }
 
 static int mpt_mask(struct mlx4_dev *dev)

From 41929ed2656bab8af1734bf5d0088385e72e294f Mon Sep 17 00:00:00 2001
From: Dotan Barak <dotanb@dev.mellanox.co.il>
Date: Sun, 21 Oct 2012 14:59:23 +0000
Subject: [PATCH 5/5] mlx4_core: Perform correct resource cleanup if
 mlx4_QUERY_ADAPTER() fails

Fixed the resource cleanup to act correctly and prevent a kernel oops when
mlx4_QUERY_ADAPTER() fails.

Signed-off-by: Dotan Barak <dotanb@dev.mellanox.co.il>
Reviewed-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/net/ethernet/mellanox/mlx4/main.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 80df2ab0177c..2aa80afd98d2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1405,7 +1405,10 @@ unmap_bf:
 	unmap_bf_area(dev);
 
 err_close:
-	mlx4_close_hca(dev);
+	if (mlx4_is_slave(dev))
+		mlx4_slave_exit(dev);
+	else
+		mlx4_CLOSE_HCA(dev, 0);
 
 err_free_icm:
 	if (!mlx4_is_slave(dev))