From 8c2a4c8efb0c22cb68fcecf84f3ac7af25ebf542 Mon Sep 17 00:00:00 2001
From: Tobias Regnery <tobias.regnery@gmail.com>
Date: Tue, 15 Nov 2016 12:43:08 +0100
Subject: [PATCH 1/9] alx: refactor descriptor allocation

Split the allocation of descriptor memory and the buffer allocation into a
tx and rx function. This is in preparation for multiple queues where we
need to iterate over the new functions.

While at it drop the unneeded casting on the rx side.

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/alx/main.c | 97 ++++++++++++++-----------
 1 file changed, 53 insertions(+), 44 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index eccbacd96201..8935766829b1 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -573,19 +573,41 @@ static int alx_set_mac_address(struct net_device *netdev, void *data)
 	return 0;
 }
 
-static int alx_alloc_descriptors(struct alx_priv *alx)
+static int alx_alloc_tx_ring(struct alx_priv *alx, struct alx_tx_queue *txq,
+			     int offset)
 {
-	alx->txq.bufs = kcalloc(alx->tx_ringsz,
-				sizeof(struct alx_buffer),
-				GFP_KERNEL);
-	if (!alx->txq.bufs)
+	txq->bufs = kcalloc(alx->tx_ringsz, sizeof(struct alx_buffer), GFP_KERNEL);
+	if (!txq->bufs)
 		return -ENOMEM;
 
-	alx->rxq.bufs = kcalloc(alx->rx_ringsz,
-				sizeof(struct alx_buffer),
-				GFP_KERNEL);
-	if (!alx->rxq.bufs)
-		goto out_free;
+	txq->tpd = alx->descmem.virt + offset;
+	txq->tpd_dma = alx->descmem.dma + offset;
+	offset += sizeof(struct alx_txd) * alx->tx_ringsz;
+
+	return offset;
+}
+
+static int alx_alloc_rx_ring(struct alx_priv *alx, struct alx_rx_queue *rxq,
+			     int offset)
+{
+	rxq->bufs = kcalloc(alx->rx_ringsz, sizeof(struct alx_buffer), GFP_KERNEL);
+	if (!rxq->bufs)
+		return -ENOMEM;
+
+	rxq->rrd = alx->descmem.virt + offset;
+	rxq->rrd_dma = alx->descmem.dma + offset;
+	offset += sizeof(struct alx_rrd) * alx->rx_ringsz;
+
+	rxq->rfd = alx->descmem.virt + offset;
+	rxq->rfd_dma = alx->descmem.dma + offset;
+	offset += sizeof(struct alx_rfd) * alx->rx_ringsz;
+
+	return offset;
+}
+
+static int alx_alloc_rings(struct alx_priv *alx)
+{
+	int offset = 0;
 
 	/* physical tx/rx ring descriptors
 	 *
@@ -601,45 +623,23 @@ static int alx_alloc_descriptors(struct alx_priv *alx)
 						&alx->descmem.dma,
 						GFP_KERNEL);
 	if (!alx->descmem.virt)
-		goto out_free;
+		return -ENOMEM;
 
-	alx->txq.tpd = alx->descmem.virt;
-	alx->txq.tpd_dma = alx->descmem.dma;
-
-	/* alignment requirement for next block */
+	/* alignment requirements */
 	BUILD_BUG_ON(sizeof(struct alx_txd) % 8);
-
-	alx->rxq.rrd =
-		(void *)((u8 *)alx->descmem.virt +
-			 sizeof(struct alx_txd) * alx->tx_ringsz);
-	alx->rxq.rrd_dma = alx->descmem.dma +
-			   sizeof(struct alx_txd) * alx->tx_ringsz;
-
-	/* alignment requirement for next block */
 	BUILD_BUG_ON(sizeof(struct alx_rrd) % 8);
 
-	alx->rxq.rfd =
-		(void *)((u8 *)alx->descmem.virt +
-			 sizeof(struct alx_txd) * alx->tx_ringsz +
-			 sizeof(struct alx_rrd) * alx->rx_ringsz);
-	alx->rxq.rfd_dma = alx->descmem.dma +
-			   sizeof(struct alx_txd) * alx->tx_ringsz +
-			   sizeof(struct alx_rrd) * alx->rx_ringsz;
+	offset = alx_alloc_tx_ring(alx, &alx->txq, offset);
+	if (offset < 0) {
+		netdev_err(alx->dev, "Allocation of tx buffer failed!\n");
+		goto out_free;
+	}
 
-	return 0;
-out_free:
-	kfree(alx->txq.bufs);
-	kfree(alx->rxq.bufs);
-	return -ENOMEM;
-}
-
-static int alx_alloc_rings(struct alx_priv *alx)
-{
-	int err;
-
-	err = alx_alloc_descriptors(alx);
-	if (err)
-		return err;
+	offset = alx_alloc_rx_ring(alx, &alx->rxq, offset);
+	if (offset < 0) {
+		netdev_err(alx->dev, "Allocation of rx buffer failed!\n");
+		goto out_free;
+	}
 
 	alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
 	alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
@@ -647,7 +647,16 @@ static int alx_alloc_rings(struct alx_priv *alx)
 	netif_napi_add(alx->dev, &alx->napi, alx_poll, 64);
 
 	alx_reinit_rings(alx);
+
 	return 0;
+out_free:
+	kfree(alx->txq.bufs);
+	kfree(alx->rxq.bufs);
+	dma_free_coherent(&alx->hw.pdev->dev,
+			  alx->descmem.size,
+			  alx->descmem.virt,
+			  alx->descmem.dma);
+	return -ENOMEM;
 }
 
 static void alx_free_rings(struct alx_priv *alx)

From bccffcf7154420c1cbd9e3c2702e2ec4ff0bc319 Mon Sep 17 00:00:00 2001
From: Tobias Regnery <tobias.regnery@gmail.com>
Date: Tue, 15 Nov 2016 12:43:09 +0100
Subject: [PATCH 2/9] alx: extend data structures for multi queue support

Extend the driver data structures to be able to handle multiple queues.

Based on the downstream driver at github.com/qca/alx

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/alx/alx.h | 31 ++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h
index 6cac919272ea..0859053525de 100644
--- a/drivers/net/ethernet/atheros/alx/alx.h
+++ b/drivers/net/ethernet/atheros/alx/alx.h
@@ -50,6 +50,10 @@ struct alx_buffer {
 };
 
 struct alx_rx_queue {
+	struct net_device *netdev;
+	struct device *dev;
+	struct alx_napi *np;
+
 	struct alx_rrd *rrd;
 	dma_addr_t rrd_dma;
 
@@ -58,16 +62,26 @@ struct alx_rx_queue {
 
 	struct alx_buffer *bufs;
 
+	u16 count;
 	u16 write_idx, read_idx;
 	u16 rrd_read_idx;
+	u16 queue_idx;
 };
 #define ALX_RX_ALLOC_THRESH	32
 
 struct alx_tx_queue {
+	struct net_device *netdev;
+	struct device *dev;
+
 	struct alx_txd *tpd;
 	dma_addr_t tpd_dma;
+
 	struct alx_buffer *bufs;
+
+	u16 count;
 	u16 write_idx, read_idx;
+	u16 queue_idx;
+	u16 p_reg, c_reg;
 };
 
 #define ALX_DEFAULT_TX_WORK 128
@@ -76,6 +90,18 @@ enum alx_device_quirks {
 	ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG = BIT(0),
 };
 
+struct alx_napi {
+	struct napi_struct	napi;
+	struct alx_priv		*alx;
+	struct alx_rx_queue	*rxq;
+	struct alx_tx_queue	*txq;
+	int			vec_idx;
+	u32			vec_mask;
+	char			irq_lbl[IFNAMSIZ + 8];
+};
+
+#define ALX_MAX_NAPIS 8
+
 #define ALX_FLAG_USING_MSIX	BIT(0)
 #define ALX_FLAG_USING_MSI	BIT(1)
 
@@ -96,6 +122,11 @@ struct alx_priv {
 		unsigned int size;
 	} descmem;
 
+	struct alx_napi *qnapi[ALX_MAX_NAPIS];
+	int num_txq;
+	int num_rxq;
+	int num_napi;
+
 	/* protect int_mask updates */
 	spinlock_t irq_lock;
 	u32 int_mask;

From b0999223f224187318c67f1de653e34fb1f04f6b Mon Sep 17 00:00:00 2001
From: Tobias Regnery <tobias.regnery@gmail.com>
Date: Tue, 15 Nov 2016 12:43:10 +0100
Subject: [PATCH 3/9] alx: add ability to allocate and free alx_napi structures

Add new functions to allocate and free the alx_napi structures and use them
in __alx_open and __alx_stop. We only allocate one of these structures for
now, as the rest of the driver is not yet ready for multiple queues.

We switch over the setup of the interrupt mask and the call to netif_napi_add
to the new function because we must adjust these later on a per queue basis.

Based on the downstream driver at github.com/qca/alx

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/alx/main.c | 99 +++++++++++++++++++------
 1 file changed, 78 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 8935766829b1..193da6799979 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -632,45 +632,96 @@ static int alx_alloc_rings(struct alx_priv *alx)
 	offset = alx_alloc_tx_ring(alx, &alx->txq, offset);
 	if (offset < 0) {
 		netdev_err(alx->dev, "Allocation of tx buffer failed!\n");
-		goto out_free;
+		return -ENOMEM;
 	}
 
 	offset = alx_alloc_rx_ring(alx, &alx->rxq, offset);
 	if (offset < 0) {
 		netdev_err(alx->dev, "Allocation of rx buffer failed!\n");
-		goto out_free;
+		return -ENOMEM;
 	}
 
-	alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
-	alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
-
-	netif_napi_add(alx->dev, &alx->napi, alx_poll, 64);
-
 	alx_reinit_rings(alx);
 
 	return 0;
-out_free:
-	kfree(alx->txq.bufs);
-	kfree(alx->rxq.bufs);
-	dma_free_coherent(&alx->hw.pdev->dev,
-			  alx->descmem.size,
-			  alx->descmem.virt,
-			  alx->descmem.dma);
-	return -ENOMEM;
 }
 
 static void alx_free_rings(struct alx_priv *alx)
 {
-	netif_napi_del(&alx->napi);
 	alx_free_buffers(alx);
 
 	kfree(alx->txq.bufs);
 	kfree(alx->rxq.bufs);
 
-	dma_free_coherent(&alx->hw.pdev->dev,
-			  alx->descmem.size,
-			  alx->descmem.virt,
-			  alx->descmem.dma);
+	if (!alx->descmem.virt)
+		dma_free_coherent(&alx->hw.pdev->dev,
+				  alx->descmem.size,
+				  alx->descmem.virt,
+				  alx->descmem.dma);
+}
+
+static void alx_free_napis(struct alx_priv *alx)
+{
+	struct alx_napi *np;
+
+	np = alx->qnapi[0];
+	if (!np)
+		return;
+
+	netif_napi_del(&alx->napi);
+	kfree(np->txq);
+	kfree(np->rxq);
+	kfree(np);
+	alx->qnapi[0] = NULL;
+}
+
+static int alx_alloc_napis(struct alx_priv *alx)
+{
+	struct alx_napi *np;
+	struct alx_rx_queue *rxq;
+	struct alx_tx_queue *txq;
+
+	alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
+	alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
+
+	/* allocate alx_napi structures */
+	np = kzalloc(sizeof(struct alx_napi), GFP_KERNEL);
+	if (!np)
+		goto err_out;
+
+	np->alx = alx;
+	netif_napi_add(alx->dev, &alx->napi, alx_poll, 64);
+	alx->qnapi[0] = np;
+
+	/* allocate tx queues */
+	np = alx->qnapi[0];
+	txq = kzalloc(sizeof(*txq), GFP_KERNEL);
+	if (!txq)
+		goto err_out;
+
+	np->txq = txq;
+	txq->count = alx->tx_ringsz;
+	txq->netdev = alx->dev;
+	txq->dev = &alx->hw.pdev->dev;
+
+	/* allocate rx queues */
+	np = alx->qnapi[0];
+	rxq = kzalloc(sizeof(*rxq), GFP_KERNEL);
+	if (!rxq)
+		goto err_out;
+
+	np->rxq = rxq;
+	rxq->np = alx->qnapi[0];
+	rxq->count = alx->rx_ringsz;
+	rxq->netdev = alx->dev;
+	rxq->dev = &alx->hw.pdev->dev;
+
+	return 0;
+
+err_out:
+	netdev_err(alx->dev, "error allocating internal structures\n");
+	alx_free_napis(alx);
+	return -ENOMEM;
 }
 
 static void alx_config_vector_mapping(struct alx_priv *alx)
@@ -1031,10 +1082,14 @@ static int __alx_open(struct alx_priv *alx, bool resume)
 	if (!resume)
 		netif_carrier_off(alx->dev);
 
-	err = alx_alloc_rings(alx);
+	err = alx_alloc_napis(alx);
 	if (err)
 		goto out_disable_adv_intr;
 
+	err = alx_alloc_rings(alx);
+	if (err)
+		goto out_free_rings;
+
 	alx_configure(alx);
 
 	err = alx_request_irq(alx);
@@ -1054,6 +1109,7 @@ static int __alx_open(struct alx_priv *alx, bool resume)
 
 out_free_rings:
 	alx_free_rings(alx);
+	alx_free_napis(alx);
 out_disable_adv_intr:
 	alx_disable_advanced_intr(alx);
 	return err;
@@ -1064,6 +1120,7 @@ static void __alx_stop(struct alx_priv *alx)
 	alx_halt(alx);
 	alx_free_irq(alx);
 	alx_free_rings(alx);
+	alx_free_napis(alx);
 }
 
 static const char *alx_speed_desc(struct alx_hw *hw)

From 702e84185f472457912c641d8c0cc0cc786310eb Mon Sep 17 00:00:00 2001
From: Tobias Regnery <tobias.regnery@gmail.com>
Date: Tue, 15 Nov 2016 12:43:11 +0100
Subject: [PATCH 4/9] alx: switch to per queue data structures

Remove the tx and rx queue structures from the alx_priv structure and switch
everything over to the queue pointers in the alx_napi structure.

Based on the downstream driver at github.com/qca/alx

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/alx/alx.h  |   5 -
 drivers/net/ethernet/atheros/alx/main.c | 183 ++++++++++++------------
 2 files changed, 93 insertions(+), 95 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h
index 0859053525de..d4a409139ea2 100644
--- a/drivers/net/ethernet/atheros/alx/alx.h
+++ b/drivers/net/ethernet/atheros/alx/alx.h
@@ -113,7 +113,6 @@ struct alx_priv {
 	/* msi-x vectors */
 	int num_vec;
 	struct msix_entry *msix_entries;
-	char irq_lbl[IFNAMSIZ + 8];
 
 	/* all descriptor memory */
 	struct {
@@ -135,10 +134,6 @@ struct alx_priv {
 	unsigned int rx_ringsz;
 	unsigned int rxbuf_size;
 
-	struct napi_struct napi;
-	struct alx_tx_queue txq;
-	struct alx_rx_queue rxq;
-
 	struct work_struct link_check_wk;
 	struct work_struct reset_wk;
 
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 193da6799979..a9fc44611b8a 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -55,12 +55,12 @@ static bool msix = false;
 module_param(msix, bool, 0);
 MODULE_PARM_DESC(msix, "Enable msi-x interrupt support");
 
-static void alx_free_txbuf(struct alx_priv *alx, int entry)
+static void alx_free_txbuf(struct alx_tx_queue *txq, int entry)
 {
-	struct alx_buffer *txb = &alx->txq.bufs[entry];
+	struct alx_buffer *txb = &txq->bufs[entry];
 
 	if (dma_unmap_len(txb, size)) {
-		dma_unmap_single(&alx->hw.pdev->dev,
+		dma_unmap_single(txq->dev,
 				 dma_unmap_addr(txb, dma),
 				 dma_unmap_len(txb, size),
 				 DMA_TO_DEVICE);
@@ -75,7 +75,7 @@ static void alx_free_txbuf(struct alx_priv *alx, int entry)
 
 static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 {
-	struct alx_rx_queue *rxq = &alx->rxq;
+	struct alx_rx_queue *rxq = alx->qnapi[0]->rxq;
 	struct sk_buff *skb;
 	struct alx_buffer *cur_buf;
 	dma_addr_t dma;
@@ -143,22 +143,22 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 	return count;
 }
 
-static inline int alx_tpd_avail(struct alx_priv *alx)
+static inline int alx_tpd_avail(struct alx_tx_queue *txq)
 {
-	struct alx_tx_queue *txq = &alx->txq;
-
 	if (txq->write_idx >= txq->read_idx)
-		return alx->tx_ringsz + txq->read_idx - txq->write_idx - 1;
+		return txq->count + txq->read_idx - txq->write_idx - 1;
 	return txq->read_idx - txq->write_idx - 1;
 }
 
-static bool alx_clean_tx_irq(struct alx_priv *alx)
+static bool alx_clean_tx_irq(struct alx_tx_queue *txq)
 {
-	struct alx_tx_queue *txq = &alx->txq;
+	struct alx_priv *alx;
 	u16 hw_read_idx, sw_read_idx;
 	unsigned int total_bytes = 0, total_packets = 0;
 	int budget = ALX_DEFAULT_TX_WORK;
 
+	alx = netdev_priv(txq->netdev);
+
 	sw_read_idx = txq->read_idx;
 	hw_read_idx = alx_read_mem16(&alx->hw, ALX_TPD_PRI0_CIDX);
 
@@ -173,19 +173,19 @@ static bool alx_clean_tx_irq(struct alx_priv *alx)
 				budget--;
 			}
 
-			alx_free_txbuf(alx, sw_read_idx);
+			alx_free_txbuf(txq, sw_read_idx);
 
-			if (++sw_read_idx == alx->tx_ringsz)
+			if (++sw_read_idx == txq->count)
 				sw_read_idx = 0;
 		}
 		txq->read_idx = sw_read_idx;
 
-		netdev_completed_queue(alx->dev, total_packets, total_bytes);
+		netdev_completed_queue(txq->netdev, total_packets, total_bytes);
 	}
 
-	if (netif_queue_stopped(alx->dev) && netif_carrier_ok(alx->dev) &&
-	    alx_tpd_avail(alx) > alx->tx_ringsz/4)
-		netif_wake_queue(alx->dev);
+	if (netif_queue_stopped(txq->netdev) && netif_carrier_ok(txq->netdev) &&
+	    alx_tpd_avail(txq) > txq->count / 4)
+		netif_wake_queue(txq->netdev);
 
 	return sw_read_idx == hw_read_idx;
 }
@@ -200,15 +200,17 @@ static void alx_schedule_reset(struct alx_priv *alx)
 	schedule_work(&alx->reset_wk);
 }
 
-static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
+static int alx_clean_rx_irq(struct alx_rx_queue *rxq, int budget)
 {
-	struct alx_rx_queue *rxq = &alx->rxq;
+	struct alx_priv *alx;
 	struct alx_rrd *rrd;
 	struct alx_buffer *rxb;
 	struct sk_buff *skb;
 	u16 length, rfd_cleaned = 0;
 	int work = 0;
 
+	alx = netdev_priv(rxq->netdev);
+
 	while (work < budget) {
 		rrd = &rxq->rrd[rxq->rrd_read_idx];
 		if (!(rrd->word3 & cpu_to_le32(1 << RRD_UPDATED_SHIFT)))
@@ -224,7 +226,7 @@ static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
 		}
 
 		rxb = &rxq->bufs[rxq->read_idx];
-		dma_unmap_single(&alx->hw.pdev->dev,
+		dma_unmap_single(rxq->dev,
 				 dma_unmap_addr(rxb, dma),
 				 dma_unmap_len(rxb, size),
 				 DMA_FROM_DEVICE);
@@ -242,7 +244,7 @@ static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
 		length = ALX_GET_FIELD(le32_to_cpu(rrd->word3),
 				       RRD_PKTLEN) - ETH_FCS_LEN;
 		skb_put(skb, length);
-		skb->protocol = eth_type_trans(skb, alx->dev);
+		skb->protocol = eth_type_trans(skb, rxq->netdev);
 
 		skb_checksum_none_assert(skb);
 		if (alx->dev->features & NETIF_F_RXCSUM &&
@@ -259,13 +261,13 @@ static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
 			}
 		}
 
-		napi_gro_receive(&alx->napi, skb);
+		napi_gro_receive(&rxq->np->napi, skb);
 		work++;
 
 next_pkt:
-		if (++rxq->read_idx == alx->rx_ringsz)
+		if (++rxq->read_idx == rxq->count)
 			rxq->read_idx = 0;
-		if (++rxq->rrd_read_idx == alx->rx_ringsz)
+		if (++rxq->rrd_read_idx == rxq->count)
 			rxq->rrd_read_idx = 0;
 
 		if (++rfd_cleaned > ALX_RX_ALLOC_THRESH)
@@ -280,19 +282,20 @@ next_pkt:
 
 static int alx_poll(struct napi_struct *napi, int budget)
 {
-	struct alx_priv *alx = container_of(napi, struct alx_priv, napi);
+	struct alx_napi *np = container_of(napi, struct alx_napi, napi);
+	struct alx_priv *alx = np->alx;
 	struct alx_hw *hw = &alx->hw;
 	unsigned long flags;
 	bool tx_complete;
 	int work;
 
-	tx_complete = alx_clean_tx_irq(alx);
-	work = alx_clean_rx_irq(alx, budget);
+	tx_complete = alx_clean_tx_irq(np->txq);
+	work = alx_clean_rx_irq(np->rxq, budget);
 
 	if (!tx_complete || work == budget)
 		return budget;
 
-	napi_complete(&alx->napi);
+	napi_complete(&np->napi);
 
 	/* enable interrupt */
 	if (alx->flags & ALX_FLAG_USING_MSIX) {
@@ -350,7 +353,7 @@ static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr)
 		goto out;
 
 	if (intr & (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0)) {
-		napi_schedule(&alx->napi);
+		napi_schedule(&alx->qnapi[0]->napi);
 		/* mask rx/tx interrupt, enable them when napi complete */
 		alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
 		alx_write_mem32(hw, ALX_IMR, alx->int_mask);
@@ -365,15 +368,15 @@ static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr)
 
 static irqreturn_t alx_intr_msix_ring(int irq, void *data)
 {
-	struct alx_priv *alx = data;
-	struct alx_hw *hw = &alx->hw;
+	struct alx_napi *np = data;
+	struct alx_hw *hw = &np->alx->hw;
 
 	/* mask interrupt to ACK chip */
 	alx_mask_msix(hw, 1, true);
 	/* clear interrupt status */
 	alx_write_mem32(hw, ALX_ISR, (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0));
 
-	napi_schedule(&alx->napi);
+	napi_schedule(&np->napi);
 
 	return IRQ_HANDLED;
 }
@@ -428,59 +431,58 @@ static void alx_init_ring_ptrs(struct alx_priv *alx)
 {
 	struct alx_hw *hw = &alx->hw;
 	u32 addr_hi = ((u64)alx->descmem.dma) >> 32;
+	struct alx_napi *np = alx->qnapi[0];
 
-	alx->rxq.read_idx = 0;
-	alx->rxq.write_idx = 0;
-	alx->rxq.rrd_read_idx = 0;
+	np->rxq->read_idx = 0;
+	np->rxq->write_idx = 0;
+	np->rxq->rrd_read_idx = 0;
 	alx_write_mem32(hw, ALX_RX_BASE_ADDR_HI, addr_hi);
-	alx_write_mem32(hw, ALX_RRD_ADDR_LO, alx->rxq.rrd_dma);
+	alx_write_mem32(hw, ALX_RRD_ADDR_LO, np->rxq->rrd_dma);
 	alx_write_mem32(hw, ALX_RRD_RING_SZ, alx->rx_ringsz);
-	alx_write_mem32(hw, ALX_RFD_ADDR_LO, alx->rxq.rfd_dma);
+	alx_write_mem32(hw, ALX_RFD_ADDR_LO, np->rxq->rfd_dma);
 	alx_write_mem32(hw, ALX_RFD_RING_SZ, alx->rx_ringsz);
 	alx_write_mem32(hw, ALX_RFD_BUF_SZ, alx->rxbuf_size);
 
-	alx->txq.read_idx = 0;
-	alx->txq.write_idx = 0;
+	np->txq->read_idx = 0;
+	np->txq->write_idx = 0;
 	alx_write_mem32(hw, ALX_TX_BASE_ADDR_HI, addr_hi);
-	alx_write_mem32(hw, ALX_TPD_PRI0_ADDR_LO, alx->txq.tpd_dma);
+	alx_write_mem32(hw, ALX_TPD_PRI0_ADDR_LO, np->txq->tpd_dma);
 	alx_write_mem32(hw, ALX_TPD_RING_SZ, alx->tx_ringsz);
 
 	/* load these pointers into the chip */
 	alx_write_mem32(hw, ALX_SRAM9, ALX_SRAM_LOAD_PTR);
 }
 
-static void alx_free_txring_buf(struct alx_priv *alx)
+static void alx_free_txring_buf(struct alx_tx_queue *txq)
 {
-	struct alx_tx_queue *txq = &alx->txq;
 	int i;
 
 	if (!txq->bufs)
 		return;
 
-	for (i = 0; i < alx->tx_ringsz; i++)
-		alx_free_txbuf(alx, i);
+	for (i = 0; i < txq->count; i++)
+		alx_free_txbuf(txq, i);
 
-	memset(txq->bufs, 0, alx->tx_ringsz * sizeof(struct alx_buffer));
-	memset(txq->tpd, 0, alx->tx_ringsz * sizeof(struct alx_txd));
+	memset(txq->bufs, 0, txq->count * sizeof(struct alx_buffer));
+	memset(txq->tpd, 0, txq->count * sizeof(struct alx_txd));
 	txq->write_idx = 0;
 	txq->read_idx = 0;
 
-	netdev_reset_queue(alx->dev);
+	netdev_reset_queue(txq->netdev);
 }
 
-static void alx_free_rxring_buf(struct alx_priv *alx)
+static void alx_free_rxring_buf(struct alx_rx_queue *rxq)
 {
-	struct alx_rx_queue *rxq = &alx->rxq;
 	struct alx_buffer *cur_buf;
 	u16 i;
 
 	if (rxq == NULL)
 		return;
 
-	for (i = 0; i < alx->rx_ringsz; i++) {
+	for (i = 0; i < rxq->count; i++) {
 		cur_buf = rxq->bufs + i;
 		if (cur_buf->skb) {
-			dma_unmap_single(&alx->hw.pdev->dev,
+			dma_unmap_single(rxq->dev,
 					 dma_unmap_addr(cur_buf, dma),
 					 dma_unmap_len(cur_buf, size),
 					 DMA_FROM_DEVICE);
@@ -498,8 +500,8 @@ static void alx_free_rxring_buf(struct alx_priv *alx)
 
 static void alx_free_buffers(struct alx_priv *alx)
 {
-	alx_free_txring_buf(alx);
-	alx_free_rxring_buf(alx);
+	alx_free_txring_buf(alx->qnapi[0]->txq);
+	alx_free_rxring_buf(alx->qnapi[0]->rxq);
 }
 
 static int alx_reinit_rings(struct alx_priv *alx)
@@ -576,13 +578,13 @@ static int alx_set_mac_address(struct net_device *netdev, void *data)
 static int alx_alloc_tx_ring(struct alx_priv *alx, struct alx_tx_queue *txq,
 			     int offset)
 {
-	txq->bufs = kcalloc(alx->tx_ringsz, sizeof(struct alx_buffer), GFP_KERNEL);
+	txq->bufs = kcalloc(txq->count, sizeof(struct alx_buffer), GFP_KERNEL);
 	if (!txq->bufs)
 		return -ENOMEM;
 
 	txq->tpd = alx->descmem.virt + offset;
 	txq->tpd_dma = alx->descmem.dma + offset;
-	offset += sizeof(struct alx_txd) * alx->tx_ringsz;
+	offset += sizeof(struct alx_txd) * txq->count;
 
 	return offset;
 }
@@ -590,17 +592,17 @@ static int alx_alloc_tx_ring(struct alx_priv *alx, struct alx_tx_queue *txq,
 static int alx_alloc_rx_ring(struct alx_priv *alx, struct alx_rx_queue *rxq,
 			     int offset)
 {
-	rxq->bufs = kcalloc(alx->rx_ringsz, sizeof(struct alx_buffer), GFP_KERNEL);
+	rxq->bufs = kcalloc(rxq->count, sizeof(struct alx_buffer), GFP_KERNEL);
 	if (!rxq->bufs)
 		return -ENOMEM;
 
 	rxq->rrd = alx->descmem.virt + offset;
 	rxq->rrd_dma = alx->descmem.dma + offset;
-	offset += sizeof(struct alx_rrd) * alx->rx_ringsz;
+	offset += sizeof(struct alx_rrd) * rxq->count;
 
 	rxq->rfd = alx->descmem.virt + offset;
 	rxq->rfd_dma = alx->descmem.dma + offset;
-	offset += sizeof(struct alx_rfd) * alx->rx_ringsz;
+	offset += sizeof(struct alx_rfd) * rxq->count;
 
 	return offset;
 }
@@ -629,13 +631,13 @@ static int alx_alloc_rings(struct alx_priv *alx)
 	BUILD_BUG_ON(sizeof(struct alx_txd) % 8);
 	BUILD_BUG_ON(sizeof(struct alx_rrd) % 8);
 
-	offset = alx_alloc_tx_ring(alx, &alx->txq, offset);
+	offset = alx_alloc_tx_ring(alx, alx->qnapi[0]->txq, offset);
 	if (offset < 0) {
 		netdev_err(alx->dev, "Allocation of tx buffer failed!\n");
 		return -ENOMEM;
 	}
 
-	offset = alx_alloc_rx_ring(alx, &alx->rxq, offset);
+	offset = alx_alloc_rx_ring(alx, alx->qnapi[0]->rxq, offset);
 	if (offset < 0) {
 		netdev_err(alx->dev, "Allocation of rx buffer failed!\n");
 		return -ENOMEM;
@@ -648,10 +650,11 @@ static int alx_alloc_rings(struct alx_priv *alx)
 
 static void alx_free_rings(struct alx_priv *alx)
 {
+
 	alx_free_buffers(alx);
 
-	kfree(alx->txq.bufs);
-	kfree(alx->rxq.bufs);
+	kfree(alx->qnapi[0]->txq->bufs);
+	kfree(alx->qnapi[0]->rxq->bufs);
 
 	if (!alx->descmem.virt)
 		dma_free_coherent(&alx->hw.pdev->dev,
@@ -668,7 +671,7 @@ static void alx_free_napis(struct alx_priv *alx)
 	if (!np)
 		return;
 
-	netif_napi_del(&alx->napi);
+	netif_napi_del(&np->napi);
 	kfree(np->txq);
 	kfree(np->rxq);
 	kfree(np);
@@ -690,7 +693,7 @@ static int alx_alloc_napis(struct alx_priv *alx)
 		goto err_out;
 
 	np->alx = alx;
-	netif_napi_add(alx->dev, &alx->napi, alx_poll, 64);
+	netif_napi_add(alx->dev, &np->napi, alx_poll, 64);
 	alx->qnapi[0] = np;
 
 	/* allocate tx queues */
@@ -768,6 +771,7 @@ static int alx_request_msix(struct alx_priv *alx)
 {
 	struct net_device *netdev = alx->dev;
 	int i, err, vector = 0, free_vector = 0;
+	struct alx_napi *np = alx->qnapi[0];
 
 	err = request_irq(alx->msix_entries[0].vector, alx_intr_msix_misc,
 			  0, netdev->name, alx);
@@ -775,10 +779,10 @@ static int alx_request_msix(struct alx_priv *alx)
 		goto out_err;
 
 	vector++;
-	sprintf(alx->irq_lbl, "%s-TxRx-0", netdev->name);
+	sprintf(np->irq_lbl, "%s-TxRx-0", netdev->name);
 
 	err = request_irq(alx->msix_entries[vector].vector,
-			  alx_intr_msix_ring, 0, alx->irq_lbl, alx);
+			  alx_intr_msix_ring, 0, np->irq_lbl, np);
 		if (err)
 			goto out_free;
 
@@ -789,7 +793,7 @@ out_free:
 
 	vector--;
 	for (i = 0; i < vector; i++)
-		free_irq(alx->msix_entries[free_vector++].vector, alx);
+		free_irq(alx->msix_entries[free_vector++].vector, alx->qnapi[0]);
 
 out_err:
 	return err;
@@ -905,12 +909,12 @@ out:
 static void alx_free_irq(struct alx_priv *alx)
 {
 	struct pci_dev *pdev = alx->hw.pdev;
-	int i;
+	int i, vector = 0;
 
 	if (alx->flags & ALX_FLAG_USING_MSIX) {
 		/* we have only 2 vectors without multi queue support */
-		for (i = 0; i < 2; i++)
-			free_irq(alx->msix_entries[i].vector, alx);
+		free_irq(alx->msix_entries[vector++].vector, alx);
+		free_irq(alx->msix_entries[vector++].vector, alx->qnapi[0]);
 	} else {
 		free_irq(pdev->irq, alx);
 	}
@@ -999,7 +1003,7 @@ static void alx_netif_stop(struct alx_priv *alx)
 	if (netif_carrier_ok(alx->dev)) {
 		netif_carrier_off(alx->dev);
 		netif_tx_disable(alx->dev);
-		napi_disable(&alx->napi);
+		napi_disable(&alx->qnapi[0]->napi);
 	}
 }
 
@@ -1069,7 +1073,7 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
 static void alx_netif_start(struct alx_priv *alx)
 {
 	netif_tx_wake_all_queues(alx->dev);
-	napi_enable(&alx->napi);
+	napi_enable(&alx->qnapi[0]->napi);
 	netif_carrier_on(alx->dev);
 }
 
@@ -1303,9 +1307,8 @@ static int alx_tso(struct sk_buff *skb, struct alx_txd *first)
 	return 1;
 }
 
-static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
+static int alx_map_tx_skb(struct alx_tx_queue *txq, struct sk_buff *skb)
 {
-	struct alx_tx_queue *txq = &alx->txq;
 	struct alx_txd *tpd, *first_tpd;
 	dma_addr_t dma;
 	int maplen, f, first_idx = txq->write_idx;
@@ -1314,7 +1317,7 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 	tpd = first_tpd;
 
 	if (tpd->word1 & (1 << TPD_LSO_V2_SHIFT)) {
-		if (++txq->write_idx == alx->tx_ringsz)
+		if (++txq->write_idx == txq->count)
 			txq->write_idx = 0;
 
 		tpd = &txq->tpd[txq->write_idx];
@@ -1324,9 +1327,9 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 	}
 
 	maplen = skb_headlen(skb);
-	dma = dma_map_single(&alx->hw.pdev->dev, skb->data, maplen,
+	dma = dma_map_single(txq->dev, skb->data, maplen,
 			     DMA_TO_DEVICE);
-	if (dma_mapping_error(&alx->hw.pdev->dev, dma))
+	if (dma_mapping_error(txq->dev, dma))
 		goto err_dma;
 
 	dma_unmap_len_set(&txq->bufs[txq->write_idx], size, maplen);
@@ -1340,16 +1343,16 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 
 		frag = &skb_shinfo(skb)->frags[f];
 
-		if (++txq->write_idx == alx->tx_ringsz)
+		if (++txq->write_idx == txq->count)
 			txq->write_idx = 0;
 		tpd = &txq->tpd[txq->write_idx];
 
 		tpd->word1 = first_tpd->word1;
 
 		maplen = skb_frag_size(frag);
-		dma = skb_frag_dma_map(&alx->hw.pdev->dev, frag, 0,
+		dma = skb_frag_dma_map(txq->dev, frag, 0,
 				       maplen, DMA_TO_DEVICE);
-		if (dma_mapping_error(&alx->hw.pdev->dev, dma))
+		if (dma_mapping_error(txq->dev, dma))
 			goto err_dma;
 		dma_unmap_len_set(&txq->bufs[txq->write_idx], size, maplen);
 		dma_unmap_addr_set(&txq->bufs[txq->write_idx], dma, dma);
@@ -1362,7 +1365,7 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 	tpd->word1 |= cpu_to_le32(1 << TPD_EOP_SHIFT);
 	txq->bufs[txq->write_idx].skb = skb;
 
-	if (++txq->write_idx == alx->tx_ringsz)
+	if (++txq->write_idx == txq->count)
 		txq->write_idx = 0;
 
 	return 0;
@@ -1370,8 +1373,8 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 err_dma:
 	f = first_idx;
 	while (f != txq->write_idx) {
-		alx_free_txbuf(alx, f);
-		if (++f == alx->tx_ringsz)
+		alx_free_txbuf(txq, f);
+		if (++f == txq->count)
 			f = 0;
 	}
 	return -ENOMEM;
@@ -1381,12 +1384,12 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
 				  struct net_device *netdev)
 {
 	struct alx_priv *alx = netdev_priv(netdev);
-	struct alx_tx_queue *txq = &alx->txq;
+	struct alx_tx_queue *txq = alx->qnapi[0]->txq;
 	struct alx_txd *first;
 	int tso;
 
-	if (alx_tpd_avail(alx) < alx_tpd_req(skb)) {
-		netif_stop_queue(alx->dev);
+	if (alx_tpd_avail(txq) < alx_tpd_req(skb)) {
+		netif_stop_queue(txq->netdev);
 		goto drop;
 	}
 
@@ -1399,17 +1402,17 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
 	else if (!tso && alx_tx_csum(skb, first))
 		goto drop;
 
-	if (alx_map_tx_skb(alx, skb) < 0)
+	if (alx_map_tx_skb(txq, skb) < 0)
 		goto drop;
 
-	netdev_sent_queue(alx->dev, skb->len);
+	netdev_sent_queue(txq->netdev, skb->len);
 
 	/* flush updates before updating hardware */
 	wmb();
 	alx_write_mem16(&alx->hw, ALX_TPD_PRI0_PIDX, txq->write_idx);
 
-	if (alx_tpd_avail(alx) < alx->tx_ringsz/8)
-		netif_stop_queue(alx->dev);
+	if (alx_tpd_avail(txq) < txq->count / 8)
+		netif_stop_queue(txq->netdev);
 
 	return NETDEV_TX_OK;
 
@@ -1478,7 +1481,7 @@ static void alx_poll_controller(struct net_device *netdev)
 
 	if (alx->flags & ALX_FLAG_USING_MSIX) {
 		alx_intr_msix_misc(0, alx);
-		alx_intr_msix_ring(0, alx);
+		alx_intr_msix_ring(0, alx->qnapi[0]);
 	} else if (alx->flags & ALX_FLAG_USING_MSI)
 		alx_intr_msi(0, alx);
 	else

From e0eac25460902a4eda07c0e9b3e749d198e074fa Mon Sep 17 00:00:00 2001
From: Tobias Regnery <tobias.regnery@gmail.com>
Date: Tue, 15 Nov 2016 12:43:12 +0100
Subject: [PATCH 5/9] alx: prepare interrupt functions for multiple queues

Extend the interrupt bringup code and the interrupt handler for msi-x
interrupts in order to handle multiple queues.

We must change the poll function because with multiple queues it is possible
that an alx_napi structure has only a tx or only a rx queue pointer.

Based on the downstream driver at github.com/qca/alx

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/alx/main.c | 130 +++++++++++++++++++-----
 1 file changed, 104 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index a9fc44611b8a..aeb42120910d 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -286,11 +286,13 @@ static int alx_poll(struct napi_struct *napi, int budget)
 	struct alx_priv *alx = np->alx;
 	struct alx_hw *hw = &alx->hw;
 	unsigned long flags;
-	bool tx_complete;
-	int work;
+	bool tx_complete = true;
+	int work = 0;
 
-	tx_complete = alx_clean_tx_irq(np->txq);
-	work = alx_clean_rx_irq(np->rxq, budget);
+	if (np->txq)
+		tx_complete = alx_clean_tx_irq(np->txq);
+	if (np->rxq)
+		work = alx_clean_rx_irq(np->rxq, budget);
 
 	if (!tx_complete || work == budget)
 		return budget;
@@ -299,7 +301,7 @@ static int alx_poll(struct napi_struct *napi, int budget)
 
 	/* enable interrupt */
 	if (alx->flags & ALX_FLAG_USING_MSIX) {
-		alx_mask_msix(hw, 1, false);
+		alx_mask_msix(hw, np->vec_idx, false);
 	} else {
 		spin_lock_irqsave(&alx->irq_lock, flags);
 		alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
@@ -372,9 +374,9 @@ static irqreturn_t alx_intr_msix_ring(int irq, void *data)
 	struct alx_hw *hw = &np->alx->hw;
 
 	/* mask interrupt to ACK chip */
-	alx_mask_msix(hw, 1, true);
+	alx_mask_msix(hw, np->vec_idx, true);
 	/* clear interrupt status */
-	alx_write_mem32(hw, ALX_ISR, (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0));
+	alx_write_mem32(hw, ALX_ISR, np->vec_mask);
 
 	napi_schedule(&np->napi);
 
@@ -678,6 +680,13 @@ static void alx_free_napis(struct alx_priv *alx)
 	alx->qnapi[0] = NULL;
 }
 
+static const u32 tx_vect_mask[] = {ALX_ISR_TX_Q0, ALX_ISR_TX_Q1,
+				   ALX_ISR_TX_Q2, ALX_ISR_TX_Q3};
+static const u32 rx_vect_mask[] = {ALX_ISR_RX_Q0, ALX_ISR_RX_Q1,
+				   ALX_ISR_RX_Q2, ALX_ISR_RX_Q3,
+				   ALX_ISR_RX_Q4, ALX_ISR_RX_Q5,
+				   ALX_ISR_RX_Q6, ALX_ISR_RX_Q7};
+
 static int alx_alloc_napis(struct alx_priv *alx)
 {
 	struct alx_napi *np;
@@ -685,7 +694,6 @@ static int alx_alloc_napis(struct alx_priv *alx)
 	struct alx_tx_queue *txq;
 
 	alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
-	alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
 
 	/* allocate alx_napi structures */
 	np = kzalloc(sizeof(struct alx_napi), GFP_KERNEL);
@@ -703,9 +711,12 @@ static int alx_alloc_napis(struct alx_priv *alx)
 		goto err_out;
 
 	np->txq = txq;
+	txq->queue_idx = 0;
 	txq->count = alx->tx_ringsz;
 	txq->netdev = alx->dev;
 	txq->dev = &alx->hw.pdev->dev;
+	np->vec_mask |= tx_vect_mask[0];
+	alx->int_mask |= tx_vect_mask[0];
 
 	/* allocate rx queues */
 	np = alx->qnapi[0];
@@ -715,9 +726,12 @@ static int alx_alloc_napis(struct alx_priv *alx)
 
 	np->rxq = rxq;
 	rxq->np = alx->qnapi[0];
+	rxq->queue_idx = 0;
 	rxq->count = alx->rx_ringsz;
 	rxq->netdev = alx->dev;
 	rxq->dev = &alx->hw.pdev->dev;
+	np->vec_mask |= rx_vect_mask[0];
+	alx->int_mask |= rx_vect_mask[0];
 
 	return 0;
 
@@ -727,24 +741,43 @@ err_out:
 	return -ENOMEM;
 }
 
+static const int txq_vec_mapping_shift[] = {
+	0, ALX_MSI_MAP_TBL1_TXQ0_SHIFT,
+	0, ALX_MSI_MAP_TBL1_TXQ1_SHIFT,
+	1, ALX_MSI_MAP_TBL2_TXQ2_SHIFT,
+	1, ALX_MSI_MAP_TBL2_TXQ3_SHIFT,
+};
+
 static void alx_config_vector_mapping(struct alx_priv *alx)
 {
 	struct alx_hw *hw = &alx->hw;
-	u32 tbl = 0;
+	u32 tbl[2] = {0, 0};
+	int i, vector, idx, shift;
 
 	if (alx->flags & ALX_FLAG_USING_MSIX) {
-		tbl |= 1 << ALX_MSI_MAP_TBL1_TXQ0_SHIFT;
-		tbl |= 1 << ALX_MSI_MAP_TBL1_RXQ0_SHIFT;
+		/* tx mappings */
+		for (i = 0, vector = 1; i < alx->num_txq; i++, vector++) {
+			idx = txq_vec_mapping_shift[i * 2];
+			shift = txq_vec_mapping_shift[i * 2 + 1];
+			tbl[idx] |= vector << shift;
+		}
+
+		/* rx mapping */
+		tbl[0] |= 1 << ALX_MSI_MAP_TBL1_RXQ0_SHIFT;
 	}
 
-	alx_write_mem32(hw, ALX_MSI_MAP_TBL1, tbl);
-	alx_write_mem32(hw, ALX_MSI_MAP_TBL2, 0);
+	alx_write_mem32(hw, ALX_MSI_MAP_TBL1, tbl[0]);
+	alx_write_mem32(hw, ALX_MSI_MAP_TBL2, tbl[1]);
 	alx_write_mem32(hw, ALX_MSI_ID_MAP, 0);
 }
 
 static bool alx_enable_msix(struct alx_priv *alx)
 {
-	int i, err, num_vec = 2;
+	int i, err, num_vec, num_txq, num_rxq;
+
+	num_txq = 1;
+	num_rxq = 1;
+	num_vec = max_t(int, num_txq, num_rxq) + 1;
 
 	alx->msix_entries = kcalloc(num_vec, sizeof(struct msix_entry),
 				    GFP_KERNEL);
@@ -764,6 +797,10 @@ static bool alx_enable_msix(struct alx_priv *alx)
 	}
 
 	alx->num_vec = num_vec;
+	alx->num_napi = num_vec - 1;
+	alx->num_txq = num_txq;
+	alx->num_rxq = num_rxq;
+
 	return true;
 }
 
@@ -771,21 +808,35 @@ static int alx_request_msix(struct alx_priv *alx)
 {
 	struct net_device *netdev = alx->dev;
 	int i, err, vector = 0, free_vector = 0;
-	struct alx_napi *np = alx->qnapi[0];
 
 	err = request_irq(alx->msix_entries[0].vector, alx_intr_msix_misc,
 			  0, netdev->name, alx);
 	if (err)
 		goto out_err;
 
-	vector++;
-	sprintf(np->irq_lbl, "%s-TxRx-0", netdev->name);
+	for (i = 0; i < alx->num_napi; i++) {
+		struct alx_napi *np = alx->qnapi[i];
 
-	err = request_irq(alx->msix_entries[vector].vector,
-			  alx_intr_msix_ring, 0, np->irq_lbl, np);
+		vector++;
+
+		if (np->txq && np->rxq)
+			sprintf(np->irq_lbl, "%s-TxRx-%u", netdev->name,
+				np->txq->queue_idx);
+		else if (np->txq)
+			sprintf(np->irq_lbl, "%s-tx-%u", netdev->name,
+				np->txq->queue_idx);
+		else if (np->rxq)
+			sprintf(np->irq_lbl, "%s-rx-%u", netdev->name,
+				np->rxq->queue_idx);
+		else
+			sprintf(np->irq_lbl, "%s-unused", netdev->name);
+
+		np->vec_idx = vector;
+		err = request_irq(alx->msix_entries[vector].vector,
+				  alx_intr_msix_ring, 0, np->irq_lbl, np);
 		if (err)
 			goto out_free;
-
+	}
 	return 0;
 
 out_free:
@@ -793,7 +844,8 @@ out_free:
 
 	vector--;
 	for (i = 0; i < vector; i++)
-		free_irq(alx->msix_entries[free_vector++].vector, alx->qnapi[0]);
+		free_irq(alx->msix_entries[free_vector++].vector,
+			 alx->qnapi[i]);
 
 out_err:
 	return err;
@@ -808,6 +860,9 @@ static void alx_init_intr(struct alx_priv *alx, bool msix)
 
 	if (!(alx->flags & ALX_FLAG_USING_MSIX)) {
 		alx->num_vec = 1;
+		alx->num_napi = 1;
+		alx->num_txq = 1;
+		alx->num_rxq = 1;
 
 		if (!pci_enable_msi(alx->hw.pdev))
 			alx->flags |= ALX_FLAG_USING_MSI;
@@ -863,6 +918,25 @@ static void alx_irq_disable(struct alx_priv *alx)
 	}
 }
 
+static int alx_realloc_resources(struct alx_priv *alx)
+{
+	int err;
+
+	alx_free_rings(alx);
+	alx_free_napis(alx);
+	alx_disable_advanced_intr(alx);
+
+	err = alx_alloc_napis(alx);
+	if (err)
+		return err;
+
+	err = alx_alloc_rings(alx);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 static int alx_request_irq(struct alx_priv *alx)
 {
 	struct pci_dev *pdev = alx->hw.pdev;
@@ -879,8 +953,9 @@ static int alx_request_irq(struct alx_priv *alx)
 			goto out;
 
 		/* msix request failed, realloc resources */
-		alx_disable_advanced_intr(alx);
-		alx_init_intr(alx, false);
+		err = alx_realloc_resources(alx);
+		if (err)
+			goto out;
 	}
 
 	if (alx->flags & ALX_FLAG_USING_MSI) {
@@ -912,9 +987,10 @@ static void alx_free_irq(struct alx_priv *alx)
 	int i, vector = 0;
 
 	if (alx->flags & ALX_FLAG_USING_MSIX) {
-		/* we have only 2 vectors without multi queue support */
 		free_irq(alx->msix_entries[vector++].vector, alx);
-		free_irq(alx->msix_entries[vector++].vector, alx->qnapi[0]);
+		for (i = 0; i < alx->num_napi; i++)
+			free_irq(alx->msix_entries[vector++].vector,
+				 alx->qnapi[i]);
 	} else {
 		free_irq(pdev->irq, alx);
 	}
@@ -1478,10 +1554,12 @@ static int alx_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 static void alx_poll_controller(struct net_device *netdev)
 {
 	struct alx_priv *alx = netdev_priv(netdev);
+	int i;
 
 	if (alx->flags & ALX_FLAG_USING_MSIX) {
 		alx_intr_msix_misc(0, alx);
-		alx_intr_msix_ring(0, alx->qnapi[0]);
+		for (i = 0; i < alx->num_txq; i++)
+			alx_intr_msix_ring(0, alx->qnapi[i]);
 	} else if (alx->flags & ALX_FLAG_USING_MSI)
 		alx_intr_msi(0, alx);
 	else

From a4076d347f9a27cdd85186bef2f4207b6187c35e Mon Sep 17 00:00:00 2001
From: Tobias Regnery <tobias.regnery@gmail.com>
Date: Tue, 15 Nov 2016 12:43:13 +0100
Subject: [PATCH 6/9] alx: prepare resource allocation for multi queue support

Allocate, initialise and free alx_tx_queue structs based on the number of
alx_napi structures. Also increase the size of the descriptor memory based
on the number of tx queues in use.

Based on the downstream driver at github.com/qca/alx

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/alx/main.c | 145 ++++++++++++++++--------
 1 file changed, 95 insertions(+), 50 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index aeb42120910d..c5f09d2616a4 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -429,28 +429,45 @@ static irqreturn_t alx_intr_legacy(int irq, void *data)
 	return alx_intr_handle(alx, intr);
 }
 
+static const u16 txring_header_reg[] = {ALX_TPD_PRI0_ADDR_LO,
+					ALX_TPD_PRI1_ADDR_LO,
+					ALX_TPD_PRI2_ADDR_LO,
+					ALX_TPD_PRI3_ADDR_LO};
+
 static void alx_init_ring_ptrs(struct alx_priv *alx)
 {
 	struct alx_hw *hw = &alx->hw;
 	u32 addr_hi = ((u64)alx->descmem.dma) >> 32;
-	struct alx_napi *np = alx->qnapi[0];
+	struct alx_napi *np;
+	int i;
+
+	for (i = 0; i < alx->num_napi; i++) {
+		np = alx->qnapi[i];
+		if (np->txq) {
+			np->txq->read_idx = 0;
+			np->txq->write_idx = 0;
+			alx_write_mem32(hw,
+					txring_header_reg[np->txq->queue_idx],
+					np->txq->tpd_dma);
+		}
+
+		if (np->rxq) {
+			np->rxq->read_idx = 0;
+			np->rxq->write_idx = 0;
+			np->rxq->rrd_read_idx = 0;
+			alx_write_mem32(hw, ALX_RRD_ADDR_LO, np->rxq->rrd_dma);
+			alx_write_mem32(hw, ALX_RFD_ADDR_LO, np->rxq->rfd_dma);
+		}
+	}
+
+	alx_write_mem32(hw, ALX_TX_BASE_ADDR_HI, addr_hi);
+	alx_write_mem32(hw, ALX_TPD_RING_SZ, alx->tx_ringsz);
 
-	np->rxq->read_idx = 0;
-	np->rxq->write_idx = 0;
-	np->rxq->rrd_read_idx = 0;
 	alx_write_mem32(hw, ALX_RX_BASE_ADDR_HI, addr_hi);
-	alx_write_mem32(hw, ALX_RRD_ADDR_LO, np->rxq->rrd_dma);
 	alx_write_mem32(hw, ALX_RRD_RING_SZ, alx->rx_ringsz);
-	alx_write_mem32(hw, ALX_RFD_ADDR_LO, np->rxq->rfd_dma);
 	alx_write_mem32(hw, ALX_RFD_RING_SZ, alx->rx_ringsz);
 	alx_write_mem32(hw, ALX_RFD_BUF_SZ, alx->rxbuf_size);
 
-	np->txq->read_idx = 0;
-	np->txq->write_idx = 0;
-	alx_write_mem32(hw, ALX_TX_BASE_ADDR_HI, addr_hi);
-	alx_write_mem32(hw, ALX_TPD_PRI0_ADDR_LO, np->txq->tpd_dma);
-	alx_write_mem32(hw, ALX_TPD_RING_SZ, alx->tx_ringsz);
-
 	/* load these pointers into the chip */
 	alx_write_mem32(hw, ALX_SRAM9, ALX_SRAM_LOAD_PTR);
 }
@@ -478,7 +495,7 @@ static void alx_free_rxring_buf(struct alx_rx_queue *rxq)
 	struct alx_buffer *cur_buf;
 	u16 i;
 
-	if (rxq == NULL)
+	if (!rxq->bufs)
 		return;
 
 	for (i = 0; i < rxq->count; i++) {
@@ -502,8 +519,14 @@ static void alx_free_rxring_buf(struct alx_rx_queue *rxq)
 
 static void alx_free_buffers(struct alx_priv *alx)
 {
-	alx_free_txring_buf(alx->qnapi[0]->txq);
-	alx_free_rxring_buf(alx->qnapi[0]->rxq);
+	int i;
+
+	for (i = 0; i < alx->num_txq; i++)
+		if (alx->qnapi[i] && alx->qnapi[i]->txq)
+			alx_free_txring_buf(alx->qnapi[i]->txq);
+
+	if (alx->qnapi[0] && alx->qnapi[0]->rxq)
+		alx_free_rxring_buf(alx->qnapi[0]->rxq);
 }
 
 static int alx_reinit_rings(struct alx_priv *alx)
@@ -611,7 +634,7 @@ static int alx_alloc_rx_ring(struct alx_priv *alx, struct alx_rx_queue *rxq,
 
 static int alx_alloc_rings(struct alx_priv *alx)
 {
-	int offset = 0;
+	int i, offset = 0;
 
 	/* physical tx/rx ring descriptors
 	 *
@@ -619,7 +642,8 @@ static int alx_alloc_rings(struct alx_priv *alx)
 	 * 4G boundary (hardware has a single register for high 32 bits
 	 * of addresses only)
 	 */
-	alx->descmem.size = sizeof(struct alx_txd) * alx->tx_ringsz +
+	alx->descmem.size = sizeof(struct alx_txd) * alx->tx_ringsz *
+			    alx->num_txq +
 			    sizeof(struct alx_rrd) * alx->rx_ringsz +
 			    sizeof(struct alx_rfd) * alx->rx_ringsz;
 	alx->descmem.virt = dma_zalloc_coherent(&alx->hw.pdev->dev,
@@ -633,10 +657,12 @@ static int alx_alloc_rings(struct alx_priv *alx)
 	BUILD_BUG_ON(sizeof(struct alx_txd) % 8);
 	BUILD_BUG_ON(sizeof(struct alx_rrd) % 8);
 
-	offset = alx_alloc_tx_ring(alx, alx->qnapi[0]->txq, offset);
-	if (offset < 0) {
-		netdev_err(alx->dev, "Allocation of tx buffer failed!\n");
-		return -ENOMEM;
+	for (i = 0; i < alx->num_txq; i++) {
+		offset = alx_alloc_tx_ring(alx, alx->qnapi[i]->txq, offset);
+		if (offset < 0) {
+			netdev_err(alx->dev, "Allocation of tx buffer failed!\n");
+			return -ENOMEM;
+		}
 	}
 
 	offset = alx_alloc_rx_ring(alx, alx->qnapi[0]->rxq, offset);
@@ -652,11 +678,16 @@ static int alx_alloc_rings(struct alx_priv *alx)
 
 static void alx_free_rings(struct alx_priv *alx)
 {
+	int i;
 
 	alx_free_buffers(alx);
 
-	kfree(alx->qnapi[0]->txq->bufs);
-	kfree(alx->qnapi[0]->rxq->bufs);
+	for (i = 0; i < alx->num_txq; i++)
+		if (alx->qnapi[i] && alx->qnapi[i]->txq)
+			kfree(alx->qnapi[i]->txq->bufs);
+
+	if (alx->qnapi[0] && alx->qnapi[0]->rxq)
+		kfree(alx->qnapi[0]->rxq->bufs);
 
 	if (!alx->descmem.virt)
 		dma_free_coherent(&alx->hw.pdev->dev,
@@ -668,16 +699,19 @@ static void alx_free_rings(struct alx_priv *alx)
 static void alx_free_napis(struct alx_priv *alx)
 {
 	struct alx_napi *np;
+	int i;
 
-	np = alx->qnapi[0];
-	if (!np)
-		return;
+	for (i = 0; i < alx->num_napi; i++) {
+		np = alx->qnapi[i];
+		if (!np)
+			continue;
 
-	netif_napi_del(&np->napi);
-	kfree(np->txq);
-	kfree(np->rxq);
-	kfree(np);
-	alx->qnapi[0] = NULL;
+		netif_napi_del(&np->napi);
+		kfree(np->txq);
+		kfree(np->rxq);
+		kfree(np);
+		alx->qnapi[i] = NULL;
+	}
 }
 
 static const u32 tx_vect_mask[] = {ALX_ISR_TX_Q0, ALX_ISR_TX_Q1,
@@ -692,31 +726,36 @@ static int alx_alloc_napis(struct alx_priv *alx)
 	struct alx_napi *np;
 	struct alx_rx_queue *rxq;
 	struct alx_tx_queue *txq;
+	int i;
 
 	alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
 
 	/* allocate alx_napi structures */
-	np = kzalloc(sizeof(struct alx_napi), GFP_KERNEL);
-	if (!np)
-		goto err_out;
+	for (i = 0; i < alx->num_napi; i++) {
+		np = kzalloc(sizeof(struct alx_napi), GFP_KERNEL);
+		if (!np)
+			goto err_out;
 
-	np->alx = alx;
-	netif_napi_add(alx->dev, &np->napi, alx_poll, 64);
-	alx->qnapi[0] = np;
+		np->alx = alx;
+		netif_napi_add(alx->dev, &np->napi, alx_poll, 64);
+		alx->qnapi[i] = np;
+	}
 
 	/* allocate tx queues */
-	np = alx->qnapi[0];
-	txq = kzalloc(sizeof(*txq), GFP_KERNEL);
-	if (!txq)
-		goto err_out;
+	for (i = 0; i < alx->num_txq; i++) {
+		np = alx->qnapi[i];
+		txq = kzalloc(sizeof(*txq), GFP_KERNEL);
+		if (!txq)
+			goto err_out;
 
-	np->txq = txq;
-	txq->queue_idx = 0;
-	txq->count = alx->tx_ringsz;
-	txq->netdev = alx->dev;
-	txq->dev = &alx->hw.pdev->dev;
-	np->vec_mask |= tx_vect_mask[0];
-	alx->int_mask |= tx_vect_mask[0];
+		np->txq = txq;
+		txq->queue_idx = i;
+		txq->count = alx->tx_ringsz;
+		txq->netdev = alx->dev;
+		txq->dev = &alx->hw.pdev->dev;
+		np->vec_mask |= tx_vect_mask[i];
+		alx->int_mask |= tx_vect_mask[i];
+	}
 
 	/* allocate rx queues */
 	np = alx->qnapi[0];
@@ -1075,11 +1114,14 @@ static netdev_features_t alx_fix_features(struct net_device *netdev,
 
 static void alx_netif_stop(struct alx_priv *alx)
 {
+	int i;
+
 	netif_trans_update(alx->dev);
 	if (netif_carrier_ok(alx->dev)) {
 		netif_carrier_off(alx->dev);
 		netif_tx_disable(alx->dev);
-		napi_disable(&alx->qnapi[0]->napi);
+		for (i = 0; i < alx->num_napi; i++)
+			napi_disable(&alx->qnapi[i]->napi);
 	}
 }
 
@@ -1148,8 +1190,11 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
 
 static void alx_netif_start(struct alx_priv *alx)
 {
+	int i;
+
 	netif_tx_wake_all_queues(alx->dev);
-	napi_enable(&alx->qnapi[0]->napi);
+	for (i = 0; i < alx->num_napi; i++)
+		napi_enable(&alx->qnapi[i]->napi);
 	netif_carrier_on(alx->dev);
 }
 

From 2e06826bc659e0ff664b709719d746b7b088827b Mon Sep 17 00:00:00 2001
From: Tobias Regnery <tobias.regnery@gmail.com>
Date: Tue, 15 Nov 2016 12:43:14 +0100
Subject: [PATCH 7/9] alx: prepare tx path for multi queue support

This patch prepares the tx path to send data on multiple tx queues. It
introduces per queue register adresses and uses them in the alx_tx_queue
structs.

There are new helper functions for the queue mapping in the tx path.

Based on the downstream driver at github.com/qca/alx

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/alx/main.c | 58 +++++++++++++++++++------
 1 file changed, 45 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index c5f09d2616a4..0e773a2ca6a0 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -143,6 +143,22 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 	return count;
 }
 
+static struct alx_tx_queue *alx_tx_queue_mapping(struct alx_priv *alx,
+						 struct sk_buff *skb)
+{
+	unsigned int r_idx = skb->queue_mapping;
+
+	if (r_idx >= alx->num_txq)
+		r_idx = r_idx % alx->num_txq;
+
+	return alx->qnapi[r_idx]->txq;
+}
+
+static struct netdev_queue *alx_get_tx_queue(const struct alx_tx_queue *txq)
+{
+	return netdev_get_tx_queue(txq->netdev, txq->queue_idx);
+}
+
 static inline int alx_tpd_avail(struct alx_tx_queue *txq)
 {
 	if (txq->write_idx >= txq->read_idx)
@@ -153,14 +169,16 @@ static inline int alx_tpd_avail(struct alx_tx_queue *txq)
 static bool alx_clean_tx_irq(struct alx_tx_queue *txq)
 {
 	struct alx_priv *alx;
+	struct netdev_queue *tx_queue;
 	u16 hw_read_idx, sw_read_idx;
 	unsigned int total_bytes = 0, total_packets = 0;
 	int budget = ALX_DEFAULT_TX_WORK;
 
 	alx = netdev_priv(txq->netdev);
+	tx_queue = alx_get_tx_queue(txq);
 
 	sw_read_idx = txq->read_idx;
-	hw_read_idx = alx_read_mem16(&alx->hw, ALX_TPD_PRI0_CIDX);
+	hw_read_idx = alx_read_mem16(&alx->hw, txq->c_reg);
 
 	if (sw_read_idx != hw_read_idx) {
 		while (sw_read_idx != hw_read_idx && budget > 0) {
@@ -180,12 +198,12 @@ static bool alx_clean_tx_irq(struct alx_tx_queue *txq)
 		}
 		txq->read_idx = sw_read_idx;
 
-		netdev_completed_queue(txq->netdev, total_packets, total_bytes);
+		netdev_tx_completed_queue(tx_queue, total_packets, total_bytes);
 	}
 
-	if (netif_queue_stopped(txq->netdev) && netif_carrier_ok(txq->netdev) &&
+	if (netif_tx_queue_stopped(tx_queue) && netif_carrier_ok(alx->dev) &&
 	    alx_tpd_avail(txq) > txq->count / 4)
-		netif_wake_queue(txq->netdev);
+		netif_tx_wake_queue(tx_queue);
 
 	return sw_read_idx == hw_read_idx;
 }
@@ -487,7 +505,7 @@ static void alx_free_txring_buf(struct alx_tx_queue *txq)
 	txq->write_idx = 0;
 	txq->read_idx = 0;
 
-	netdev_reset_queue(txq->netdev);
+	netdev_tx_reset_queue(alx_get_tx_queue(txq));
 }
 
 static void alx_free_rxring_buf(struct alx_rx_queue *rxq)
@@ -714,6 +732,10 @@ static void alx_free_napis(struct alx_priv *alx)
 	}
 }
 
+static const u16 tx_pidx_reg[] = {ALX_TPD_PRI0_PIDX, ALX_TPD_PRI1_PIDX,
+				  ALX_TPD_PRI2_PIDX, ALX_TPD_PRI3_PIDX};
+static const u16 tx_cidx_reg[] = {ALX_TPD_PRI0_CIDX, ALX_TPD_PRI1_CIDX,
+				  ALX_TPD_PRI2_CIDX, ALX_TPD_PRI3_CIDX};
 static const u32 tx_vect_mask[] = {ALX_ISR_TX_Q0, ALX_ISR_TX_Q1,
 				   ALX_ISR_TX_Q2, ALX_ISR_TX_Q3};
 static const u32 rx_vect_mask[] = {ALX_ISR_RX_Q0, ALX_ISR_RX_Q1,
@@ -749,6 +771,8 @@ static int alx_alloc_napis(struct alx_priv *alx)
 			goto err_out;
 
 		np->txq = txq;
+		txq->p_reg = tx_pidx_reg[i];
+		txq->c_reg = tx_cidx_reg[i];
 		txq->queue_idx = i;
 		txq->count = alx->tx_ringsz;
 		txq->netdev = alx->dev;
@@ -1501,16 +1525,17 @@ err_dma:
 	return -ENOMEM;
 }
 
-static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
-				  struct net_device *netdev)
+static netdev_tx_t alx_start_xmit_ring(struct sk_buff *skb,
+				       struct alx_tx_queue *txq)
 {
-	struct alx_priv *alx = netdev_priv(netdev);
-	struct alx_tx_queue *txq = alx->qnapi[0]->txq;
+	struct alx_priv *alx;
 	struct alx_txd *first;
 	int tso;
 
+	alx = netdev_priv(txq->netdev);
+
 	if (alx_tpd_avail(txq) < alx_tpd_req(skb)) {
-		netif_stop_queue(txq->netdev);
+		netif_tx_stop_queue(alx_get_tx_queue(txq));
 		goto drop;
 	}
 
@@ -1526,14 +1551,14 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
 	if (alx_map_tx_skb(txq, skb) < 0)
 		goto drop;
 
-	netdev_sent_queue(txq->netdev, skb->len);
+	netdev_tx_sent_queue(alx_get_tx_queue(txq), skb->len);
 
 	/* flush updates before updating hardware */
 	wmb();
-	alx_write_mem16(&alx->hw, ALX_TPD_PRI0_PIDX, txq->write_idx);
+	alx_write_mem16(&alx->hw, txq->p_reg, txq->write_idx);
 
 	if (alx_tpd_avail(txq) < txq->count / 8)
-		netif_stop_queue(txq->netdev);
+		netif_tx_stop_queue(alx_get_tx_queue(txq));
 
 	return NETDEV_TX_OK;
 
@@ -1542,6 +1567,13 @@ drop:
 	return NETDEV_TX_OK;
 }
 
+static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
+				  struct net_device *netdev)
+{
+	struct alx_priv *alx = netdev_priv(netdev);
+	return alx_start_xmit_ring(skb, alx_tx_queue_mapping(alx, skb));
+}
+
 static void alx_tx_timeout(struct net_device *dev)
 {
 	struct alx_priv *alx = netdev_priv(dev);

From f58e0f77471b913c18fc253c90fd7bb8ee6fca3f Mon Sep 17 00:00:00 2001
From: Tobias Regnery <tobias.regnery@gmail.com>
Date: Tue, 15 Nov 2016 12:43:15 +0100
Subject: [PATCH 8/9] alx: enable msi-x interrupts by default

Remove the module parameter to enable msi-x support and enable msi-x
interrupts unconditionally by default. This is a preparatory step to enable
multi queue support by default, because this is only working with msi-x
interrupts.

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/alx/main.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 0e773a2ca6a0..91c9149ab3f2 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -51,10 +51,6 @@
 
 const char alx_drv_name[] = "alx";
 
-static bool msix = false;
-module_param(msix, bool, 0);
-MODULE_PARM_DESC(msix, "Enable msi-x interrupt support");
-
 static void alx_free_txbuf(struct alx_tx_queue *txq, int entry)
 {
 	struct alx_buffer *txb = &txq->bufs[entry];
@@ -1226,7 +1222,7 @@ static int __alx_open(struct alx_priv *alx, bool resume)
 {
 	int err;
 
-	alx_init_intr(alx, msix);
+	alx_init_intr(alx, true);
 
 	if (!resume)
 		netif_carrier_off(alx->dev);

From d768319cd4277ef1cfa1e04792adb0aeea40cca6 Mon Sep 17 00:00:00 2001
From: Tobias Regnery <tobias.regnery@gmail.com>
Date: Tue, 15 Nov 2016 12:43:16 +0100
Subject: [PATCH 9/9] alx: enable multiple tx queues

Enable multiple tx queues by default based on the number of online cpus. The
hardware supports up to four tx queues.

Based on the downstream driver at github.com/qca/alx

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/alx/main.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 91c9149ab3f2..c8f525574d68 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -834,7 +834,7 @@ static bool alx_enable_msix(struct alx_priv *alx)
 {
 	int i, err, num_vec, num_txq, num_rxq;
 
-	num_txq = 1;
+	num_txq = min_t(int, num_online_cpus(), ALX_MAX_TX_QUEUES);
 	num_rxq = 1;
 	num_vec = max_t(int, num_txq, num_rxq) + 1;
 
@@ -1241,6 +1241,9 @@ static int __alx_open(struct alx_priv *alx, bool resume)
 	if (err)
 		goto out_free_rings;
 
+	netif_set_real_num_tx_queues(alx->dev, alx->num_txq);
+	netif_set_real_num_rx_queues(alx->dev, alx->num_rxq);
+
 	/* clear old interrupts */
 	alx_write_mem32(&alx->hw, ALX_ISR, ~(u32)ALX_ISR_DIS);
 
@@ -1749,7 +1752,8 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto out_pci_release;
 	}
 
-	netdev = alloc_etherdev(sizeof(*alx));
+	netdev = alloc_etherdev_mqs(sizeof(*alx),
+				    ALX_MAX_TX_QUEUES, 1);
 	if (!netdev) {
 		err = -ENOMEM;
 		goto out_pci_release;