Merge branch 'renesas-rswitch-perf'

Yoshihiro Shimoda says:

====================
net: renesas: rswitch: Improve perfromance of TX/RX

This patch series is based on net-next.git / main branch [1]. This patch
series can improve perfromance of TX in a specific condition. The previous code
used "global rate limiter" feature so that this is possible to cause
performance down if we use multiple ports at the same time. To resolve this
issue, use "hardware pause" features of GWCA and COMA. Note that this is not
related to the ethernet PAUSE frames.

< UDP TX by iperf3 >
 before: about 450Mbps on both tsn0 and tsn1
 after:  about 950Mbps on both tsn0 and tsn1

Also, this patch series can improve performance of RX by using
napi_gro_receive().

< TCP RX by iperf >
 before: about 670Mbps on tsn0
 after:  about 840Mbps on tsn0

[1]
The commit e06bd5e3ad ("Merge branch 'followup-fixes-for-the-dwmac-and-altera-lynx-conversion'")

Changes from v3:
https://lore.kernel.org/all/20230607015641.1724057-1-yoshihiro.shimoda.uh@renesas.com/
 - Rebased on the latest net-next.git / main branch.
 - Added Reviewed-by in the patch 2/2. (Maciej, thanks!)
 - Fix typos in the commit description in the patch 2/2.

Changes from v2:
https://lore.kernel.org/all/20230606085558.1708766-1-yoshihiro.shimoda.uh@renesas.com/
 - Rebased on the latest net-next.git / main branch.
 - Added Reviewed-by in the patch 1/2. (Maciej, thanks!)
 - Revise the commit description in the patch 2/2.
 - Add definition to remove magic hardcoded numbers in the patch 2/2.

Changes from v1:
https://lore.kernel.org/all/20230529080840.1156458-1-yoshihiro.shimoda.uh@renesas.com/
 - Rebased on the latest net-next.git / main branch.
 - Use "hardware pause" feature instead of "per-queue limiter" feature.
 - Drop refactaring for "per-queue limiter".
 - Drop dt-bindings update because "hardware pause" doesn't need additional
   clock information.
 - Use napi_gro_receive() to improve RX performance.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2023-06-10 19:49:34 +01:00
commit 3334129245
2 changed files with 22 additions and 23 deletions

View File

@ -90,6 +90,11 @@ static int rswitch_bpool_config(struct rswitch_private *priv)
return rswitch_reg_wait(priv->addr, CABPIRM, CABPIRM_BPR, CABPIRM_BPR);
}
static void rswitch_coma_init(struct rswitch_private *priv)
{
iowrite32(CABPPFLC_INIT_VALUE, priv->addr + CABPPFLC0);
}
/* R-Switch-2 block (TOP) */
static void rswitch_top_init(struct rswitch_private *priv)
{
@ -156,24 +161,6 @@ static int rswitch_gwca_axi_ram_reset(struct rswitch_private *priv)
return rswitch_reg_wait(priv->addr, GWARIRM, GWARIRM_ARR, GWARIRM_ARR);
}
static void rswitch_gwca_set_rate_limit(struct rswitch_private *priv, int rate)
{
u32 gwgrlulc, gwgrlc;
switch (rate) {
case 1000:
gwgrlulc = 0x0000005f;
gwgrlc = 0x00010260;
break;
default:
dev_err(&priv->pdev->dev, "%s: This rate is not supported (%d)\n", __func__, rate);
return;
}
iowrite32(gwgrlulc, priv->addr + GWGRLULC);
iowrite32(gwgrlc, priv->addr + GWGRLC);
}
static bool rswitch_is_any_data_irq(struct rswitch_private *priv, u32 *dis, bool tx)
{
u32 *mask = tx ? priv->gwca.tx_irq_bits : priv->gwca.rx_irq_bits;
@ -402,7 +389,7 @@ static int rswitch_gwca_queue_format(struct net_device *ndev,
linkfix->die_dt = DT_LINKFIX;
rswitch_desc_set_dptr(linkfix, gq->ring_dma);
iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DQT : 0) | GWDCC_EDE,
iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DCP(GWCA_IPV_NUM) | GWDCC_DQT : 0) | GWDCC_EDE,
priv->addr + GWDCC_OFFS(gq->index));
return 0;
@ -500,7 +487,8 @@ static int rswitch_gwca_queue_ext_ts_format(struct net_device *ndev,
linkfix->die_dt = DT_LINKFIX;
rswitch_desc_set_dptr(linkfix, gq->ring_dma);
iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DQT : 0) | GWDCC_ETS | GWDCC_EDE,
iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DCP(GWCA_IPV_NUM) | GWDCC_DQT : 0) |
GWDCC_ETS | GWDCC_EDE,
priv->addr + GWDCC_OFFS(gq->index));
return 0;
@ -649,7 +637,8 @@ static int rswitch_gwca_hw_init(struct rswitch_private *priv)
iowrite32(lower_32_bits(priv->gwca.ts_queue.ring_dma), priv->addr + GWTDCAC10);
iowrite32(upper_32_bits(priv->gwca.ts_queue.ring_dma), priv->addr + GWTDCAC00);
iowrite32(GWCA_TS_IRQ_BIT, priv->addr + GWTSDCC0);
rswitch_gwca_set_rate_limit(priv, priv->gwca.speed);
iowrite32(GWTPC_PPPL(GWCA_IPV_NUM), priv->addr + GWTPC0);
for (i = 0; i < RSWITCH_NUM_PORTS; i++) {
err = rswitch_rxdmac_init(priv, i);
@ -729,7 +718,7 @@ static bool rswitch_rx(struct net_device *ndev, int *quota)
}
skb_put(skb, pkt_len);
skb->protocol = eth_type_trans(skb, ndev);
netif_receive_skb(skb);
napi_gro_receive(&rdev->napi, skb);
rdev->ndev->stats.rx_packets++;
rdev->ndev->stats.rx_bytes += pkt_len;
@ -1502,7 +1491,8 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd
rswitch_desc_set_dptr(&desc->desc, dma_addr);
desc->desc.info_ds = cpu_to_le16(skb->len);
desc->info1 = cpu_to_le64(INFO1_DV(BIT(rdev->etha->index)) | INFO1_FMT);
desc->info1 = cpu_to_le64(INFO1_DV(BIT(rdev->etha->index)) |
INFO1_IPV(GWCA_IPV_NUM) | INFO1_FMT);
if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
struct rswitch_gwca_ts_info *ts_info;
@ -1772,6 +1762,8 @@ static int rswitch_init(struct rswitch_private *priv)
if (err < 0)
return err;
rswitch_coma_init(priv);
err = rswitch_gwca_linkfix_alloc(priv);
if (err < 0)
return -ENOMEM;

View File

@ -48,6 +48,7 @@
#define GWCA_NUM_IRQS 8
#define GWCA_INDEX 0
#define AGENT_INDEX_GWCA 3
#define GWCA_IPV_NUM 0
#define GWRO RSWITCH_GWCA0_OFFSET
#define GWCA_TS_IRQ_RESOURCE_NAME "gwca0_rxts0"
@ -768,11 +769,14 @@ enum rswitch_gwca_mode {
#define GWARIRM_ARR BIT(1)
#define GWDCC_BALR BIT(24)
#define GWDCC_DCP_MASK GENMASK(18, 16)
#define GWDCC_DCP(prio) FIELD_PREP(GWDCC_DCP_MASK, (prio))
#define GWDCC_DQT BIT(11)
#define GWDCC_ETS BIT(9)
#define GWDCC_EDE BIT(8)
#define GWTRC(queue) (GWTRC0 + (queue) / 32 * 4)
#define GWTPC_PPPL(ipv) BIT(ipv)
#define GWDCC_OFFS(queue) (GWDCC0 + (queue) * 4)
#define GWDIS(i) (GWDIS0 + (i) * 0x10)
@ -789,6 +793,8 @@ enum rswitch_gwca_mode {
#define CABPIRM_BPIOG BIT(0)
#define CABPIRM_BPR BIT(1)
#define CABPPFLC_INIT_VALUE 0x00800080
/* MFWD */
#define FWPC0_LTHTA BIT(0)
#define FWPC0_IP4UE BIT(3)
@ -863,6 +869,7 @@ enum DIE_DT {
/* For transmission */
#define INFO1_TSUN(val) ((u64)(val) << 8ULL)
#define INFO1_IPV(prio) ((u64)(prio) << 28ULL)
#define INFO1_CSD0(index) ((u64)(index) << 32ULL)
#define INFO1_CSD1(index) ((u64)(index) << 40ULL)
#define INFO1_DV(port_vector) ((u64)(port_vector) << 48ULL)