Merge branch 'dpaa2-eth-sw-TSO'
Ioana Ciornei says: ==================== dpaa2-eth: add support for software TSO This series adds support for driver level TSO in the dpaa2-eth driver. The first 5 patches lay the ground work for the actual feature: rearrange some variable declaration, cleaning up the interraction with the S/G Table buffer cache etc. The 6th patch adds the actual driver level software TSO support by using the usual tso_build_hdr()/tso_build_data() APIs and creates the S/G FDs. With this patch set we can see the following improvement in a TCP flow running on a single A72@2.2GHz of the LX2160A SoC: before: 6.38Gbit/s after: 8.48Gbit/s ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
62b5b162e4
@ -18,6 +18,7 @@
|
||||
#include <linux/ptp_classify.h>
|
||||
#include <net/pkt_cls.h>
|
||||
#include <net/sock.h>
|
||||
#include <net/tso.h>
|
||||
|
||||
#include "dpaa2-eth.h"
|
||||
|
||||
@ -760,6 +761,39 @@ static void dpaa2_eth_enable_tx_tstamp(struct dpaa2_eth_priv *priv,
|
||||
}
|
||||
}
|
||||
|
||||
static void *dpaa2_eth_sgt_get(struct dpaa2_eth_priv *priv)
|
||||
{
|
||||
struct dpaa2_eth_sgt_cache *sgt_cache;
|
||||
void *sgt_buf = NULL;
|
||||
int sgt_buf_size;
|
||||
|
||||
sgt_cache = this_cpu_ptr(priv->sgt_cache);
|
||||
sgt_buf_size = priv->tx_data_offset +
|
||||
DPAA2_ETH_SG_ENTRIES_MAX * sizeof(struct dpaa2_sg_entry);
|
||||
|
||||
if (sgt_cache->count == 0)
|
||||
sgt_buf = napi_alloc_frag_align(sgt_buf_size, DPAA2_ETH_TX_BUF_ALIGN);
|
||||
else
|
||||
sgt_buf = sgt_cache->buf[--sgt_cache->count];
|
||||
if (!sgt_buf)
|
||||
return NULL;
|
||||
|
||||
memset(sgt_buf, 0, sgt_buf_size);
|
||||
|
||||
return sgt_buf;
|
||||
}
|
||||
|
||||
static void dpaa2_eth_sgt_recycle(struct dpaa2_eth_priv *priv, void *sgt_buf)
|
||||
{
|
||||
struct dpaa2_eth_sgt_cache *sgt_cache;
|
||||
|
||||
sgt_cache = this_cpu_ptr(priv->sgt_cache);
|
||||
if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE)
|
||||
skb_free_frag(sgt_buf);
|
||||
else
|
||||
sgt_cache->buf[sgt_cache->count++] = sgt_buf;
|
||||
}
|
||||
|
||||
/* Create a frame descriptor based on a fragmented skb */
|
||||
static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv,
|
||||
struct sk_buff *skb,
|
||||
@ -805,12 +839,11 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv,
|
||||
/* Prepare the HW SGT structure */
|
||||
sgt_buf_size = priv->tx_data_offset +
|
||||
sizeof(struct dpaa2_sg_entry) * num_dma_bufs;
|
||||
sgt_buf = napi_alloc_frag_align(sgt_buf_size, DPAA2_ETH_TX_BUF_ALIGN);
|
||||
sgt_buf = dpaa2_eth_sgt_get(priv);
|
||||
if (unlikely(!sgt_buf)) {
|
||||
err = -ENOMEM;
|
||||
goto sgt_buf_alloc_failed;
|
||||
}
|
||||
memset(sgt_buf, 0, sgt_buf_size);
|
||||
|
||||
sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset);
|
||||
|
||||
@ -846,6 +879,7 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv,
|
||||
err = -ENOMEM;
|
||||
goto dma_map_single_failed;
|
||||
}
|
||||
memset(fd, 0, sizeof(struct dpaa2_fd));
|
||||
dpaa2_fd_set_offset(fd, priv->tx_data_offset);
|
||||
dpaa2_fd_set_format(fd, dpaa2_fd_sg);
|
||||
dpaa2_fd_set_addr(fd, addr);
|
||||
@ -855,7 +889,7 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv,
|
||||
return 0;
|
||||
|
||||
dma_map_single_failed:
|
||||
skb_free_frag(sgt_buf);
|
||||
dpaa2_eth_sgt_recycle(priv, sgt_buf);
|
||||
sgt_buf_alloc_failed:
|
||||
dma_unmap_sg(dev, scl, num_sg, DMA_BIDIRECTIONAL);
|
||||
dma_map_sg_failed:
|
||||
@ -875,7 +909,6 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
|
||||
void **swa_addr)
|
||||
{
|
||||
struct device *dev = priv->net_dev->dev.parent;
|
||||
struct dpaa2_eth_sgt_cache *sgt_cache;
|
||||
struct dpaa2_sg_entry *sgt;
|
||||
struct dpaa2_eth_swa *swa;
|
||||
dma_addr_t addr, sgt_addr;
|
||||
@ -884,18 +917,10 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
|
||||
int err;
|
||||
|
||||
/* Prepare the HW SGT structure */
|
||||
sgt_cache = this_cpu_ptr(priv->sgt_cache);
|
||||
sgt_buf_size = priv->tx_data_offset + sizeof(struct dpaa2_sg_entry);
|
||||
|
||||
if (sgt_cache->count == 0)
|
||||
sgt_buf = kzalloc(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN,
|
||||
GFP_ATOMIC);
|
||||
else
|
||||
sgt_buf = sgt_cache->buf[--sgt_cache->count];
|
||||
sgt_buf = dpaa2_eth_sgt_get(priv);
|
||||
if (unlikely(!sgt_buf))
|
||||
return -ENOMEM;
|
||||
|
||||
sgt_buf = PTR_ALIGN(sgt_buf, DPAA2_ETH_TX_BUF_ALIGN);
|
||||
sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset);
|
||||
|
||||
addr = dma_map_single(dev, skb->data, skb->len, DMA_BIDIRECTIONAL);
|
||||
@ -923,6 +948,7 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
|
||||
goto sgt_map_failed;
|
||||
}
|
||||
|
||||
memset(fd, 0, sizeof(struct dpaa2_fd));
|
||||
dpaa2_fd_set_offset(fd, priv->tx_data_offset);
|
||||
dpaa2_fd_set_format(fd, dpaa2_fd_sg);
|
||||
dpaa2_fd_set_addr(fd, sgt_addr);
|
||||
@ -934,10 +960,7 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
|
||||
sgt_map_failed:
|
||||
dma_unmap_single(dev, addr, skb->len, DMA_BIDIRECTIONAL);
|
||||
data_map_failed:
|
||||
if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE)
|
||||
kfree(sgt_buf);
|
||||
else
|
||||
sgt_cache->buf[sgt_cache->count++] = sgt_buf;
|
||||
dpaa2_eth_sgt_recycle(priv, sgt_buf);
|
||||
|
||||
return err;
|
||||
}
|
||||
@ -978,6 +1001,7 @@ static int dpaa2_eth_build_single_fd(struct dpaa2_eth_priv *priv,
|
||||
if (unlikely(dma_mapping_error(dev, addr)))
|
||||
return -ENOMEM;
|
||||
|
||||
memset(fd, 0, sizeof(struct dpaa2_fd));
|
||||
dpaa2_fd_set_addr(fd, addr);
|
||||
dpaa2_fd_set_offset(fd, (u16)(skb->data - buffer_start));
|
||||
dpaa2_fd_set_len(fd, skb->len);
|
||||
@ -1005,9 +1029,9 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv,
|
||||
struct dpaa2_eth_swa *swa;
|
||||
u8 fd_format = dpaa2_fd_get_format(fd);
|
||||
u32 fd_len = dpaa2_fd_get_len(fd);
|
||||
|
||||
struct dpaa2_eth_sgt_cache *sgt_cache;
|
||||
struct dpaa2_sg_entry *sgt;
|
||||
int should_free_skb = 1;
|
||||
int i;
|
||||
|
||||
fd_addr = dpaa2_fd_get_addr(fd);
|
||||
buffer_start = dpaa2_iova_to_virt(priv->iommu_domain, fd_addr);
|
||||
@ -1039,6 +1063,28 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv,
|
||||
/* Unmap the SGT buffer */
|
||||
dma_unmap_single(dev, fd_addr, swa->sg.sgt_size,
|
||||
DMA_BIDIRECTIONAL);
|
||||
} else if (swa->type == DPAA2_ETH_SWA_SW_TSO) {
|
||||
skb = swa->tso.skb;
|
||||
|
||||
sgt = (struct dpaa2_sg_entry *)(buffer_start +
|
||||
priv->tx_data_offset);
|
||||
|
||||
/* Unmap and free the header */
|
||||
dma_unmap_single(dev, dpaa2_sg_get_addr(sgt), TSO_HEADER_SIZE,
|
||||
DMA_TO_DEVICE);
|
||||
kfree(dpaa2_iova_to_virt(priv->iommu_domain, dpaa2_sg_get_addr(sgt)));
|
||||
|
||||
/* Unmap the other SG entries for the data */
|
||||
for (i = 1; i < swa->tso.num_sg; i++)
|
||||
dma_unmap_single(dev, dpaa2_sg_get_addr(&sgt[i]),
|
||||
dpaa2_sg_get_len(&sgt[i]), DMA_TO_DEVICE);
|
||||
|
||||
/* Unmap the SGT buffer */
|
||||
dma_unmap_single(dev, fd_addr, swa->sg.sgt_size,
|
||||
DMA_BIDIRECTIONAL);
|
||||
|
||||
if (!swa->tso.is_last_fd)
|
||||
should_free_skb = 0;
|
||||
} else {
|
||||
skb = swa->single.skb;
|
||||
|
||||
@ -1067,55 +1113,195 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv,
|
||||
}
|
||||
|
||||
/* Get the timestamp value */
|
||||
if (skb->cb[0] == TX_TSTAMP) {
|
||||
struct skb_shared_hwtstamps shhwtstamps;
|
||||
__le64 *ts = dpaa2_get_ts(buffer_start, true);
|
||||
u64 ns;
|
||||
if (swa->type != DPAA2_ETH_SWA_SW_TSO) {
|
||||
if (skb->cb[0] == TX_TSTAMP) {
|
||||
struct skb_shared_hwtstamps shhwtstamps;
|
||||
__le64 *ts = dpaa2_get_ts(buffer_start, true);
|
||||
u64 ns;
|
||||
|
||||
memset(&shhwtstamps, 0, sizeof(shhwtstamps));
|
||||
memset(&shhwtstamps, 0, sizeof(shhwtstamps));
|
||||
|
||||
ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts);
|
||||
shhwtstamps.hwtstamp = ns_to_ktime(ns);
|
||||
skb_tstamp_tx(skb, &shhwtstamps);
|
||||
} else if (skb->cb[0] == TX_TSTAMP_ONESTEP_SYNC) {
|
||||
mutex_unlock(&priv->onestep_tstamp_lock);
|
||||
}
|
||||
|
||||
/* Free SGT buffer allocated on tx */
|
||||
if (fd_format != dpaa2_fd_single) {
|
||||
sgt_cache = this_cpu_ptr(priv->sgt_cache);
|
||||
if (swa->type == DPAA2_ETH_SWA_SG) {
|
||||
skb_free_frag(buffer_start);
|
||||
} else {
|
||||
if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE)
|
||||
kfree(buffer_start);
|
||||
else
|
||||
sgt_cache->buf[sgt_cache->count++] = buffer_start;
|
||||
ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts);
|
||||
shhwtstamps.hwtstamp = ns_to_ktime(ns);
|
||||
skb_tstamp_tx(skb, &shhwtstamps);
|
||||
} else if (skb->cb[0] == TX_TSTAMP_ONESTEP_SYNC) {
|
||||
mutex_unlock(&priv->onestep_tstamp_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/* Move on with skb release */
|
||||
napi_consume_skb(skb, in_napi);
|
||||
/* Free SGT buffer allocated on tx */
|
||||
if (fd_format != dpaa2_fd_single)
|
||||
dpaa2_eth_sgt_recycle(priv, buffer_start);
|
||||
|
||||
/* Move on with skb release. If we are just confirming multiple FDs
|
||||
* from the same TSO skb then only the last one will need to free the
|
||||
* skb.
|
||||
*/
|
||||
if (should_free_skb)
|
||||
napi_consume_skb(skb, in_napi);
|
||||
}
|
||||
|
||||
static int dpaa2_eth_build_gso_fd(struct dpaa2_eth_priv *priv,
|
||||
struct sk_buff *skb, struct dpaa2_fd *fd,
|
||||
int *num_fds, u32 *total_fds_len)
|
||||
{
|
||||
struct device *dev = priv->net_dev->dev.parent;
|
||||
int hdr_len, total_len, data_left, fd_len;
|
||||
int num_sge, err, i, sgt_buf_size;
|
||||
struct dpaa2_fd *fd_start = fd;
|
||||
struct dpaa2_sg_entry *sgt;
|
||||
struct dpaa2_eth_swa *swa;
|
||||
dma_addr_t sgt_addr, addr;
|
||||
dma_addr_t tso_hdr_dma;
|
||||
unsigned int index = 0;
|
||||
struct tso_t tso;
|
||||
char *tso_hdr;
|
||||
void *sgt_buf;
|
||||
|
||||
/* Initialize the TSO handler, and prepare the first payload */
|
||||
hdr_len = tso_start(skb, &tso);
|
||||
*total_fds_len = 0;
|
||||
|
||||
total_len = skb->len - hdr_len;
|
||||
while (total_len > 0) {
|
||||
/* Prepare the HW SGT structure for this frame */
|
||||
sgt_buf = dpaa2_eth_sgt_get(priv);
|
||||
if (unlikely(!sgt_buf)) {
|
||||
netdev_err(priv->net_dev, "dpaa2_eth_sgt_get() failed\n");
|
||||
err = -ENOMEM;
|
||||
goto err_sgt_get;
|
||||
}
|
||||
sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset);
|
||||
|
||||
/* Determine the data length of this frame */
|
||||
data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
|
||||
total_len -= data_left;
|
||||
fd_len = data_left + hdr_len;
|
||||
|
||||
/* Prepare packet headers: MAC + IP + TCP */
|
||||
tso_hdr = kmalloc(TSO_HEADER_SIZE, GFP_ATOMIC);
|
||||
if (!tso_hdr) {
|
||||
err = -ENOMEM;
|
||||
goto err_alloc_tso_hdr;
|
||||
}
|
||||
|
||||
tso_build_hdr(skb, tso_hdr, &tso, data_left, total_len == 0);
|
||||
tso_hdr_dma = dma_map_single(dev, tso_hdr, TSO_HEADER_SIZE, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(dev, tso_hdr_dma)) {
|
||||
netdev_err(priv->net_dev, "dma_map_single(tso_hdr) failed\n");
|
||||
err = -ENOMEM;
|
||||
goto err_map_tso_hdr;
|
||||
}
|
||||
|
||||
/* Setup the SG entry for the header */
|
||||
dpaa2_sg_set_addr(sgt, tso_hdr_dma);
|
||||
dpaa2_sg_set_len(sgt, hdr_len);
|
||||
dpaa2_sg_set_final(sgt, data_left > 0 ? false : true);
|
||||
|
||||
/* Compose the SG entries for each fragment of data */
|
||||
num_sge = 1;
|
||||
while (data_left > 0) {
|
||||
int size;
|
||||
|
||||
/* Move to the next SG entry */
|
||||
sgt++;
|
||||
size = min_t(int, tso.size, data_left);
|
||||
|
||||
addr = dma_map_single(dev, tso.data, size, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(dev, addr)) {
|
||||
netdev_err(priv->net_dev, "dma_map_single(tso.data) failed\n");
|
||||
err = -ENOMEM;
|
||||
goto err_map_data;
|
||||
}
|
||||
dpaa2_sg_set_addr(sgt, addr);
|
||||
dpaa2_sg_set_len(sgt, size);
|
||||
dpaa2_sg_set_final(sgt, size == data_left ? true : false);
|
||||
|
||||
num_sge++;
|
||||
|
||||
/* Build the data for the __next__ fragment */
|
||||
data_left -= size;
|
||||
tso_build_data(skb, &tso, size);
|
||||
}
|
||||
|
||||
/* Store the skb backpointer in the SGT buffer */
|
||||
sgt_buf_size = priv->tx_data_offset + num_sge * sizeof(struct dpaa2_sg_entry);
|
||||
swa = (struct dpaa2_eth_swa *)sgt_buf;
|
||||
swa->type = DPAA2_ETH_SWA_SW_TSO;
|
||||
swa->tso.skb = skb;
|
||||
swa->tso.num_sg = num_sge;
|
||||
swa->tso.sgt_size = sgt_buf_size;
|
||||
swa->tso.is_last_fd = total_len == 0 ? 1 : 0;
|
||||
|
||||
/* Separately map the SGT buffer */
|
||||
sgt_addr = dma_map_single(dev, sgt_buf, sgt_buf_size, DMA_BIDIRECTIONAL);
|
||||
if (unlikely(dma_mapping_error(dev, sgt_addr))) {
|
||||
netdev_err(priv->net_dev, "dma_map_single(sgt_buf) failed\n");
|
||||
err = -ENOMEM;
|
||||
goto err_map_sgt;
|
||||
}
|
||||
|
||||
/* Setup the frame descriptor */
|
||||
memset(fd, 0, sizeof(struct dpaa2_fd));
|
||||
dpaa2_fd_set_offset(fd, priv->tx_data_offset);
|
||||
dpaa2_fd_set_format(fd, dpaa2_fd_sg);
|
||||
dpaa2_fd_set_addr(fd, sgt_addr);
|
||||
dpaa2_fd_set_len(fd, fd_len);
|
||||
dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA);
|
||||
|
||||
*total_fds_len += fd_len;
|
||||
/* Advance to the next frame descriptor */
|
||||
fd++;
|
||||
index++;
|
||||
}
|
||||
|
||||
*num_fds = index;
|
||||
|
||||
return 0;
|
||||
|
||||
err_map_sgt:
|
||||
err_map_data:
|
||||
/* Unmap all the data S/G entries for the current FD */
|
||||
sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset);
|
||||
for (i = 1; i < num_sge; i++)
|
||||
dma_unmap_single(dev, dpaa2_sg_get_addr(&sgt[i]),
|
||||
dpaa2_sg_get_len(&sgt[i]), DMA_TO_DEVICE);
|
||||
|
||||
/* Unmap the header entry */
|
||||
dma_unmap_single(dev, tso_hdr_dma, TSO_HEADER_SIZE, DMA_TO_DEVICE);
|
||||
err_map_tso_hdr:
|
||||
kfree(tso_hdr);
|
||||
err_alloc_tso_hdr:
|
||||
dpaa2_eth_sgt_recycle(priv, sgt_buf);
|
||||
err_sgt_get:
|
||||
/* Free all the other FDs that were already fully created */
|
||||
for (i = 0; i < index; i++)
|
||||
dpaa2_eth_free_tx_fd(priv, NULL, &fd_start[i], false);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb,
|
||||
struct net_device *net_dev)
|
||||
{
|
||||
struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
|
||||
struct dpaa2_fd fd;
|
||||
struct rtnl_link_stats64 *percpu_stats;
|
||||
int total_enqueued = 0, retries = 0, enqueued;
|
||||
struct dpaa2_eth_drv_stats *percpu_extras;
|
||||
struct rtnl_link_stats64 *percpu_stats;
|
||||
unsigned int needed_headroom;
|
||||
int num_fds = 1, max_retries;
|
||||
struct dpaa2_eth_fq *fq;
|
||||
struct netdev_queue *nq;
|
||||
struct dpaa2_fd *fd;
|
||||
u16 queue_mapping;
|
||||
unsigned int needed_headroom;
|
||||
u32 fd_len;
|
||||
void *swa = NULL;
|
||||
u8 prio = 0;
|
||||
int err, i;
|
||||
void *swa;
|
||||
u32 fd_len;
|
||||
|
||||
percpu_stats = this_cpu_ptr(priv->percpu_stats);
|
||||
percpu_extras = this_cpu_ptr(priv->percpu_extras);
|
||||
fd = (this_cpu_ptr(priv->fd))->array;
|
||||
|
||||
needed_headroom = dpaa2_eth_needed_headroom(skb);
|
||||
|
||||
@ -1130,20 +1316,28 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb,
|
||||
}
|
||||
|
||||
/* Setup the FD fields */
|
||||
memset(&fd, 0, sizeof(fd));
|
||||
|
||||
if (skb_is_nonlinear(skb)) {
|
||||
err = dpaa2_eth_build_sg_fd(priv, skb, &fd, &swa);
|
||||
if (skb_is_gso(skb)) {
|
||||
err = dpaa2_eth_build_gso_fd(priv, skb, fd, &num_fds, &fd_len);
|
||||
percpu_extras->tx_sg_frames += num_fds;
|
||||
percpu_extras->tx_sg_bytes += fd_len;
|
||||
percpu_extras->tx_tso_frames += num_fds;
|
||||
percpu_extras->tx_tso_bytes += fd_len;
|
||||
} else if (skb_is_nonlinear(skb)) {
|
||||
err = dpaa2_eth_build_sg_fd(priv, skb, fd, &swa);
|
||||
percpu_extras->tx_sg_frames++;
|
||||
percpu_extras->tx_sg_bytes += skb->len;
|
||||
fd_len = dpaa2_fd_get_len(fd);
|
||||
} else if (skb_headroom(skb) < needed_headroom) {
|
||||
err = dpaa2_eth_build_sg_fd_single_buf(priv, skb, &fd, &swa);
|
||||
err = dpaa2_eth_build_sg_fd_single_buf(priv, skb, fd, &swa);
|
||||
percpu_extras->tx_sg_frames++;
|
||||
percpu_extras->tx_sg_bytes += skb->len;
|
||||
percpu_extras->tx_converted_sg_frames++;
|
||||
percpu_extras->tx_converted_sg_bytes += skb->len;
|
||||
fd_len = dpaa2_fd_get_len(fd);
|
||||
} else {
|
||||
err = dpaa2_eth_build_single_fd(priv, skb, &fd, &swa);
|
||||
err = dpaa2_eth_build_single_fd(priv, skb, fd, &swa);
|
||||
fd_len = dpaa2_fd_get_len(fd);
|
||||
}
|
||||
|
||||
if (unlikely(err)) {
|
||||
@ -1151,11 +1345,12 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb,
|
||||
goto err_build_fd;
|
||||
}
|
||||
|
||||
if (skb->cb[0])
|
||||
dpaa2_eth_enable_tx_tstamp(priv, &fd, swa, skb);
|
||||
if (swa && skb->cb[0])
|
||||
dpaa2_eth_enable_tx_tstamp(priv, fd, swa, skb);
|
||||
|
||||
/* Tracing point */
|
||||
trace_dpaa2_tx_fd(net_dev, &fd);
|
||||
for (i = 0; i < num_fds; i++)
|
||||
trace_dpaa2_tx_fd(net_dev, &fd[i]);
|
||||
|
||||
/* TxConf FQ selection relies on queue id from the stack.
|
||||
* In case of a forwarded frame from another DPNI interface, we choose
|
||||
@ -1175,27 +1370,32 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb,
|
||||
queue_mapping %= dpaa2_eth_queue_count(priv);
|
||||
}
|
||||
fq = &priv->fq[queue_mapping];
|
||||
|
||||
fd_len = dpaa2_fd_get_len(&fd);
|
||||
nq = netdev_get_tx_queue(net_dev, queue_mapping);
|
||||
netdev_tx_sent_queue(nq, fd_len);
|
||||
|
||||
/* Everything that happens after this enqueues might race with
|
||||
* the Tx confirmation callback for this frame
|
||||
*/
|
||||
for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
|
||||
err = priv->enqueue(priv, fq, &fd, prio, 1, NULL);
|
||||
if (err != -EBUSY)
|
||||
break;
|
||||
max_retries = num_fds * DPAA2_ETH_ENQUEUE_RETRIES;
|
||||
while (total_enqueued < num_fds && retries < max_retries) {
|
||||
err = priv->enqueue(priv, fq, &fd[total_enqueued],
|
||||
prio, num_fds - total_enqueued, &enqueued);
|
||||
if (err == -EBUSY) {
|
||||
retries++;
|
||||
continue;
|
||||
}
|
||||
|
||||
total_enqueued += enqueued;
|
||||
}
|
||||
percpu_extras->tx_portal_busy += i;
|
||||
percpu_extras->tx_portal_busy += retries;
|
||||
|
||||
if (unlikely(err < 0)) {
|
||||
percpu_stats->tx_errors++;
|
||||
/* Clean up everything, including freeing the skb */
|
||||
dpaa2_eth_free_tx_fd(priv, fq, &fd, false);
|
||||
dpaa2_eth_free_tx_fd(priv, fq, fd, false);
|
||||
netdev_tx_completed_queue(nq, 1, fd_len);
|
||||
} else {
|
||||
percpu_stats->tx_packets++;
|
||||
percpu_stats->tx_packets += total_enqueued;
|
||||
percpu_stats->tx_bytes += fd_len;
|
||||
}
|
||||
|
||||
@ -1523,7 +1723,7 @@ static void dpaa2_eth_sgt_cache_drain(struct dpaa2_eth_priv *priv)
|
||||
count = sgt_cache->count;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
kfree(sgt_cache->buf[i]);
|
||||
skb_free_frag(sgt_cache->buf[i]);
|
||||
sgt_cache->count = 0;
|
||||
}
|
||||
}
|
||||
@ -4115,7 +4315,8 @@ static int dpaa2_eth_netdev_init(struct net_device *net_dev)
|
||||
net_dev->features = NETIF_F_RXCSUM |
|
||||
NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
|
||||
NETIF_F_SG | NETIF_F_HIGHDMA |
|
||||
NETIF_F_LLTX | NETIF_F_HW_TC;
|
||||
NETIF_F_LLTX | NETIF_F_HW_TC | NETIF_F_TSO;
|
||||
net_dev->gso_max_segs = DPAA2_ETH_ENQUEUE_MAX_FDS;
|
||||
net_dev->hw_features = net_dev->features;
|
||||
|
||||
if (priv->dpni_attrs.vlan_filter_entries)
|
||||
@ -4397,6 +4598,13 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
|
||||
goto err_alloc_sgt_cache;
|
||||
}
|
||||
|
||||
priv->fd = alloc_percpu(*priv->fd);
|
||||
if (!priv->fd) {
|
||||
dev_err(dev, "alloc_percpu(fds) failed\n");
|
||||
err = -ENOMEM;
|
||||
goto err_alloc_fds;
|
||||
}
|
||||
|
||||
err = dpaa2_eth_netdev_init(net_dev);
|
||||
if (err)
|
||||
goto err_netdev_init;
|
||||
@ -4484,6 +4692,8 @@ err_poll_thread:
|
||||
err_alloc_rings:
|
||||
err_csum:
|
||||
err_netdev_init:
|
||||
free_percpu(priv->fd);
|
||||
err_alloc_fds:
|
||||
free_percpu(priv->sgt_cache);
|
||||
err_alloc_sgt_cache:
|
||||
free_percpu(priv->percpu_extras);
|
||||
@ -4539,6 +4749,7 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
|
||||
fsl_mc_free_irqs(ls_dev);
|
||||
|
||||
dpaa2_eth_free_rings(priv);
|
||||
free_percpu(priv->fd);
|
||||
free_percpu(priv->sgt_cache);
|
||||
free_percpu(priv->percpu_stats);
|
||||
free_percpu(priv->percpu_extras);
|
||||
|
@ -122,6 +122,7 @@ enum dpaa2_eth_swa_type {
|
||||
DPAA2_ETH_SWA_SINGLE,
|
||||
DPAA2_ETH_SWA_SG,
|
||||
DPAA2_ETH_SWA_XDP,
|
||||
DPAA2_ETH_SWA_SW_TSO,
|
||||
};
|
||||
|
||||
/* Must keep this struct smaller than DPAA2_ETH_SWA_SIZE */
|
||||
@ -142,6 +143,12 @@ struct dpaa2_eth_swa {
|
||||
int dma_size;
|
||||
struct xdp_frame *xdpf;
|
||||
} xdp;
|
||||
struct {
|
||||
struct sk_buff *skb;
|
||||
int num_sg;
|
||||
int sgt_size;
|
||||
int is_last_fd;
|
||||
} tso;
|
||||
};
|
||||
};
|
||||
|
||||
@ -354,6 +361,8 @@ struct dpaa2_eth_drv_stats {
|
||||
__u64 tx_conf_bytes;
|
||||
__u64 tx_sg_frames;
|
||||
__u64 tx_sg_bytes;
|
||||
__u64 tx_tso_frames;
|
||||
__u64 tx_tso_bytes;
|
||||
__u64 rx_sg_frames;
|
||||
__u64 rx_sg_bytes;
|
||||
/* Linear skbs sent as a S/G FD due to insufficient headroom */
|
||||
@ -493,8 +502,15 @@ struct dpaa2_eth_trap_data {
|
||||
struct dpaa2_eth_priv *priv;
|
||||
};
|
||||
|
||||
#define DPAA2_ETH_SG_ENTRIES_MAX (PAGE_SIZE / sizeof(struct scatterlist))
|
||||
|
||||
#define DPAA2_ETH_DEFAULT_COPYBREAK 512
|
||||
|
||||
#define DPAA2_ETH_ENQUEUE_MAX_FDS 200
|
||||
struct dpaa2_eth_fds {
|
||||
struct dpaa2_fd array[DPAA2_ETH_ENQUEUE_MAX_FDS];
|
||||
};
|
||||
|
||||
/* Driver private data */
|
||||
struct dpaa2_eth_priv {
|
||||
struct net_device *net_dev;
|
||||
@ -577,6 +593,8 @@ struct dpaa2_eth_priv {
|
||||
struct devlink_port devlink_port;
|
||||
|
||||
u32 rx_copybreak;
|
||||
|
||||
struct dpaa2_eth_fds __percpu *fd;
|
||||
};
|
||||
|
||||
struct dpaa2_eth_devlink_priv {
|
||||
|
@ -44,6 +44,8 @@ static char dpaa2_ethtool_extras[][ETH_GSTRING_LEN] = {
|
||||
"[drv] tx conf bytes",
|
||||
"[drv] tx sg frames",
|
||||
"[drv] tx sg bytes",
|
||||
"[drv] tx tso frames",
|
||||
"[drv] tx tso bytes",
|
||||
"[drv] rx sg frames",
|
||||
"[drv] rx sg bytes",
|
||||
"[drv] tx converted sg frames",
|
||||
|
@ -743,8 +743,8 @@ int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
|
||||
full_mask = s->eqcr.pi_ci_mask;
|
||||
if (!s->eqcr.available) {
|
||||
eqcr_ci = s->eqcr.ci;
|
||||
p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK;
|
||||
s->eqcr.ci = *p & full_mask;
|
||||
s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI);
|
||||
s->eqcr.ci &= full_mask;
|
||||
s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
|
||||
eqcr_ci, s->eqcr.ci);
|
||||
if (!s->eqcr.available) {
|
||||
@ -887,8 +887,8 @@ int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
|
||||
full_mask = s->eqcr.pi_ci_mask;
|
||||
if (!s->eqcr.available) {
|
||||
eqcr_ci = s->eqcr.ci;
|
||||
p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK;
|
||||
s->eqcr.ci = *p & full_mask;
|
||||
s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI);
|
||||
s->eqcr.ci &= full_mask;
|
||||
s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
|
||||
eqcr_ci, s->eqcr.ci);
|
||||
if (!s->eqcr.available)
|
||||
|
Loading…
Reference in New Issue
Block a user