IB/hfi1: Add functions to receive accelerated ipoib packets

Ipoib netdev will share receive contexts with existing VNIC netdev.
To achieve that, a dummy netdev is allocated with hfi1_devdata to
own the receive contexts, and ipoib and VNIC netdevs will be put
on top of it. Each receive context is associated with a single
NAPI object.

This patch adds the functions to receive incoming packets for
accelerated ipoib.

Link: https://lore.kernel.org/r/20200511160631.173205.54184.stgit@awfm-01.aw.intel.com
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Sadanand Warrier <sadanand.warrier@intel.com>
Signed-off-by: Grzegorz Andrejczuk <grzegorz.andrejczuk@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
Kaike Wan 2020-05-11 12:06:31 -04:00 committed by Jason Gunthorpe
parent 89dcaa366b
commit 6991abcb99
7 changed files with 355 additions and 2 deletions

View File

@ -23,10 +23,12 @@ hfi1-y := \
intr.o \
iowait.o \
ipoib_main.o \
ipoib_rx.o \
ipoib_tx.o \
mad.o \
mmu_rb.o \
msix.o \
netdev_rx.o \
opfn.o \
pcie.o \
pio.o \

View File

@ -1,5 +1,5 @@
/*
* Copyright(c) 2015-2018 Intel Corporation.
* Copyright(c) 2015-2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -54,6 +54,7 @@
#include <linux/module.h>
#include <linux/prefetch.h>
#include <rdma/ib_verbs.h>
#include <linux/etherdevice.h>
#include "hfi.h"
#include "trace.h"
@ -63,6 +64,9 @@
#include "vnic.h"
#include "fault.h"
#include "ipoib.h"
#include "netdev.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@ -1550,6 +1554,81 @@ void handle_eflags(struct hfi1_packet *packet)
show_eflags_errs(packet);
}
static void hfi1_ipoib_ib_rcv(struct hfi1_packet *packet)
{
struct hfi1_ibport *ibp;
struct net_device *netdev;
struct hfi1_ctxtdata *rcd = packet->rcd;
struct napi_struct *napi = rcd->napi;
struct sk_buff *skb;
struct hfi1_netdev_rxq *rxq = container_of(napi,
struct hfi1_netdev_rxq, napi);
u32 extra_bytes;
u32 tlen, qpnum;
bool do_work, do_cnp;
struct hfi1_ipoib_dev_priv *priv;
trace_hfi1_rcvhdr(packet);
hfi1_setup_ib_header(packet);
packet->ohdr = &((struct ib_header *)packet->hdr)->u.oth;
packet->grh = NULL;
if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
return;
}
qpnum = ib_bth_get_qpn(packet->ohdr);
netdev = hfi1_netdev_get_data(rcd->dd, qpnum);
if (!netdev)
goto drop_no_nd;
trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
/* handle congestion notifications */
do_work = hfi1_may_ecn(packet);
if (unlikely(do_work)) {
do_cnp = (packet->opcode != IB_OPCODE_CNP);
(void)hfi1_process_ecn_slowpath(hfi1_ipoib_priv(netdev)->qp,
packet, do_cnp);
}
/*
* We have split point after last byte of DETH
* lets strip padding and CRC and ICRC.
* tlen is whole packet len so we need to
* subtract header size as well.
*/
tlen = packet->tlen;
extra_bytes = ib_bth_get_pad(packet->ohdr) + (SIZE_OF_CRC << 2) +
packet->hlen;
if (unlikely(tlen < extra_bytes))
goto drop;
tlen -= extra_bytes;
skb = hfi1_ipoib_prepare_skb(rxq, tlen, packet->ebuf);
if (unlikely(!skb))
goto drop;
priv = hfi1_ipoib_priv(netdev);
hfi1_ipoib_update_rx_netstats(priv, 1, skb->len);
skb->dev = netdev;
skb->pkt_type = PACKET_HOST;
netif_receive_skb(skb);
return;
drop:
++netdev->stats.rx_dropped;
drop_no_nd:
ibp = rcd_to_iport(packet->rcd);
++ibp->rvp.n_pkt_drops;
}
/*
* The following functions are called by the interrupt handler. They are type
* specific handlers for each packet type.
@ -1757,3 +1836,14 @@ const rhf_rcv_function_ptr normal_rhf_rcv_functions[] = {
[RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
[RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
};
const rhf_rcv_function_ptr netdev_rhf_rcv_functions[] = {
[RHF_RCV_TYPE_EXPECTED] = process_receive_invalid,
[RHF_RCV_TYPE_EAGER] = process_receive_invalid,
[RHF_RCV_TYPE_IB] = hfi1_ipoib_ib_rcv,
[RHF_RCV_TYPE_ERROR] = process_receive_error,
[RHF_RCV_TYPE_BYPASS] = hfi1_vnic_bypass_rcv,
[RHF_RCV_TYPE_INVALID5] = process_receive_invalid,
[RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
[RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
};

View File

@ -233,6 +233,8 @@ struct hfi1_ctxtdata {
intr_handler fast_handler;
/** slow handler */
intr_handler slow_handler;
/* napi pointer assiociated with netdev */
struct napi_struct *napi;
/* verbs rx_stats per rcd */
struct hfi1_opcode_stats_perctx *opstats;
/* clear interrupt mask */
@ -985,7 +987,7 @@ typedef void (*hfi1_make_req)(struct rvt_qp *qp,
struct hfi1_pkt_state *ps,
struct rvt_swqe *wqe);
extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[];
extern const rhf_rcv_function_ptr netdev_rhf_rcv_functions[];
/* return values for the RHF receive functions */
#define RHF_RCV_CONTINUE 0 /* keep going */
@ -1417,6 +1419,7 @@ struct hfi1_devdata {
struct hfi1_vnic_data vnic;
/* Lock to protect IRQ SRC register access */
spinlock_t irq_src_lock;
struct net_device *dummy_netdev;
/* Keeps track of IPoIB RSM rule users */
atomic_t ipoib_rsm_usr_num;

View File

@ -22,6 +22,7 @@
#include "hfi.h"
#include "iowait.h"
#include "netdev.h"
#include <rdma/ib_verbs.h>
@ -29,6 +30,7 @@
#define HFI1_IPOIB_TXREQ_NAME_LEN 32
#define HFI1_IPOIB_PSEUDO_LEN 20
#define HFI1_IPOIB_ENCAP_LEN 4
struct hfi1_ipoib_dev_priv;
@ -118,6 +120,19 @@ hfi1_ipoib_priv(const struct net_device *dev)
return &((struct hfi1_ipoib_rdma_netdev *)netdev_priv(dev))->dev_priv;
}
static inline void
hfi1_ipoib_update_rx_netstats(struct hfi1_ipoib_dev_priv *priv,
u64 packets,
u64 bytes)
{
struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats);
u64_stats_update_begin(&netstats->syncp);
netstats->rx_packets += packets;
netstats->rx_bytes += bytes;
u64_stats_update_end(&netstats->syncp);
}
static inline void
hfi1_ipoib_update_tx_netstats(struct hfi1_ipoib_dev_priv *priv,
u64 packets,
@ -142,6 +157,9 @@ void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv);
void hfi1_ipoib_napi_tx_enable(struct net_device *dev);
void hfi1_ipoib_napi_tx_disable(struct net_device *dev);
struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
int size, void *data);
int hfi1_ipoib_rn_get_params(struct ib_device *device,
u8 port_num,
enum rdma_netdev_t type,

View File

@ -0,0 +1,71 @@
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
* Copyright(c) 2020 Intel Corporation.
*
*/
#include "netdev.h"
#include "ipoib.h"
#define HFI1_IPOIB_SKB_PAD ((NET_SKB_PAD) + (NET_IP_ALIGN))
static void copy_ipoib_buf(struct sk_buff *skb, void *data, int size)
{
void *dst_data;
skb_checksum_none_assert(skb);
skb->protocol = *((__be16 *)data);
dst_data = skb_put(skb, size);
memcpy(dst_data, data, size);
skb->mac_header = HFI1_IPOIB_PSEUDO_LEN;
skb_pull(skb, HFI1_IPOIB_ENCAP_LEN);
}
static struct sk_buff *prepare_frag_skb(struct napi_struct *napi, int size)
{
struct sk_buff *skb;
int skb_size = SKB_DATA_ALIGN(size + HFI1_IPOIB_SKB_PAD);
void *frag;
skb_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
skb_size = SKB_DATA_ALIGN(skb_size);
frag = napi_alloc_frag(skb_size);
if (unlikely(!frag))
return napi_alloc_skb(napi, size);
skb = build_skb(frag, skb_size);
if (unlikely(!skb)) {
skb_free_frag(frag);
return NULL;
}
skb_reserve(skb, HFI1_IPOIB_SKB_PAD);
return skb;
}
struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
int size, void *data)
{
struct napi_struct *napi = &rxq->napi;
int skb_size = size + HFI1_IPOIB_ENCAP_LEN;
struct sk_buff *skb;
/*
* For smaller(4k + skb overhead) allocations we will go using
* napi cache. Otherwise we will try to use napi frag cache.
*/
if (size <= SKB_WITH_OVERHEAD(PAGE_SIZE))
skb = napi_alloc_skb(napi, skb_size);
else
skb = prepare_frag_skb(napi, skb_size);
if (unlikely(!skb))
return NULL;
copy_ipoib_buf(skb, data, size);
return skb;
}

View File

@ -0,0 +1,90 @@
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* Copyright(c) 2020 Intel Corporation.
*
*/
#ifndef HFI1_NETDEV_H
#define HFI1_NETDEV_H
#include "hfi.h"
#include <linux/netdevice.h>
#include <linux/xarray.h>
/**
* struct hfi1_netdev_rxq - Receive Queue for HFI
* dummy netdev. Both IPoIB and VNIC netdevices will be working on
* top of this device.
* @napi: napi object
* @priv: ptr to netdev_priv
* @rcd: ptr to receive context data
*/
struct hfi1_netdev_rxq {
struct napi_struct napi;
struct hfi1_netdev_priv *priv;
struct hfi1_ctxtdata *rcd;
};
/*
* Number of netdev contexts used. Ensure it is less than or equal to
* max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
*/
#define HFI1_MAX_NETDEV_CTXTS 8
/* Number of NETDEV RSM entries */
#define NUM_NETDEV_MAP_ENTRIES HFI1_MAX_NETDEV_CTXTS
/**
* struct hfi1_netdev_priv: data required to setup and run HFI netdev.
* @dd: hfi1_devdata
* @rxq: pointer to dummy netdev receive queues.
* @num_rx_q: number of receive queues
* @rmt_index: first free index in RMT Array
* @msix_start: first free MSI-X interrupt vector.
* @dev_tbl: netdev table for unique identifier VNIC and IPoIb VLANs.
* @enabled: atomic counter of netdevs enabling receive queues.
* When 0 NAPI will be disabled.
* @netdevs: atomic counter of netdevs using dummy netdev.
* When 0 receive queues will be freed.
*/
struct hfi1_netdev_priv {
struct hfi1_devdata *dd;
struct hfi1_netdev_rxq *rxq;
int num_rx_q;
int rmt_start;
struct xarray dev_tbl;
/* count of enabled napi polls */
atomic_t enabled;
/* count of netdevs on top */
atomic_t netdevs;
};
static inline
struct hfi1_netdev_priv *hfi1_netdev_priv(struct net_device *dev)
{
return (struct hfi1_netdev_priv *)&dev[1];
}
static inline
int hfi1_netdev_ctxt_count(struct hfi1_devdata *dd)
{
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
return priv->num_rx_q;
}
static inline
struct hfi1_ctxtdata *hfi1_netdev_get_ctxt(struct hfi1_devdata *dd, int ctxt)
{
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
return priv->rxq[ctxt].rcd;
}
int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data);
void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id);
void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id);
void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id);
#endif /* HFI1_NETDEV_H */

View File

@ -0,0 +1,79 @@
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
* Copyright(c) 2020 Intel Corporation.
*
*/
/*
* This file contains HFI1 support for netdev RX functionality
*/
#include "sdma.h"
#include "verbs.h"
#include "netdev.h"
#include "hfi.h"
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <rdma/ib_verbs.h>
/**
* hfi1_netdev_add_data - Registers data with unique identifier
* to be requested later this is needed for VNIC and IPoIB VLANs
* implementations.
* This call is protected by mutex idr_lock.
*
* @dd: hfi1 dev data
* @id: requested integer id up to INT_MAX
* @data: data to be associated with index
*/
int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data)
{
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
return xa_insert(&priv->dev_tbl, id, data, GFP_NOWAIT);
}
/**
* hfi1_netdev_remove_data - Removes data with previously given id.
* Returns the reference to removed entry.
*
* @dd: hfi1 dev data
* @id: requested integer id up to INT_MAX
*/
void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id)
{
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
return xa_erase(&priv->dev_tbl, id);
}
/**
* hfi1_netdev_get_data - Gets data with given id
*
* @dd: hfi1 dev data
* @id: requested integer id up to INT_MAX
*/
void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id)
{
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
return xa_load(&priv->dev_tbl, id);
}
/**
* hfi1_netdev_get_first_dat - Gets first entry with greater or equal id.
*
* @dd: hfi1 dev data
* @id: requested integer id up to INT_MAX
*/
void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id)
{
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
unsigned long index = *start_id;
void *ret;
ret = xa_find(&priv->dev_tbl, &index, UINT_MAX, XA_PRESENT);
*start_id = (int)index;
return ret;
}