Merge tag 'net-next-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next

Pull networking updates from Jakub Kicinski:
 "Core:

   - BPF:
      - add syscall program type and libbpf support for generating
        instructions and bindings for in-kernel BPF loaders (BPF loaders
        for BPF), this is a stepping stone for signed BPF programs
      - infrastructure to migrate TCP child sockets from one listener to
        another in the same reuseport group/map to improve flexibility
        of service hand-off/restart
      - add broadcast support to XDP redirect

   - allow bypass of the lockless qdisc to improving performance (for
     pktgen: +23% with one thread, +44% with 2 threads)

   - add a simpler version of "DO_ONCE()" which does not require jump
     labels, intended for slow-path usage

   - virtio/vsock: introduce SOCK_SEQPACKET support

   - add getsocketopt to retrieve netns cookie

   - ip: treat lowest address of a IPv4 subnet as ordinary unicast
     address allowing reclaiming of precious IPv4 addresses

   - ipv6: use prandom_u32() for ID generation

   - ip: add support for more flexible field selection for hashing
     across multi-path routes (w/ offload to mlxsw)

   - icmp: add support for extended RFC 8335 PROBE (ping)

   - seg6: add support for SRv6 End.DT46 behavior

   - mptcp:
      - DSS checksum support (RFC 8684) to detect middlebox meddling
      - support Connection-time 'C' flag
      - time stamping support

   - sctp: packetization Layer Path MTU Discovery (RFC 8899)

   - xfrm: speed up state addition with seq set

   - WiFi:
      - hidden AP discovery on 6 GHz and other HE 6 GHz improvements
      - aggregation handling improvements for some drivers
      - minstrel improvements for no-ack frames
      - deferred rate control for TXQs to improve reaction times
      - switch from round robin to virtual time-based airtime scheduler

   - add trace points:
      - tcp checksum errors
      - openvswitch - action execution, upcalls
      - socket errors via sk_error_report

  Device APIs:

   - devlink: add rate API for hierarchical control of max egress rate
     of virtual devices (VFs, SFs etc.)

   - don't require RCU read lock to be held around BPF hooks in NAPI
     context

   - page_pool: generic buffer recycling

  New hardware/drivers:

   - mobile:
      - iosm: PCIe Driver for Intel M.2 Modem
      - support for Qualcomm MSM8998 (ipa)

   - WiFi: Qualcomm QCN9074 and WCN6855 PCI devices

   - sparx5: Microchip SparX-5 family of Enterprise Ethernet switches

   - Mellanox BlueField Gigabit Ethernet (control NIC of the DPU)

   - NXP SJA1110 Automotive Ethernet 10-port switch

   - Qualcomm QCA8327 switch support (qca8k)

   - Mikrotik 10/25G NIC (atl1c)

  Driver changes:

   - ACPI support for some MDIO, MAC and PHY devices from Marvell and
     NXP (our first foray into MAC/PHY description via ACPI)

   - HW timestamping (PTP) support: bnxt_en, ice, sja1105, hns3, tja11xx

   - Mellanox/Nvidia NIC (mlx5)
      - NIC VF offload of L2 bridging
      - support IRQ distribution to Sub-functions

   - Marvell (prestera):
      - add flower and match all
      - devlink trap
      - link aggregation

   - Netronome (nfp): connection tracking offload

   - Intel 1GE (igc): add AF_XDP support

   - Marvell DPU (octeontx2): ingress ratelimit offload

   - Google vNIC (gve): new ring/descriptor format support

   - Qualcomm mobile (rmnet & ipa): inline checksum offload support

   - MediaTek WiFi (mt76)
      - mt7915 MSI support
      - mt7915 Tx status reporting
      - mt7915 thermal sensors support
      - mt7921 decapsulation offload
      - mt7921 enable runtime pm and deep sleep

   - Realtek WiFi (rtw88)
      - beacon filter support
      - Tx antenna path diversity support
      - firmware crash information via devcoredump

   - Qualcomm WiFi (wcn36xx)
      - Wake-on-WLAN support with magic packets and GTK rekeying

   - Micrel PHY (ksz886x/ksz8081): add cable test support"

* tag 'net-next-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2168 commits)
  tcp: change ICSK_CA_PRIV_SIZE definition
  tcp_yeah: check struct yeah size at compile time
  gve: DQO: Fix off by one in gve_rx_dqo()
  stmmac: intel: set PCI_D3hot in suspend
  stmmac: intel: Enable PHY WOL option in EHL
  net: stmmac: option to enable PHY WOL with PMT enabled
  net: say "local" instead of "static" addresses in ndo_dflt_fdb_{add,del}
  net: use netdev_info in ndo_dflt_fdb_{add,del}
  ptp: Set lookup cookie when creating a PTP PPS source.
  net: sock: add trace for socket errors
  net: sock: introduce sk_error_report
  net: dsa: replay the local bridge FDB entries pointing to the bridge dev too
  net: dsa: ensure during dsa_fdb_offload_notify that dev_hold and dev_put are on the same dev
  net: dsa: include fdb entries pointing to bridge in the host fdb list
  net: dsa: include bridge addresses which are local in the host fdb list
  net: dsa: sync static FDB entries on foreign interfaces to hardware
  net: dsa: install the host MDB and FDB entries in the master's RX filter
  net: dsa: reference count the FDB addresses at the cross-chip notifier level
  net: dsa: introduce a separate cross-chip notifier type for host FDBs
  net: dsa: reference count the MDB entries at the cross-chip notifier level
  ...
This commit is contained in:
Linus Torvalds
2021-06-30 15:51:09 -07:00
1908 changed files with 108602 additions and 27721 deletions

View File

@@ -73,6 +73,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
__field(u64, data_seq)
__field(u32, subflow_seq)
__field(u16, data_len)
__field(u16, csum)
__field(u8, use_map)
__field(u8, dsn64)
__field(u8, data_fin)
@@ -82,6 +83,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
__field(u8, frozen)
__field(u8, reset_transient)
__field(u8, reset_reason)
__field(u8, csum_reqd)
),
TP_fast_assign(
@@ -89,6 +91,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
__entry->data_seq = mpext->data_seq;
__entry->subflow_seq = mpext->subflow_seq;
__entry->data_len = mpext->data_len;
__entry->csum = (__force u16)mpext->csum;
__entry->use_map = mpext->use_map;
__entry->dsn64 = mpext->dsn64;
__entry->data_fin = mpext->data_fin;
@@ -98,16 +101,18 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
__entry->frozen = mpext->frozen;
__entry->reset_transient = mpext->reset_transient;
__entry->reset_reason = mpext->reset_reason;
__entry->csum_reqd = mpext->csum_reqd;
),
TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u",
TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u csum=%x use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u csum_reqd=%u",
__entry->data_ack, __entry->data_seq,
__entry->subflow_seq, __entry->data_len,
__entry->use_map, __entry->dsn64,
__entry->data_fin, __entry->use_ack,
__entry->ack64, __entry->mpc_map,
__entry->frozen, __entry->reset_transient,
__entry->reset_reason)
__entry->csum, __entry->use_map,
__entry->dsn64, __entry->data_fin,
__entry->use_ack, __entry->ack64,
__entry->mpc_map, __entry->frozen,
__entry->reset_transient, __entry->reset_reason,
__entry->csum_reqd)
);
DEFINE_EVENT(mptcp_dump_mpext, get_mapping_status,

View File

@@ -201,6 +201,66 @@ TRACE_EVENT(inet_sock_set_state,
show_tcp_state_name(__entry->newstate))
);
TRACE_EVENT(inet_sk_error_report,
TP_PROTO(const struct sock *sk),
TP_ARGS(sk),
TP_STRUCT__entry(
__field(int, error)
__field(__u16, sport)
__field(__u16, dport)
__field(__u16, family)
__field(__u16, protocol)
__array(__u8, saddr, 4)
__array(__u8, daddr, 4)
__array(__u8, saddr_v6, 16)
__array(__u8, daddr_v6, 16)
),
TP_fast_assign(
struct inet_sock *inet = inet_sk(sk);
struct in6_addr *pin6;
__be32 *p32;
__entry->error = sk->sk_err;
__entry->family = sk->sk_family;
__entry->protocol = sk->sk_protocol;
__entry->sport = ntohs(inet->inet_sport);
__entry->dport = ntohs(inet->inet_dport);
p32 = (__be32 *) __entry->saddr;
*p32 = inet->inet_saddr;
p32 = (__be32 *) __entry->daddr;
*p32 = inet->inet_daddr;
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6) {
pin6 = (struct in6_addr *)__entry->saddr_v6;
*pin6 = sk->sk_v6_rcv_saddr;
pin6 = (struct in6_addr *)__entry->daddr_v6;
*pin6 = sk->sk_v6_daddr;
} else
#endif
{
pin6 = (struct in6_addr *)__entry->saddr_v6;
ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
pin6 = (struct in6_addr *)__entry->daddr_v6;
ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
}
),
TP_printk("family=%s protocol=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c error=%d",
show_family_name(__entry->family),
show_inet_protocol_name(__entry->protocol),
__entry->sport, __entry->dport,
__entry->saddr, __entry->daddr,
__entry->saddr_v6, __entry->daddr_v6,
__entry->error)
);
#endif /* _TRACE_SOCK_H */
/* This part must be outside protection */

View File

@@ -295,6 +295,82 @@ TRACE_EVENT(tcp_probe,
__entry->srtt, __entry->rcv_wnd, __entry->sock_cookie)
);
#define TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb) \
do { \
const struct tcphdr *th = (const struct tcphdr *)skb->data; \
struct sockaddr_in *v4 = (void *)__entry->saddr; \
\
v4->sin_family = AF_INET; \
v4->sin_port = th->source; \
v4->sin_addr.s_addr = ip_hdr(skb)->saddr; \
v4 = (void *)__entry->daddr; \
v4->sin_family = AF_INET; \
v4->sin_port = th->dest; \
v4->sin_addr.s_addr = ip_hdr(skb)->daddr; \
} while (0)
#if IS_ENABLED(CONFIG_IPV6)
#define TP_STORE_ADDR_PORTS_SKB(__entry, skb) \
do { \
const struct iphdr *iph = ip_hdr(skb); \
\
if (iph->version == 6) { \
const struct tcphdr *th = (const struct tcphdr *)skb->data; \
struct sockaddr_in6 *v6 = (void *)__entry->saddr; \
\
v6->sin6_family = AF_INET6; \
v6->sin6_port = th->source; \
v6->sin6_addr = ipv6_hdr(skb)->saddr; \
v6 = (void *)__entry->daddr; \
v6->sin6_family = AF_INET6; \
v6->sin6_port = th->dest; \
v6->sin6_addr = ipv6_hdr(skb)->daddr; \
} else \
TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb); \
} while (0)
#else
#define TP_STORE_ADDR_PORTS_SKB(__entry, skb) \
TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb)
#endif
/*
* tcp event with only skb
*/
DECLARE_EVENT_CLASS(tcp_event_skb,
TP_PROTO(const struct sk_buff *skb),
TP_ARGS(skb),
TP_STRUCT__entry(
__field(const void *, skbaddr)
__array(__u8, saddr, sizeof(struct sockaddr_in6))
__array(__u8, daddr, sizeof(struct sockaddr_in6))
),
TP_fast_assign(
__entry->skbaddr = skb;
memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
TP_STORE_ADDR_PORTS_SKB(__entry, skb);
),
TP_printk("src=%pISpc dest=%pISpc", __entry->saddr, __entry->daddr)
);
DEFINE_EVENT(tcp_event_skb, tcp_bad_csum,
TP_PROTO(const struct sk_buff *skb),
TP_ARGS(skb)
);
#endif /* _TRACE_TCP_H */
/* This part must be outside protection */

View File

@@ -9,9 +9,12 @@
#include <linux/tracepoint.h>
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_TYPE_STREAM);
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_TYPE_SEQPACKET);
#define show_type(val) \
__print_symbolic(val, { VIRTIO_VSOCK_TYPE_STREAM, "STREAM" })
__print_symbolic(val, \
{ VIRTIO_VSOCK_TYPE_STREAM, "STREAM" }, \
{ VIRTIO_VSOCK_TYPE_SEQPACKET, "SEQPACKET" })
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_INVALID);
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_REQUEST);

View File

@@ -110,7 +110,11 @@ DECLARE_EVENT_CLASS(xdp_redirect_template,
u32 ifindex = 0, map_index = index;
if (map_type == BPF_MAP_TYPE_DEVMAP || map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex;
/* Just leave to_ifindex to 0 if do broadcast redirect,
* as tgt will be NULL.
*/
if (tgt)
ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex;
} else if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
ifindex = index;
map_index = 0;