linux/drivers/net/virtio_net.c

6798 lines
176 KiB
C
Raw Normal View History

treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 13 Based on 2 normalized pattern(s): this program is free software you can redistribute it and or modify it under the terms of the gnu general public license as published by the free software foundation either version 2 of the license or at your option any later version this program is distributed in the hope that it will be useful but without any warranty without even the implied warranty of merchantability or fitness for a particular purpose see the gnu general public license for more details you should have received a copy of the gnu general public license along with this program if not see http www gnu org licenses this program is free software you can redistribute it and or modify it under the terms of the gnu general public license as published by the free software foundation either version 2 of the license or at your option any later version this program is distributed in the hope that it will be useful but without any warranty without even the implied warranty of merchantability or fitness for a particular purpose see the gnu general public license for more details [based] [from] [clk] [highbank] [c] you should have received a copy of the gnu general public license along with this program if not see http www gnu org licenses extracted by the scancode license scanner the SPDX license identifier GPL-2.0-or-later has been chosen to replace the boilerplate/reference in 355 file(s). Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Kate Stewart <kstewart@linuxfoundation.org> Reviewed-by: Jilayne Lovejoy <opensource@jilayne.com> Reviewed-by: Steve Winslow <swinslow@gmail.com> Reviewed-by: Allison Randal <allison@lohutok.net> Cc: linux-spdx@vger.kernel.org Link: https://lkml.kernel.org/r/20190519154041.837383322@linutronix.de Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-05-19 13:51:43 +00:00
// SPDX-License-Identifier: GPL-2.0-or-later
/* A network driver using virtio.
*
* Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
*/
//#define DEBUG
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/module.h>
#include <linux/virtio.h>
#include <linux/virtio_net.h>
#include <linux/bpf.h>
bpf: add initial bpf tracepoints This work adds a number of tracepoints to paths that are either considered slow-path or exception-like states, where monitoring or inspecting them would be desirable. For bpf(2) syscall, tracepoints have been placed for main commands when they succeed. In XDP case, tracepoint is for exceptions, that is, f.e. on abnormal BPF program exit such as unknown or XDP_ABORTED return code, or when error occurs during XDP_TX action and the packet could not be forwarded. Both have been split into separate event headers, and can be further extended. Worst case, if they unexpectedly should get into our way in future, they can also removed [1]. Of course, these tracepoints (like any other) can be analyzed by eBPF itself, etc. Example output: # ./perf record -a -e bpf:* sleep 10 # ./perf script sock_example 6197 [005] 283.980322: bpf:bpf_map_create: map type=ARRAY ufd=4 key=4 val=8 max=256 flags=0 sock_example 6197 [005] 283.980721: bpf:bpf_prog_load: prog=a5ea8fa30ea6849c type=SOCKET_FILTER ufd=5 sock_example 6197 [005] 283.988423: bpf:bpf_prog_get_type: prog=a5ea8fa30ea6849c type=SOCKET_FILTER sock_example 6197 [005] 283.988443: bpf:bpf_map_lookup_elem: map type=ARRAY ufd=4 key=[06 00 00 00] val=[00 00 00 00 00 00 00 00] [...] sock_example 6197 [005] 288.990868: bpf:bpf_map_lookup_elem: map type=ARRAY ufd=4 key=[01 00 00 00] val=[14 00 00 00 00 00 00 00] swapper 0 [005] 289.338243: bpf:bpf_prog_put_rcu: prog=a5ea8fa30ea6849c type=SOCKET_FILTER [1] https://lwn.net/Articles/705270/ Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-01-25 01:28:18 +00:00
#include <linux/bpf_trace.h>
#include <linux/scatterlist.h>
#include <linux/if_vlan.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 08:04:11 +00:00
#include <linux/slab.h>
#include <linux/cpu.h>
virtio-net: auto-tune mergeable rx buffer size for improved performance Commit 2613af0ed18a ("virtio_net: migrate mergeable rx buffers to page frag allocators") changed the mergeable receive buffer size from PAGE_SIZE to MTU-size, introducing a single-stream regression for benchmarks with large average packet size. There is no single optimal buffer size for all workloads. For workloads with packet size <= MTU bytes, MTU + virtio-net header-sized buffers are preferred as larger buffers reduce the TCP window due to SKB truesize. However, single-stream workloads with large average packet sizes have higher throughput if larger (e.g., PAGE_SIZE) buffers are used. This commit auto-tunes the mergeable receiver buffer packet size by choosing the packet buffer size based on an EWMA of the recent packet sizes for the receive queue. Packet buffer sizes range from MTU_SIZE + virtio-net header len to PAGE_SIZE. This improves throughput for large packet workloads, as any workload with average packet size >= PAGE_SIZE will use PAGE_SIZE buffers. These optimizations interact positively with recent commit ba275241030c ("virtio-net: coalesce rx frags when possible during rx"), which coalesces adjacent RX SKB fragments in virtio_net. The coalescing optimizations benefit buffers of any size. Benchmarks taken from an average of 5 netperf 30-second TCP_STREAM runs between two QEMU VMs on a single physical machine. Each VM has two VCPUs with all offloads & vhost enabled. All VMs and vhost threads run in a single 4 CPU cgroup cpuset, using cgroups to ensure that other processes in the system will not be scheduled on the benchmark CPUs. Trunk includes SKB rx frag coalescing. net-next w/ virtio_net before 2613af0ed18a (PAGE_SIZE bufs): 14642.85Gb/s net-next (MTU-size bufs): 13170.01Gb/s net-next + auto-tune: 14555.94Gb/s Jason Wang also reported a throughput increase on mlx4 from 22Gb/s using MTU-sized buffers to about 26Gb/s using auto-tuning. Signed-off-by: Michael Dalton <mwdalton@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-01-17 06:23:27 +00:00
#include <linux/average.h>
#include <linux/filter.h>
#include <linux/kernel.h>
#include <linux/dim.h>
#include <net/route.h>
#include <net/xdp.h>
#include <net/net_failover.h>
#include <net/netdev_rx_queue.h>
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
#include <net/netdev_queues.h>
#include <net/xdp_sock_drv.h>
static int napi_weight = NAPI_POLL_WEIGHT;
module_param(napi_weight, int, 0444);
static bool csum = true, gso = true, napi_tx = true;
module_param(csum, bool, 0444);
module_param(gso, bool, 0444);
module_param(napi_tx, bool, 0644);
/* FIXME: MTU in config. */
virtio-net: mergeable buffer size should include virtio-net header Commit 2613af0ed18a ("virtio_net: migrate mergeable rx buffers to page frag allocators") changed the mergeable receive buffer size from PAGE_SIZE to MTU-size. However, the merge buffer size does not take into account the size of the virtio-net header. Consequently, packets that are MTU-size will take two buffers intead of one (to store the virtio-net header), substantially decreasing the throughput of MTU-size traffic due to TCP window / SKB truesize effects. This commit changes the mergeable buffer size to include the virtio-net header. The buffer size is cacheline-aligned because skb_page_frag_refill will not automatically align the requested size. Benchmarks taken from an average of 5 netperf 30-second TCP_STREAM runs between two QEMU VMs on a single physical machine. Each VM has two VCPUs and vhost enabled. All VMs and vhost threads run in a single 4 CPU cgroup cpuset, using cgroups to ensure that other processes in the system will not be scheduled on the benchmark CPUs. Transmit offloads and mergeable receive buffers are enabled, but guest_tso4 / guest_csum are explicitly disabled to force MTU-sized packets on the receiver. next-net trunk before 2613af0ed18a (PAGE_SIZE buf): 3861.08Gb/s net-next trunk (MTU 1500- packet uses two buf due to size bug): 4076.62Gb/s net-next trunk (MTU 1480- packet fits in one buf): 6301.34Gb/s net-next trunk w/ size fix (MTU 1500 - packet fits in one buf): 6445.44Gb/s Suggested-by: Eric Northup <digitaleric@google.com> Signed-off-by: Michael Dalton <mwdalton@google.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-14 18:41:04 +00:00
#define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
#define GOOD_COPY_LEN 128
#define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
/* Separating two types of XDP xmit */
#define VIRTIO_XDP_TX BIT(0)
#define VIRTIO_XDP_REDIR BIT(1)
#define VIRTIO_XDP_FLAG BIT(0)
#define VIRTIO_ORPHAN_FLAG BIT(1)
/* RX packet size EWMA. The average packet size is used to determine the packet
* buffer size when refilling RX rings. As the entire RX ring may be refilled
* at once, the weight is chosen so that the EWMA will be insensitive to short-
* term, transient changes in packet size.
virtio-net: auto-tune mergeable rx buffer size for improved performance Commit 2613af0ed18a ("virtio_net: migrate mergeable rx buffers to page frag allocators") changed the mergeable receive buffer size from PAGE_SIZE to MTU-size, introducing a single-stream regression for benchmarks with large average packet size. There is no single optimal buffer size for all workloads. For workloads with packet size <= MTU bytes, MTU + virtio-net header-sized buffers are preferred as larger buffers reduce the TCP window due to SKB truesize. However, single-stream workloads with large average packet sizes have higher throughput if larger (e.g., PAGE_SIZE) buffers are used. This commit auto-tunes the mergeable receiver buffer packet size by choosing the packet buffer size based on an EWMA of the recent packet sizes for the receive queue. Packet buffer sizes range from MTU_SIZE + virtio-net header len to PAGE_SIZE. This improves throughput for large packet workloads, as any workload with average packet size >= PAGE_SIZE will use PAGE_SIZE buffers. These optimizations interact positively with recent commit ba275241030c ("virtio-net: coalesce rx frags when possible during rx"), which coalesces adjacent RX SKB fragments in virtio_net. The coalescing optimizations benefit buffers of any size. Benchmarks taken from an average of 5 netperf 30-second TCP_STREAM runs between two QEMU VMs on a single physical machine. Each VM has two VCPUs with all offloads & vhost enabled. All VMs and vhost threads run in a single 4 CPU cgroup cpuset, using cgroups to ensure that other processes in the system will not be scheduled on the benchmark CPUs. Trunk includes SKB rx frag coalescing. net-next w/ virtio_net before 2613af0ed18a (PAGE_SIZE bufs): 14642.85Gb/s net-next (MTU-size bufs): 13170.01Gb/s net-next + auto-tune: 14555.94Gb/s Jason Wang also reported a throughput increase on mlx4 from 22Gb/s using MTU-sized buffers to about 26Gb/s using auto-tuning. Signed-off-by: Michael Dalton <mwdalton@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-01-17 06:23:27 +00:00
*/
DECLARE_EWMA(pkt_len, 0, 64)
virtio-net: auto-tune mergeable rx buffer size for improved performance Commit 2613af0ed18a ("virtio_net: migrate mergeable rx buffers to page frag allocators") changed the mergeable receive buffer size from PAGE_SIZE to MTU-size, introducing a single-stream regression for benchmarks with large average packet size. There is no single optimal buffer size for all workloads. For workloads with packet size <= MTU bytes, MTU + virtio-net header-sized buffers are preferred as larger buffers reduce the TCP window due to SKB truesize. However, single-stream workloads with large average packet sizes have higher throughput if larger (e.g., PAGE_SIZE) buffers are used. This commit auto-tunes the mergeable receiver buffer packet size by choosing the packet buffer size based on an EWMA of the recent packet sizes for the receive queue. Packet buffer sizes range from MTU_SIZE + virtio-net header len to PAGE_SIZE. This improves throughput for large packet workloads, as any workload with average packet size >= PAGE_SIZE will use PAGE_SIZE buffers. These optimizations interact positively with recent commit ba275241030c ("virtio-net: coalesce rx frags when possible during rx"), which coalesces adjacent RX SKB fragments in virtio_net. The coalescing optimizations benefit buffers of any size. Benchmarks taken from an average of 5 netperf 30-second TCP_STREAM runs between two QEMU VMs on a single physical machine. Each VM has two VCPUs with all offloads & vhost enabled. All VMs and vhost threads run in a single 4 CPU cgroup cpuset, using cgroups to ensure that other processes in the system will not be scheduled on the benchmark CPUs. Trunk includes SKB rx frag coalescing. net-next w/ virtio_net before 2613af0ed18a (PAGE_SIZE bufs): 14642.85Gb/s net-next (MTU-size bufs): 13170.01Gb/s net-next + auto-tune: 14555.94Gb/s Jason Wang also reported a throughput increase on mlx4 from 22Gb/s using MTU-sized buffers to about 26Gb/s using auto-tuning. Signed-off-by: Michael Dalton <mwdalton@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-01-17 06:23:27 +00:00
#define VIRTNET_DRIVER_VERSION "1.0.0"
static const unsigned long guest_offloads[] = {
VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6,
VIRTIO_NET_F_GUEST_ECN,
VIRTIO_NET_F_GUEST_UFO,
VIRTIO_NET_F_GUEST_CSUM,
VIRTIO_NET_F_GUEST_USO4,
net: virtio_net: implement exact header length guest feature Virtio spec introduced a feature VIRTIO_NET_F_GUEST_HDRLEN which when set implicates that device benefits from knowing the exact size of the header. For compatibility, to signal to the device that the header is reliable driver also needs to set this feature. Without this feature set by driver, device has to figure out the header size itself. Quoting the original virtio spec: "hdr_len is a hint to the device as to how much of the header needs to be kept to copy into each packet" "a hint" might not be clear for the reader what does it mean, if it is "maybe like that" of "exactly like that". This feature just makes it crystal clear and let the device count on the hdr_len being filled up by the exact length of header. Also note the spec already has following note about hdr_len: "Due to various bugs in implementations, this field is not useful as a guarantee of the transport header size." Without this feature the device needs to parse the header in core data path handling. Accurate information helps the device to eliminate such header parsing and directly use the hardware accelerators for GSO operation. virtio_net_hdr_from_skb() fills up hdr_len to skb_headlen(skb). The driver already complies to fill the correct value. Introduce the feature and advertise it. Note that virtio spec also includes following note for device implementation: "Caution should be taken by the implementation so as to prevent a malicious driver from attacking the device by setting an incorrect hdr_len." There is a plan to support this feature in our emulated device. A device of SolidRun offers this feature bit. They claim this feature will save the device a few cycles for every GSO packet. Link: https://docs.oasis-open.org/virtio/virtio/v1.2/cs01/virtio-v1.2-cs01.html#x1-230006x3 Signed-off-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Parav Pandit <parav@nvidia.com> Reviewed-by: Alvaro Karsz <alvaro.karsz@solid-run.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Willem de Bruijn <willemb@google.com> Link: https://lore.kernel.org/r/20230309094559.917857-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-03-09 09:45:59 +00:00
VIRTIO_NET_F_GUEST_USO6,
VIRTIO_NET_F_GUEST_HDRLEN
};
#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
(1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
(1ULL << VIRTIO_NET_F_GUEST_ECN) | \
(1ULL << VIRTIO_NET_F_GUEST_UFO) | \
(1ULL << VIRTIO_NET_F_GUEST_USO4) | \
(1ULL << VIRTIO_NET_F_GUEST_USO6))
struct virtnet_stat_desc {
char desc[ETH_GSTRING_LEN];
size_t offset;
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
size_t qstat_offset;
};
struct virtnet_sq_free_stats {
u64 packets;
u64 bytes;
u64 napi_packets;
u64 napi_bytes;
};
struct virtnet_sq_stats {
struct u64_stats_sync syncp;
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_t packets;
u64_stats_t bytes;
u64_stats_t xdp_tx;
u64_stats_t xdp_tx_drops;
u64_stats_t kicks;
u64_stats_t tx_timeouts;
u64_stats_t stop;
u64_stats_t wake;
};
struct virtnet_rq_stats {
struct u64_stats_sync syncp;
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_t packets;
u64_stats_t bytes;
u64_stats_t drops;
u64_stats_t xdp_packets;
u64_stats_t xdp_tx;
u64_stats_t xdp_redirects;
u64_stats_t xdp_drops;
u64_stats_t kicks;
};
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
#define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1}
#define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1}
#define VIRTNET_SQ_STAT_QSTAT(name, m) \
{ \
name, \
offsetof(struct virtnet_sq_stats, m), \
offsetof(struct netdev_queue_stats_tx, m), \
}
#define VIRTNET_RQ_STAT_QSTAT(name, m) \
{ \
name, \
offsetof(struct virtnet_rq_stats, m), \
offsetof(struct netdev_queue_stats_rx, m), \
}
static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = {
VIRTNET_SQ_STAT("xdp_tx", xdp_tx),
VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops),
VIRTNET_SQ_STAT("kicks", kicks),
VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts),
};
static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
VIRTNET_RQ_STAT("drops", drops),
VIRTNET_RQ_STAT("xdp_packets", xdp_packets),
VIRTNET_RQ_STAT("xdp_tx", xdp_tx),
VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects),
VIRTNET_RQ_STAT("xdp_drops", xdp_drops),
VIRTNET_RQ_STAT("kicks", kicks),
};
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = {
VIRTNET_SQ_STAT_QSTAT("packets", packets),
VIRTNET_SQ_STAT_QSTAT("bytes", bytes),
VIRTNET_SQ_STAT_QSTAT("stop", stop),
VIRTNET_SQ_STAT_QSTAT("wake", wake),
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
};
static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = {
VIRTNET_RQ_STAT_QSTAT("packets", packets),
VIRTNET_RQ_STAT_QSTAT("bytes", bytes),
};
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
#define VIRTNET_STATS_DESC_CQ(name) \
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
{#name, offsetof(struct virtio_net_stats_cvq, name), -1}
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
#define VIRTNET_STATS_DESC_RX(class, name) \
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
{#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1}
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
#define VIRTNET_STATS_DESC_TX(class, name) \
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
{#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1}
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = {
VIRTNET_STATS_DESC_CQ(command_num),
VIRTNET_STATS_DESC_CQ(ok_num),
};
static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = {
VIRTNET_STATS_DESC_RX(basic, packets),
VIRTNET_STATS_DESC_RX(basic, bytes),
VIRTNET_STATS_DESC_RX(basic, notifications),
VIRTNET_STATS_DESC_RX(basic, interrupts),
};
static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = {
VIRTNET_STATS_DESC_TX(basic, packets),
VIRTNET_STATS_DESC_TX(basic, bytes),
VIRTNET_STATS_DESC_TX(basic, notifications),
VIRTNET_STATS_DESC_TX(basic, interrupts),
};
static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = {
VIRTNET_STATS_DESC_RX(csum, needs_csum),
};
static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = {
VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg),
VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg),
};
static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = {
VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes),
};
static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = {
VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes),
};
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
#define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \
{ \
#name, \
offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \
offsetof(struct netdev_queue_stats_rx, qstat_field), \
}
#define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \
{ \
#name, \
offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \
offsetof(struct netdev_queue_stats_tx, qstat_field), \
}
static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = {
VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops),
VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns),
};
static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = {
VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops),
VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors),
};
static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = {
VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary),
VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none),
VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad),
};
static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = {
VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none),
VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum),
};
static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = {
VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets),
VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes),
VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets),
VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes),
};
static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = {
VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets),
VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes),
VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets),
VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes),
};
static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = {
VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits),
};
static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = {
VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits),
};
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
#define VIRTNET_Q_TYPE_RX 0
#define VIRTNET_Q_TYPE_TX 1
#define VIRTNET_Q_TYPE_CQ 2
struct virtnet_interrupt_coalesce {
u32 max_packets;
u32 max_usecs;
};
/* The dma information of pages allocated at a time. */
struct virtnet_rq_dma {
dma_addr_t addr;
u32 ref;
u16 len;
u16 need_sync;
};
/* Internal representation of a send virtqueue */
struct send_queue {
/* Virtqueue associated with this send _queue */
struct virtqueue *vq;
/* TX: fragments + linear part + virtio header */
struct scatterlist sg[MAX_SKB_FRAGS + 2];
/* Name of the send queue: output.$index */
char name[16];
struct virtnet_sq_stats stats;
struct virtnet_interrupt_coalesce intr_coal;
struct napi_struct napi;
/* Record whether sq is in reset state. */
bool reset;
};
/* Internal representation of a receive virtqueue */
struct receive_queue {
/* Virtqueue associated with this receive_queue */
struct virtqueue *vq;
struct napi_struct napi;
struct bpf_prog __rcu *xdp_prog;
struct virtnet_rq_stats stats;
/* The number of rx notifications */
u16 calls;
/* Is dynamic interrupt moderation enabled? */
bool dim_enabled;
/* Used to protect dim_enabled and inter_coal */
struct mutex dim_lock;
/* Dynamic Interrupt Moderation */
struct dim dim;
u32 packets_in_napi;
struct virtnet_interrupt_coalesce intr_coal;
/* Chain pages by the private ptr. */
struct page *pages;
virtio-net: auto-tune mergeable rx buffer size for improved performance Commit 2613af0ed18a ("virtio_net: migrate mergeable rx buffers to page frag allocators") changed the mergeable receive buffer size from PAGE_SIZE to MTU-size, introducing a single-stream regression for benchmarks with large average packet size. There is no single optimal buffer size for all workloads. For workloads with packet size <= MTU bytes, MTU + virtio-net header-sized buffers are preferred as larger buffers reduce the TCP window due to SKB truesize. However, single-stream workloads with large average packet sizes have higher throughput if larger (e.g., PAGE_SIZE) buffers are used. This commit auto-tunes the mergeable receiver buffer packet size by choosing the packet buffer size based on an EWMA of the recent packet sizes for the receive queue. Packet buffer sizes range from MTU_SIZE + virtio-net header len to PAGE_SIZE. This improves throughput for large packet workloads, as any workload with average packet size >= PAGE_SIZE will use PAGE_SIZE buffers. These optimizations interact positively with recent commit ba275241030c ("virtio-net: coalesce rx frags when possible during rx"), which coalesces adjacent RX SKB fragments in virtio_net. The coalescing optimizations benefit buffers of any size. Benchmarks taken from an average of 5 netperf 30-second TCP_STREAM runs between two QEMU VMs on a single physical machine. Each VM has two VCPUs with all offloads & vhost enabled. All VMs and vhost threads run in a single 4 CPU cgroup cpuset, using cgroups to ensure that other processes in the system will not be scheduled on the benchmark CPUs. Trunk includes SKB rx frag coalescing. net-next w/ virtio_net before 2613af0ed18a (PAGE_SIZE bufs): 14642.85Gb/s net-next (MTU-size bufs): 13170.01Gb/s net-next + auto-tune: 14555.94Gb/s Jason Wang also reported a throughput increase on mlx4 from 22Gb/s using MTU-sized buffers to about 26Gb/s using auto-tuning. Signed-off-by: Michael Dalton <mwdalton@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-01-17 06:23:27 +00:00
/* Average packet length for mergeable receive buffers. */
struct ewma_pkt_len mrg_avg_pkt_len;
virtio-net: auto-tune mergeable rx buffer size for improved performance Commit 2613af0ed18a ("virtio_net: migrate mergeable rx buffers to page frag allocators") changed the mergeable receive buffer size from PAGE_SIZE to MTU-size, introducing a single-stream regression for benchmarks with large average packet size. There is no single optimal buffer size for all workloads. For workloads with packet size <= MTU bytes, MTU + virtio-net header-sized buffers are preferred as larger buffers reduce the TCP window due to SKB truesize. However, single-stream workloads with large average packet sizes have higher throughput if larger (e.g., PAGE_SIZE) buffers are used. This commit auto-tunes the mergeable receiver buffer packet size by choosing the packet buffer size based on an EWMA of the recent packet sizes for the receive queue. Packet buffer sizes range from MTU_SIZE + virtio-net header len to PAGE_SIZE. This improves throughput for large packet workloads, as any workload with average packet size >= PAGE_SIZE will use PAGE_SIZE buffers. These optimizations interact positively with recent commit ba275241030c ("virtio-net: coalesce rx frags when possible during rx"), which coalesces adjacent RX SKB fragments in virtio_net. The coalescing optimizations benefit buffers of any size. Benchmarks taken from an average of 5 netperf 30-second TCP_STREAM runs between two QEMU VMs on a single physical machine. Each VM has two VCPUs with all offloads & vhost enabled. All VMs and vhost threads run in a single 4 CPU cgroup cpuset, using cgroups to ensure that other processes in the system will not be scheduled on the benchmark CPUs. Trunk includes SKB rx frag coalescing. net-next w/ virtio_net before 2613af0ed18a (PAGE_SIZE bufs): 14642.85Gb/s net-next (MTU-size bufs): 13170.01Gb/s net-next + auto-tune: 14555.94Gb/s Jason Wang also reported a throughput increase on mlx4 from 22Gb/s using MTU-sized buffers to about 26Gb/s using auto-tuning. Signed-off-by: Michael Dalton <mwdalton@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-01-17 06:23:27 +00:00
/* Page frag for packet buffer allocation. */
struct page_frag alloc_frag;
/* RX: fragments + linear part + virtio header */
struct scatterlist sg[MAX_SKB_FRAGS + 2];
/* Min single buffer size for mergeable buffers case. */
unsigned int min_buf_len;
/* Name of this receive queue: input.$index */
char name[16];
struct xdp_rxq_info xdp_rxq;
/* Record the last dma info to free after new pages is allocated. */
struct virtnet_rq_dma *last_dma;
struct xsk_buff_pool *xsk_pool;
/* xdp rxq used by xsk */
struct xdp_rxq_info xsk_rxq_info;
struct xdp_buff **xsk_buffs;
/* Do dma by self */
bool do_dma;
};
/* This structure can contain rss message with maximum settings for indirection table and keysize
* Note, that default structure that describes RSS configuration virtio_net_rss_config
* contains same info but can't handle table values.
* In any case, structure would be passed to virtio hw through sg_buf split by parts
* because table sizes may be differ according to the device configuration.
*/
#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40
#define VIRTIO_NET_RSS_MAX_TABLE_LEN 128
struct virtio_net_ctrl_rss {
u32 hash_types;
u16 indirection_table_mask;
u16 unclassified_queue;
u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN];
u16 max_tx_vq;
u8 hash_key_length;
u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE];
};
/* Control VQ buffers: protected by the rtnl lock */
struct control_buf {
struct virtio_net_ctrl_hdr hdr;
virtio_net_ctrl_ack status;
};
struct virtnet_info {
struct virtio_device *vdev;
struct virtqueue *cvq;
struct net_device *dev;
struct send_queue *sq;
struct receive_queue *rq;
unsigned int status;
/* Max # of queue pairs supported by the device */
u16 max_queue_pairs;
/* # of queue pairs currently used by the driver */
u16 curr_queue_pairs;
/* # of XDP queue pairs currently used by the driver */
u16 xdp_queue_pairs;
/* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
bool xdp_enabled;
/* I like... big packets and I cannot lie! */
bool big_packets;
virtio-net: use mtu size as buffer length for big packets Currently add_recvbuf_big() allocates MAX_SKB_FRAGS segments for big packets even when GUEST_* offloads are not present on the device. However, if guest GSO is not supported, it would be sufficient to allocate segments to cover just up the MTU size and no further. Allocating the maximum amount of segments results in a large waste of buffer space in the queue, which limits the number of packets that can be buffered and can result in reduced performance. Therefore, if guest GSO is not supported, use the MTU to calculate the optimal amount of segments required. Below is the iperf TCP test results over a Mellanox NIC, using vDPA for 1 VQ, queue size 1024, before and after the change, with the iperf server running over the virtio-net interface. MTU(Bytes)/Bandwidth (Gbit/s) Before After 1500 22.5 22.4 9000 12.8 25.9 And result of queue size 256. MTU(Bytes)/Bandwidth (Gbit/s) Before After 9000 2.15 11.9 With this patch no degradation is observed with multiple below tests and feature bit combinations. Results are summarized below for q depth of 1024. Interface MTU is 1500 if MTU feature is disabled. MTU is set to 9000 in other tests. Features/ Bandwidth (Gbit/s) Before After mtu off 20.1 20.2 mtu/indirect on 17.4 17.3 mtu/indirect/packed on 17.2 17.2 Signed-off-by: Gavin Li <gavinl@nvidia.com> Reviewed-by: Gavi Teitz <gavi@nvidia.com> Reviewed-by: Parav Pandit <parav@nvidia.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Si-Wei Liu <si-wei.liu@oracle.com> Message-Id: <20220914144911.56422-3-gavinl@nvidia.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com>
2022-09-14 14:49:11 +00:00
/* number of sg entries allocated for big packets */
unsigned int big_packets_num_skbfrags;
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
/* Host will merge rx buffers for big packets (shake it! shake it!) */
bool mergeable_rx_bufs;
/* Host supports rss and/or hash report */
bool has_rss;
bool has_rss_hash_report;
u8 rss_key_size;
u16 rss_indir_table_size;
u32 rss_hash_types_supported;
u32 rss_hash_types_saved;
struct virtio_net_ctrl_rss rss;
/* Has control virtqueue */
bool has_cvq;
/* Lock to protect the control VQ */
struct mutex cvq_lock;
/* Host can handle any s/g split between our header and packet data */
bool any_header_sg;
/* Packet virtio header size */
u8 hdr_len;
virtio-net: fix the race between refill work and close We try using cancel_delayed_work_sync() to prevent the work from enabling NAPI. This is insufficient since we don't disable the source of the refill work scheduling. This means an NAPI poll callback after cancel_delayed_work_sync() can schedule the refill work then can re-enable the NAPI that leads to use-after-free [1]. Since the work can enable NAPI, we can't simply disable NAPI before calling cancel_delayed_work_sync(). So fix this by introducing a dedicated boolean to control whether or not the work could be scheduled from NAPI. [1] ================================================================== BUG: KASAN: use-after-free in refill_work+0x43/0xd4 Read of size 2 at addr ffff88810562c92e by task kworker/2:1/42 CPU: 2 PID: 42 Comm: kworker/2:1 Not tainted 5.19.0-rc1+ #480 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: events refill_work Call Trace: <TASK> dump_stack_lvl+0x34/0x44 print_report.cold+0xbb/0x6ac ? _printk+0xad/0xde ? refill_work+0x43/0xd4 kasan_report+0xa8/0x130 ? refill_work+0x43/0xd4 refill_work+0x43/0xd4 process_one_work+0x43d/0x780 worker_thread+0x2a0/0x6f0 ? process_one_work+0x780/0x780 kthread+0x167/0x1a0 ? kthread_exit+0x50/0x50 ret_from_fork+0x22/0x30 </TASK> ... Fixes: b2baed69e605c ("virtio_net: set/cancel work on ndo_open/ndo_stop") Signed-off-by: Jason Wang <jasowang@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2022-07-25 07:21:59 +00:00
/* Work struct for delayed refilling if we run low on memory. */
struct delayed_work refill;
virtio-net: fix the race between refill work and close We try using cancel_delayed_work_sync() to prevent the work from enabling NAPI. This is insufficient since we don't disable the source of the refill work scheduling. This means an NAPI poll callback after cancel_delayed_work_sync() can schedule the refill work then can re-enable the NAPI that leads to use-after-free [1]. Since the work can enable NAPI, we can't simply disable NAPI before calling cancel_delayed_work_sync(). So fix this by introducing a dedicated boolean to control whether or not the work could be scheduled from NAPI. [1] ================================================================== BUG: KASAN: use-after-free in refill_work+0x43/0xd4 Read of size 2 at addr ffff88810562c92e by task kworker/2:1/42 CPU: 2 PID: 42 Comm: kworker/2:1 Not tainted 5.19.0-rc1+ #480 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: events refill_work Call Trace: <TASK> dump_stack_lvl+0x34/0x44 print_report.cold+0xbb/0x6ac ? _printk+0xad/0xde ? refill_work+0x43/0xd4 kasan_report+0xa8/0x130 ? refill_work+0x43/0xd4 refill_work+0x43/0xd4 process_one_work+0x43d/0x780 worker_thread+0x2a0/0x6f0 ? process_one_work+0x780/0x780 kthread+0x167/0x1a0 ? kthread_exit+0x50/0x50 ret_from_fork+0x22/0x30 </TASK> ... Fixes: b2baed69e605c ("virtio_net: set/cancel work on ndo_open/ndo_stop") Signed-off-by: Jason Wang <jasowang@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2022-07-25 07:21:59 +00:00
/* Is delayed refill enabled? */
bool refill_enabled;
/* The lock to synchronize the access to refill_enabled */
spinlock_t refill_lock;
/* Work struct for config space updates */
struct work_struct config_work;
/* Work struct for setting rx mode */
struct work_struct rx_mode_work;
/* OK to queue work setting RX mode? */
bool rx_mode_work_enabled;
/* Does the affinity hint is set for virtqueues? */
bool affinity_hint_set;
/* CPU hotplug instances for online & dead */
struct hlist_node node;
struct hlist_node node_dead;
struct control_buf *ctrl;
/* Ethtool settings */
u8 duplex;
u32 speed;
/* Is rx dynamic interrupt moderation enabled? */
bool rx_dim_enabled;
/* Interrupt coalescing settings */
struct virtnet_interrupt_coalesce intr_coal_tx;
struct virtnet_interrupt_coalesce intr_coal_rx;
unsigned long guest_offloads;
unsigned long guest_offloads_capable;
/* failover when STANDBY feature enabled */
struct failover *failover;
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
u64 device_stats_cap;
};
struct padded_vnet_hdr {
struct virtio_net_hdr_v1_hash hdr;
/*
* hdr is in a separate sg buffer, and data sg buffer shares same page
* with this header sg. This padding makes next sg 16 byte aligned
* after the header.
*/
char padding[12];
};
virtio_net: Introduce skb_vnet_common_hdr to avoid typecasting The virtio_net driver currently deals with different versions and types of virtio net headers, such as virtio_net_hdr_mrg_rxbuf, virtio_net_hdr_v1_hash, etc. Due to these variations, the code relies on multiple type casts to convert memory between different structures, potentially leading to bugs when there are changes in these structures. Introduces the "struct skb_vnet_common_hdr" as a unifying header structure using a union. With this approach, various virtio net header structures can be converted by accessing different members of this structure, thus eliminating the need for type casting and reducing the risk of potential bugs. For example following code: static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, struct page *page, unsigned int offset, unsigned int len, unsigned int truesize, unsigned int headroom) { [...] struct virtio_net_hdr_mrg_rxbuf *hdr; [...] hdr_len = vi->hdr_len; [...] ok: hdr = skb_vnet_hdr(skb); memcpy(hdr, hdr_p, hdr_len); [...] } When VIRTIO_NET_F_HASH_REPORT feature is enabled, hdr_len = 20 But the sizeof(*hdr) is 12, memcpy(hdr, hdr_p, hdr_len); will copy 20 bytes to the hdr, which make a potential risk of bug. And this risk can be avoided by introducing struct skb_vnet_common_hdr. Change log v1->v2 feedback from Willem de Bruijn <willemdebruijn.kernel@gmail.com> feedback from Simon Horman <horms@kernel.org> 1. change to use net-next tree. 2. move skb_vnet_common_hdr inside kernel file instead of the UAPI header. v2->v3 feedback from Willem de Bruijn <willemdebruijn.kernel@gmail.com> 1. fix typo in commit message. 2. add original struct virtio_net_hdr into union 3. remove virtio_net_hdr_mrg_rxbuf variable in receive_buf; Signed-off-by: Feng Liu <feliu@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Willem de Bruijn <willemb@google.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-08-21 14:27:13 +00:00
struct virtio_net_common_hdr {
union {
struct virtio_net_hdr hdr;
struct virtio_net_hdr_mrg_rxbuf mrg_hdr;
struct virtio_net_hdr_v1_hash hash_v1_hdr;
};
};
static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
struct net_device *dev,
unsigned int *xdp_xmit,
struct virtnet_rq_stats *stats);
static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq,
struct sk_buff *skb, u8 flags);
static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb,
struct sk_buff *curr_skb,
struct page *page, void *buf,
int len, int truesize);
static bool is_xdp_frame(void *ptr)
{
return (unsigned long)ptr & VIRTIO_XDP_FLAG;
}
static void *xdp_to_ptr(struct xdp_frame *ptr)
{
return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG);
}
static struct xdp_frame *ptr_to_xdp(void *ptr)
{
return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
}
static bool is_orphan_skb(void *ptr)
{
return (unsigned long)ptr & VIRTIO_ORPHAN_FLAG;
}
static void *skb_to_ptr(struct sk_buff *skb, bool orphan)
{
return (void *)((unsigned long)skb | (orphan ? VIRTIO_ORPHAN_FLAG : 0));
}
static struct sk_buff *ptr_to_skb(void *ptr)
{
return (struct sk_buff *)((unsigned long)ptr & ~VIRTIO_ORPHAN_FLAG);
}
static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
bool in_napi, struct virtnet_sq_free_stats *stats)
{
unsigned int len;
void *ptr;
while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
if (!is_xdp_frame(ptr)) {
struct sk_buff *skb = ptr_to_skb(ptr);
pr_debug("Sent skb %p\n", skb);
if (is_orphan_skb(ptr)) {
stats->packets++;
stats->bytes += skb->len;
} else {
stats->napi_packets++;
stats->napi_bytes += skb->len;
}
napi_consume_skb(skb, in_napi);
} else {
struct xdp_frame *frame = ptr_to_xdp(ptr);
stats->packets++;
stats->bytes += xdp_get_frame_len(frame);
xdp_return_frame(frame);
}
}
netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes);
}
/* Converting between virtqueue no. and kernel tx/rx queue no.
* 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
*/
static int vq2txq(struct virtqueue *vq)
{
return (vq->index - 1) / 2;
}
static int txq2vq(int txq)
{
return txq * 2 + 1;
}
static int vq2rxq(struct virtqueue *vq)
{
return vq->index / 2;
}
static int rxq2vq(int rxq)
{
return rxq * 2;
}
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
static int vq_type(struct virtnet_info *vi, int qid)
{
if (qid == vi->max_queue_pairs * 2)
return VIRTNET_Q_TYPE_CQ;
if (qid % 2)
return VIRTNET_Q_TYPE_TX;
return VIRTNET_Q_TYPE_RX;
}
virtio_net: Introduce skb_vnet_common_hdr to avoid typecasting The virtio_net driver currently deals with different versions and types of virtio net headers, such as virtio_net_hdr_mrg_rxbuf, virtio_net_hdr_v1_hash, etc. Due to these variations, the code relies on multiple type casts to convert memory between different structures, potentially leading to bugs when there are changes in these structures. Introduces the "struct skb_vnet_common_hdr" as a unifying header structure using a union. With this approach, various virtio net header structures can be converted by accessing different members of this structure, thus eliminating the need for type casting and reducing the risk of potential bugs. For example following code: static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, struct page *page, unsigned int offset, unsigned int len, unsigned int truesize, unsigned int headroom) { [...] struct virtio_net_hdr_mrg_rxbuf *hdr; [...] hdr_len = vi->hdr_len; [...] ok: hdr = skb_vnet_hdr(skb); memcpy(hdr, hdr_p, hdr_len); [...] } When VIRTIO_NET_F_HASH_REPORT feature is enabled, hdr_len = 20 But the sizeof(*hdr) is 12, memcpy(hdr, hdr_p, hdr_len); will copy 20 bytes to the hdr, which make a potential risk of bug. And this risk can be avoided by introducing struct skb_vnet_common_hdr. Change log v1->v2 feedback from Willem de Bruijn <willemdebruijn.kernel@gmail.com> feedback from Simon Horman <horms@kernel.org> 1. change to use net-next tree. 2. move skb_vnet_common_hdr inside kernel file instead of the UAPI header. v2->v3 feedback from Willem de Bruijn <willemdebruijn.kernel@gmail.com> 1. fix typo in commit message. 2. add original struct virtio_net_hdr into union 3. remove virtio_net_hdr_mrg_rxbuf variable in receive_buf; Signed-off-by: Feng Liu <feliu@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Willem de Bruijn <willemb@google.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-08-21 14:27:13 +00:00
static inline struct virtio_net_common_hdr *
skb_vnet_common_hdr(struct sk_buff *skb)
{
virtio_net: Introduce skb_vnet_common_hdr to avoid typecasting The virtio_net driver currently deals with different versions and types of virtio net headers, such as virtio_net_hdr_mrg_rxbuf, virtio_net_hdr_v1_hash, etc. Due to these variations, the code relies on multiple type casts to convert memory between different structures, potentially leading to bugs when there are changes in these structures. Introduces the "struct skb_vnet_common_hdr" as a unifying header structure using a union. With this approach, various virtio net header structures can be converted by accessing different members of this structure, thus eliminating the need for type casting and reducing the risk of potential bugs. For example following code: static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, struct page *page, unsigned int offset, unsigned int len, unsigned int truesize, unsigned int headroom) { [...] struct virtio_net_hdr_mrg_rxbuf *hdr; [...] hdr_len = vi->hdr_len; [...] ok: hdr = skb_vnet_hdr(skb); memcpy(hdr, hdr_p, hdr_len); [...] } When VIRTIO_NET_F_HASH_REPORT feature is enabled, hdr_len = 20 But the sizeof(*hdr) is 12, memcpy(hdr, hdr_p, hdr_len); will copy 20 bytes to the hdr, which make a potential risk of bug. And this risk can be avoided by introducing struct skb_vnet_common_hdr. Change log v1->v2 feedback from Willem de Bruijn <willemdebruijn.kernel@gmail.com> feedback from Simon Horman <horms@kernel.org> 1. change to use net-next tree. 2. move skb_vnet_common_hdr inside kernel file instead of the UAPI header. v2->v3 feedback from Willem de Bruijn <willemdebruijn.kernel@gmail.com> 1. fix typo in commit message. 2. add original struct virtio_net_hdr into union 3. remove virtio_net_hdr_mrg_rxbuf variable in receive_buf; Signed-off-by: Feng Liu <feliu@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Willem de Bruijn <willemb@google.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-08-21 14:27:13 +00:00
return (struct virtio_net_common_hdr *)skb->cb;
}
/*
* private is used to chain pages for big packets, put the whole
* most recent used list in the beginning for reuse
*/
static void give_pages(struct receive_queue *rq, struct page *page)
{
struct page *end;
/* Find end of list, sew whole thing into vi->rq.pages. */
for (end = page; end->private; end = (struct page *)end->private);
end->private = (unsigned long)rq->pages;
rq->pages = page;
}
static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
{
struct page *p = rq->pages;
if (p) {
rq->pages = (struct page *)p->private;
/* clear private here, it is used to chain pages */
p->private = 0;
} else
p = alloc_page(gfp_mask);
return p;
}
static void virtnet_rq_free_buf(struct virtnet_info *vi,
struct receive_queue *rq, void *buf)
{
if (vi->mergeable_rx_bufs)
put_page(virt_to_head_page(buf));
else if (vi->big_packets)
give_pages(rq, buf);
else
put_page(virt_to_head_page(buf));
}
virtio-net: fix the race between refill work and close We try using cancel_delayed_work_sync() to prevent the work from enabling NAPI. This is insufficient since we don't disable the source of the refill work scheduling. This means an NAPI poll callback after cancel_delayed_work_sync() can schedule the refill work then can re-enable the NAPI that leads to use-after-free [1]. Since the work can enable NAPI, we can't simply disable NAPI before calling cancel_delayed_work_sync(). So fix this by introducing a dedicated boolean to control whether or not the work could be scheduled from NAPI. [1] ================================================================== BUG: KASAN: use-after-free in refill_work+0x43/0xd4 Read of size 2 at addr ffff88810562c92e by task kworker/2:1/42 CPU: 2 PID: 42 Comm: kworker/2:1 Not tainted 5.19.0-rc1+ #480 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: events refill_work Call Trace: <TASK> dump_stack_lvl+0x34/0x44 print_report.cold+0xbb/0x6ac ? _printk+0xad/0xde ? refill_work+0x43/0xd4 kasan_report+0xa8/0x130 ? refill_work+0x43/0xd4 refill_work+0x43/0xd4 process_one_work+0x43d/0x780 worker_thread+0x2a0/0x6f0 ? process_one_work+0x780/0x780 kthread+0x167/0x1a0 ? kthread_exit+0x50/0x50 ret_from_fork+0x22/0x30 </TASK> ... Fixes: b2baed69e605c ("virtio_net: set/cancel work on ndo_open/ndo_stop") Signed-off-by: Jason Wang <jasowang@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2022-07-25 07:21:59 +00:00
static void enable_delayed_refill(struct virtnet_info *vi)
{
spin_lock_bh(&vi->refill_lock);
vi->refill_enabled = true;
spin_unlock_bh(&vi->refill_lock);
}
static void disable_delayed_refill(struct virtnet_info *vi)
{
spin_lock_bh(&vi->refill_lock);
vi->refill_enabled = false;
spin_unlock_bh(&vi->refill_lock);
}
static void enable_rx_mode_work(struct virtnet_info *vi)
{
rtnl_lock();
vi->rx_mode_work_enabled = true;
rtnl_unlock();
}
static void disable_rx_mode_work(struct virtnet_info *vi)
{
rtnl_lock();
vi->rx_mode_work_enabled = false;
rtnl_unlock();
}
static void virtqueue_napi_schedule(struct napi_struct *napi,
struct virtqueue *vq)
{
if (napi_schedule_prep(napi)) {
virtqueue_disable_cb(vq);
__napi_schedule(napi);
}
}
static bool virtqueue_napi_complete(struct napi_struct *napi,
struct virtqueue *vq, int processed)
{
int opaque;
opaque = virtqueue_enable_cb_prepare(vq);
virtio_net: Disable interrupts if napi_complete_done rescheduled napi Since commit 39e6c8208d7b ("net: solve a NAPI race") napi has been able to be rescheduled within napi_complete_done() even in non-busypoll case, but virtnet_poll() always enabled interrupts before complete, and when napi was rescheduled within napi_complete_done() it did not disable interrupts. This caused more interrupts when event idx is disabled. According to commit cbdadbbf0c79 ("virtio_net: fix race in RX VQ processing") we cannot place virtqueue_enable_cb_prepare() after NAPI_STATE_SCHED is cleared, so disable interrupts again if napi_complete_done() returned false. Tested with vhost-user of OVS 2.7 on host, which does not have the event idx feature. * Before patch: $ netperf -t UDP_STREAM -H 192.168.150.253 -l 60 -- -m 1472 MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.150.253 () port 0 AF_INET Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 212992 1472 60.00 32763206 0 6430.32 212992 60.00 23384299 4589.56 Interrupts on guest: 9872369 Packets/interrupt: 2.37 * After patch $ netperf -t UDP_STREAM -H 192.168.150.253 -l 60 -- -m 1472 MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.150.253 () port 0 AF_INET Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 212992 1472 60.00 32794646 0 6436.49 212992 60.00 32793501 6436.27 Interrupts on guest: 4941299 Packets/interrupt: 6.64 Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> Acked-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-12-07 04:15:15 +00:00
if (napi_complete_done(napi, processed)) {
if (unlikely(virtqueue_poll(vq, opaque)))
virtqueue_napi_schedule(napi, vq);
else
return true;
virtio_net: Disable interrupts if napi_complete_done rescheduled napi Since commit 39e6c8208d7b ("net: solve a NAPI race") napi has been able to be rescheduled within napi_complete_done() even in non-busypoll case, but virtnet_poll() always enabled interrupts before complete, and when napi was rescheduled within napi_complete_done() it did not disable interrupts. This caused more interrupts when event idx is disabled. According to commit cbdadbbf0c79 ("virtio_net: fix race in RX VQ processing") we cannot place virtqueue_enable_cb_prepare() after NAPI_STATE_SCHED is cleared, so disable interrupts again if napi_complete_done() returned false. Tested with vhost-user of OVS 2.7 on host, which does not have the event idx feature. * Before patch: $ netperf -t UDP_STREAM -H 192.168.150.253 -l 60 -- -m 1472 MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.150.253 () port 0 AF_INET Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 212992 1472 60.00 32763206 0 6430.32 212992 60.00 23384299 4589.56 Interrupts on guest: 9872369 Packets/interrupt: 2.37 * After patch $ netperf -t UDP_STREAM -H 192.168.150.253 -l 60 -- -m 1472 MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.150.253 () port 0 AF_INET Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 212992 1472 60.00 32794646 0 6436.49 212992 60.00 32793501 6436.27 Interrupts on guest: 4941299 Packets/interrupt: 6.64 Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> Acked-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-12-07 04:15:15 +00:00
} else {
virtqueue_disable_cb(vq);
}
return false;
}
static void skb_xmit_done(struct virtqueue *vq)
{
struct virtnet_info *vi = vq->vdev->priv;
struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi;
/* Suppress further interrupts. */
virtqueue_disable_cb(vq);
if (napi->weight)
virtqueue_napi_schedule(napi, vq);
else
/* We were probably waiting for more output buffers. */
netif_wake_subqueue(vi->dev, vq2txq(vq));
}
#define MRG_CTX_HEADER_SHIFT 22
static void *mergeable_len_to_ctx(unsigned int truesize,
unsigned int headroom)
{
return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize);
}
static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
{
return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
}
static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
{
return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
}
static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen,
unsigned int headroom,
unsigned int len)
{
struct sk_buff *skb;
skb = build_skb(buf, buflen);
if (unlikely(!skb))
return NULL;
skb_reserve(skb, headroom);
skb_put(skb, len);
return skb;
}
/* Called from bottom half context */
static struct sk_buff *page_to_skb(struct virtnet_info *vi,
struct receive_queue *rq,
struct page *page, unsigned int offset,
virtio_net: fix page_to_skb() miss headroom Because headroom is not passed to page_to_skb(), this causes the shinfo exceeds the range. Then the frags of shinfo are changed by other process. [ 157.724634] stack segment: 0000 [#1] PREEMPT SMP NOPTI [ 157.725358] CPU: 3 PID: 679 Comm: xdp_pass_user_f Tainted: G E 6.2.0+ #150 [ 157.726401] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/4 [ 157.727820] RIP: 0010:skb_release_data+0x11b/0x180 [ 157.728449] Code: 44 24 02 48 83 c3 01 39 d8 7e be 48 89 d8 48 c1 e0 04 41 80 7d 7e 00 49 8b 6c 04 30 79 0c 48 89 ef e8 89 b [ 157.730751] RSP: 0018:ffffc90000178b48 EFLAGS: 00010202 [ 157.731383] RAX: 0000000000000010 RBX: 0000000000000001 RCX: 0000000000000000 [ 157.732270] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff888100dd0b00 [ 157.733117] RBP: 5d5d76010f6e2408 R08: ffff888100dd0b2c R09: 0000000000000000 [ 157.734013] R10: ffffffff82effd30 R11: 000000000000a14e R12: ffff88810981ffc0 [ 157.734904] R13: ffff888100dd0b00 R14: 0000000000000002 R15: 0000000000002310 [ 157.735793] FS: 00007f06121d9740(0000) GS:ffff88842fcc0000(0000) knlGS:0000000000000000 [ 157.736794] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 157.737522] CR2: 00007ffd9a56c084 CR3: 0000000104bda001 CR4: 0000000000770ee0 [ 157.738420] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 157.739283] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 157.740146] PKRU: 55555554 [ 157.740502] Call Trace: [ 157.740843] <IRQ> [ 157.741117] kfree_skb_reason+0x50/0x120 [ 157.741613] __udp4_lib_rcv+0x52b/0x5e0 [ 157.742132] ip_protocol_deliver_rcu+0xaf/0x190 [ 157.742715] ip_local_deliver_finish+0x77/0xa0 [ 157.743280] ip_sublist_rcv_finish+0x80/0x90 [ 157.743834] ip_list_rcv_finish.constprop.0+0x16f/0x190 [ 157.744493] ip_list_rcv+0x126/0x140 [ 157.744952] __netif_receive_skb_list_core+0x29b/0x2c0 [ 157.745602] __netif_receive_skb_list+0xed/0x160 [ 157.746190] ? udp4_gro_receive+0x275/0x350 [ 157.746732] netif_receive_skb_list_internal+0xf2/0x1b0 [ 157.747398] napi_gro_receive+0xd1/0x210 [ 157.747911] virtnet_receive+0x75/0x1c0 [ 157.748422] virtnet_poll+0x48/0x1b0 [ 157.748878] __napi_poll+0x29/0x1b0 [ 157.749330] net_rx_action+0x27a/0x340 [ 157.749812] __do_softirq+0xf3/0x2fb [ 157.750298] do_softirq+0xa2/0xd0 [ 157.750745] </IRQ> [ 157.751563] <TASK> [ 157.752329] __local_bh_enable_ip+0x6d/0x80 [ 157.753178] virtnet_xdp_set+0x482/0x860 [ 157.754159] ? __pfx_virtnet_xdp+0x10/0x10 [ 157.755129] dev_xdp_install+0xa4/0xe0 [ 157.756033] dev_xdp_attach+0x20b/0x5e0 [ 157.756933] do_setlink+0x82e/0xc90 [ 157.757777] ? __nla_validate_parse+0x12b/0x1e0 [ 157.758744] rtnl_setlink+0xd8/0x170 [ 157.759549] ? mod_objcg_state+0xcb/0x320 [ 157.760328] ? security_capable+0x37/0x60 [ 157.761209] ? security_capable+0x37/0x60 [ 157.762072] rtnetlink_rcv_msg+0x145/0x3d0 [ 157.762929] ? ___slab_alloc+0x327/0x610 [ 157.763754] ? __alloc_skb+0x141/0x170 [ 157.764533] ? __pfx_rtnetlink_rcv_msg+0x10/0x10 [ 157.765422] netlink_rcv_skb+0x58/0x110 [ 157.766229] netlink_unicast+0x21f/0x330 [ 157.766951] netlink_sendmsg+0x240/0x4a0 [ 157.767654] sock_sendmsg+0x93/0xa0 [ 157.768434] ? sockfd_lookup_light+0x12/0x70 [ 157.769245] __sys_sendto+0xfe/0x170 [ 157.770079] ? handle_mm_fault+0xe9/0x2d0 [ 157.770859] ? preempt_count_add+0x51/0xa0 [ 157.771645] ? up_read+0x3c/0x80 [ 157.772340] ? do_user_addr_fault+0x1e9/0x710 [ 157.773166] ? kvm_read_and_reset_apf_flags+0x49/0x60 [ 157.774087] __x64_sys_sendto+0x29/0x30 [ 157.774856] do_syscall_64+0x3c/0x90 [ 157.775518] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 157.776382] RIP: 0033:0x7f06122def70 Fixes: 18117a842ab0 ("virtio-net: remove xdp related info from page_to_skb()") Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-03-15 01:52:22 +00:00
unsigned int len, unsigned int truesize,
unsigned int headroom)
{
struct sk_buff *skb;
virtio_net: Introduce skb_vnet_common_hdr to avoid typecasting The virtio_net driver currently deals with different versions and types of virtio net headers, such as virtio_net_hdr_mrg_rxbuf, virtio_net_hdr_v1_hash, etc. Due to these variations, the code relies on multiple type casts to convert memory between different structures, potentially leading to bugs when there are changes in these structures. Introduces the "struct skb_vnet_common_hdr" as a unifying header structure using a union. With this approach, various virtio net header structures can be converted by accessing different members of this structure, thus eliminating the need for type casting and reducing the risk of potential bugs. For example following code: static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, struct page *page, unsigned int offset, unsigned int len, unsigned int truesize, unsigned int headroom) { [...] struct virtio_net_hdr_mrg_rxbuf *hdr; [...] hdr_len = vi->hdr_len; [...] ok: hdr = skb_vnet_hdr(skb); memcpy(hdr, hdr_p, hdr_len); [...] } When VIRTIO_NET_F_HASH_REPORT feature is enabled, hdr_len = 20 But the sizeof(*hdr) is 12, memcpy(hdr, hdr_p, hdr_len); will copy 20 bytes to the hdr, which make a potential risk of bug. And this risk can be avoided by introducing struct skb_vnet_common_hdr. Change log v1->v2 feedback from Willem de Bruijn <willemdebruijn.kernel@gmail.com> feedback from Simon Horman <horms@kernel.org> 1. change to use net-next tree. 2. move skb_vnet_common_hdr inside kernel file instead of the UAPI header. v2->v3 feedback from Willem de Bruijn <willemdebruijn.kernel@gmail.com> 1. fix typo in commit message. 2. add original struct virtio_net_hdr into union 3. remove virtio_net_hdr_mrg_rxbuf variable in receive_buf; Signed-off-by: Feng Liu <feliu@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Willem de Bruijn <willemb@google.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-08-21 14:27:13 +00:00
struct virtio_net_common_hdr *hdr;
unsigned int copy, hdr_len, hdr_padded_len;
virtio-net: fix use-after-free in page_to_skb() KASAN/syzbot had 4 reports, one of them being: BUG: KASAN: slab-out-of-bounds in memcpy include/linux/fortify-string.h:191 [inline] BUG: KASAN: slab-out-of-bounds in page_to_skb+0x5cf/0xb70 drivers/net/virtio_net.c:480 Read of size 12 at addr ffff888014a5f800 by task systemd-udevd/8445 CPU: 0 PID: 8445 Comm: systemd-udevd Not tainted 5.12.0-rc8-next-20210419-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: <IRQ> __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x141/0x1d7 lib/dump_stack.c:120 print_address_description.constprop.0.cold+0x5b/0x2f8 mm/kasan/report.c:233 __kasan_report mm/kasan/report.c:419 [inline] kasan_report.cold+0x7c/0xd8 mm/kasan/report.c:436 check_region_inline mm/kasan/generic.c:180 [inline] kasan_check_range+0x13d/0x180 mm/kasan/generic.c:186 memcpy+0x20/0x60 mm/kasan/shadow.c:65 memcpy include/linux/fortify-string.h:191 [inline] page_to_skb+0x5cf/0xb70 drivers/net/virtio_net.c:480 receive_mergeable drivers/net/virtio_net.c:1009 [inline] receive_buf+0x2bc0/0x6250 drivers/net/virtio_net.c:1119 virtnet_receive drivers/net/virtio_net.c:1411 [inline] virtnet_poll+0x568/0x10b0 drivers/net/virtio_net.c:1516 __napi_poll+0xaf/0x440 net/core/dev.c:6962 napi_poll net/core/dev.c:7029 [inline] net_rx_action+0x801/0xb40 net/core/dev.c:7116 __do_softirq+0x29b/0x9fe kernel/softirq.c:559 invoke_softirq kernel/softirq.c:433 [inline] __irq_exit_rcu+0x136/0x200 kernel/softirq.c:637 irq_exit_rcu+0x5/0x20 kernel/softirq.c:649 common_interrupt+0xa4/0xd0 arch/x86/kernel/irq.c:240 Fixes: fb32856b16ad ("virtio-net: page_to_skb() use build_skb when there's sufficient tailroom") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot <syzkaller@googlegroups.com> Reported-by: Guenter Roeck <linux@roeck-us.net> Reported-by: Mat Martineau <mathew.j.martineau@linux.intel.com> Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Cc: Jason Wang <jasowang@redhat.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: virtualization@lists.linux-foundation.org Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-04-20 09:43:41 +00:00
struct page *page_to_free = NULL;
int tailroom, shinfo_size;
virtio-net: fix use-after-free in skb_gro_receive When "headroom" > 0, the actual allocated memory space is the entire page, so the address of the page should be used when passing it to build_skb(). BUG: KASAN: use-after-free in skb_gro_receive (net/core/skbuff.c:4260) Write of size 16 at addr ffff88811619fffc by task kworker/u9:0/534 CPU: 2 PID: 534 Comm: kworker/u9:0 Not tainted 5.12.0-rc7-custom-16372-gb150be05b806 #3382 Hardware name: QEMU MSN2700, BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: xprtiod xs_stream_data_receive_workfn [sunrpc] Call Trace: <IRQ> dump_stack (lib/dump_stack.c:122) print_address_description.constprop.0 (mm/kasan/report.c:233) kasan_report.cold (mm/kasan/report.c:400 mm/kasan/report.c:416) skb_gro_receive (net/core/skbuff.c:4260) tcp_gro_receive (net/ipv4/tcp_offload.c:266 (discriminator 1)) tcp4_gro_receive (net/ipv4/tcp_offload.c:316) inet_gro_receive (net/ipv4/af_inet.c:1545 (discriminator 2)) dev_gro_receive (net/core/dev.c:6075) napi_gro_receive (net/core/dev.c:6168 net/core/dev.c:6198) receive_buf (drivers/net/virtio_net.c:1151) virtio_net virtnet_poll (drivers/net/virtio_net.c:1415 drivers/net/virtio_net.c:1519) virtio_net __napi_poll (net/core/dev.c:6964) net_rx_action (net/core/dev.c:7033 net/core/dev.c:7118) __do_softirq (./arch/x86/include/asm/jump_label.h:25 ./include/linux/jump_label.h:200 ./include/trace/events/irq.h:142 kernel/softirq.c:346) irq_exit_rcu (kernel/softirq.c:221 kernel/softirq.c:422 kernel/softirq.c:434) common_interrupt (arch/x86/kernel/irq.c:240 (discriminator 14)) </IRQ> Fixes: fb32856b16ad ("virtio-net: page_to_skb() use build_skb when there's sufficient tailroom") Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reported-by: Ido Schimmel <idosch@nvidia.com> Tested-by: Ido Schimmel <idosch@nvidia.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-04-22 15:16:20 +00:00
char *p, *hdr_p, *buf;
p = page_address(page) + offset;
hdr_p = p;
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
hdr_len = vi->hdr_len;
if (vi->mergeable_rx_bufs)
hdr_padded_len = hdr_len;
else
hdr_padded_len = sizeof(struct padded_vnet_hdr);
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
virtio_net: fix page_to_skb() miss headroom Because headroom is not passed to page_to_skb(), this causes the shinfo exceeds the range. Then the frags of shinfo are changed by other process. [ 157.724634] stack segment: 0000 [#1] PREEMPT SMP NOPTI [ 157.725358] CPU: 3 PID: 679 Comm: xdp_pass_user_f Tainted: G E 6.2.0+ #150 [ 157.726401] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/4 [ 157.727820] RIP: 0010:skb_release_data+0x11b/0x180 [ 157.728449] Code: 44 24 02 48 83 c3 01 39 d8 7e be 48 89 d8 48 c1 e0 04 41 80 7d 7e 00 49 8b 6c 04 30 79 0c 48 89 ef e8 89 b [ 157.730751] RSP: 0018:ffffc90000178b48 EFLAGS: 00010202 [ 157.731383] RAX: 0000000000000010 RBX: 0000000000000001 RCX: 0000000000000000 [ 157.732270] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff888100dd0b00 [ 157.733117] RBP: 5d5d76010f6e2408 R08: ffff888100dd0b2c R09: 0000000000000000 [ 157.734013] R10: ffffffff82effd30 R11: 000000000000a14e R12: ffff88810981ffc0 [ 157.734904] R13: ffff888100dd0b00 R14: 0000000000000002 R15: 0000000000002310 [ 157.735793] FS: 00007f06121d9740(0000) GS:ffff88842fcc0000(0000) knlGS:0000000000000000 [ 157.736794] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 157.737522] CR2: 00007ffd9a56c084 CR3: 0000000104bda001 CR4: 0000000000770ee0 [ 157.738420] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 157.739283] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 157.740146] PKRU: 55555554 [ 157.740502] Call Trace: [ 157.740843] <IRQ> [ 157.741117] kfree_skb_reason+0x50/0x120 [ 157.741613] __udp4_lib_rcv+0x52b/0x5e0 [ 157.742132] ip_protocol_deliver_rcu+0xaf/0x190 [ 157.742715] ip_local_deliver_finish+0x77/0xa0 [ 157.743280] ip_sublist_rcv_finish+0x80/0x90 [ 157.743834] ip_list_rcv_finish.constprop.0+0x16f/0x190 [ 157.744493] ip_list_rcv+0x126/0x140 [ 157.744952] __netif_receive_skb_list_core+0x29b/0x2c0 [ 157.745602] __netif_receive_skb_list+0xed/0x160 [ 157.746190] ? udp4_gro_receive+0x275/0x350 [ 157.746732] netif_receive_skb_list_internal+0xf2/0x1b0 [ 157.747398] napi_gro_receive+0xd1/0x210 [ 157.747911] virtnet_receive+0x75/0x1c0 [ 157.748422] virtnet_poll+0x48/0x1b0 [ 157.748878] __napi_poll+0x29/0x1b0 [ 157.749330] net_rx_action+0x27a/0x340 [ 157.749812] __do_softirq+0xf3/0x2fb [ 157.750298] do_softirq+0xa2/0xd0 [ 157.750745] </IRQ> [ 157.751563] <TASK> [ 157.752329] __local_bh_enable_ip+0x6d/0x80 [ 157.753178] virtnet_xdp_set+0x482/0x860 [ 157.754159] ? __pfx_virtnet_xdp+0x10/0x10 [ 157.755129] dev_xdp_install+0xa4/0xe0 [ 157.756033] dev_xdp_attach+0x20b/0x5e0 [ 157.756933] do_setlink+0x82e/0xc90 [ 157.757777] ? __nla_validate_parse+0x12b/0x1e0 [ 157.758744] rtnl_setlink+0xd8/0x170 [ 157.759549] ? mod_objcg_state+0xcb/0x320 [ 157.760328] ? security_capable+0x37/0x60 [ 157.761209] ? security_capable+0x37/0x60 [ 157.762072] rtnetlink_rcv_msg+0x145/0x3d0 [ 157.762929] ? ___slab_alloc+0x327/0x610 [ 157.763754] ? __alloc_skb+0x141/0x170 [ 157.764533] ? __pfx_rtnetlink_rcv_msg+0x10/0x10 [ 157.765422] netlink_rcv_skb+0x58/0x110 [ 157.766229] netlink_unicast+0x21f/0x330 [ 157.766951] netlink_sendmsg+0x240/0x4a0 [ 157.767654] sock_sendmsg+0x93/0xa0 [ 157.768434] ? sockfd_lookup_light+0x12/0x70 [ 157.769245] __sys_sendto+0xfe/0x170 [ 157.770079] ? handle_mm_fault+0xe9/0x2d0 [ 157.770859] ? preempt_count_add+0x51/0xa0 [ 157.771645] ? up_read+0x3c/0x80 [ 157.772340] ? do_user_addr_fault+0x1e9/0x710 [ 157.773166] ? kvm_read_and_reset_apf_flags+0x49/0x60 [ 157.774087] __x64_sys_sendto+0x29/0x30 [ 157.774856] do_syscall_64+0x3c/0x90 [ 157.775518] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 157.776382] RIP: 0033:0x7f06122def70 Fixes: 18117a842ab0 ("virtio-net: remove xdp related info from page_to_skb()") Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-03-15 01:52:22 +00:00
buf = p - headroom;
len -= hdr_len;
offset += hdr_padded_len;
p += hdr_padded_len;
virtio_net: fix page_to_skb() miss headroom Because headroom is not passed to page_to_skb(), this causes the shinfo exceeds the range. Then the frags of shinfo are changed by other process. [ 157.724634] stack segment: 0000 [#1] PREEMPT SMP NOPTI [ 157.725358] CPU: 3 PID: 679 Comm: xdp_pass_user_f Tainted: G E 6.2.0+ #150 [ 157.726401] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/4 [ 157.727820] RIP: 0010:skb_release_data+0x11b/0x180 [ 157.728449] Code: 44 24 02 48 83 c3 01 39 d8 7e be 48 89 d8 48 c1 e0 04 41 80 7d 7e 00 49 8b 6c 04 30 79 0c 48 89 ef e8 89 b [ 157.730751] RSP: 0018:ffffc90000178b48 EFLAGS: 00010202 [ 157.731383] RAX: 0000000000000010 RBX: 0000000000000001 RCX: 0000000000000000 [ 157.732270] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff888100dd0b00 [ 157.733117] RBP: 5d5d76010f6e2408 R08: ffff888100dd0b2c R09: 0000000000000000 [ 157.734013] R10: ffffffff82effd30 R11: 000000000000a14e R12: ffff88810981ffc0 [ 157.734904] R13: ffff888100dd0b00 R14: 0000000000000002 R15: 0000000000002310 [ 157.735793] FS: 00007f06121d9740(0000) GS:ffff88842fcc0000(0000) knlGS:0000000000000000 [ 157.736794] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 157.737522] CR2: 00007ffd9a56c084 CR3: 0000000104bda001 CR4: 0000000000770ee0 [ 157.738420] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 157.739283] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 157.740146] PKRU: 55555554 [ 157.740502] Call Trace: [ 157.740843] <IRQ> [ 157.741117] kfree_skb_reason+0x50/0x120 [ 157.741613] __udp4_lib_rcv+0x52b/0x5e0 [ 157.742132] ip_protocol_deliver_rcu+0xaf/0x190 [ 157.742715] ip_local_deliver_finish+0x77/0xa0 [ 157.743280] ip_sublist_rcv_finish+0x80/0x90 [ 157.743834] ip_list_rcv_finish.constprop.0+0x16f/0x190 [ 157.744493] ip_list_rcv+0x126/0x140 [ 157.744952] __netif_receive_skb_list_core+0x29b/0x2c0 [ 157.745602] __netif_receive_skb_list+0xed/0x160 [ 157.746190] ? udp4_gro_receive+0x275/0x350 [ 157.746732] netif_receive_skb_list_internal+0xf2/0x1b0 [ 157.747398] napi_gro_receive+0xd1/0x210 [ 157.747911] virtnet_receive+0x75/0x1c0 [ 157.748422] virtnet_poll+0x48/0x1b0 [ 157.748878] __napi_poll+0x29/0x1b0 [ 157.749330] net_rx_action+0x27a/0x340 [ 157.749812] __do_softirq+0xf3/0x2fb [ 157.750298] do_softirq+0xa2/0xd0 [ 157.750745] </IRQ> [ 157.751563] <TASK> [ 157.752329] __local_bh_enable_ip+0x6d/0x80 [ 157.753178] virtnet_xdp_set+0x482/0x860 [ 157.754159] ? __pfx_virtnet_xdp+0x10/0x10 [ 157.755129] dev_xdp_install+0xa4/0xe0 [ 157.756033] dev_xdp_attach+0x20b/0x5e0 [ 157.756933] do_setlink+0x82e/0xc90 [ 157.757777] ? __nla_validate_parse+0x12b/0x1e0 [ 157.758744] rtnl_setlink+0xd8/0x170 [ 157.759549] ? mod_objcg_state+0xcb/0x320 [ 157.760328] ? security_capable+0x37/0x60 [ 157.761209] ? security_capable+0x37/0x60 [ 157.762072] rtnetlink_rcv_msg+0x145/0x3d0 [ 157.762929] ? ___slab_alloc+0x327/0x610 [ 157.763754] ? __alloc_skb+0x141/0x170 [ 157.764533] ? __pfx_rtnetlink_rcv_msg+0x10/0x10 [ 157.765422] netlink_rcv_skb+0x58/0x110 [ 157.766229] netlink_unicast+0x21f/0x330 [ 157.766951] netlink_sendmsg+0x240/0x4a0 [ 157.767654] sock_sendmsg+0x93/0xa0 [ 157.768434] ? sockfd_lookup_light+0x12/0x70 [ 157.769245] __sys_sendto+0xfe/0x170 [ 157.770079] ? handle_mm_fault+0xe9/0x2d0 [ 157.770859] ? preempt_count_add+0x51/0xa0 [ 157.771645] ? up_read+0x3c/0x80 [ 157.772340] ? do_user_addr_fault+0x1e9/0x710 [ 157.773166] ? kvm_read_and_reset_apf_flags+0x49/0x60 [ 157.774087] __x64_sys_sendto+0x29/0x30 [ 157.774856] do_syscall_64+0x3c/0x90 [ 157.775518] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 157.776382] RIP: 0033:0x7f06122def70 Fixes: 18117a842ab0 ("virtio-net: remove xdp related info from page_to_skb()") Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-03-15 01:52:22 +00:00
tailroom = truesize - headroom - hdr_padded_len - len;
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
skb = virtnet_build_skb(buf, truesize, p - buf, len);
if (unlikely(!skb))
return NULL;
page = (struct page *)page->private;
if (page)
give_pages(rq, page);
goto ok;
}
/* copy small packet so we can reuse these pages for small data */
skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
if (unlikely(!skb))
return NULL;
virtio_net: Do not pull payload in skb->head Xuan Zhuo reported that commit 3226b158e67c ("net: avoid 32 x truesize under-estimation for tiny skbs") brought a ~10% performance drop. The reason for the performance drop was that GRO was forced to chain sk_buff (using skb_shinfo(skb)->frag_list), which uses more memory but also cause packet consumers to go over a lot of overhead handling all the tiny skbs. It turns out that virtio_net page_to_skb() has a wrong strategy : It allocates skbs with GOOD_COPY_LEN (128) bytes in skb->head, then copies 128 bytes from the page, before feeding the packet to GRO stack. This was suboptimal before commit 3226b158e67c ("net: avoid 32 x truesize under-estimation for tiny skbs") because GRO was using 2 frags per MSS, meaning we were not packing MSS with 100% efficiency. Fix is to pull only the ethernet header in page_to_skb() Then, we change virtio_net_hdr_to_skb() to pull the missing headers, instead of assuming they were already pulled by callers. This fixes the performance regression, but could also allow virtio_net to accept packets with more than 128bytes of headers. Many thanks to Xuan Zhuo for his report, and his tests/help. Fixes: 3226b158e67c ("net: avoid 32 x truesize under-estimation for tiny skbs") Reported-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Link: https://www.spinics.net/lists/netdev/msg731397.html Co-Developed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Jason Wang <jasowang@redhat.com> Cc: virtualization@lists.linux-foundation.org Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-04-02 13:26:02 +00:00
/* Copy all frame if it fits skb->head, otherwise
* we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
*/
if (len <= skb_tailroom(skb))
copy = len;
else
copy = ETH_HLEN;
skb_put_data(skb, p, copy);
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
len -= copy;
offset += copy;
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
if (vi->mergeable_rx_bufs) {
if (len)
skb_add_rx_frag(skb, 0, page, offset, len, truesize);
else
virtio-net: fix use-after-free in page_to_skb() KASAN/syzbot had 4 reports, one of them being: BUG: KASAN: slab-out-of-bounds in memcpy include/linux/fortify-string.h:191 [inline] BUG: KASAN: slab-out-of-bounds in page_to_skb+0x5cf/0xb70 drivers/net/virtio_net.c:480 Read of size 12 at addr ffff888014a5f800 by task systemd-udevd/8445 CPU: 0 PID: 8445 Comm: systemd-udevd Not tainted 5.12.0-rc8-next-20210419-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: <IRQ> __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x141/0x1d7 lib/dump_stack.c:120 print_address_description.constprop.0.cold+0x5b/0x2f8 mm/kasan/report.c:233 __kasan_report mm/kasan/report.c:419 [inline] kasan_report.cold+0x7c/0xd8 mm/kasan/report.c:436 check_region_inline mm/kasan/generic.c:180 [inline] kasan_check_range+0x13d/0x180 mm/kasan/generic.c:186 memcpy+0x20/0x60 mm/kasan/shadow.c:65 memcpy include/linux/fortify-string.h:191 [inline] page_to_skb+0x5cf/0xb70 drivers/net/virtio_net.c:480 receive_mergeable drivers/net/virtio_net.c:1009 [inline] receive_buf+0x2bc0/0x6250 drivers/net/virtio_net.c:1119 virtnet_receive drivers/net/virtio_net.c:1411 [inline] virtnet_poll+0x568/0x10b0 drivers/net/virtio_net.c:1516 __napi_poll+0xaf/0x440 net/core/dev.c:6962 napi_poll net/core/dev.c:7029 [inline] net_rx_action+0x801/0xb40 net/core/dev.c:7116 __do_softirq+0x29b/0x9fe kernel/softirq.c:559 invoke_softirq kernel/softirq.c:433 [inline] __irq_exit_rcu+0x136/0x200 kernel/softirq.c:637 irq_exit_rcu+0x5/0x20 kernel/softirq.c:649 common_interrupt+0xa4/0xd0 arch/x86/kernel/irq.c:240 Fixes: fb32856b16ad ("virtio-net: page_to_skb() use build_skb when there's sufficient tailroom") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot <syzkaller@googlegroups.com> Reported-by: Guenter Roeck <linux@roeck-us.net> Reported-by: Mat Martineau <mathew.j.martineau@linux.intel.com> Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Cc: Jason Wang <jasowang@redhat.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: virtualization@lists.linux-foundation.org Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-04-20 09:43:41 +00:00
page_to_free = page;
goto ok;
}
/*
* Verify that we can indeed put this data into a skb.
* This is here to handle cases when the device erroneously
* tries to receive more than is possible. This is usually
* the case of a broken device.
*/
if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
dev_kfree_skb(skb);
return NULL;
}
BUG_ON(offset >= PAGE_SIZE);
while (len) {
unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
frag_size, truesize);
len -= frag_size;
page = (struct page *)page->private;
offset = 0;
}
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
if (page)
give_pages(rq, page);
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
ok:
virtio_net: Introduce skb_vnet_common_hdr to avoid typecasting The virtio_net driver currently deals with different versions and types of virtio net headers, such as virtio_net_hdr_mrg_rxbuf, virtio_net_hdr_v1_hash, etc. Due to these variations, the code relies on multiple type casts to convert memory between different structures, potentially leading to bugs when there are changes in these structures. Introduces the "struct skb_vnet_common_hdr" as a unifying header structure using a union. With this approach, various virtio net header structures can be converted by accessing different members of this structure, thus eliminating the need for type casting and reducing the risk of potential bugs. For example following code: static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, struct page *page, unsigned int offset, unsigned int len, unsigned int truesize, unsigned int headroom) { [...] struct virtio_net_hdr_mrg_rxbuf *hdr; [...] hdr_len = vi->hdr_len; [...] ok: hdr = skb_vnet_hdr(skb); memcpy(hdr, hdr_p, hdr_len); [...] } When VIRTIO_NET_F_HASH_REPORT feature is enabled, hdr_len = 20 But the sizeof(*hdr) is 12, memcpy(hdr, hdr_p, hdr_len); will copy 20 bytes to the hdr, which make a potential risk of bug. And this risk can be avoided by introducing struct skb_vnet_common_hdr. Change log v1->v2 feedback from Willem de Bruijn <willemdebruijn.kernel@gmail.com> feedback from Simon Horman <horms@kernel.org> 1. change to use net-next tree. 2. move skb_vnet_common_hdr inside kernel file instead of the UAPI header. v2->v3 feedback from Willem de Bruijn <willemdebruijn.kernel@gmail.com> 1. fix typo in commit message. 2. add original struct virtio_net_hdr into union 3. remove virtio_net_hdr_mrg_rxbuf variable in receive_buf; Signed-off-by: Feng Liu <feliu@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Willem de Bruijn <willemb@google.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-08-21 14:27:13 +00:00
hdr = skb_vnet_common_hdr(skb);
memcpy(hdr, hdr_p, hdr_len);
virtio-net: fix use-after-free in page_to_skb() KASAN/syzbot had 4 reports, one of them being: BUG: KASAN: slab-out-of-bounds in memcpy include/linux/fortify-string.h:191 [inline] BUG: KASAN: slab-out-of-bounds in page_to_skb+0x5cf/0xb70 drivers/net/virtio_net.c:480 Read of size 12 at addr ffff888014a5f800 by task systemd-udevd/8445 CPU: 0 PID: 8445 Comm: systemd-udevd Not tainted 5.12.0-rc8-next-20210419-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: <IRQ> __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x141/0x1d7 lib/dump_stack.c:120 print_address_description.constprop.0.cold+0x5b/0x2f8 mm/kasan/report.c:233 __kasan_report mm/kasan/report.c:419 [inline] kasan_report.cold+0x7c/0xd8 mm/kasan/report.c:436 check_region_inline mm/kasan/generic.c:180 [inline] kasan_check_range+0x13d/0x180 mm/kasan/generic.c:186 memcpy+0x20/0x60 mm/kasan/shadow.c:65 memcpy include/linux/fortify-string.h:191 [inline] page_to_skb+0x5cf/0xb70 drivers/net/virtio_net.c:480 receive_mergeable drivers/net/virtio_net.c:1009 [inline] receive_buf+0x2bc0/0x6250 drivers/net/virtio_net.c:1119 virtnet_receive drivers/net/virtio_net.c:1411 [inline] virtnet_poll+0x568/0x10b0 drivers/net/virtio_net.c:1516 __napi_poll+0xaf/0x440 net/core/dev.c:6962 napi_poll net/core/dev.c:7029 [inline] net_rx_action+0x801/0xb40 net/core/dev.c:7116 __do_softirq+0x29b/0x9fe kernel/softirq.c:559 invoke_softirq kernel/softirq.c:433 [inline] __irq_exit_rcu+0x136/0x200 kernel/softirq.c:637 irq_exit_rcu+0x5/0x20 kernel/softirq.c:649 common_interrupt+0xa4/0xd0 arch/x86/kernel/irq.c:240 Fixes: fb32856b16ad ("virtio-net: page_to_skb() use build_skb when there's sufficient tailroom") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot <syzkaller@googlegroups.com> Reported-by: Guenter Roeck <linux@roeck-us.net> Reported-by: Mat Martineau <mathew.j.martineau@linux.intel.com> Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Cc: Jason Wang <jasowang@redhat.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: virtualization@lists.linux-foundation.org Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-04-20 09:43:41 +00:00
if (page_to_free)
put_page(page_to_free);
return skb;
}
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len)
{
struct page *page = virt_to_head_page(buf);
struct virtnet_rq_dma *dma;
void *head;
int offset;
head = page_address(page);
dma = head;
--dma->ref;
if (dma->need_sync && len) {
offset = buf - (head + sizeof(*dma));
virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr,
offset, len,
DMA_FROM_DEVICE);
}
if (dma->ref)
return;
virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len,
DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
put_page(page);
}
static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx)
{
void *buf;
buf = virtqueue_get_buf_ctx(rq->vq, len, ctx);
if (buf && rq->do_dma)
virtnet_rq_unmap(rq, buf, *len);
return buf;
}
static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len)
{
struct virtnet_rq_dma *dma;
dma_addr_t addr;
u32 offset;
void *head;
if (!rq->do_dma) {
sg_init_one(rq->sg, buf, len);
return;
}
head = page_address(rq->alloc_frag.page);
offset = buf - head;
dma = head;
addr = dma->addr - sizeof(*dma) + offset;
sg_init_table(rq->sg, 1);
rq->sg[0].dma_address = addr;
rq->sg[0].length = len;
}
static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp)
{
struct page_frag *alloc_frag = &rq->alloc_frag;
struct virtnet_rq_dma *dma;
void *buf, *head;
dma_addr_t addr;
if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp)))
return NULL;
head = page_address(alloc_frag->page);
if (rq->do_dma) {
dma = head;
/* new pages */
if (!alloc_frag->offset) {
if (rq->last_dma) {
/* Now, the new page is allocated, the last dma
* will not be used. So the dma can be unmapped
* if the ref is 0.
*/
virtnet_rq_unmap(rq, rq->last_dma, 0);
rq->last_dma = NULL;
}
dma->len = alloc_frag->size - sizeof(*dma);
addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1,
dma->len, DMA_FROM_DEVICE, 0);
if (virtqueue_dma_mapping_error(rq->vq, addr))
return NULL;
dma->addr = addr;
dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr);
/* Add a reference to dma to prevent the entire dma from
* being released during error handling. This reference
* will be freed after the pages are no longer used.
*/
get_page(alloc_frag->page);
dma->ref = 1;
alloc_frag->offset = sizeof(*dma);
rq->last_dma = dma;
}
++dma->ref;
}
buf = head + alloc_frag->offset;
get_page(alloc_frag->page);
alloc_frag->offset += size;
return buf;
}
static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf)
{
struct virtnet_info *vi = vq->vdev->priv;
struct receive_queue *rq;
int i = vq2rxq(vq);
rq = &vi->rq[i];
if (rq->xsk_pool) {
xsk_buff_free((struct xdp_buff *)buf);
return;
}
if (rq->do_dma)
virtnet_rq_unmap(rq, buf, 0);
virtnet_rq_free_buf(vi, rq, buf);
}
static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
bool in_napi)
{
struct virtnet_sq_free_stats stats = {0};
__free_old_xmit(sq, txq, in_napi, &stats);
/* Avoid overhead when no packets have been processed
* happens when called speculatively from start_xmit.
*/
if (!stats.packets && !stats.napi_packets)
return;
u64_stats_update_begin(&sq->stats.syncp);
u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes);
u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets);
u64_stats_update_end(&sq->stats.syncp);
}
static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
{
if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
return false;
else if (q < vi->curr_queue_pairs)
return true;
else
return false;
}
static void check_sq_full_and_disable(struct virtnet_info *vi,
struct net_device *dev,
struct send_queue *sq)
{
bool use_napi = sq->napi.weight;
int qnum;
qnum = sq - vi->sq;
/* If running out of space, stop queue to avoid getting packets that we
* are then unable to transmit.
* An alternative would be to force queuing layer to requeue the skb by
* returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
* returned in a normal path of operation: it means that driver is not
* maintaining the TX queue stop/start state properly, and causes
* the stack to do a non-trivial amount of useless work.
* Since most packets only take 1 or 2 ring slots, stopping the queue
* early means 16 slots are typically wasted.
*/
if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
netif_tx_stop_queue(txq);
u64_stats_update_begin(&sq->stats.syncp);
u64_stats_inc(&sq->stats.stop);
u64_stats_update_end(&sq->stats.syncp);
if (use_napi) {
if (unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
virtqueue_napi_schedule(&sq->napi, sq->vq);
} else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
/* More just got used, free them then recheck. */
free_old_xmit(sq, txq, false);
if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
netif_start_subqueue(dev, qnum);
u64_stats_update_begin(&sq->stats.syncp);
u64_stats_inc(&sq->stats.wake);
u64_stats_update_end(&sq->stats.syncp);
virtqueue_disable_cb(sq->vq);
}
}
}
}
static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
{
sg->dma_address = addr;
sg->length = len;
}
static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi,
struct receive_queue *rq, void *buf, u32 len)
{
struct xdp_buff *xdp;
u32 bufsize;
xdp = (struct xdp_buff *)buf;
bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len;
if (unlikely(len > bufsize)) {
pr_debug("%s: rx error: len %u exceeds truesize %u\n",
vi->dev->name, len, bufsize);
DEV_STATS_INC(vi->dev, rx_length_errors);
xsk_buff_free(xdp);
return NULL;
}
xsk_buff_set_size(xdp, len);
xsk_buff_dma_sync_for_cpu(xdp);
return xdp;
}
static struct sk_buff *xsk_construct_skb(struct receive_queue *rq,
struct xdp_buff *xdp)
{
unsigned int metasize = xdp->data - xdp->data_meta;
struct sk_buff *skb;
unsigned int size;
size = xdp->data_end - xdp->data_hard_start;
skb = napi_alloc_skb(&rq->napi, size);
if (unlikely(!skb)) {
xsk_buff_free(xdp);
return NULL;
}
skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
size = xdp->data_end - xdp->data_meta;
memcpy(__skb_put(skb, size), xdp->data_meta, size);
if (metasize) {
__skb_pull(skb, metasize);
skb_metadata_set(skb, metasize);
}
xsk_buff_free(xdp);
return skb;
}
static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi,
struct receive_queue *rq, struct xdp_buff *xdp,
unsigned int *xdp_xmit,
struct virtnet_rq_stats *stats)
{
struct bpf_prog *prog;
u32 ret;
ret = XDP_PASS;
rcu_read_lock();
prog = rcu_dereference(rq->xdp_prog);
if (prog)
ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
rcu_read_unlock();
switch (ret) {
case XDP_PASS:
return xsk_construct_skb(rq, xdp);
case XDP_TX:
case XDP_REDIRECT:
return NULL;
default:
/* drop packet */
xsk_buff_free(xdp);
u64_stats_inc(&stats->drops);
return NULL;
}
}
static void xsk_drop_follow_bufs(struct net_device *dev,
struct receive_queue *rq,
u32 num_buf,
struct virtnet_rq_stats *stats)
{
struct xdp_buff *xdp;
u32 len;
while (num_buf-- > 1) {
xdp = virtqueue_get_buf(rq->vq, &len);
if (unlikely(!xdp)) {
pr_debug("%s: rx error: %d buffers missing\n",
dev->name, num_buf);
DEV_STATS_INC(dev, rx_length_errors);
break;
}
u64_stats_add(&stats->bytes, len);
xsk_buff_free(xdp);
}
}
static int xsk_append_merge_buffer(struct virtnet_info *vi,
struct receive_queue *rq,
struct sk_buff *head_skb,
u32 num_buf,
struct virtio_net_hdr_mrg_rxbuf *hdr,
struct virtnet_rq_stats *stats)
{
struct sk_buff *curr_skb;
struct xdp_buff *xdp;
u32 len, truesize;
struct page *page;
void *buf;
curr_skb = head_skb;
while (--num_buf) {
buf = virtqueue_get_buf(rq->vq, &len);
if (unlikely(!buf)) {
pr_debug("%s: rx error: %d buffers out of %d missing\n",
vi->dev->name, num_buf,
virtio16_to_cpu(vi->vdev,
hdr->num_buffers));
DEV_STATS_INC(vi->dev, rx_length_errors);
return -EINVAL;
}
u64_stats_add(&stats->bytes, len);
xdp = buf_to_xdp(vi, rq, buf, len);
if (!xdp)
goto err;
buf = napi_alloc_frag(len);
if (!buf) {
xsk_buff_free(xdp);
goto err;
}
memcpy(buf, xdp->data - vi->hdr_len, len);
xsk_buff_free(xdp);
page = virt_to_page(buf);
truesize = len;
curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page,
buf, len, truesize);
if (!curr_skb) {
put_page(page);
goto err;
}
}
return 0;
err:
xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats);
return -EINVAL;
}
static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi,
struct receive_queue *rq, struct xdp_buff *xdp,
unsigned int *xdp_xmit,
struct virtnet_rq_stats *stats)
{
struct virtio_net_hdr_mrg_rxbuf *hdr;
struct bpf_prog *prog;
struct sk_buff *skb;
u32 ret, num_buf;
hdr = xdp->data - vi->hdr_len;
num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
ret = XDP_PASS;
rcu_read_lock();
prog = rcu_dereference(rq->xdp_prog);
/* TODO: support multi buffer. */
if (prog && num_buf == 1)
ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
rcu_read_unlock();
switch (ret) {
case XDP_PASS:
skb = xsk_construct_skb(rq, xdp);
if (!skb)
goto drop_bufs;
if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) {
dev_kfree_skb(skb);
goto drop;
}
return skb;
case XDP_TX:
case XDP_REDIRECT:
return NULL;
default:
/* drop packet */
xsk_buff_free(xdp);
}
drop_bufs:
xsk_drop_follow_bufs(dev, rq, num_buf, stats);
drop:
u64_stats_inc(&stats->drops);
return NULL;
}
static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq,
void *buf, u32 len,
unsigned int *xdp_xmit,
struct virtnet_rq_stats *stats)
{
struct net_device *dev = vi->dev;
struct sk_buff *skb = NULL;
struct xdp_buff *xdp;
u8 flags;
len -= vi->hdr_len;
u64_stats_add(&stats->bytes, len);
xdp = buf_to_xdp(vi, rq, buf, len);
if (!xdp)
return;
if (unlikely(len < ETH_HLEN)) {
pr_debug("%s: short packet %i\n", dev->name, len);
DEV_STATS_INC(dev, rx_length_errors);
xsk_buff_free(xdp);
return;
}
flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags;
if (!vi->mergeable_rx_bufs)
skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats);
else
skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats);
if (skb)
virtnet_receive_done(vi, rq, skb, flags);
}
static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq,
struct xsk_buff_pool *pool, gfp_t gfp)
{
struct xdp_buff **xsk_buffs;
dma_addr_t addr;
int err = 0;
u32 len, i;
int num;
xsk_buffs = rq->xsk_buffs;
num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free);
if (!num)
return -ENOMEM;
len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len;
for (i = 0; i < num; ++i) {
/* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space.
* We assume XDP_PACKET_HEADROOM is larger than hdr->len.
* (see function virtnet_xsk_pool_enable)
*/
addr = xsk_buff_xdp_get_dma(xsk_buffs[i]) - vi->hdr_len;
sg_init_table(rq->sg, 1);
sg_fill_dma(rq->sg, addr, len);
err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, xsk_buffs[i], gfp);
if (err)
goto err;
}
return num;
err:
for (; i < num; ++i)
xsk_buff_free(xsk_buffs[i]);
return err;
}
static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag)
{
struct virtnet_info *vi = netdev_priv(dev);
struct send_queue *sq;
if (!netif_running(dev))
return -ENETDOWN;
if (qid >= vi->curr_queue_pairs)
return -EINVAL;
sq = &vi->sq[qid];
if (napi_if_scheduled_mark_missed(&sq->napi))
return 0;
local_bh_disable();
virtqueue_napi_schedule(&sq->napi, sq->vq);
local_bh_enable();
return 0;
}
static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
struct send_queue *sq,
struct xdp_frame *xdpf)
{
struct virtio_net_hdr_mrg_rxbuf *hdr;
struct skb_shared_info *shinfo;
u8 nr_frags = 0;
int err, i;
if (unlikely(xdpf->headroom < vi->hdr_len))
return -EOVERFLOW;
if (unlikely(xdp_frame_has_frags(xdpf))) {
shinfo = xdp_get_shared_info_from_frame(xdpf);
nr_frags = shinfo->nr_frags;
}
/* In wrapping function virtnet_xdp_xmit(), we need to free
* up the pending old buffers, where we need to calculate the
* position of skb_shared_info in xdp_get_frame_len() and
* xdp_return_frame(), which will involve to xdpf->data and
* xdpf->headroom. Therefore, we need to update the value of
* headroom synchronously here.
*/
xdpf->headroom -= vi->hdr_len;
xdpf->data -= vi->hdr_len;
/* Zero header and leave csum up to XDP layers */
hdr = xdpf->data;
memset(hdr, 0, vi->hdr_len);
xdpf->len += vi->hdr_len;
sg_init_table(sq->sg, nr_frags + 1);
sg_set_buf(sq->sg, xdpf->data, xdpf->len);
for (i = 0; i < nr_frags; i++) {
skb_frag_t *frag = &shinfo->frags[i];
sg_set_page(&sq->sg[i + 1], skb_frag_page(frag),
skb_frag_size(frag), skb_frag_off(frag));
}
err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1,
xdp_to_ptr(xdpf), GFP_ATOMIC);
if (unlikely(err))
return -ENOSPC; /* Caller handle free/refcnt */
return 0;
}
/* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on
* the current cpu, so it does not need to be locked.
*
* Here we use marco instead of inline functions because we have to deal with
* three issues at the same time: 1. the choice of sq. 2. judge and execute the
* lock/unlock of txq 3. make sparse happy. It is difficult for two inline
* functions to perfectly solve these three problems at the same time.
*/
#define virtnet_xdp_get_sq(vi) ({ \
int cpu = smp_processor_id(); \
struct netdev_queue *txq; \
typeof(vi) v = (vi); \
unsigned int qp; \
\
if (v->curr_queue_pairs > nr_cpu_ids) { \
qp = v->curr_queue_pairs - v->xdp_queue_pairs; \
qp += cpu; \
txq = netdev_get_tx_queue(v->dev, qp); \
__netif_tx_acquire(txq); \
} else { \
qp = cpu % v->curr_queue_pairs; \
txq = netdev_get_tx_queue(v->dev, qp); \
__netif_tx_lock(txq, cpu); \
} \
v->sq + qp; \
})
#define virtnet_xdp_put_sq(vi, q) { \
struct netdev_queue *txq; \
typeof(vi) v = (vi); \
\
txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \
if (v->curr_queue_pairs > nr_cpu_ids) \
__netif_tx_release(txq); \
else \
__netif_tx_unlock(txq); \
}
static int virtnet_xdp_xmit(struct net_device *dev,
int n, struct xdp_frame **frames, u32 flags)
{
struct virtnet_info *vi = netdev_priv(dev);
struct virtnet_sq_free_stats stats = {0};
struct receive_queue *rq = vi->rq;
struct bpf_prog *xdp_prog;
struct send_queue *sq;
int nxmit = 0;
int kicks = 0;
int ret;
int i;
/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
* indicate XDP resources have been successfully allocated.
*/
xdp_prog = rcu_access_pointer(rq->xdp_prog);
if (!xdp_prog)
return -ENXIO;
sq = virtnet_xdp_get_sq(vi);
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
ret = -EINVAL;
goto out;
}
/* Free up any pending old buffers before queueing new ones. */
__free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq),
false, &stats);
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
if (__virtnet_xdp_xmit_one(vi, sq, xdpf))
break;
nxmit++;
}
ret = nxmit;
if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq))
check_sq_full_and_disable(vi, dev, sq);
if (flags & XDP_XMIT_FLUSH) {
if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq))
kicks = 1;
}
out:
u64_stats_update_begin(&sq->stats.syncp);
u64_stats_add(&sq->stats.bytes, stats.bytes);
u64_stats_add(&sq->stats.packets, stats.packets);
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_add(&sq->stats.xdp_tx, n);
u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit);
u64_stats_add(&sq->stats.kicks, kicks);
u64_stats_update_end(&sq->stats.syncp);
virtnet_xdp_put_sq(vi, sq);
return ret;
}
static void put_xdp_frags(struct xdp_buff *xdp)
{
struct skb_shared_info *shinfo;
struct page *xdp_page;
int i;
if (xdp_buff_has_frags(xdp)) {
shinfo = xdp_get_shared_info_from_buff(xdp);
for (i = 0; i < shinfo->nr_frags; i++) {
xdp_page = skb_frag_page(&shinfo->frags[i]);
put_page(xdp_page);
}
}
}
static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
struct net_device *dev,
unsigned int *xdp_xmit,
struct virtnet_rq_stats *stats)
{
struct xdp_frame *xdpf;
int err;
u32 act;
act = bpf_prog_run_xdp(xdp_prog, xdp);
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_inc(&stats->xdp_packets);
switch (act) {
case XDP_PASS:
return act;
case XDP_TX:
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_inc(&stats->xdp_tx);
xdpf = xdp_convert_buff_to_frame(xdp);
if (unlikely(!xdpf)) {
netdev_dbg(dev, "convert buff to frame failed for xdp\n");
return XDP_DROP;
}
err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
if (unlikely(!err)) {
xdp_return_frame_rx_napi(xdpf);
} else if (unlikely(err < 0)) {
trace_xdp_exception(dev, xdp_prog, act);
return XDP_DROP;
}
*xdp_xmit |= VIRTIO_XDP_TX;
return act;
case XDP_REDIRECT:
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_inc(&stats->xdp_redirects);
err = xdp_do_redirect(dev, xdp, xdp_prog);
if (err)
return XDP_DROP;
*xdp_xmit |= VIRTIO_XDP_REDIR;
return act;
default:
bpf_warn_invalid_xdp_action(dev, xdp_prog, act);
fallthrough;
case XDP_ABORTED:
trace_xdp_exception(dev, xdp_prog, act);
fallthrough;
case XDP_DROP:
return XDP_DROP;
}
}
static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
{
return vi->xdp_enabled ? XDP_PACKET_HEADROOM : 0;
}
/* We copy the packet for XDP in the following cases:
*
* 1) Packet is scattered across multiple rx buffers.
* 2) Headroom space is insufficient.
*
* This is inefficient but it's a temporary condition that
* we hit right after XDP is enabled and until queue is refilled
* with large buffers with sufficient headroom - so it should affect
* at most queue size packets.
* Afterwards, the conditions to enable
* XDP should preclude the underlying device from sending packets
* across multiple buffers (num_buf > 1), and we make sure buffers
* have enough headroom.
*/
static struct page *xdp_linearize_page(struct receive_queue *rq,
int *num_buf,
struct page *p,
int offset,
int page_off,
unsigned int *len)
{
int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
struct page *page;
if (page_off + *len + tailroom > PAGE_SIZE)
return NULL;
page = alloc_page(GFP_ATOMIC);
if (!page)
return NULL;
memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
page_off += *len;
while (--*num_buf) {
unsigned int buflen;
void *buf;
int off;
buf = virtnet_rq_get_buf(rq, &buflen, NULL);
if (unlikely(!buf))
goto err_buf;
p = virt_to_head_page(buf);
off = buf - page_address(p);
/* guard against a misconfigured or uncooperative backend that
* is sending packet larger than the MTU.
*/
if ((page_off + buflen + tailroom) > PAGE_SIZE) {
put_page(p);
goto err_buf;
}
memcpy(page_address(page) + page_off,
page_address(p) + off, buflen);
page_off += buflen;
put_page(p);
}
/* Headroom does not contribute to packet length */
*len = page_off - XDP_PACKET_HEADROOM;
return page;
err_buf:
__free_pages(page, 0);
return NULL;
}
static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi,
unsigned int xdp_headroom,
void *buf,
unsigned int len)
{
unsigned int header_offset;
unsigned int headroom;
unsigned int buflen;
struct sk_buff *skb;
header_offset = VIRTNET_RX_PAD + xdp_headroom;
headroom = vi->hdr_len + header_offset;
buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
skb = virtnet_build_skb(buf, buflen, headroom, len);
if (unlikely(!skb))
return NULL;
buf += header_offset;
virtio_net: Introduce skb_vnet_common_hdr to avoid typecasting The virtio_net driver currently deals with different versions and types of virtio net headers, such as virtio_net_hdr_mrg_rxbuf, virtio_net_hdr_v1_hash, etc. Due to these variations, the code relies on multiple type casts to convert memory between different structures, potentially leading to bugs when there are changes in these structures. Introduces the "struct skb_vnet_common_hdr" as a unifying header structure using a union. With this approach, various virtio net header structures can be converted by accessing different members of this structure, thus eliminating the need for type casting and reducing the risk of potential bugs. For example following code: static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, struct page *page, unsigned int offset, unsigned int len, unsigned int truesize, unsigned int headroom) { [...] struct virtio_net_hdr_mrg_rxbuf *hdr; [...] hdr_len = vi->hdr_len; [...] ok: hdr = skb_vnet_hdr(skb); memcpy(hdr, hdr_p, hdr_len); [...] } When VIRTIO_NET_F_HASH_REPORT feature is enabled, hdr_len = 20 But the sizeof(*hdr) is 12, memcpy(hdr, hdr_p, hdr_len); will copy 20 bytes to the hdr, which make a potential risk of bug. And this risk can be avoided by introducing struct skb_vnet_common_hdr. Change log v1->v2 feedback from Willem de Bruijn <willemdebruijn.kernel@gmail.com> feedback from Simon Horman <horms@kernel.org> 1. change to use net-next tree. 2. move skb_vnet_common_hdr inside kernel file instead of the UAPI header. v2->v3 feedback from Willem de Bruijn <willemdebruijn.kernel@gmail.com> 1. fix typo in commit message. 2. add original struct virtio_net_hdr into union 3. remove virtio_net_hdr_mrg_rxbuf variable in receive_buf; Signed-off-by: Feng Liu <feliu@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Willem de Bruijn <willemb@google.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-08-21 14:27:13 +00:00
memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len);
return skb;
}
static struct sk_buff *receive_small_xdp(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
struct bpf_prog *xdp_prog,
void *buf,
unsigned int xdp_headroom,
unsigned int len,
unsigned int *xdp_xmit,
struct virtnet_rq_stats *stats)
{
unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
unsigned int headroom = vi->hdr_len + header_offset;
struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
struct page *page = virt_to_head_page(buf);
struct page *xdp_page;
unsigned int buflen;
struct xdp_buff xdp;
struct sk_buff *skb;
unsigned int metasize = 0;
u32 act;
if (unlikely(hdr->hdr.gso_type))
goto err_xdp;
/* Partially checksummed packets must be dropped. */
if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM))
goto err_xdp;
buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
int offset = buf - page_address(page) + header_offset;
unsigned int tlen = len + vi->hdr_len;
int num_buf = 1;
xdp_headroom = virtnet_get_headroom(vi);
header_offset = VIRTNET_RX_PAD + xdp_headroom;
headroom = vi->hdr_len + header_offset;
buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
xdp_page = xdp_linearize_page(rq, &num_buf, page,
offset, header_offset,
&tlen);
if (!xdp_page)
goto err_xdp;
buf = page_address(xdp_page);
put_page(page);
page = xdp_page;
}
xdp_init_buff(&xdp, buflen, &rq->xdp_rxq);
xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len,
xdp_headroom, len, true);
act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats);
switch (act) {
case XDP_PASS:
/* Recalculate length in case bpf program changed it */
len = xdp.data_end - xdp.data;
metasize = xdp.data - xdp.data_meta;
break;
case XDP_TX:
case XDP_REDIRECT:
goto xdp_xmit;
default:
goto err_xdp;
}
skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len);
if (unlikely(!skb))
goto err;
if (metasize)
skb_metadata_set(skb, metasize);
return skb;
err_xdp:
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_inc(&stats->xdp_drops);
err:
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_inc(&stats->drops);
put_page(page);
xdp_xmit:
return NULL;
}
static struct sk_buff *receive_small(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
void *buf, void *ctx,
unsigned int len,
unsigned int *xdp_xmit,
struct virtnet_rq_stats *stats)
{
unsigned int xdp_headroom = (unsigned long)ctx;
struct page *page = virt_to_head_page(buf);
struct sk_buff *skb;
/* We passed the address of virtnet header to virtio-core,
* so truncate the padding.
*/
buf -= VIRTNET_RX_PAD + xdp_headroom;
len -= vi->hdr_len;
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_add(&stats->bytes, len);
if (unlikely(len > GOOD_PACKET_LEN)) {
pr_debug("%s: rx error: len %u exceeds max size %d\n",
dev->name, len, GOOD_PACKET_LEN);
DEV_STATS_INC(dev, rx_length_errors);
goto err;
}
if (unlikely(vi->xdp_enabled)) {
struct bpf_prog *xdp_prog;
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
if (xdp_prog) {
skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf,
xdp_headroom, len, xdp_xmit,
stats);
rcu_read_unlock();
return skb;
}
rcu_read_unlock();
}
skb = receive_small_build_skb(vi, xdp_headroom, buf, len);
if (likely(skb))
return skb;
err:
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_inc(&stats->drops);
put_page(page);
return NULL;
}
static struct sk_buff *receive_big(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
void *buf,
unsigned int len,
struct virtnet_rq_stats *stats)
{
struct page *page = buf;
struct sk_buff *skb =
virtio_net: fix page_to_skb() miss headroom Because headroom is not passed to page_to_skb(), this causes the shinfo exceeds the range. Then the frags of shinfo are changed by other process. [ 157.724634] stack segment: 0000 [#1] PREEMPT SMP NOPTI [ 157.725358] CPU: 3 PID: 679 Comm: xdp_pass_user_f Tainted: G E 6.2.0+ #150 [ 157.726401] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/4 [ 157.727820] RIP: 0010:skb_release_data+0x11b/0x180 [ 157.728449] Code: 44 24 02 48 83 c3 01 39 d8 7e be 48 89 d8 48 c1 e0 04 41 80 7d 7e 00 49 8b 6c 04 30 79 0c 48 89 ef e8 89 b [ 157.730751] RSP: 0018:ffffc90000178b48 EFLAGS: 00010202 [ 157.731383] RAX: 0000000000000010 RBX: 0000000000000001 RCX: 0000000000000000 [ 157.732270] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff888100dd0b00 [ 157.733117] RBP: 5d5d76010f6e2408 R08: ffff888100dd0b2c R09: 0000000000000000 [ 157.734013] R10: ffffffff82effd30 R11: 000000000000a14e R12: ffff88810981ffc0 [ 157.734904] R13: ffff888100dd0b00 R14: 0000000000000002 R15: 0000000000002310 [ 157.735793] FS: 00007f06121d9740(0000) GS:ffff88842fcc0000(0000) knlGS:0000000000000000 [ 157.736794] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 157.737522] CR2: 00007ffd9a56c084 CR3: 0000000104bda001 CR4: 0000000000770ee0 [ 157.738420] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 157.739283] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 157.740146] PKRU: 55555554 [ 157.740502] Call Trace: [ 157.740843] <IRQ> [ 157.741117] kfree_skb_reason+0x50/0x120 [ 157.741613] __udp4_lib_rcv+0x52b/0x5e0 [ 157.742132] ip_protocol_deliver_rcu+0xaf/0x190 [ 157.742715] ip_local_deliver_finish+0x77/0xa0 [ 157.743280] ip_sublist_rcv_finish+0x80/0x90 [ 157.743834] ip_list_rcv_finish.constprop.0+0x16f/0x190 [ 157.744493] ip_list_rcv+0x126/0x140 [ 157.744952] __netif_receive_skb_list_core+0x29b/0x2c0 [ 157.745602] __netif_receive_skb_list+0xed/0x160 [ 157.746190] ? udp4_gro_receive+0x275/0x350 [ 157.746732] netif_receive_skb_list_internal+0xf2/0x1b0 [ 157.747398] napi_gro_receive+0xd1/0x210 [ 157.747911] virtnet_receive+0x75/0x1c0 [ 157.748422] virtnet_poll+0x48/0x1b0 [ 157.748878] __napi_poll+0x29/0x1b0 [ 157.749330] net_rx_action+0x27a/0x340 [ 157.749812] __do_softirq+0xf3/0x2fb [ 157.750298] do_softirq+0xa2/0xd0 [ 157.750745] </IRQ> [ 157.751563] <TASK> [ 157.752329] __local_bh_enable_ip+0x6d/0x80 [ 157.753178] virtnet_xdp_set+0x482/0x860 [ 157.754159] ? __pfx_virtnet_xdp+0x10/0x10 [ 157.755129] dev_xdp_install+0xa4/0xe0 [ 157.756033] dev_xdp_attach+0x20b/0x5e0 [ 157.756933] do_setlink+0x82e/0xc90 [ 157.757777] ? __nla_validate_parse+0x12b/0x1e0 [ 157.758744] rtnl_setlink+0xd8/0x170 [ 157.759549] ? mod_objcg_state+0xcb/0x320 [ 157.760328] ? security_capable+0x37/0x60 [ 157.761209] ? security_capable+0x37/0x60 [ 157.762072] rtnetlink_rcv_msg+0x145/0x3d0 [ 157.762929] ? ___slab_alloc+0x327/0x610 [ 157.763754] ? __alloc_skb+0x141/0x170 [ 157.764533] ? __pfx_rtnetlink_rcv_msg+0x10/0x10 [ 157.765422] netlink_rcv_skb+0x58/0x110 [ 157.766229] netlink_unicast+0x21f/0x330 [ 157.766951] netlink_sendmsg+0x240/0x4a0 [ 157.767654] sock_sendmsg+0x93/0xa0 [ 157.768434] ? sockfd_lookup_light+0x12/0x70 [ 157.769245] __sys_sendto+0xfe/0x170 [ 157.770079] ? handle_mm_fault+0xe9/0x2d0 [ 157.770859] ? preempt_count_add+0x51/0xa0 [ 157.771645] ? up_read+0x3c/0x80 [ 157.772340] ? do_user_addr_fault+0x1e9/0x710 [ 157.773166] ? kvm_read_and_reset_apf_flags+0x49/0x60 [ 157.774087] __x64_sys_sendto+0x29/0x30 [ 157.774856] do_syscall_64+0x3c/0x90 [ 157.775518] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 157.776382] RIP: 0033:0x7f06122def70 Fixes: 18117a842ab0 ("virtio-net: remove xdp related info from page_to_skb()") Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-03-15 01:52:22 +00:00
page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0);
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_add(&stats->bytes, len - vi->hdr_len);
if (unlikely(!skb))
goto err;
return skb;
err:
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_inc(&stats->drops);
give_pages(rq, page);
return NULL;
}
static void mergeable_buf_free(struct receive_queue *rq, int num_buf,
struct net_device *dev,
struct virtnet_rq_stats *stats)
{
struct page *page;
void *buf;
int len;
while (num_buf-- > 1) {
buf = virtnet_rq_get_buf(rq, &len, NULL);
if (unlikely(!buf)) {
pr_debug("%s: rx error: %d buffers missing\n",
dev->name, num_buf);
DEV_STATS_INC(dev, rx_length_errors);
break;
}
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_add(&stats->bytes, len);
page = virt_to_head_page(buf);
put_page(page);
}
}
/* Why not use xdp_build_skb_from_frame() ?
* XDP core assumes that xdp frags are PAGE_SIZE in length, while in
* virtio-net there are 2 points that do not match its requirements:
* 1. The size of the prefilled buffer is not fixed before xdp is set.
* 2. xdp_build_skb_from_frame() does more checks that we don't need,
* like eth_type_trans() (which virtio-net does in receive_buf()).
*/
static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev,
struct virtnet_info *vi,
struct xdp_buff *xdp,
unsigned int xdp_frags_truesz)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
unsigned int headroom, data_len;
struct sk_buff *skb;
int metasize;
u8 nr_frags;
if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) {
pr_debug("Error building skb as missing reserved tailroom for xdp");
return NULL;
}
if (unlikely(xdp_buff_has_frags(xdp)))
nr_frags = sinfo->nr_frags;
skb = build_skb(xdp->data_hard_start, xdp->frame_sz);
if (unlikely(!skb))
return NULL;
headroom = xdp->data - xdp->data_hard_start;
data_len = xdp->data_end - xdp->data;
skb_reserve(skb, headroom);
__skb_put(skb, data_len);
metasize = xdp->data - xdp->data_meta;
metasize = metasize > 0 ? metasize : 0;
if (metasize)
skb_metadata_set(skb, metasize);
if (unlikely(xdp_buff_has_frags(xdp)))
xdp_update_skb_shared_info(skb, nr_frags,
sinfo->xdp_frags_size,
xdp_frags_truesz,
xdp_buff_is_frag_pfmemalloc(xdp));
return skb;
}
/* TODO: build xdp in big mode */
static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
struct xdp_buff *xdp,
void *buf,
unsigned int len,
unsigned int frame_sz,
int *num_buf,
unsigned int *xdp_frags_truesize,
struct virtnet_rq_stats *stats)
{
struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
unsigned int headroom, tailroom, room;
unsigned int truesize, cur_frag_size;
struct skb_shared_info *shinfo;
unsigned int xdp_frags_truesz = 0;
struct page *page;
skb_frag_t *frag;
int offset;
void *ctx;
xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq);
xdp_prepare_buff(xdp, buf - XDP_PACKET_HEADROOM,
XDP_PACKET_HEADROOM + vi->hdr_len, len - vi->hdr_len, true);
if (!*num_buf)
return 0;
if (*num_buf > 1) {
/* If we want to build multi-buffer xdp, we need
* to specify that the flags of xdp_buff have the
* XDP_FLAGS_HAS_FRAG bit.
*/
if (!xdp_buff_has_frags(xdp))
xdp_buff_set_frags_flag(xdp);
shinfo = xdp_get_shared_info_from_buff(xdp);
shinfo->nr_frags = 0;
shinfo->xdp_frags_size = 0;
}
if (*num_buf > MAX_SKB_FRAGS + 1)
return -EINVAL;
while (--*num_buf > 0) {
buf = virtnet_rq_get_buf(rq, &len, &ctx);
if (unlikely(!buf)) {
pr_debug("%s: rx error: %d buffers out of %d missing\n",
dev->name, *num_buf,
virtio16_to_cpu(vi->vdev, hdr->num_buffers));
DEV_STATS_INC(dev, rx_length_errors);
goto err;
}
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_add(&stats->bytes, len);
page = virt_to_head_page(buf);
offset = buf - page_address(page);
truesize = mergeable_ctx_to_truesize(ctx);
headroom = mergeable_ctx_to_headroom(ctx);
tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
room = SKB_DATA_ALIGN(headroom + tailroom);
cur_frag_size = truesize;
xdp_frags_truesz += cur_frag_size;
if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) {
put_page(page);
pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
dev->name, len, (unsigned long)(truesize - room));
DEV_STATS_INC(dev, rx_length_errors);
goto err;
}
frag = &shinfo->frags[shinfo->nr_frags++];
skb_frag_fill_page_desc(frag, page, offset, len);
if (page_is_pfmemalloc(page))
xdp_buff_set_frag_pfmemalloc(xdp);
shinfo->xdp_frags_size += len;
}
*xdp_frags_truesize = xdp_frags_truesz;
return 0;
err:
put_xdp_frags(xdp);
return -EINVAL;
}
static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
struct receive_queue *rq,
struct bpf_prog *xdp_prog,
void *ctx,
unsigned int *frame_sz,
int *num_buf,
struct page **page,
int offset,
unsigned int *len,
struct virtio_net_hdr_mrg_rxbuf *hdr)
{
unsigned int truesize = mergeable_ctx_to_truesize(ctx);
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
struct page *xdp_page;
unsigned int xdp_room;
/* Transient failure which in theory could occur if
* in-flight packets from before XDP was enabled reach
* the receive path after XDP is loaded.
*/
if (unlikely(hdr->hdr.gso_type))
return NULL;
/* Partially checksummed packets must be dropped. */
if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM))
return NULL;
/* Now XDP core assumes frag size is PAGE_SIZE, but buffers
* with headroom may add hole in truesize, which
* make their length exceed PAGE_SIZE. So we disabled the
* hole mechanism for xdp. See add_recvbuf_mergeable().
*/
*frame_sz = truesize;
if (likely(headroom >= virtnet_get_headroom(vi) &&
(*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) {
return page_address(*page) + offset;
}
/* This happens when headroom is not enough because
* of the buffer was prefilled before XDP is set.
* This should only happen for the first several packets.
* In fact, vq reset can be used here to help us clean up
* the prefilled buffers, but many existing devices do not
* support it, and we don't want to bother users who are
* using xdp normally.
*/
if (!xdp_prog->aux->xdp_has_frags) {
/* linearize data for XDP */
xdp_page = xdp_linearize_page(rq, num_buf,
*page, offset,
XDP_PACKET_HEADROOM,
len);
if (!xdp_page)
return NULL;
} else {
xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM +
sizeof(struct skb_shared_info));
if (*len + xdp_room > PAGE_SIZE)
return NULL;
xdp_page = alloc_page(GFP_ATOMIC);
if (!xdp_page)
return NULL;
memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM,
page_address(*page) + offset, *len);
}
*frame_sz = PAGE_SIZE;
put_page(*page);
*page = xdp_page;
return page_address(*page) + XDP_PACKET_HEADROOM;
}
static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
struct bpf_prog *xdp_prog,
void *buf,
void *ctx,
unsigned int len,
unsigned int *xdp_xmit,
struct virtnet_rq_stats *stats)
{
struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
struct page *page = virt_to_head_page(buf);
int offset = buf - page_address(page);
unsigned int xdp_frags_truesz = 0;
struct sk_buff *head_skb;
unsigned int frame_sz;
struct xdp_buff xdp;
void *data;
u32 act;
int err;
data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page,
offset, &len, hdr);
if (unlikely(!data))
goto err_xdp;
err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz,
&num_buf, &xdp_frags_truesz, stats);
if (unlikely(err))
goto err_xdp;
act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats);
switch (act) {
case XDP_PASS:
head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz);
if (unlikely(!head_skb))
break;
return head_skb;
case XDP_TX:
case XDP_REDIRECT:
return NULL;
default:
break;
}
put_xdp_frags(&xdp);
err_xdp:
put_page(page);
mergeable_buf_free(rq, num_buf, dev, stats);
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_inc(&stats->xdp_drops);
u64_stats_inc(&stats->drops);
return NULL;
}
static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb,
struct sk_buff *curr_skb,
struct page *page, void *buf,
int len, int truesize)
{
int num_skb_frags;
int offset;
num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);
if (unlikely(!nskb))
return NULL;
if (curr_skb == head_skb)
skb_shinfo(curr_skb)->frag_list = nskb;
else
curr_skb->next = nskb;
curr_skb = nskb;
head_skb->truesize += nskb->truesize;
num_skb_frags = 0;
}
if (curr_skb != head_skb) {
head_skb->data_len += len;
head_skb->len += len;
head_skb->truesize += truesize;
}
offset = buf - page_address(page);
if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
put_page(page);
skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
len, truesize);
} else {
skb_add_rx_frag(curr_skb, num_skb_frags, page,
offset, len, truesize);
}
return curr_skb;
}
static struct sk_buff *receive_mergeable(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
void *buf,
void *ctx,
unsigned int len,
unsigned int *xdp_xmit,
struct virtnet_rq_stats *stats)
{
struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
struct page *page = virt_to_head_page(buf);
int offset = buf - page_address(page);
struct sk_buff *head_skb, *curr_skb;
virtio_net: Add XDP frame size in two code paths The virtio_net driver is running inside the guest-OS. There are two XDP receive code-paths in virtio_net, namely receive_small() and receive_mergeable(). The receive_big() function does not support XDP. In receive_small() the frame size is available in buflen. The buffer backing these frames are allocated in add_recvbuf_small() with same size, except for the headroom, but tailroom have reserved room for skb_shared_info. The headroom is encoded in ctx pointer as a value. In receive_mergeable() the frame size is more dynamic. There are two basic cases: (1) buffer size is based on a exponentially weighted moving average (see DECLARE_EWMA) of packet length. Or (2) in case virtnet_get_headroom() have any headroom then buffer size is PAGE_SIZE. The ctx pointer is this time used for encoding two values; the buffer len "truesize" and headroom. In case (1) if the rx buffer size is underestimated, the packet will have been split over more buffers (num_buf info in virtio_net_hdr_mrg_rxbuf placed in top of buffer area). If that happens the XDP path does a xdp_linearize_page operation. V3: Adjust frame_sz in receive_mergeable() case, spotted by Jason Wang. The code is really hard to follow, so some hints to reviewers. The receive_mergeable() case gets frames that were allocated in add_recvbuf_mergeable() which uses headroom=virtnet_get_headroom(), and 'buf' ptr is advanced this headroom. The headroom can only be 0 or VIRTIO_XDP_HEADROOM, as virtnet_get_headroom is really simple: static unsigned int virtnet_get_headroom(struct virtnet_info *vi) { return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0; } As frame_sz is an offset size from xdp.data_hard_start, reviewers should notice how this is calculated in receive_mergeable(): int offset = buf - page_address(page); [...] data = page_address(xdp_page) + offset; xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; The calculated offset will always be VIRTIO_XDP_HEADROOM when reaching this code. Thus, xdp.data_hard_start will be page-start address plus vi->hdr_len. Given this xdp.frame_sz need to be reduced with vi->hdr_len size. IMHO a followup patch should cleanup this code to make it easier to maintain and understand, but it is outside the scope of this patchset. Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/bpf/158945344436.97035.9445115070189151680.stgit@firesoul
2020-05-14 10:50:44 +00:00
unsigned int truesize = mergeable_ctx_to_truesize(ctx);
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
head_skb = NULL;
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_add(&stats->bytes, len - vi->hdr_len);
if (unlikely(len > truesize - room)) {
pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
dev->name, len, (unsigned long)(truesize - room));
DEV_STATS_INC(dev, rx_length_errors);
goto err_skb;
}
if (unlikely(vi->xdp_enabled)) {
struct bpf_prog *xdp_prog;
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
if (xdp_prog) {
head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx,
len, xdp_xmit, stats);
rcu_read_unlock();
return head_skb;
}
rcu_read_unlock();
}
virtio-net: auto-tune mergeable rx buffer size for improved performance Commit 2613af0ed18a ("virtio_net: migrate mergeable rx buffers to page frag allocators") changed the mergeable receive buffer size from PAGE_SIZE to MTU-size, introducing a single-stream regression for benchmarks with large average packet size. There is no single optimal buffer size for all workloads. For workloads with packet size <= MTU bytes, MTU + virtio-net header-sized buffers are preferred as larger buffers reduce the TCP window due to SKB truesize. However, single-stream workloads with large average packet sizes have higher throughput if larger (e.g., PAGE_SIZE) buffers are used. This commit auto-tunes the mergeable receiver buffer packet size by choosing the packet buffer size based on an EWMA of the recent packet sizes for the receive queue. Packet buffer sizes range from MTU_SIZE + virtio-net header len to PAGE_SIZE. This improves throughput for large packet workloads, as any workload with average packet size >= PAGE_SIZE will use PAGE_SIZE buffers. These optimizations interact positively with recent commit ba275241030c ("virtio-net: coalesce rx frags when possible during rx"), which coalesces adjacent RX SKB fragments in virtio_net. The coalescing optimizations benefit buffers of any size. Benchmarks taken from an average of 5 netperf 30-second TCP_STREAM runs between two QEMU VMs on a single physical machine. Each VM has two VCPUs with all offloads & vhost enabled. All VMs and vhost threads run in a single 4 CPU cgroup cpuset, using cgroups to ensure that other processes in the system will not be scheduled on the benchmark CPUs. Trunk includes SKB rx frag coalescing. net-next w/ virtio_net before 2613af0ed18a (PAGE_SIZE bufs): 14642.85Gb/s net-next (MTU-size bufs): 13170.01Gb/s net-next + auto-tune: 14555.94Gb/s Jason Wang also reported a throughput increase on mlx4 from 22Gb/s using MTU-sized buffers to about 26Gb/s using auto-tuning. Signed-off-by: Michael Dalton <mwdalton@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-01-17 06:23:27 +00:00
virtio_net: fix page_to_skb() miss headroom Because headroom is not passed to page_to_skb(), this causes the shinfo exceeds the range. Then the frags of shinfo are changed by other process. [ 157.724634] stack segment: 0000 [#1] PREEMPT SMP NOPTI [ 157.725358] CPU: 3 PID: 679 Comm: xdp_pass_user_f Tainted: G E 6.2.0+ #150 [ 157.726401] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/4 [ 157.727820] RIP: 0010:skb_release_data+0x11b/0x180 [ 157.728449] Code: 44 24 02 48 83 c3 01 39 d8 7e be 48 89 d8 48 c1 e0 04 41 80 7d 7e 00 49 8b 6c 04 30 79 0c 48 89 ef e8 89 b [ 157.730751] RSP: 0018:ffffc90000178b48 EFLAGS: 00010202 [ 157.731383] RAX: 0000000000000010 RBX: 0000000000000001 RCX: 0000000000000000 [ 157.732270] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff888100dd0b00 [ 157.733117] RBP: 5d5d76010f6e2408 R08: ffff888100dd0b2c R09: 0000000000000000 [ 157.734013] R10: ffffffff82effd30 R11: 000000000000a14e R12: ffff88810981ffc0 [ 157.734904] R13: ffff888100dd0b00 R14: 0000000000000002 R15: 0000000000002310 [ 157.735793] FS: 00007f06121d9740(0000) GS:ffff88842fcc0000(0000) knlGS:0000000000000000 [ 157.736794] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 157.737522] CR2: 00007ffd9a56c084 CR3: 0000000104bda001 CR4: 0000000000770ee0 [ 157.738420] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 157.739283] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 157.740146] PKRU: 55555554 [ 157.740502] Call Trace: [ 157.740843] <IRQ> [ 157.741117] kfree_skb_reason+0x50/0x120 [ 157.741613] __udp4_lib_rcv+0x52b/0x5e0 [ 157.742132] ip_protocol_deliver_rcu+0xaf/0x190 [ 157.742715] ip_local_deliver_finish+0x77/0xa0 [ 157.743280] ip_sublist_rcv_finish+0x80/0x90 [ 157.743834] ip_list_rcv_finish.constprop.0+0x16f/0x190 [ 157.744493] ip_list_rcv+0x126/0x140 [ 157.744952] __netif_receive_skb_list_core+0x29b/0x2c0 [ 157.745602] __netif_receive_skb_list+0xed/0x160 [ 157.746190] ? udp4_gro_receive+0x275/0x350 [ 157.746732] netif_receive_skb_list_internal+0xf2/0x1b0 [ 157.747398] napi_gro_receive+0xd1/0x210 [ 157.747911] virtnet_receive+0x75/0x1c0 [ 157.748422] virtnet_poll+0x48/0x1b0 [ 157.748878] __napi_poll+0x29/0x1b0 [ 157.749330] net_rx_action+0x27a/0x340 [ 157.749812] __do_softirq+0xf3/0x2fb [ 157.750298] do_softirq+0xa2/0xd0 [ 157.750745] </IRQ> [ 157.751563] <TASK> [ 157.752329] __local_bh_enable_ip+0x6d/0x80 [ 157.753178] virtnet_xdp_set+0x482/0x860 [ 157.754159] ? __pfx_virtnet_xdp+0x10/0x10 [ 157.755129] dev_xdp_install+0xa4/0xe0 [ 157.756033] dev_xdp_attach+0x20b/0x5e0 [ 157.756933] do_setlink+0x82e/0xc90 [ 157.757777] ? __nla_validate_parse+0x12b/0x1e0 [ 157.758744] rtnl_setlink+0xd8/0x170 [ 157.759549] ? mod_objcg_state+0xcb/0x320 [ 157.760328] ? security_capable+0x37/0x60 [ 157.761209] ? security_capable+0x37/0x60 [ 157.762072] rtnetlink_rcv_msg+0x145/0x3d0 [ 157.762929] ? ___slab_alloc+0x327/0x610 [ 157.763754] ? __alloc_skb+0x141/0x170 [ 157.764533] ? __pfx_rtnetlink_rcv_msg+0x10/0x10 [ 157.765422] netlink_rcv_skb+0x58/0x110 [ 157.766229] netlink_unicast+0x21f/0x330 [ 157.766951] netlink_sendmsg+0x240/0x4a0 [ 157.767654] sock_sendmsg+0x93/0xa0 [ 157.768434] ? sockfd_lookup_light+0x12/0x70 [ 157.769245] __sys_sendto+0xfe/0x170 [ 157.770079] ? handle_mm_fault+0xe9/0x2d0 [ 157.770859] ? preempt_count_add+0x51/0xa0 [ 157.771645] ? up_read+0x3c/0x80 [ 157.772340] ? do_user_addr_fault+0x1e9/0x710 [ 157.773166] ? kvm_read_and_reset_apf_flags+0x49/0x60 [ 157.774087] __x64_sys_sendto+0x29/0x30 [ 157.774856] do_syscall_64+0x3c/0x90 [ 157.775518] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 157.776382] RIP: 0033:0x7f06122def70 Fixes: 18117a842ab0 ("virtio-net: remove xdp related info from page_to_skb()") Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-03-15 01:52:22 +00:00
head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom);
curr_skb = head_skb;
if (unlikely(!curr_skb))
goto err_skb;
while (--num_buf) {
buf = virtnet_rq_get_buf(rq, &len, &ctx);
if (unlikely(!buf)) {
pr_debug("%s: rx error: %d buffers out of %d missing\n",
dev->name, num_buf,
virtio16_to_cpu(vi->vdev,
hdr->num_buffers));
DEV_STATS_INC(dev, rx_length_errors);
goto err_buf;
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
}
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_add(&stats->bytes, len);
page = virt_to_head_page(buf);
truesize = mergeable_ctx_to_truesize(ctx);
headroom = mergeable_ctx_to_headroom(ctx);
tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
room = SKB_DATA_ALIGN(headroom + tailroom);
if (unlikely(len > truesize - room)) {
pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
dev->name, len, (unsigned long)(truesize - room));
DEV_STATS_INC(dev, rx_length_errors);
goto err_skb;
}
curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page,
buf, len, truesize);
if (!curr_skb)
goto err_skb;
}
ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
return head_skb;
err_skb:
put_page(page);
mergeable_buf_free(rq, num_buf, dev, stats);
err_buf:
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_inc(&stats->drops);
dev_kfree_skb(head_skb);
return NULL;
}
static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash,
struct sk_buff *skb)
{
enum pkt_hash_types rss_hash_type;
if (!hdr_hash || !skb)
return;
switch (__le16_to_cpu(hdr_hash->hash_report)) {
case VIRTIO_NET_HASH_REPORT_TCPv4:
case VIRTIO_NET_HASH_REPORT_UDPv4:
case VIRTIO_NET_HASH_REPORT_TCPv6:
case VIRTIO_NET_HASH_REPORT_UDPv6:
case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
rss_hash_type = PKT_HASH_TYPE_L4;
break;
case VIRTIO_NET_HASH_REPORT_IPv4:
case VIRTIO_NET_HASH_REPORT_IPv6:
case VIRTIO_NET_HASH_REPORT_IPv6_EX:
rss_hash_type = PKT_HASH_TYPE_L3;
break;
case VIRTIO_NET_HASH_REPORT_NONE:
default:
rss_hash_type = PKT_HASH_TYPE_NONE;
}
skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type);
}
static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq,
struct sk_buff *skb, u8 flags)
{
struct virtio_net_common_hdr *hdr;
struct net_device *dev = vi->dev;
hdr = skb_vnet_common_hdr(skb);
if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report)
virtio_skb_set_hash(&hdr->hash_v1_hdr, skb);
if (flags & VIRTIO_NET_HDR_F_DATA_VALID)
skb->ip_summed = CHECKSUM_UNNECESSARY;
if (virtio_net_hdr_to_skb(skb, &hdr->hdr,
virtio_is_little_endian(vi->vdev))) {
net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n",
dev->name, hdr->hdr.gso_type,
hdr->hdr.gso_size);
goto frame_err;
}
skb_record_rx_queue(skb, vq2rxq(rq->vq));
skb->protocol = eth_type_trans(skb, dev);
pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
ntohs(skb->protocol), skb->len, skb->pkt_type);
napi_gro_receive(&rq->napi, skb);
return;
frame_err:
DEV_STATS_INC(dev, rx_frame_errors);
dev_kfree_skb(skb);
}
static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
void *buf, unsigned int len, void **ctx,
unsigned int *xdp_xmit,
struct virtnet_rq_stats *stats)
{
struct net_device *dev = vi->dev;
struct sk_buff *skb;
u8 flags;
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
pr_debug("%s: short packet %i\n", dev->name, len);
DEV_STATS_INC(dev, rx_length_errors);
virtnet_rq_free_buf(vi, rq, buf);
return;
}
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
/* 1. Save the flags early, as the XDP program might overwrite them.
* These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID
* stay valid after XDP processing.
* 2. XDP doesn't work with partially checksummed packets (refer to
* virtnet_xdp_set()), so packets marked as
* VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing.
*/
flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags;
if (vi->mergeable_rx_bufs)
skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit,
stats);
else if (vi->big_packets)
skb = receive_big(dev, vi, rq, buf, len, stats);
else
skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats);
if (unlikely(!skb))
return;
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
virtnet_receive_done(vi, rq, skb, flags);
}
/* Unlike mergeable buffers, all buffers are allocated to the
* same size, except for the headroom. For this reason we do
* not need to use mergeable_len_to_ctx here - it is enough
* to store the headroom as the context ignoring the truesize.
*/
static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
gfp_t gfp)
{
char *buf;
unsigned int xdp_headroom = virtnet_get_headroom(vi);
void *ctx = (void *)(unsigned long)xdp_headroom;
int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
int err;
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
len = SKB_DATA_ALIGN(len) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
buf = virtnet_rq_alloc(rq, len, gfp);
if (unlikely(!buf))
return -ENOMEM;
buf += VIRTNET_RX_PAD + xdp_headroom;
virtnet_rq_init_one_sg(rq, buf, vi->hdr_len + GOOD_PACKET_LEN);
err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
if (err < 0) {
if (rq->do_dma)
virtnet_rq_unmap(rq, buf, 0);
put_page(virt_to_head_page(buf));
}
return err;
}
static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
gfp_t gfp)
{
struct page *first, *list = NULL;
char *p;
int i, err, offset;
virtio-net: use mtu size as buffer length for big packets Currently add_recvbuf_big() allocates MAX_SKB_FRAGS segments for big packets even when GUEST_* offloads are not present on the device. However, if guest GSO is not supported, it would be sufficient to allocate segments to cover just up the MTU size and no further. Allocating the maximum amount of segments results in a large waste of buffer space in the queue, which limits the number of packets that can be buffered and can result in reduced performance. Therefore, if guest GSO is not supported, use the MTU to calculate the optimal amount of segments required. Below is the iperf TCP test results over a Mellanox NIC, using vDPA for 1 VQ, queue size 1024, before and after the change, with the iperf server running over the virtio-net interface. MTU(Bytes)/Bandwidth (Gbit/s) Before After 1500 22.5 22.4 9000 12.8 25.9 And result of queue size 256. MTU(Bytes)/Bandwidth (Gbit/s) Before After 9000 2.15 11.9 With this patch no degradation is observed with multiple below tests and feature bit combinations. Results are summarized below for q depth of 1024. Interface MTU is 1500 if MTU feature is disabled. MTU is set to 9000 in other tests. Features/ Bandwidth (Gbit/s) Before After mtu off 20.1 20.2 mtu/indirect on 17.4 17.3 mtu/indirect/packed on 17.2 17.2 Signed-off-by: Gavin Li <gavinl@nvidia.com> Reviewed-by: Gavi Teitz <gavi@nvidia.com> Reviewed-by: Parav Pandit <parav@nvidia.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Si-Wei Liu <si-wei.liu@oracle.com> Message-Id: <20220914144911.56422-3-gavinl@nvidia.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com>
2022-09-14 14:49:11 +00:00
sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2);
virtio-net: use mtu size as buffer length for big packets Currently add_recvbuf_big() allocates MAX_SKB_FRAGS segments for big packets even when GUEST_* offloads are not present on the device. However, if guest GSO is not supported, it would be sufficient to allocate segments to cover just up the MTU size and no further. Allocating the maximum amount of segments results in a large waste of buffer space in the queue, which limits the number of packets that can be buffered and can result in reduced performance. Therefore, if guest GSO is not supported, use the MTU to calculate the optimal amount of segments required. Below is the iperf TCP test results over a Mellanox NIC, using vDPA for 1 VQ, queue size 1024, before and after the change, with the iperf server running over the virtio-net interface. MTU(Bytes)/Bandwidth (Gbit/s) Before After 1500 22.5 22.4 9000 12.8 25.9 And result of queue size 256. MTU(Bytes)/Bandwidth (Gbit/s) Before After 9000 2.15 11.9 With this patch no degradation is observed with multiple below tests and feature bit combinations. Results are summarized below for q depth of 1024. Interface MTU is 1500 if MTU feature is disabled. MTU is set to 9000 in other tests. Features/ Bandwidth (Gbit/s) Before After mtu off 20.1 20.2 mtu/indirect on 17.4 17.3 mtu/indirect/packed on 17.2 17.2 Signed-off-by: Gavin Li <gavinl@nvidia.com> Reviewed-by: Gavi Teitz <gavi@nvidia.com> Reviewed-by: Parav Pandit <parav@nvidia.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Si-Wei Liu <si-wei.liu@oracle.com> Message-Id: <20220914144911.56422-3-gavinl@nvidia.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com>
2022-09-14 14:49:11 +00:00
/* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */
for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) {
first = get_a_page(rq, gfp);
if (!first) {
if (list)
give_pages(rq, list);
return -ENOMEM;
}
sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
/* chain new page in list head to match sg */
first->private = (unsigned long)list;
list = first;
}
first = get_a_page(rq, gfp);
if (!first) {
give_pages(rq, list);
return -ENOMEM;
}
p = page_address(first);
/* rq->sg[0], rq->sg[1] share the same page */
/* a separated rq->sg[0] for header - required in case !any_header_sg */
sg_set_buf(&rq->sg[0], p, vi->hdr_len);
/* rq->sg[1] for data packet, from offset */
offset = sizeof(struct padded_vnet_hdr);
sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
/* chain first in list head */
first->private = (unsigned long)list;
virtio-net: use mtu size as buffer length for big packets Currently add_recvbuf_big() allocates MAX_SKB_FRAGS segments for big packets even when GUEST_* offloads are not present on the device. However, if guest GSO is not supported, it would be sufficient to allocate segments to cover just up the MTU size and no further. Allocating the maximum amount of segments results in a large waste of buffer space in the queue, which limits the number of packets that can be buffered and can result in reduced performance. Therefore, if guest GSO is not supported, use the MTU to calculate the optimal amount of segments required. Below is the iperf TCP test results over a Mellanox NIC, using vDPA for 1 VQ, queue size 1024, before and after the change, with the iperf server running over the virtio-net interface. MTU(Bytes)/Bandwidth (Gbit/s) Before After 1500 22.5 22.4 9000 12.8 25.9 And result of queue size 256. MTU(Bytes)/Bandwidth (Gbit/s) Before After 9000 2.15 11.9 With this patch no degradation is observed with multiple below tests and feature bit combinations. Results are summarized below for q depth of 1024. Interface MTU is 1500 if MTU feature is disabled. MTU is set to 9000 in other tests. Features/ Bandwidth (Gbit/s) Before After mtu off 20.1 20.2 mtu/indirect on 17.4 17.3 mtu/indirect/packed on 17.2 17.2 Signed-off-by: Gavin Li <gavinl@nvidia.com> Reviewed-by: Gavi Teitz <gavi@nvidia.com> Reviewed-by: Parav Pandit <parav@nvidia.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Si-Wei Liu <si-wei.liu@oracle.com> Message-Id: <20220914144911.56422-3-gavinl@nvidia.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com>
2022-09-14 14:49:11 +00:00
err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2,
first, gfp);
if (err < 0)
give_pages(rq, first);
return err;
}
static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
struct ewma_pkt_len *avg_pkt_len,
unsigned int room)
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
{
struct virtnet_info *vi = rq->vq->vdev->priv;
const size_t hdr_len = vi->hdr_len;
unsigned int len;
if (room)
return PAGE_SIZE - room;
len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
rq->min_buf_len, PAGE_SIZE - hdr_len);
return ALIGN(len, L1_CACHE_BYTES);
}
static int add_recvbuf_mergeable(struct virtnet_info *vi,
struct receive_queue *rq, gfp_t gfp)
{
struct page_frag *alloc_frag = &rq->alloc_frag;
unsigned int headroom = virtnet_get_headroom(vi);
unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
unsigned int len, hole;
void *ctx;
char *buf;
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
int err;
/* Extra tailroom is needed to satisfy XDP's assumption. This
* means rx frags coalescing won't work, but consider we've
* disabled GSO for XDP, it won't be a big issue.
*/
len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
buf = virtnet_rq_alloc(rq, len + room, gfp);
if (unlikely(!buf))
return -ENOMEM;
virtio-net: auto-tune mergeable rx buffer size for improved performance Commit 2613af0ed18a ("virtio_net: migrate mergeable rx buffers to page frag allocators") changed the mergeable receive buffer size from PAGE_SIZE to MTU-size, introducing a single-stream regression for benchmarks with large average packet size. There is no single optimal buffer size for all workloads. For workloads with packet size <= MTU bytes, MTU + virtio-net header-sized buffers are preferred as larger buffers reduce the TCP window due to SKB truesize. However, single-stream workloads with large average packet sizes have higher throughput if larger (e.g., PAGE_SIZE) buffers are used. This commit auto-tunes the mergeable receiver buffer packet size by choosing the packet buffer size based on an EWMA of the recent packet sizes for the receive queue. Packet buffer sizes range from MTU_SIZE + virtio-net header len to PAGE_SIZE. This improves throughput for large packet workloads, as any workload with average packet size >= PAGE_SIZE will use PAGE_SIZE buffers. These optimizations interact positively with recent commit ba275241030c ("virtio-net: coalesce rx frags when possible during rx"), which coalesces adjacent RX SKB fragments in virtio_net. The coalescing optimizations benefit buffers of any size. Benchmarks taken from an average of 5 netperf 30-second TCP_STREAM runs between two QEMU VMs on a single physical machine. Each VM has two VCPUs with all offloads & vhost enabled. All VMs and vhost threads run in a single 4 CPU cgroup cpuset, using cgroups to ensure that other processes in the system will not be scheduled on the benchmark CPUs. Trunk includes SKB rx frag coalescing. net-next w/ virtio_net before 2613af0ed18a (PAGE_SIZE bufs): 14642.85Gb/s net-next (MTU-size bufs): 13170.01Gb/s net-next + auto-tune: 14555.94Gb/s Jason Wang also reported a throughput increase on mlx4 from 22Gb/s using MTU-sized buffers to about 26Gb/s using auto-tuning. Signed-off-by: Michael Dalton <mwdalton@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-01-17 06:23:27 +00:00
buf += headroom; /* advance address leaving hole at front of pkt */
hole = alloc_frag->size - alloc_frag->offset;
if (hole < len + room) {
virtio-net: auto-tune mergeable rx buffer size for improved performance Commit 2613af0ed18a ("virtio_net: migrate mergeable rx buffers to page frag allocators") changed the mergeable receive buffer size from PAGE_SIZE to MTU-size, introducing a single-stream regression for benchmarks with large average packet size. There is no single optimal buffer size for all workloads. For workloads with packet size <= MTU bytes, MTU + virtio-net header-sized buffers are preferred as larger buffers reduce the TCP window due to SKB truesize. However, single-stream workloads with large average packet sizes have higher throughput if larger (e.g., PAGE_SIZE) buffers are used. This commit auto-tunes the mergeable receiver buffer packet size by choosing the packet buffer size based on an EWMA of the recent packet sizes for the receive queue. Packet buffer sizes range from MTU_SIZE + virtio-net header len to PAGE_SIZE. This improves throughput for large packet workloads, as any workload with average packet size >= PAGE_SIZE will use PAGE_SIZE buffers. These optimizations interact positively with recent commit ba275241030c ("virtio-net: coalesce rx frags when possible during rx"), which coalesces adjacent RX SKB fragments in virtio_net. The coalescing optimizations benefit buffers of any size. Benchmarks taken from an average of 5 netperf 30-second TCP_STREAM runs between two QEMU VMs on a single physical machine. Each VM has two VCPUs with all offloads & vhost enabled. All VMs and vhost threads run in a single 4 CPU cgroup cpuset, using cgroups to ensure that other processes in the system will not be scheduled on the benchmark CPUs. Trunk includes SKB rx frag coalescing. net-next w/ virtio_net before 2613af0ed18a (PAGE_SIZE bufs): 14642.85Gb/s net-next (MTU-size bufs): 13170.01Gb/s net-next + auto-tune: 14555.94Gb/s Jason Wang also reported a throughput increase on mlx4 from 22Gb/s using MTU-sized buffers to about 26Gb/s using auto-tuning. Signed-off-by: Michael Dalton <mwdalton@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-01-17 06:23:27 +00:00
/* To avoid internal fragmentation, if there is very likely not
* enough space for another buffer, add the remaining space to
* the current buffer.
* XDP core assumes that frame_size of xdp_buff and the length
* of the frag are PAGE_SIZE, so we disable the hole mechanism.
virtio-net: auto-tune mergeable rx buffer size for improved performance Commit 2613af0ed18a ("virtio_net: migrate mergeable rx buffers to page frag allocators") changed the mergeable receive buffer size from PAGE_SIZE to MTU-size, introducing a single-stream regression for benchmarks with large average packet size. There is no single optimal buffer size for all workloads. For workloads with packet size <= MTU bytes, MTU + virtio-net header-sized buffers are preferred as larger buffers reduce the TCP window due to SKB truesize. However, single-stream workloads with large average packet sizes have higher throughput if larger (e.g., PAGE_SIZE) buffers are used. This commit auto-tunes the mergeable receiver buffer packet size by choosing the packet buffer size based on an EWMA of the recent packet sizes for the receive queue. Packet buffer sizes range from MTU_SIZE + virtio-net header len to PAGE_SIZE. This improves throughput for large packet workloads, as any workload with average packet size >= PAGE_SIZE will use PAGE_SIZE buffers. These optimizations interact positively with recent commit ba275241030c ("virtio-net: coalesce rx frags when possible during rx"), which coalesces adjacent RX SKB fragments in virtio_net. The coalescing optimizations benefit buffers of any size. Benchmarks taken from an average of 5 netperf 30-second TCP_STREAM runs between two QEMU VMs on a single physical machine. Each VM has two VCPUs with all offloads & vhost enabled. All VMs and vhost threads run in a single 4 CPU cgroup cpuset, using cgroups to ensure that other processes in the system will not be scheduled on the benchmark CPUs. Trunk includes SKB rx frag coalescing. net-next w/ virtio_net before 2613af0ed18a (PAGE_SIZE bufs): 14642.85Gb/s net-next (MTU-size bufs): 13170.01Gb/s net-next + auto-tune: 14555.94Gb/s Jason Wang also reported a throughput increase on mlx4 from 22Gb/s using MTU-sized buffers to about 26Gb/s using auto-tuning. Signed-off-by: Michael Dalton <mwdalton@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-01-17 06:23:27 +00:00
*/
if (!headroom)
len += hole;
alloc_frag->offset += hole;
}
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
virtnet_rq_init_one_sg(rq, buf, len);
ctx = mergeable_len_to_ctx(len + room, headroom);
err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
if (err < 0) {
if (rq->do_dma)
virtnet_rq_unmap(rq, buf, 0);
put_page(virt_to_head_page(buf));
}
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
return err;
}
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
/*
* Returns false if we couldn't fill entirely (OOM).
*
* Normally run in the receive path, but can also be run from ndo_open
* before we're receiving packets, or from refill_work which is
* careful to disable receiving (using napi_disable).
*/
static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
gfp_t gfp)
{
int err;
if (rq->xsk_pool) {
err = virtnet_add_recvbuf_xsk(vi, rq, rq->xsk_pool, gfp);
goto kick;
}
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
do {
if (vi->mergeable_rx_bufs)
err = add_recvbuf_mergeable(vi, rq, gfp);
else if (vi->big_packets)
err = add_recvbuf_big(vi, rq, gfp);
else
err = add_recvbuf_small(vi, rq, gfp);
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
if (err)
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
break;
Some nice cleanups, and even a patch my wife did as a "live" demo for Latinoware 2012. There's a slightly non-trivial merge in virtio-net, as we cleaned up the virtio add_buf interface while DaveM accepted the mq virtio-net patches. You can see my solution in my pending-rebases branch, if that helps, but I know you love merging: https://git.kernel.org/?p=linux/kernel/git/rusty/linux.git;a=commit;h=12e4e64fa66a4c812e4855de32abdb4d819526fe Cheers, Rusty. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.11 (GNU/Linux) iQIcBAABAgAGBQJQz/vKAAoJENkgDmzRrbjx+eYQAK/egj9T8Nnth6mkzdbCFSO7 Bciga2hDiudGCiGojTRGPRSc0VP9LgfvPbY2pxX+R9CfEqR+a8q/rRQhCS79ZwPB /mJy3HNiCx418HZxgwNtk6vPe0PjJm6SsjbXeB9hB+PQLCbdwA0BjpG6xjF/jitP noPqhhXreeQgYVxAKoFPvff/Byu2GlNnDdVMQxWRmo8hTKlTCzl0T/7BHRxthhJj iOrXTFzrT/osPT0zyqlngT03T4wlBvL2Bfw8d/kuRPEZ71dpIctWeH2KzdwXVCrz hFQGxAz4OWvW3xrNwj7c6O3SWj4VemUMjQqeA/PtRiOEI5gM0Y/Bit47dWL4wM/O OWUKFHzq4DFs8MmwXBgDDXl5xOjOBH9Ik4FZayn3Y7COT/B8CjFdOC2MdDGmZ9yd NInumg7FqP+u12g+9Vq8S/b0cfoQm4qFe8VHiPJu+jRmCZglyvLjk7oq/QwW8Gaq Pkzit1Ey0DWo2KvZ4D/nuXJCuhmzN/AJ10M48lLYZhtOIVg9gsa0xjhfgq4FnvSK xFCf3rcWnlGIXcOYh/hKU25WaCLzBuqMuSK35A72IujrQOL7OJTk4Oqote3Z3H9B 08XJmyW6SOZdfw17X4Im1jbyuLek///xQJ9Jw/tya7j9lBt8zjJ+FmLPs4mLGEOm WJv9uZPs+QbIMNky2Lcb =myDR -----END PGP SIGNATURE----- Merge tag 'virtio-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux Pull virtio update from Rusty Russell: "Some nice cleanups, and even a patch my wife did as a "live" demo for Latinoware 2012. There's a slightly non-trivial merge in virtio-net, as we cleaned up the virtio add_buf interface while DaveM accepted the mq virtio-net patches." * tag 'virtio-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux: (27 commits) virtio_console: Add support for remoteproc serial virtio_console: Merge struct buffer_token into struct port_buffer virtio: add drv_to_virtio to make code clearly virtio: use dev_to_virtio wrapper in virtio virtio-mmio: Fix irq parsing in command line parameter virtio_console: Free buffers from out-queue upon close virtio: Convert dev_printk(KERN_<LEVEL> to dev_<level>( virtio_console: Use kmalloc instead of kzalloc virtio_console: Free buffer if splice fails virtio: tools: make it clear that virtqueue_add_buf() no longer returns > 0 virtio: scsi: make it clear that virtqueue_add_buf() no longer returns > 0 virtio: rpmsg: make it clear that virtqueue_add_buf() no longer returns > 0 virtio: net: make it clear that virtqueue_add_buf() no longer returns > 0 virtio: console: make it clear that virtqueue_add_buf() no longer returns > 0 virtio: make virtqueue_add_buf() returning 0 on success, not capacity. virtio: console: don't rely on virtqueue_add_buf() returning capacity. virtio_net: don't rely on virtqueue_add_buf() returning capacity. virtio-net: remove unused skb_vnet_hdr->num_sg field virtio-net: correct capacity math on ring full virtio: move queue_index and num_free fields into core struct virtqueue. ...
2012-12-20 16:37:04 +00:00
} while (rq->vq->num_free);
kick:
if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) {
unsigned long flags;
flags = u64_stats_update_begin_irqsave(&rq->stats.syncp);
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_inc(&rq->stats.kicks);
u64_stats_update_end_irqrestore(&rq->stats.syncp, flags);
}
return err != -ENOMEM;
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
}
static void skb_recv_done(struct virtqueue *rvq)
{
struct virtnet_info *vi = rvq->vdev->priv;
struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
rq->calls++;
virtqueue_napi_schedule(&rq->napi, rvq);
}
static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
{
napi_enable(napi);
/* If all buffers were filled by other side before we napi_enabled, we
* won't get another interrupt, so process any outstanding packets now.
* Call local_bh_enable after to trigger softIRQ processing.
*/
local_bh_disable();
virtqueue_napi_schedule(napi, vq);
local_bh_enable();
}
static void virtnet_napi_tx_enable(struct virtnet_info *vi,
struct virtqueue *vq,
struct napi_struct *napi)
{
if (!napi->weight)
return;
/* Tx napi touches cachelines on the cpu handling tx interrupts. Only
* enable the feature if this is likely affine with the transmit path.
*/
if (!vi->affinity_hint_set) {
napi->weight = 0;
return;
}
return virtnet_napi_enable(vq, napi);
}
static void virtnet_napi_tx_disable(struct napi_struct *napi)
{
if (napi->weight)
napi_disable(napi);
}
static void refill_work(struct work_struct *work)
{
struct virtnet_info *vi =
container_of(work, struct virtnet_info, refill.work);
bool still_empty;
int i;
for (i = 0; i < vi->curr_queue_pairs; i++) {
struct receive_queue *rq = &vi->rq[i];
napi_disable(&rq->napi);
still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
virtnet_napi_enable(rq->vq, &rq->napi);
/* In theory, this can happen: if we don't get any buffers in
* we will *never* try to fill again.
*/
if (still_empty)
schedule_delayed_work(&vi->refill, HZ/2);
}
}
static int virtnet_receive_xsk_bufs(struct virtnet_info *vi,
struct receive_queue *rq,
int budget,
unsigned int *xdp_xmit,
struct virtnet_rq_stats *stats)
{
unsigned int len;
int packets = 0;
void *buf;
while (packets < budget) {
buf = virtqueue_get_buf(rq->vq, &len);
if (!buf)
break;
virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats);
packets++;
}
return packets;
}
static int virtnet_receive_packets(struct virtnet_info *vi,
struct receive_queue *rq,
int budget,
unsigned int *xdp_xmit,
struct virtnet_rq_stats *stats)
{
unsigned int len;
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
int packets = 0;
void *buf;
if (!vi->big_packets || vi->mergeable_rx_bufs) {
void *ctx;
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
while (packets < budget &&
(buf = virtnet_rq_get_buf(rq, &len, &ctx))) {
receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats);
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
packets++;
}
} else {
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
while (packets < budget &&
(buf = virtnet_rq_get_buf(rq, &len, NULL)) != NULL) {
receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats);
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
packets++;
}
}
return packets;
}
static int virtnet_receive(struct receive_queue *rq, int budget,
unsigned int *xdp_xmit)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
struct virtnet_rq_stats stats = {};
int i, packets;
if (rq->xsk_pool)
packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, &stats);
else
packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats);
if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
virtio-net: fix the race between refill work and close We try using cancel_delayed_work_sync() to prevent the work from enabling NAPI. This is insufficient since we don't disable the source of the refill work scheduling. This means an NAPI poll callback after cancel_delayed_work_sync() can schedule the refill work then can re-enable the NAPI that leads to use-after-free [1]. Since the work can enable NAPI, we can't simply disable NAPI before calling cancel_delayed_work_sync(). So fix this by introducing a dedicated boolean to control whether or not the work could be scheduled from NAPI. [1] ================================================================== BUG: KASAN: use-after-free in refill_work+0x43/0xd4 Read of size 2 at addr ffff88810562c92e by task kworker/2:1/42 CPU: 2 PID: 42 Comm: kworker/2:1 Not tainted 5.19.0-rc1+ #480 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: events refill_work Call Trace: <TASK> dump_stack_lvl+0x34/0x44 print_report.cold+0xbb/0x6ac ? _printk+0xad/0xde ? refill_work+0x43/0xd4 kasan_report+0xa8/0x130 ? refill_work+0x43/0xd4 refill_work+0x43/0xd4 process_one_work+0x43d/0x780 worker_thread+0x2a0/0x6f0 ? process_one_work+0x780/0x780 kthread+0x167/0x1a0 ? kthread_exit+0x50/0x50 ret_from_fork+0x22/0x30 </TASK> ... Fixes: b2baed69e605c ("virtio_net: set/cancel work on ndo_open/ndo_stop") Signed-off-by: Jason Wang <jasowang@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2022-07-25 07:21:59 +00:00
if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
spin_lock(&vi->refill_lock);
if (vi->refill_enabled)
schedule_delayed_work(&vi->refill, 0);
spin_unlock(&vi->refill_lock);
}
}
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_set(&stats.packets, packets);
u64_stats_update_begin(&rq->stats.syncp);
for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) {
size_t offset = virtnet_rq_stats_desc[i].offset;
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_t *item, *src;
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
item = (u64_stats_t *)((u8 *)&rq->stats + offset);
src = (u64_stats_t *)((u8 *)&stats + offset);
u64_stats_add(item, u64_stats_read(src));
}
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
u64_stats_add(&rq->stats.packets, u64_stats_read(&stats.packets));
u64_stats_add(&rq->stats.bytes, u64_stats_read(&stats.bytes));
u64_stats_update_end(&rq->stats.syncp);
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
return packets;
}
static void virtnet_poll_cleantx(struct receive_queue *rq, int budget)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
unsigned int index = vq2rxq(rq->vq);
struct send_queue *sq = &vi->sq[index];
struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
return;
if (__netif_tx_trylock(txq)) {
if (sq->reset) {
__netif_tx_unlock(txq);
return;
}
do {
virtqueue_disable_cb(sq->vq);
free_old_xmit(sq, txq, !!budget);
} while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
if (netif_tx_queue_stopped(txq)) {
u64_stats_update_begin(&sq->stats.syncp);
u64_stats_inc(&sq->stats.wake);
u64_stats_update_end(&sq->stats.syncp);
}
netif_tx_wake_queue(txq);
}
__netif_tx_unlock(txq);
}
}
static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq)
{
struct dim_sample cur_sample = {};
if (!rq->packets_in_napi)
return;
/* Don't need protection when fetching stats, since fetcher and
* updater of the stats are in same context
*/
dim_update_sample(rq->calls,
u64_stats_read(&rq->stats.packets),
u64_stats_read(&rq->stats.bytes),
&cur_sample);
net_dim(&rq->dim, cur_sample);
rq->packets_in_napi = 0;
}
static int virtnet_poll(struct napi_struct *napi, int budget)
{
struct receive_queue *rq =
container_of(napi, struct receive_queue, napi);
struct virtnet_info *vi = rq->vq->vdev->priv;
struct send_queue *sq;
unsigned int received;
unsigned int xdp_xmit = 0;
bool napi_complete;
virtnet_poll_cleantx(rq, budget);
received = virtnet_receive(rq, budget, &xdp_xmit);
rq->packets_in_napi += received;
if (xdp_xmit & VIRTIO_XDP_REDIR)
xdp_do_flush();
/* Out of packets? */
if (received < budget) {
napi_complete = virtqueue_napi_complete(napi, rq->vq, received);
/* Intentionally not taking dim_lock here. This may result in a
* spurious net_dim call. But if that happens virtnet_rx_dim_work
* will not act on the scheduled work.
*/
if (napi_complete && rq->dim_enabled)
virtnet_rx_dim_update(vi, rq);
}
if (xdp_xmit & VIRTIO_XDP_TX) {
sq = virtnet_xdp_get_sq(vi);
if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
u64_stats_update_begin(&sq->stats.syncp);
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_inc(&sq->stats.kicks);
u64_stats_update_end(&sq->stats.syncp);
}
virtnet_xdp_put_sq(vi, sq);
}
return received;
}
static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
{
virtnet_napi_tx_disable(&vi->sq[qp_index].napi);
napi_disable(&vi->rq[qp_index].napi);
xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
}
static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
{
struct net_device *dev = vi->dev;
int err;
err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index,
vi->rq[qp_index].napi.napi_id);
if (err < 0)
return err;
err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq,
MEM_TYPE_PAGE_SHARED, NULL);
if (err < 0)
goto err_xdp_reg_mem_model;
netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index));
virtio_net: move netdev_tx_reset_queue() call before RX napi enable During suspend/resume the following BUG was hit: ------------[ cut here ]------------ kernel BUG at lib/dynamic_queue_limits.c:99! Internal error: Oops - BUG: 0 [#1] SMP ARM Modules linked in: bluetooth ecdh_generic ecc libaes CPU: 1 PID: 1282 Comm: rtcwake Not tainted 6.10.0-rc3-00732-gc8bd1f7f3e61 #15240 Hardware name: Generic DT based system PC is at dql_completed+0x270/0x2cc LR is at __free_old_xmit+0x120/0x198 pc : [<c07ffa54>]    lr : [<c0c42bf4>]    psr: 80000013 ... Flags: Nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none Control: 10c5387d  Table: 43a4406a  DAC: 00000051 ... Process rtcwake (pid: 1282, stack limit = 0xfbc21278) Stack: (0xe0805e80 to 0xe0806000) ... Call trace:  dql_completed from __free_old_xmit+0x120/0x198  __free_old_xmit from free_old_xmit+0x44/0xe4  free_old_xmit from virtnet_poll_tx+0x88/0x1b4  virtnet_poll_tx from __napi_poll+0x2c/0x1d4  __napi_poll from net_rx_action+0x140/0x2b4  net_rx_action from handle_softirqs+0x11c/0x350  handle_softirqs from call_with_stack+0x18/0x20  call_with_stack from do_softirq+0x48/0x50  do_softirq from __local_bh_enable_ip+0xa0/0xa4  __local_bh_enable_ip from virtnet_open+0xd4/0x21c  virtnet_open from virtnet_restore+0x94/0x120  virtnet_restore from virtio_device_restore+0x110/0x1f4  virtio_device_restore from dpm_run_callback+0x3c/0x100  dpm_run_callback from device_resume+0x12c/0x2a8  device_resume from dpm_resume+0x12c/0x1e0  dpm_resume from dpm_resume_end+0xc/0x18  dpm_resume_end from suspend_devices_and_enter+0x1f0/0x72c  suspend_devices_and_enter from pm_suspend+0x270/0x2a0  pm_suspend from state_store+0x68/0xc8  state_store from kernfs_fop_write_iter+0x10c/0x1cc  kernfs_fop_write_iter from vfs_write+0x2b0/0x3dc  vfs_write from ksys_write+0x5c/0xd4  ksys_write from ret_fast_syscall+0x0/0x54 Exception stack(0xe8bf1fa8 to 0xe8bf1ff0) ... ---[ end trace 0000000000000000 ]--- After virtnet_napi_enable() is called, the following path is hit: __napi_poll() -> virtnet_poll() -> virtnet_poll_cleantx() -> netif_tx_wake_queue() That wakes the TX queue and allows skbs to be submitted and accounted by BQL counters. Then netdev_tx_reset_queue() is called that resets BQL counters and eventually leads to the BUG in dql_completed(). Move virtnet_napi_tx_enable() what does BQL counters reset before RX napi enable to avoid the issue. Reported-by: Marek Szyprowski <m.szyprowski@samsung.com> Closes: https://lore.kernel.org/netdev/e632e378-d019-4de7-8f13-07c572ab37a9@samsung.com/ Fixes: c8bd1f7f3e61 ("virtio_net: add support for Byte Queue Limits") Tested-by: Marek Szyprowski <m.szyprowski@samsung.com> Signed-off-by: Jiri Pirko <jiri@nvidia.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://patch.msgid.link/20240814122500.1710279-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-08-14 12:25:00 +00:00
virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi);
return 0;
err_xdp_reg_mem_model:
xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
return err;
}
static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim)
{
if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
return;
net_dim_work_cancel(dim);
}
static void virtnet_update_settings(struct virtnet_info *vi)
{
u32 speed;
u8 duplex;
if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX))
return;
virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed);
if (ethtool_validate_speed(speed))
vi->speed = speed;
virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex);
if (ethtool_validate_duplex(duplex))
vi->duplex = duplex;
}
static int virtnet_open(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
int i, err;
virtio-net: fix the race between refill work and close We try using cancel_delayed_work_sync() to prevent the work from enabling NAPI. This is insufficient since we don't disable the source of the refill work scheduling. This means an NAPI poll callback after cancel_delayed_work_sync() can schedule the refill work then can re-enable the NAPI that leads to use-after-free [1]. Since the work can enable NAPI, we can't simply disable NAPI before calling cancel_delayed_work_sync(). So fix this by introducing a dedicated boolean to control whether or not the work could be scheduled from NAPI. [1] ================================================================== BUG: KASAN: use-after-free in refill_work+0x43/0xd4 Read of size 2 at addr ffff88810562c92e by task kworker/2:1/42 CPU: 2 PID: 42 Comm: kworker/2:1 Not tainted 5.19.0-rc1+ #480 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: events refill_work Call Trace: <TASK> dump_stack_lvl+0x34/0x44 print_report.cold+0xbb/0x6ac ? _printk+0xad/0xde ? refill_work+0x43/0xd4 kasan_report+0xa8/0x130 ? refill_work+0x43/0xd4 refill_work+0x43/0xd4 process_one_work+0x43d/0x780 worker_thread+0x2a0/0x6f0 ? process_one_work+0x780/0x780 kthread+0x167/0x1a0 ? kthread_exit+0x50/0x50 ret_from_fork+0x22/0x30 </TASK> ... Fixes: b2baed69e605c ("virtio_net: set/cancel work on ndo_open/ndo_stop") Signed-off-by: Jason Wang <jasowang@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2022-07-25 07:21:59 +00:00
enable_delayed_refill(vi);
for (i = 0; i < vi->max_queue_pairs; i++) {
if (i < vi->curr_queue_pairs)
/* Make sure we have some buffers: if oom use wq. */
if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
schedule_delayed_work(&vi->refill, 0);
err = virtnet_enable_queue_pair(vi, i);
if (err < 0)
goto err_enable_qp;
}
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
if (vi->status & VIRTIO_NET_S_LINK_UP)
netif_carrier_on(vi->dev);
virtio_config_driver_enable(vi->vdev);
} else {
vi->status = VIRTIO_NET_S_LINK_UP;
netif_carrier_on(dev);
}
return 0;
err_enable_qp:
disable_delayed_refill(vi);
cancel_delayed_work_sync(&vi->refill);
for (i--; i >= 0; i--) {
virtnet_disable_queue_pair(vi, i);
virtnet_cancel_dim(vi, &vi->rq[i].dim);
}
return err;
}
static int virtnet_poll_tx(struct napi_struct *napi, int budget)
{
struct send_queue *sq = container_of(napi, struct send_queue, napi);
struct virtnet_info *vi = sq->vq->vdev->priv;
unsigned int index = vq2txq(sq->vq);
struct netdev_queue *txq;
int opaque;
bool done;
if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
/* We don't need to enable cb for XDP */
napi_complete_done(napi, 0);
return 0;
}
txq = netdev_get_tx_queue(vi->dev, index);
__netif_tx_lock(txq, raw_smp_processor_id());
virtqueue_disable_cb(sq->vq);
free_old_xmit(sq, txq, !!budget);
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
if (netif_tx_queue_stopped(txq)) {
u64_stats_update_begin(&sq->stats.syncp);
u64_stats_inc(&sq->stats.wake);
u64_stats_update_end(&sq->stats.syncp);
}
netif_tx_wake_queue(txq);
}
opaque = virtqueue_enable_cb_prepare(sq->vq);
done = napi_complete_done(napi, 0);
if (!done)
virtqueue_disable_cb(sq->vq);
__netif_tx_unlock(txq);
if (done) {
if (unlikely(virtqueue_poll(sq->vq, opaque))) {
if (napi_schedule_prep(napi)) {
__netif_tx_lock(txq, raw_smp_processor_id());
virtqueue_disable_cb(sq->vq);
__netif_tx_unlock(txq);
__napi_schedule(napi);
}
}
}
return 0;
}
static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan)
{
struct virtio_net_hdr_mrg_rxbuf *hdr;
const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
struct virtnet_info *vi = sq->vq->vdev->priv;
int num_sg;
unsigned hdr_len = vi->hdr_len;
bool can_push;
pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
can_push = vi->any_header_sg &&
!((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
!skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
/* Even if we can, don't push here yet as this would skew
* csum_start offset below. */
if (can_push)
hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len);
else
virtio_net: Introduce skb_vnet_common_hdr to avoid typecasting The virtio_net driver currently deals with different versions and types of virtio net headers, such as virtio_net_hdr_mrg_rxbuf, virtio_net_hdr_v1_hash, etc. Due to these variations, the code relies on multiple type casts to convert memory between different structures, potentially leading to bugs when there are changes in these structures. Introduces the "struct skb_vnet_common_hdr" as a unifying header structure using a union. With this approach, various virtio net header structures can be converted by accessing different members of this structure, thus eliminating the need for type casting and reducing the risk of potential bugs. For example following code: static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, struct page *page, unsigned int offset, unsigned int len, unsigned int truesize, unsigned int headroom) { [...] struct virtio_net_hdr_mrg_rxbuf *hdr; [...] hdr_len = vi->hdr_len; [...] ok: hdr = skb_vnet_hdr(skb); memcpy(hdr, hdr_p, hdr_len); [...] } When VIRTIO_NET_F_HASH_REPORT feature is enabled, hdr_len = 20 But the sizeof(*hdr) is 12, memcpy(hdr, hdr_p, hdr_len); will copy 20 bytes to the hdr, which make a potential risk of bug. And this risk can be avoided by introducing struct skb_vnet_common_hdr. Change log v1->v2 feedback from Willem de Bruijn <willemdebruijn.kernel@gmail.com> feedback from Simon Horman <horms@kernel.org> 1. change to use net-next tree. 2. move skb_vnet_common_hdr inside kernel file instead of the UAPI header. v2->v3 feedback from Willem de Bruijn <willemdebruijn.kernel@gmail.com> 1. fix typo in commit message. 2. add original struct virtio_net_hdr into union 3. remove virtio_net_hdr_mrg_rxbuf variable in receive_buf; Signed-off-by: Feng Liu <feliu@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Willem de Bruijn <willemb@google.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-08-21 14:27:13 +00:00
hdr = &skb_vnet_common_hdr(skb)->mrg_hdr;
if (virtio_net_hdr_from_skb(skb, &hdr->hdr,
virtio_is_little_endian(vi->vdev), false,
0))
return -EPROTO;
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
if (vi->mergeable_rx_bufs)
hdr->num_buffers = 0;
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2));
if (can_push) {
__skb_push(skb, hdr_len);
num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
if (unlikely(num_sg < 0))
return num_sg;
/* Pull header back to avoid skew in tx bytes calculations. */
__skb_pull(skb, hdr_len);
} else {
sg_set_buf(sq->sg, hdr, hdr_len);
num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len);
if (unlikely(num_sg < 0))
return num_sg;
num_sg++;
}
return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg,
skb_to_ptr(skb, orphan), GFP_ATOMIC);
}
static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
int qnum = skb_get_queue_mapping(skb);
struct send_queue *sq = &vi->sq[qnum];
int err;
struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
bool xmit_more = netdev_xmit_more();
bool use_napi = sq->napi.weight;
bool kick;
/* Free up any pending old buffers before queueing new ones. */
do {
if (use_napi)
virtqueue_disable_cb(sq->vq);
free_old_xmit(sq, txq, false);
} while (use_napi && !xmit_more &&
unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
/* timestamp packet in software */
skb_tx_timestamp(skb);
/* Try to transmit */
err = xmit_skb(sq, skb, !use_napi);
/* This should not happen! */
if (unlikely(err)) {
DEV_STATS_INC(dev, tx_fifo_errors);
if (net_ratelimit())
dev_warn(&dev->dev,
"Unexpected TXQ (%d) queue failure: %d\n",
qnum, err);
DEV_STATS_INC(dev, tx_dropped);
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
/* Don't wait up for transmitted skbs to be freed. */
if (!use_napi) {
skb_orphan(skb);
nf_reset_ct(skb);
}
check_sq_full_and_disable(vi, dev, sq);
kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) :
!xmit_more || netif_xmit_stopped(txq);
if (kick) {
if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
u64_stats_update_begin(&sq->stats.syncp);
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_inc(&sq->stats.kicks);
u64_stats_update_end(&sq->stats.syncp);
}
}
return NETDEV_TX_OK;
}
static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
{
bool running = netif_running(vi->dev);
if (running) {
napi_disable(&rq->napi);
virtnet_cancel_dim(vi, &rq->dim);
}
}
static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq)
{
bool running = netif_running(vi->dev);
if (!try_fill_recv(vi, rq, GFP_KERNEL))
schedule_delayed_work(&vi->refill, 0);
if (running)
virtnet_napi_enable(rq->vq, &rq->napi);
}
static int virtnet_rx_resize(struct virtnet_info *vi,
struct receive_queue *rq, u32 ring_num)
{
int err, qindex;
qindex = rq - vi->rq;
virtnet_rx_pause(vi, rq);
err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf);
if (err)
netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err);
virtnet_rx_resume(vi, rq);
return err;
}
static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq)
{
bool running = netif_running(vi->dev);
struct netdev_queue *txq;
int qindex;
qindex = sq - vi->sq;
if (running)
virtnet_napi_tx_disable(&sq->napi);
txq = netdev_get_tx_queue(vi->dev, qindex);
/* 1. wait all ximt complete
* 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue()
*/
__netif_tx_lock_bh(txq);
/* Prevent rx poll from accessing sq. */
sq->reset = true;
/* Prevent the upper layer from trying to send packets. */
netif_stop_subqueue(vi->dev, qindex);
__netif_tx_unlock_bh(txq);
}
static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq)
{
bool running = netif_running(vi->dev);
struct netdev_queue *txq;
int qindex;
qindex = sq - vi->sq;
txq = netdev_get_tx_queue(vi->dev, qindex);
__netif_tx_lock_bh(txq);
sq->reset = false;
netif_tx_wake_queue(txq);
__netif_tx_unlock_bh(txq);
if (running)
virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
}
static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq,
u32 ring_num)
{
int qindex, err;
qindex = sq - vi->sq;
virtnet_tx_pause(vi, sq);
err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
if (err)
netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err);
virtnet_tx_resume(vi, sq);
return err;
}
/*
* Send command via the control virtqueue and check status. Commands
* supported by the hypervisor, as indicated by feature bits, should
* never fail unless improperly formatted.
*/
static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd,
struct scatterlist *out,
struct scatterlist *in)
{
struct scatterlist *sgs[5], hdr, stat;
u32 out_num = 0, tmp, in_num = 0;
bool ok;
int ret;
/* Caller should know better */
BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
mutex_lock(&vi->cvq_lock);
vi->ctrl->status = ~0;
vi->ctrl->hdr.class = class;
vi->ctrl->hdr.cmd = cmd;
/* Add header */
sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr));
sgs[out_num++] = &hdr;
if (out)
sgs[out_num++] = out;
/* Add return status. */
sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status));
sgs[out_num + in_num++] = &stat;
if (in)
sgs[out_num + in_num++] = in;
BUG_ON(out_num + in_num > ARRAY_SIZE(sgs));
ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC);
if (ret < 0) {
dev_warn(&vi->vdev->dev,
"Failed to add sgs for command vq: %d\n.", ret);
mutex_unlock(&vi->cvq_lock);
return false;
}
if (unlikely(!virtqueue_kick(vi->cvq)))
goto unlock;
/* Spin for a response, the kick causes an ioport write, trapping
* into the hypervisor, so the request should be handled immediately.
*/
while (!virtqueue_get_buf(vi->cvq, &tmp) &&
!virtqueue_is_broken(vi->cvq)) {
cond_resched();
cpu_relax();
}
unlock:
ok = vi->ctrl->status == VIRTIO_NET_OK;
mutex_unlock(&vi->cvq_lock);
return ok;
}
static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
struct scatterlist *out)
{
return virtnet_send_command_reply(vi, class, cmd, out, NULL);
}
static int virtnet_set_mac_address(struct net_device *dev, void *p)
{
struct virtnet_info *vi = netdev_priv(dev);
struct virtio_device *vdev = vi->vdev;
int ret;
struct sockaddr *addr;
struct scatterlist sg;
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY))
return -EOPNOTSUPP;
addr = kmemdup(p, sizeof(*addr), GFP_KERNEL);
if (!addr)
return -ENOMEM;
ret = eth_prepare_mac_addr_change(dev, addr);
if (ret)
goto out;
if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
sg_init_one(&sg, addr->sa_data, dev->addr_len);
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
dev_warn(&vdev->dev,
"Failed to set mac address by vq command.\n");
ret = -EINVAL;
goto out;
}
} else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
!virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
unsigned int i;
/* Naturally, this has an atomicity problem. */
for (i = 0; i < dev->addr_len; i++)
virtio_cwrite8(vdev,
offsetof(struct virtio_net_config, mac) +
i, addr->sa_data[i]);
}
eth_commit_mac_addr_change(dev, p);
ret = 0;
out:
kfree(addr);
return ret;
}
static void virtnet_stats(struct net_device *dev,
struct rtnl_link_stats64 *tot)
{
struct virtnet_info *vi = netdev_priv(dev);
unsigned int start;
int i;
for (i = 0; i < vi->max_queue_pairs; i++) {
u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops;
struct receive_queue *rq = &vi->rq[i];
struct send_queue *sq = &vi->sq[i];
do {
start = u64_stats_fetch_begin(&sq->stats.syncp);
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
tpackets = u64_stats_read(&sq->stats.packets);
tbytes = u64_stats_read(&sq->stats.bytes);
terrors = u64_stats_read(&sq->stats.tx_timeouts);
} while (u64_stats_fetch_retry(&sq->stats.syncp, start));
do {
start = u64_stats_fetch_begin(&rq->stats.syncp);
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
rpackets = u64_stats_read(&rq->stats.packets);
rbytes = u64_stats_read(&rq->stats.bytes);
rdrops = u64_stats_read(&rq->stats.drops);
} while (u64_stats_fetch_retry(&rq->stats.syncp, start));
tot->rx_packets += rpackets;
tot->tx_packets += tpackets;
tot->rx_bytes += rbytes;
tot->tx_bytes += tbytes;
tot->rx_dropped += rdrops;
tot->tx_errors += terrors;
}
tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors);
tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors);
tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors);
}
static void virtnet_ack_link_announce(struct virtnet_info *vi)
{
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL))
dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
}
static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
{
struct virtio_net_ctrl_mq *mq __free(kfree) = NULL;
struct scatterlist sg;
struct net_device *dev = vi->dev;
if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
return 0;
mq = kzalloc(sizeof(*mq), GFP_KERNEL);
if (!mq)
return -ENOMEM;
mq->virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
sg_init_one(&sg, mq, sizeof(*mq));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) {
dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
queue_pairs);
return -EINVAL;
} else {
vi->curr_queue_pairs = queue_pairs;
/* virtnet_open() will refill when device is going to up. */
if (dev->flags & IFF_UP)
schedule_delayed_work(&vi->refill, 0);
}
return 0;
}
static int virtnet_close(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
int i;
virtio-net: fix the race between refill work and close We try using cancel_delayed_work_sync() to prevent the work from enabling NAPI. This is insufficient since we don't disable the source of the refill work scheduling. This means an NAPI poll callback after cancel_delayed_work_sync() can schedule the refill work then can re-enable the NAPI that leads to use-after-free [1]. Since the work can enable NAPI, we can't simply disable NAPI before calling cancel_delayed_work_sync(). So fix this by introducing a dedicated boolean to control whether or not the work could be scheduled from NAPI. [1] ================================================================== BUG: KASAN: use-after-free in refill_work+0x43/0xd4 Read of size 2 at addr ffff88810562c92e by task kworker/2:1/42 CPU: 2 PID: 42 Comm: kworker/2:1 Not tainted 5.19.0-rc1+ #480 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: events refill_work Call Trace: <TASK> dump_stack_lvl+0x34/0x44 print_report.cold+0xbb/0x6ac ? _printk+0xad/0xde ? refill_work+0x43/0xd4 kasan_report+0xa8/0x130 ? refill_work+0x43/0xd4 refill_work+0x43/0xd4 process_one_work+0x43d/0x780 worker_thread+0x2a0/0x6f0 ? process_one_work+0x780/0x780 kthread+0x167/0x1a0 ? kthread_exit+0x50/0x50 ret_from_fork+0x22/0x30 </TASK> ... Fixes: b2baed69e605c ("virtio_net: set/cancel work on ndo_open/ndo_stop") Signed-off-by: Jason Wang <jasowang@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2022-07-25 07:21:59 +00:00
/* Make sure NAPI doesn't schedule refill work */
disable_delayed_refill(vi);
/* Make sure refill_work doesn't re-enable napi! */
cancel_delayed_work_sync(&vi->refill);
/* Prevent the config change callback from changing carrier
* after close
*/
virtio_config_driver_disable(vi->vdev);
/* Stop getting status/speed updates: we don't care until next
* open
*/
cancel_work_sync(&vi->config_work);
for (i = 0; i < vi->max_queue_pairs; i++) {
virtnet_disable_queue_pair(vi, i);
virtnet_cancel_dim(vi, &vi->rq[i].dim);
}
netif_carrier_off(dev);
return 0;
}
static void virtnet_rx_mode_work(struct work_struct *work)
{
struct virtnet_info *vi =
container_of(work, struct virtnet_info, rx_mode_work);
u8 *promisc_allmulti __free(kfree) = NULL;
struct net_device *dev = vi->dev;
struct scatterlist sg[2];
struct virtio_net_ctrl_mac *mac_data;
struct netdev_hw_addr *ha;
int uc_count;
int mc_count;
void *buf;
int i;
/* We can't dynamically set ndo_set_rx_mode, so return gracefully */
if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
return;
promisc_allmulti = kzalloc(sizeof(*promisc_allmulti), GFP_KERNEL);
if (!promisc_allmulti) {
dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n");
return;
}
rtnl_lock();
*promisc_allmulti = !!(dev->flags & IFF_PROMISC);
sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
VIRTIO_NET_CTRL_RX_PROMISC, sg))
dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
*promisc_allmulti ? "en" : "dis");
*promisc_allmulti = !!(dev->flags & IFF_ALLMULTI);
sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
*promisc_allmulti ? "en" : "dis");
netif_addr_lock_bh(dev);
uc_count = netdev_uc_count(dev);
mc_count = netdev_mc_count(dev);
/* MAC filter - use one buffer for both lists */
buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
(2 * sizeof(mac_data->entries)), GFP_ATOMIC);
mac_data = buf;
if (!buf) {
netif_addr_unlock_bh(dev);
rtnl_unlock();
return;
}
sg_init_table(sg, 2);
/* Store the unicast list and count in the front of the buffer */
mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count);
i = 0;
netdev_for_each_uc_addr(ha, dev)
memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
sg_set_buf(&sg[0], mac_data,
sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
/* multicast list and count fill the end */
mac_data = (void *)&mac_data->macs[uc_count][0];
mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count);
i = 0;
netdev_for_each_mc_addr(ha, dev)
memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
netif_addr_unlock_bh(dev);
sg_set_buf(&sg[1], mac_data,
sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
rtnl_unlock();
kfree(buf);
}
static void virtnet_set_rx_mode(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
if (vi->rx_mode_work_enabled)
schedule_work(&vi->rx_mode_work);
}
static int virtnet_vlan_rx_add_vid(struct net_device *dev,
__be16 proto, u16 vid)
{
struct virtnet_info *vi = netdev_priv(dev);
__virtio16 *_vid __free(kfree) = NULL;
struct scatterlist sg;
_vid = kzalloc(sizeof(*_vid), GFP_KERNEL);
if (!_vid)
return -ENOMEM;
*_vid = cpu_to_virtio16(vi->vdev, vid);
sg_init_one(&sg, _vid, sizeof(*_vid));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
VIRTIO_NET_CTRL_VLAN_ADD, &sg))
dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
return 0;
}
static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
__be16 proto, u16 vid)
{
struct virtnet_info *vi = netdev_priv(dev);
__virtio16 *_vid __free(kfree) = NULL;
struct scatterlist sg;
_vid = kzalloc(sizeof(*_vid), GFP_KERNEL);
if (!_vid)
return -ENOMEM;
*_vid = cpu_to_virtio16(vi->vdev, vid);
sg_init_one(&sg, _vid, sizeof(*_vid));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
VIRTIO_NET_CTRL_VLAN_DEL, &sg))
dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
return 0;
}
static void virtnet_clean_affinity(struct virtnet_info *vi)
{
int i;
if (vi->affinity_hint_set) {
for (i = 0; i < vi->max_queue_pairs; i++) {
virtqueue_set_affinity(vi->rq[i].vq, NULL);
virtqueue_set_affinity(vi->sq[i].vq, NULL);
}
vi->affinity_hint_set = false;
}
}
static void virtnet_set_affinity(struct virtnet_info *vi)
{
cpumask_var_t mask;
int stragglers;
int group_size;
int i, j, cpu;
int num_cpu;
int stride;
if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
virtnet_clean_affinity(vi);
return;
}
num_cpu = num_online_cpus();
stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1);
stragglers = num_cpu >= vi->curr_queue_pairs ?
num_cpu % vi->curr_queue_pairs :
0;
cpu = cpumask_first(cpu_online_mask);
net: allow to call netif_reset_xps_queues() under cpus_read_lock The definition of static_key_slow_inc() has cpus_read_lock in place. In the virtio_net driver, XPS queues are initialized after setting the queue:cpu affinity in virtnet_set_affinity() which is already protected within cpus_read_lock. Lockdep prints a warning when we are trying to acquire cpus_read_lock when it is already held. This patch adds an ability to call __netif_set_xps_queue under cpus_read_lock(). Acked-by: Jason Wang <jasowang@redhat.com> ============================================ WARNING: possible recursive locking detected 4.18.0-rc3-next-20180703+ #1 Not tainted -------------------------------------------- swapper/0/1 is trying to acquire lock: 00000000cf973d46 (cpu_hotplug_lock.rw_sem){++++}, at: static_key_slow_inc+0xe/0x20 but task is already holding lock: 00000000cf973d46 (cpu_hotplug_lock.rw_sem){++++}, at: init_vqs+0x513/0x5a0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(cpu_hotplug_lock.rw_sem); lock(cpu_hotplug_lock.rw_sem); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by swapper/0/1: #0: 00000000244bc7da (&dev->mutex){....}, at: __driver_attach+0x5a/0x110 #1: 00000000cf973d46 (cpu_hotplug_lock.rw_sem){++++}, at: init_vqs+0x513/0x5a0 #2: 000000005cd8463f (xps_map_mutex){+.+.}, at: __netif_set_xps_queue+0x8d/0xc60 v2: move cpus_read_lock() out of __netif_set_xps_queue() Cc: "Nambiar, Amritha" <amritha.nambiar@intel.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Jason Wang <jasowang@redhat.com> Fixes: 8af2c06ff4b1 ("net-sysfs: Add interface for Rx queue(s) map per Tx queue") Signed-off-by: Andrei Vagin <avagin@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-09 03:07:35 +00:00
for (i = 0; i < vi->curr_queue_pairs; i++) {
group_size = stride + (i < stragglers ? 1 : 0);
for (j = 0; j < group_size; j++) {
cpumask_set_cpu(cpu, mask);
cpu = cpumask_next_wrap(cpu, cpu_online_mask,
nr_cpu_ids, false);
}
virtqueue_set_affinity(vi->rq[i].vq, mask);
virtqueue_set_affinity(vi->sq[i].vq, mask);
__netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS);
cpumask_clear(mask);
}
vi->affinity_hint_set = true;
free_cpumask_var(mask);
}
static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node)
{
struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
node);
virtnet_set_affinity(vi);
return 0;
}
static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node)
{
struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
node_dead);
virtnet_set_affinity(vi);
return 0;
}
static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
{
struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
node);
virtnet_clean_affinity(vi);
return 0;
}
static enum cpuhp_state virtionet_online;
static int virtnet_cpu_notif_add(struct virtnet_info *vi)
{
int ret;
ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node);
if (ret)
return ret;
ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD,
&vi->node_dead);
if (!ret)
return ret;
cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
return ret;
}
static void virtnet_cpu_notif_remove(struct virtnet_info *vi)
{
cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD,
&vi->node_dead);
}
static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi,
u16 vqn, u32 max_usecs, u32 max_packets)
{
struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL;
struct scatterlist sgs;
coal_vq = kzalloc(sizeof(*coal_vq), GFP_KERNEL);
if (!coal_vq)
return -ENOMEM;
coal_vq->vqn = cpu_to_le16(vqn);
coal_vq->coal.max_usecs = cpu_to_le32(max_usecs);
coal_vq->coal.max_packets = cpu_to_le32(max_packets);
sg_init_one(&sgs, coal_vq, sizeof(*coal_vq));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET,
&sgs))
return -EINVAL;
return 0;
}
static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi,
u16 queue, u32 max_usecs,
u32 max_packets)
{
int err;
if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
return -EOPNOTSUPP;
err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue),
max_usecs, max_packets);
if (err)
return err;
vi->rq[queue].intr_coal.max_usecs = max_usecs;
vi->rq[queue].intr_coal.max_packets = max_packets;
return 0;
}
static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi,
u16 queue, u32 max_usecs,
u32 max_packets)
{
int err;
if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
return -EOPNOTSUPP;
err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue),
max_usecs, max_packets);
if (err)
return err;
vi->sq[queue].intr_coal.max_usecs = max_usecs;
vi->sq[queue].intr_coal.max_packets = max_packets;
return 0;
}
static void virtnet_get_ringparam(struct net_device *dev,
struct ethtool_ringparam *ring,
struct kernel_ethtool_ringparam *kernel_ring,
struct netlink_ext_ack *extack)
{
struct virtnet_info *vi = netdev_priv(dev);
ring->rx_max_pending = vi->rq[0].vq->num_max;
ring->tx_max_pending = vi->sq[0].vq->num_max;
ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
}
static int virtnet_set_ringparam(struct net_device *dev,
struct ethtool_ringparam *ring,
struct kernel_ethtool_ringparam *kernel_ring,
struct netlink_ext_ack *extack)
{
struct virtnet_info *vi = netdev_priv(dev);
u32 rx_pending, tx_pending;
struct receive_queue *rq;
struct send_queue *sq;
int i, err;
if (ring->rx_mini_pending || ring->rx_jumbo_pending)
return -EINVAL;
rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
if (ring->rx_pending == rx_pending &&
ring->tx_pending == tx_pending)
return 0;
if (ring->rx_pending > vi->rq[0].vq->num_max)
return -EINVAL;
if (ring->tx_pending > vi->sq[0].vq->num_max)
return -EINVAL;
for (i = 0; i < vi->max_queue_pairs; i++) {
rq = vi->rq + i;
sq = vi->sq + i;
if (ring->tx_pending != tx_pending) {
err = virtnet_tx_resize(vi, sq, ring->tx_pending);
if (err)
return err;
/* Upon disabling and re-enabling a transmit virtqueue, the device must
* set the coalescing parameters of the virtqueue to those configured
* through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver
* did not set any TX coalescing parameters, to 0.
*/
err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i,
vi->intr_coal_tx.max_usecs,
vi->intr_coal_tx.max_packets);
/* Don't break the tx resize action if the vq coalescing is not
* supported. The same is true for rx resize below.
*/
if (err && err != -EOPNOTSUPP)
return err;
}
if (ring->rx_pending != rx_pending) {
err = virtnet_rx_resize(vi, rq, ring->rx_pending);
if (err)
return err;
/* The reason is same as the transmit virtqueue reset */
mutex_lock(&vi->rq[i].dim_lock);
err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i,
vi->intr_coal_rx.max_usecs,
vi->intr_coal_rx.max_packets);
mutex_unlock(&vi->rq[i].dim_lock);
if (err && err != -EOPNOTSUPP)
return err;
}
}
return 0;
}
static bool virtnet_commit_rss_command(struct virtnet_info *vi)
{
struct net_device *dev = vi->dev;
struct scatterlist sgs[4];
unsigned int sg_buf_size;
/* prepare sgs */
sg_init_table(sgs, 4);
sg_buf_size = offsetof(struct virtio_net_ctrl_rss, indirection_table);
sg_set_buf(&sgs[0], &vi->rss, sg_buf_size);
sg_buf_size = sizeof(uint16_t) * (vi->rss.indirection_table_mask + 1);
sg_set_buf(&sgs[1], vi->rss.indirection_table, sg_buf_size);
sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key)
- offsetof(struct virtio_net_ctrl_rss, max_tx_vq);
sg_set_buf(&sgs[2], &vi->rss.max_tx_vq, sg_buf_size);
sg_buf_size = vi->rss_key_size;
sg_set_buf(&sgs[3], vi->rss.key, sg_buf_size);
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG
: VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs))
goto err;
return true;
err:
dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n");
return false;
}
static void virtnet_init_default_rss(struct virtnet_info *vi)
{
u32 indir_val = 0;
int i = 0;
vi->rss.hash_types = vi->rss_hash_types_supported;
vi->rss_hash_types_saved = vi->rss_hash_types_supported;
vi->rss.indirection_table_mask = vi->rss_indir_table_size
? vi->rss_indir_table_size - 1 : 0;
vi->rss.unclassified_queue = 0;
for (; i < vi->rss_indir_table_size; ++i) {
indir_val = ethtool_rxfh_indir_default(i, vi->curr_queue_pairs);
vi->rss.indirection_table[i] = indir_val;
}
vi->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0;
vi->rss.hash_key_length = vi->rss_key_size;
netdev_rss_key_fill(vi->rss.key, vi->rss_key_size);
}
static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info)
{
info->data = 0;
switch (info->flow_type) {
case TCP_V4_FLOW:
if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
info->data = RXH_IP_SRC | RXH_IP_DST |
RXH_L4_B_0_1 | RXH_L4_B_2_3;
} else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
info->data = RXH_IP_SRC | RXH_IP_DST;
}
break;
case TCP_V6_FLOW:
if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
info->data = RXH_IP_SRC | RXH_IP_DST |
RXH_L4_B_0_1 | RXH_L4_B_2_3;
} else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
info->data = RXH_IP_SRC | RXH_IP_DST;
}
break;
case UDP_V4_FLOW:
if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
info->data = RXH_IP_SRC | RXH_IP_DST |
RXH_L4_B_0_1 | RXH_L4_B_2_3;
} else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
info->data = RXH_IP_SRC | RXH_IP_DST;
}
break;
case UDP_V6_FLOW:
if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
info->data = RXH_IP_SRC | RXH_IP_DST |
RXH_L4_B_0_1 | RXH_L4_B_2_3;
} else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
info->data = RXH_IP_SRC | RXH_IP_DST;
}
break;
case IPV4_FLOW:
if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4)
info->data = RXH_IP_SRC | RXH_IP_DST;
break;
case IPV6_FLOW:
if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6)
info->data = RXH_IP_SRC | RXH_IP_DST;
break;
default:
info->data = 0;
break;
}
}
static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info)
{
u32 new_hashtypes = vi->rss_hash_types_saved;
bool is_disable = info->data & RXH_DISCARD;
bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3);
/* supports only 'sd', 'sdfn' and 'r' */
if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable))
return false;
switch (info->flow_type) {
case TCP_V4_FLOW:
new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4);
if (!is_disable)
new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4
| (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0);
break;
case UDP_V4_FLOW:
new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4);
if (!is_disable)
new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4
| (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0);
break;
case IPV4_FLOW:
new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4;
if (!is_disable)
new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4;
break;
case TCP_V6_FLOW:
new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6);
if (!is_disable)
new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6
| (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0);
break;
case UDP_V6_FLOW:
new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6);
if (!is_disable)
new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6
| (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0);
break;
case IPV6_FLOW:
new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6;
if (!is_disable)
new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6;
break;
default:
/* unsupported flow */
return false;
}
/* if unsupported hashtype was set */
if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported))
return false;
if (new_hashtypes != vi->rss_hash_types_saved) {
vi->rss_hash_types_saved = new_hashtypes;
vi->rss.hash_types = vi->rss_hash_types_saved;
if (vi->dev->features & NETIF_F_RXHASH)
return virtnet_commit_rss_command(vi);
}
return true;
}
static void virtnet_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *info)
{
struct virtnet_info *vi = netdev_priv(dev);
struct virtio_device *vdev = vi->vdev;
strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));
}
/* TODO: Eliminate OOO packets during switching */
static int virtnet_set_channels(struct net_device *dev,
struct ethtool_channels *channels)
{
struct virtnet_info *vi = netdev_priv(dev);
u16 queue_pairs = channels->combined_count;
int err;
/* We don't support separate rx/tx channels.
* We don't allow setting 'other' channels.
*/
if (channels->rx_count || channels->tx_count || channels->other_count)
return -EINVAL;
if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
return -EINVAL;
/* For now we don't support modifying channels while XDP is loaded
* also when XDP is loaded all RX queues have XDP programs so we only
* need to check a single RX queue.
*/
if (vi->rq[0].xdp_prog)
return -EINVAL;
cpus_read_lock();
err = virtnet_set_queues(vi, queue_pairs);
if (err) {
cpus_read_unlock();
goto err;
}
virtnet_set_affinity(vi);
cpus_read_unlock();
netif_set_real_num_tx_queues(dev, queue_pairs);
netif_set_real_num_rx_queues(dev, queue_pairs);
err:
return err;
}
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
static void virtnet_stats_sprintf(u8 **p, const char *fmt, const char *noq_fmt,
int num, int qid, const struct virtnet_stat_desc *desc)
{
int i;
if (qid < 0) {
for (i = 0; i < num; ++i)
ethtool_sprintf(p, noq_fmt, desc[i].desc);
} else {
for (i = 0; i < num; ++i)
ethtool_sprintf(p, fmt, qid, desc[i].desc);
}
}
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
/* qid == -1: for rx/tx queue total field */
static void virtnet_get_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data)
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
{
const struct virtnet_stat_desc *desc;
const char *fmt, *noq_fmt;
u8 *p = *data;
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
u32 num;
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
if (type == VIRTNET_Q_TYPE_CQ && qid >= 0) {
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
noq_fmt = "cq_hw_%s";
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) {
desc = &virtnet_stats_cvq_desc[0];
num = ARRAY_SIZE(virtnet_stats_cvq_desc);
virtnet_stats_sprintf(&p, NULL, noq_fmt, num, -1, desc);
}
}
if (type == VIRTNET_Q_TYPE_RX) {
fmt = "rx%u_%s";
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
noq_fmt = "rx_%s";
desc = &virtnet_rq_stats_desc[0];
num = ARRAY_SIZE(virtnet_rq_stats_desc);
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
fmt = "rx%u_hw_%s";
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
noq_fmt = "rx_hw_%s";
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
desc = &virtnet_stats_rx_basic_desc[0];
num = ARRAY_SIZE(virtnet_stats_rx_basic_desc);
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
desc = &virtnet_stats_rx_csum_desc[0];
num = ARRAY_SIZE(virtnet_stats_rx_csum_desc);
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
desc = &virtnet_stats_rx_speed_desc[0];
num = ARRAY_SIZE(virtnet_stats_rx_speed_desc);
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
}
}
if (type == VIRTNET_Q_TYPE_TX) {
fmt = "tx%u_%s";
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
noq_fmt = "tx_%s";
desc = &virtnet_sq_stats_desc[0];
num = ARRAY_SIZE(virtnet_sq_stats_desc);
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
fmt = "tx%u_hw_%s";
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
noq_fmt = "tx_hw_%s";
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
desc = &virtnet_stats_tx_basic_desc[0];
num = ARRAY_SIZE(virtnet_stats_tx_basic_desc);
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
desc = &virtnet_stats_tx_gso_desc[0];
num = ARRAY_SIZE(virtnet_stats_tx_gso_desc);
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
desc = &virtnet_stats_tx_speed_desc[0];
num = ARRAY_SIZE(virtnet_stats_tx_speed_desc);
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
}
}
*data = p;
}
struct virtnet_stats_ctx {
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
/* The stats are write to qstats or ethtool -S */
bool to_qstat;
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
/* Used to calculate the offset inside the output buffer. */
u32 desc_num[3];
/* The actual supported stat types. */
u32 bitmap[3];
/* Used to calculate the reply buffer size. */
u32 size[3];
/* Record the output buffer. */
u64 *data;
};
static void virtnet_stats_ctx_init(struct virtnet_info *vi,
struct virtnet_stats_ctx *ctx,
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
u64 *data, bool to_qstat)
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
{
u32 queue_type;
ctx->data = data;
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
ctx->to_qstat = to_qstat;
if (to_qstat) {
ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat);
ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat);
queue_type = VIRTNET_Q_TYPE_RX;
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC;
ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat);
ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic);
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM;
ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat);
ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum);
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) {
ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_GSO;
ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat);
ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso);
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED;
ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat);
ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed);
}
queue_type = VIRTNET_Q_TYPE_TX;
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC;
ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat);
ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic);
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) {
ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_CSUM;
ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat);
ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum);
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO;
ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat);
ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso);
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED;
ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat);
ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed);
}
return;
}
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc);
ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc);
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) {
queue_type = VIRTNET_Q_TYPE_CQ;
ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_CVQ;
ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc);
ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq);
}
queue_type = VIRTNET_Q_TYPE_RX;
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC;
ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc);
ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic);
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM;
ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc);
ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum);
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED;
ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc);
ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed);
}
queue_type = VIRTNET_Q_TYPE_TX;
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC;
ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc);
ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic);
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO;
ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc);
ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso);
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED;
ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc);
ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed);
}
}
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
/* stats_sum_queue - Calculate the sum of the same fields in sq or rq.
* @sum: the position to store the sum values
* @num: field num
* @q_value: the first queue fields
* @q_num: number of the queues
*/
static void stats_sum_queue(u64 *sum, u32 num, u64 *q_value, u32 q_num)
{
u32 step = num;
int i, j;
u64 *p;
for (i = 0; i < num; ++i) {
p = sum + i;
*p = 0;
for (j = 0; j < q_num; ++j)
*p += *(q_value + i + j * step);
}
}
static void virtnet_fill_total_fields(struct virtnet_info *vi,
struct virtnet_stats_ctx *ctx)
{
u64 *data, *first_rx_q, *first_tx_q;
u32 num_cq, num_rx, num_tx;
num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ];
num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX];
num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX];
first_rx_q = ctx->data + num_rx + num_tx + num_cq;
first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx;
data = ctx->data;
stats_sum_queue(data, num_rx, first_rx_q, vi->curr_queue_pairs);
data = ctx->data + num_rx;
stats_sum_queue(data, num_tx, first_tx_q, vi->curr_queue_pairs);
}
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid,
struct virtnet_stats_ctx *ctx,
const u8 *base, bool drv_stats, u8 reply_type)
{
const struct virtnet_stat_desc *desc;
const u64_stats_t *v_stat;
u64 offset, bitmap;
const __le64 *v;
u32 queue_type;
int i, num;
queue_type = vq_type(vi, qid);
bitmap = ctx->bitmap[queue_type];
if (drv_stats) {
if (queue_type == VIRTNET_Q_TYPE_RX) {
desc = &virtnet_rq_stats_desc_qstat[0];
num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat);
} else {
desc = &virtnet_sq_stats_desc_qstat[0];
num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat);
}
for (i = 0; i < num; ++i) {
offset = desc[i].qstat_offset / sizeof(*ctx->data);
v_stat = (const u64_stats_t *)(base + desc[i].offset);
ctx->data[offset] = u64_stats_read(v_stat);
}
return;
}
if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
desc = &virtnet_stats_rx_basic_desc_qstat[0];
num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat);
if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC)
goto found;
}
if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
desc = &virtnet_stats_rx_csum_desc_qstat[0];
num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat);
if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM)
goto found;
}
if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) {
desc = &virtnet_stats_rx_gso_desc_qstat[0];
num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat);
if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO)
goto found;
}
if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
desc = &virtnet_stats_rx_speed_desc_qstat[0];
num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat);
if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED)
goto found;
}
if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
desc = &virtnet_stats_tx_basic_desc_qstat[0];
num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat);
if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC)
goto found;
}
if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) {
desc = &virtnet_stats_tx_csum_desc_qstat[0];
num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat);
if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM)
goto found;
}
if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
desc = &virtnet_stats_tx_gso_desc_qstat[0];
num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat);
if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO)
goto found;
}
if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
desc = &virtnet_stats_tx_speed_desc_qstat[0];
num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat);
if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED)
goto found;
}
return;
found:
for (i = 0; i < num; ++i) {
offset = desc[i].qstat_offset / sizeof(*ctx->data);
v = (const __le64 *)(base + desc[i].offset);
ctx->data[offset] = le64_to_cpu(*v);
}
}
/* virtnet_fill_stats - copy the stats to qstats or ethtool -S
* The stats source is the device or the driver.
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
*
* @vi: virtio net info
* @qid: the vq id
* @ctx: stats ctx (initiated by virtnet_stats_ctx_init())
* @base: pointer to the device reply or the driver stats structure.
* @drv_stats: designate the base type (device reply, driver stats)
* @type: the type of the device reply (if drv_stats is true, this must be zero)
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
*/
static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid,
struct virtnet_stats_ctx *ctx,
const u8 *base, bool drv_stats, u8 reply_type)
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
{
u32 queue_type, num_rx, num_tx, num_cq;
const struct virtnet_stat_desc *desc;
const u64_stats_t *v_stat;
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
u64 offset, bitmap;
const __le64 *v;
int i, num;
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
if (ctx->to_qstat)
return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type);
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ];
num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX];
num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX];
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
queue_type = vq_type(vi, qid);
bitmap = ctx->bitmap[queue_type];
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
/* skip the total fields of pairs */
offset = num_rx + num_tx;
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
if (queue_type == VIRTNET_Q_TYPE_TX) {
offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2);
num = ARRAY_SIZE(virtnet_sq_stats_desc);
if (drv_stats) {
desc = &virtnet_sq_stats_desc[0];
goto drv_stats;
}
offset += num;
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
} else if (queue_type == VIRTNET_Q_TYPE_RX) {
offset += num_cq + num_rx * (qid / 2);
num = ARRAY_SIZE(virtnet_rq_stats_desc);
if (drv_stats) {
desc = &virtnet_rq_stats_desc[0];
goto drv_stats;
}
offset += num;
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
}
if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) {
desc = &virtnet_stats_cvq_desc[0];
num = ARRAY_SIZE(virtnet_stats_cvq_desc);
if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ)
goto found;
offset += num;
}
if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
desc = &virtnet_stats_rx_basic_desc[0];
num = ARRAY_SIZE(virtnet_stats_rx_basic_desc);
if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC)
goto found;
offset += num;
}
if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
desc = &virtnet_stats_rx_csum_desc[0];
num = ARRAY_SIZE(virtnet_stats_rx_csum_desc);
if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM)
goto found;
offset += num;
}
if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
desc = &virtnet_stats_rx_speed_desc[0];
num = ARRAY_SIZE(virtnet_stats_rx_speed_desc);
if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED)
goto found;
offset += num;
}
if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
desc = &virtnet_stats_tx_basic_desc[0];
num = ARRAY_SIZE(virtnet_stats_tx_basic_desc);
if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC)
goto found;
offset += num;
}
if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
desc = &virtnet_stats_tx_gso_desc[0];
num = ARRAY_SIZE(virtnet_stats_tx_gso_desc);
if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO)
goto found;
offset += num;
}
if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
desc = &virtnet_stats_tx_speed_desc[0];
num = ARRAY_SIZE(virtnet_stats_tx_speed_desc);
if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED)
goto found;
offset += num;
}
return;
found:
for (i = 0; i < num; ++i) {
v = (const __le64 *)(base + desc[i].offset);
ctx->data[offset + i] = le64_to_cpu(*v);
}
return;
drv_stats:
for (i = 0; i < num; ++i) {
v_stat = (const u64_stats_t *)(base + desc[i].offset);
ctx->data[offset + i] = u64_stats_read(v_stat);
}
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
}
static int __virtnet_get_hw_stats(struct virtnet_info *vi,
struct virtnet_stats_ctx *ctx,
struct virtio_net_ctrl_queue_stats *req,
int req_size, void *reply, int res_size)
{
struct virtio_net_stats_reply_hdr *hdr;
struct scatterlist sgs_in, sgs_out;
void *p;
u32 qid;
int ok;
sg_init_one(&sgs_out, req, req_size);
sg_init_one(&sgs_in, reply, res_size);
ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS,
VIRTIO_NET_CTRL_STATS_GET,
&sgs_out, &sgs_in);
if (!ok)
return ok;
for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) {
hdr = p;
qid = le16_to_cpu(hdr->vq_index);
virtnet_fill_stats(vi, qid, ctx, p, false, hdr->type);
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
}
return 0;
}
static void virtnet_make_stat_req(struct virtnet_info *vi,
struct virtnet_stats_ctx *ctx,
struct virtio_net_ctrl_queue_stats *req,
int qid, int *idx)
{
int qtype = vq_type(vi, qid);
u64 bitmap = ctx->bitmap[qtype];
if (!bitmap)
return;
req->stats[*idx].vq_index = cpu_to_le16(qid);
req->stats[*idx].types_bitmap[0] = cpu_to_le64(bitmap);
*idx += 1;
}
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
/* qid: -1: get stats of all vq.
* > 0: get the stats for the special vq. This must not be cvq.
*/
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
static int virtnet_get_hw_stats(struct virtnet_info *vi,
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
struct virtnet_stats_ctx *ctx, int qid)
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
{
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
int qnum, i, j, res_size, qtype, last_vq, first_vq;
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
struct virtio_net_ctrl_queue_stats *req;
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
bool enable_cvq;
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
void *reply;
int ok;
if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS))
return 0;
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
if (qid == -1) {
last_vq = vi->curr_queue_pairs * 2 - 1;
first_vq = 0;
enable_cvq = true;
} else {
last_vq = qid;
first_vq = qid;
enable_cvq = false;
}
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
qnum = 0;
res_size = 0;
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
for (i = first_vq; i <= last_vq ; ++i) {
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
qtype = vq_type(vi, i);
if (ctx->bitmap[qtype]) {
++qnum;
res_size += ctx->size[qtype];
}
}
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) {
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
res_size += ctx->size[VIRTNET_Q_TYPE_CQ];
qnum += 1;
}
req = kcalloc(qnum, sizeof(*req), GFP_KERNEL);
if (!req)
return -ENOMEM;
reply = kmalloc(res_size, GFP_KERNEL);
if (!reply) {
kfree(req);
return -ENOMEM;
}
j = 0;
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
for (i = first_vq; i <= last_vq ; ++i)
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
virtnet_make_stat_req(vi, ctx, req, i, &j);
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
if (enable_cvq)
virtnet_make_stat_req(vi, ctx, req, vi->max_queue_pairs * 2, &j);
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
ok = __virtnet_get_hw_stats(vi, ctx, req, sizeof(*req) * j, reply, res_size);
kfree(req);
kfree(reply);
return ok;
}
static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
{
struct virtnet_info *vi = netdev_priv(dev);
unsigned int i;
u8 *p = data;
switch (stringset) {
case ETH_SS_STATS:
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
/* Generate the total field names. */
virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, -1, &p);
virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, -1, &p);
virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, 0, &p);
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
for (i = 0; i < vi->curr_queue_pairs; ++i)
virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, i, &p);
for (i = 0; i < vi->curr_queue_pairs; ++i)
virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, i, &p);
break;
}
}
static int virtnet_get_sset_count(struct net_device *dev, int sset)
{
struct virtnet_info *vi = netdev_priv(dev);
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
struct virtnet_stats_ctx ctx = {0};
u32 pair_count;
switch (sset) {
case ETH_SS_STATS:
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
virtnet_stats_ctx_init(vi, &ctx, NULL, false);
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX];
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] +
vi->curr_queue_pairs * pair_count;
default:
return -EOPNOTSUPP;
}
}
static void virtnet_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats, u64 *data)
{
struct virtnet_info *vi = netdev_priv(dev);
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
struct virtnet_stats_ctx ctx = {0};
unsigned int start, i;
const u8 *stats_base;
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
virtnet_stats_ctx_init(vi, &ctx, data, false);
if (virtnet_get_hw_stats(vi, &ctx, -1))
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
dev_warn(&vi->dev->dev, "Failed to get hw stats.\n");
for (i = 0; i < vi->curr_queue_pairs; i++) {
struct receive_queue *rq = &vi->rq[i];
struct send_queue *sq = &vi->sq[i];
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
stats_base = (const u8 *)&rq->stats;
do {
start = u64_stats_fetch_begin(&rq->stats.syncp);
virtnet_fill_stats(vi, i * 2, &ctx, stats_base, true, 0);
} while (u64_stats_fetch_retry(&rq->stats.syncp, start));
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
stats_base = (const u8 *)&sq->stats;
do {
start = u64_stats_fetch_begin(&sq->stats.syncp);
virtnet_fill_stats(vi, i * 2 + 1, &ctx, stats_base, true, 0);
} while (u64_stats_fetch_retry(&sq->stats.syncp, start));
}
virtio_net: add the total stats field Now, we just show the stats of every queue. But for the user, the total values of every stat may are valuable. NIC statistics: rx_packets: 373522 rx_bytes: 85919736 rx_drops: 0 rx_xdp_packets: 0 rx_xdp_tx: 0 rx_xdp_redirects: 0 rx_xdp_drops: 0 rx_kicks: 11125 rx_hw_notifications: 0 rx_hw_packets: 1325870 rx_hw_bytes: 263348963 rx_hw_interrupts: 0 rx_hw_drops: 1451 rx_hw_drop_overruns: 0 rx_hw_csum_valid: 1325870 rx_hw_needs_csum: 1325870 rx_hw_csum_none: 0 rx_hw_csum_bad: 0 rx_hw_ratelimit_packets: 0 rx_hw_ratelimit_bytes: 0 tx_packets: 10050 tx_bytes: 1230176 tx_xdp_tx: 0 tx_xdp_tx_drops: 0 tx_kicks: 10050 tx_timeouts: 0 tx_hw_notifications: 0 tx_hw_packets: 32281 tx_hw_bytes: 4315590 tx_hw_interrupts: 0 tx_hw_drops: 0 tx_hw_drop_malformed: 0 tx_hw_csum_none: 0 tx_hw_needs_csum: 32281 tx_hw_ratelimit_packets: 0 tx_hw_ratelimit_bytes: 0 rx0_packets: 373522 rx0_bytes: 85919736 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 11125 rx0_hw_notifications: 0 rx0_hw_packets: 1325870 rx0_hw_bytes: 263348963 rx0_hw_interrupts: 0 rx0_hw_drops: 1451 rx0_hw_drop_overruns: 0 rx0_hw_csum_valid: 1325870 rx0_hw_needs_csum: 1325870 rx0_hw_csum_none: 0 rx0_hw_csum_bad: 0 rx0_hw_ratelimit_packets: 0 rx0_hw_ratelimit_bytes: 0 tx0_packets: 10050 tx0_bytes: 1230176 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 10050 tx0_timeouts: 0 tx0_hw_notifications: 0 tx0_hw_packets: 32281 tx0_hw_bytes: 4315590 tx0_hw_interrupts: 0 tx0_hw_drops: 0 tx0_hw_drop_malformed: 0 tx0_hw_csum_none: 0 tx0_hw_needs_csum: 32281 tx0_hw_ratelimit_packets: 0 tx0_hw_ratelimit_bytes: 0 Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:26 +00:00
virtnet_fill_total_fields(vi, &ctx);
}
static void virtnet_get_channels(struct net_device *dev,
struct ethtool_channels *channels)
{
struct virtnet_info *vi = netdev_priv(dev);
channels->combined_count = vi->curr_queue_pairs;
channels->max_combined = vi->max_queue_pairs;
channels->max_other = 0;
channels->rx_count = 0;
channels->tx_count = 0;
channels->other_count = 0;
}
static int virtnet_set_link_ksettings(struct net_device *dev,
const struct ethtool_link_ksettings *cmd)
{
struct virtnet_info *vi = netdev_priv(dev);
return ethtool_virtdev_set_link_ksettings(dev, cmd,
&vi->speed, &vi->duplex);
}
static int virtnet_get_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings *cmd)
{
struct virtnet_info *vi = netdev_priv(dev);
cmd->base.speed = vi->speed;
cmd->base.duplex = vi->duplex;
cmd->base.port = PORT_OTHER;
return 0;
}
static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi,
struct ethtool_coalesce *ec)
{
struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL;
struct scatterlist sgs_tx;
int i;
coal_tx = kzalloc(sizeof(*coal_tx), GFP_KERNEL);
if (!coal_tx)
return -ENOMEM;
coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
&sgs_tx))
return -EINVAL;
vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs;
vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames;
for (i = 0; i < vi->max_queue_pairs; i++) {
vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs;
vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames;
}
return 0;
}
static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi,
struct ethtool_coalesce *ec)
{
struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL;
bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce;
struct scatterlist sgs_rx;
int i;
if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
return -EOPNOTSUPP;
if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs ||
ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets))
return -EINVAL;
if (rx_ctrl_dim_on && !vi->rx_dim_enabled) {
vi->rx_dim_enabled = true;
virtio_net: fix a spurious deadlock issue When the following snippet is run, lockdep will report a deadlock[1]. /* Acquire all queues dim_locks */ for (i = 0; i < vi->max_queue_pairs; i++) mutex_lock(&vi->rq[i].dim_lock); There's no deadlock here because the vq locks are always taken in the same order, but lockdep can not figure it out. So refactoring the code to alleviate the problem. [1] ======================================================== WARNING: possible recursive locking detected 6.9.0-rc7+ #319 Not tainted -------------------------------------------- ethtool/962 is trying to acquire lock: but task is already holding lock: other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&vi->rq[i].dim_lock); lock(&vi->rq[i].dim_lock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by ethtool/962: #0: ffffffff82dbaab0 (cb_lock){++++}-{3:3}, at: genl_rcv+0x19/0x40 #1: ffffffff82dad0a8 (rtnl_mutex){+.+.}-{3:3}, at: ethnl_default_set_doit+0xbe/0x1e0 stack backtrace: CPU: 6 PID: 962 Comm: ethtool Not tainted 6.9.0-rc7+ #319 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: <TASK> dump_stack_lvl+0x79/0xb0 check_deadlock+0x130/0x220 __lock_acquire+0x861/0x990 lock_acquire.part.0+0x72/0x1d0 ? lock_acquire+0xf8/0x130 __mutex_lock+0x71/0xd50 virtnet_set_coalesce+0x151/0x190 __ethnl_set_coalesce.isra.0+0x3f8/0x4d0 ethnl_set_coalesce+0x34/0x90 ethnl_default_set_doit+0xdd/0x1e0 genl_family_rcv_msg_doit+0xdc/0x130 genl_family_rcv_msg+0x154/0x230 ? __pfx_ethnl_default_set_doit+0x10/0x10 genl_rcv_msg+0x4b/0xa0 ? __pfx_genl_rcv_msg+0x10/0x10 netlink_rcv_skb+0x5a/0x110 genl_rcv+0x28/0x40 netlink_unicast+0x1af/0x280 netlink_sendmsg+0x20e/0x460 __sys_sendto+0x1fe/0x210 ? find_held_lock+0x2b/0x80 ? do_user_addr_fault+0x3a2/0x8a0 ? __lock_release+0x5e/0x160 ? do_user_addr_fault+0x3a2/0x8a0 ? lock_release+0x72/0x140 ? do_user_addr_fault+0x3a7/0x8a0 __x64_sys_sendto+0x29/0x30 do_syscall_64+0x78/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 4d4ac2ececd3 ("virtio_net: Add a lock for per queue RX coalesce") Signed-off-by: Heng Qi <hengqi@linux.alibaba.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20240528134116.117426-3-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-05-28 13:41:16 +00:00
for (i = 0; i < vi->max_queue_pairs; i++) {
mutex_lock(&vi->rq[i].dim_lock);
vi->rq[i].dim_enabled = true;
virtio_net: fix a spurious deadlock issue When the following snippet is run, lockdep will report a deadlock[1]. /* Acquire all queues dim_locks */ for (i = 0; i < vi->max_queue_pairs; i++) mutex_lock(&vi->rq[i].dim_lock); There's no deadlock here because the vq locks are always taken in the same order, but lockdep can not figure it out. So refactoring the code to alleviate the problem. [1] ======================================================== WARNING: possible recursive locking detected 6.9.0-rc7+ #319 Not tainted -------------------------------------------- ethtool/962 is trying to acquire lock: but task is already holding lock: other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&vi->rq[i].dim_lock); lock(&vi->rq[i].dim_lock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by ethtool/962: #0: ffffffff82dbaab0 (cb_lock){++++}-{3:3}, at: genl_rcv+0x19/0x40 #1: ffffffff82dad0a8 (rtnl_mutex){+.+.}-{3:3}, at: ethnl_default_set_doit+0xbe/0x1e0 stack backtrace: CPU: 6 PID: 962 Comm: ethtool Not tainted 6.9.0-rc7+ #319 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: <TASK> dump_stack_lvl+0x79/0xb0 check_deadlock+0x130/0x220 __lock_acquire+0x861/0x990 lock_acquire.part.0+0x72/0x1d0 ? lock_acquire+0xf8/0x130 __mutex_lock+0x71/0xd50 virtnet_set_coalesce+0x151/0x190 __ethnl_set_coalesce.isra.0+0x3f8/0x4d0 ethnl_set_coalesce+0x34/0x90 ethnl_default_set_doit+0xdd/0x1e0 genl_family_rcv_msg_doit+0xdc/0x130 genl_family_rcv_msg+0x154/0x230 ? __pfx_ethnl_default_set_doit+0x10/0x10 genl_rcv_msg+0x4b/0xa0 ? __pfx_genl_rcv_msg+0x10/0x10 netlink_rcv_skb+0x5a/0x110 genl_rcv+0x28/0x40 netlink_unicast+0x1af/0x280 netlink_sendmsg+0x20e/0x460 __sys_sendto+0x1fe/0x210 ? find_held_lock+0x2b/0x80 ? do_user_addr_fault+0x3a2/0x8a0 ? __lock_release+0x5e/0x160 ? do_user_addr_fault+0x3a2/0x8a0 ? lock_release+0x72/0x140 ? do_user_addr_fault+0x3a7/0x8a0 __x64_sys_sendto+0x29/0x30 do_syscall_64+0x78/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 4d4ac2ececd3 ("virtio_net: Add a lock for per queue RX coalesce") Signed-off-by: Heng Qi <hengqi@linux.alibaba.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20240528134116.117426-3-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-05-28 13:41:16 +00:00
mutex_unlock(&vi->rq[i].dim_lock);
}
return 0;
}
coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL);
virtio_net: fix a spurious deadlock issue When the following snippet is run, lockdep will report a deadlock[1]. /* Acquire all queues dim_locks */ for (i = 0; i < vi->max_queue_pairs; i++) mutex_lock(&vi->rq[i].dim_lock); There's no deadlock here because the vq locks are always taken in the same order, but lockdep can not figure it out. So refactoring the code to alleviate the problem. [1] ======================================================== WARNING: possible recursive locking detected 6.9.0-rc7+ #319 Not tainted -------------------------------------------- ethtool/962 is trying to acquire lock: but task is already holding lock: other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&vi->rq[i].dim_lock); lock(&vi->rq[i].dim_lock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by ethtool/962: #0: ffffffff82dbaab0 (cb_lock){++++}-{3:3}, at: genl_rcv+0x19/0x40 #1: ffffffff82dad0a8 (rtnl_mutex){+.+.}-{3:3}, at: ethnl_default_set_doit+0xbe/0x1e0 stack backtrace: CPU: 6 PID: 962 Comm: ethtool Not tainted 6.9.0-rc7+ #319 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: <TASK> dump_stack_lvl+0x79/0xb0 check_deadlock+0x130/0x220 __lock_acquire+0x861/0x990 lock_acquire.part.0+0x72/0x1d0 ? lock_acquire+0xf8/0x130 __mutex_lock+0x71/0xd50 virtnet_set_coalesce+0x151/0x190 __ethnl_set_coalesce.isra.0+0x3f8/0x4d0 ethnl_set_coalesce+0x34/0x90 ethnl_default_set_doit+0xdd/0x1e0 genl_family_rcv_msg_doit+0xdc/0x130 genl_family_rcv_msg+0x154/0x230 ? __pfx_ethnl_default_set_doit+0x10/0x10 genl_rcv_msg+0x4b/0xa0 ? __pfx_genl_rcv_msg+0x10/0x10 netlink_rcv_skb+0x5a/0x110 genl_rcv+0x28/0x40 netlink_unicast+0x1af/0x280 netlink_sendmsg+0x20e/0x460 __sys_sendto+0x1fe/0x210 ? find_held_lock+0x2b/0x80 ? do_user_addr_fault+0x3a2/0x8a0 ? __lock_release+0x5e/0x160 ? do_user_addr_fault+0x3a2/0x8a0 ? lock_release+0x72/0x140 ? do_user_addr_fault+0x3a7/0x8a0 __x64_sys_sendto+0x29/0x30 do_syscall_64+0x78/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 4d4ac2ececd3 ("virtio_net: Add a lock for per queue RX coalesce") Signed-off-by: Heng Qi <hengqi@linux.alibaba.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20240528134116.117426-3-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-05-28 13:41:16 +00:00
if (!coal_rx)
return -ENOMEM;
if (!rx_ctrl_dim_on && vi->rx_dim_enabled) {
vi->rx_dim_enabled = false;
virtio_net: fix a spurious deadlock issue When the following snippet is run, lockdep will report a deadlock[1]. /* Acquire all queues dim_locks */ for (i = 0; i < vi->max_queue_pairs; i++) mutex_lock(&vi->rq[i].dim_lock); There's no deadlock here because the vq locks are always taken in the same order, but lockdep can not figure it out. So refactoring the code to alleviate the problem. [1] ======================================================== WARNING: possible recursive locking detected 6.9.0-rc7+ #319 Not tainted -------------------------------------------- ethtool/962 is trying to acquire lock: but task is already holding lock: other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&vi->rq[i].dim_lock); lock(&vi->rq[i].dim_lock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by ethtool/962: #0: ffffffff82dbaab0 (cb_lock){++++}-{3:3}, at: genl_rcv+0x19/0x40 #1: ffffffff82dad0a8 (rtnl_mutex){+.+.}-{3:3}, at: ethnl_default_set_doit+0xbe/0x1e0 stack backtrace: CPU: 6 PID: 962 Comm: ethtool Not tainted 6.9.0-rc7+ #319 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: <TASK> dump_stack_lvl+0x79/0xb0 check_deadlock+0x130/0x220 __lock_acquire+0x861/0x990 lock_acquire.part.0+0x72/0x1d0 ? lock_acquire+0xf8/0x130 __mutex_lock+0x71/0xd50 virtnet_set_coalesce+0x151/0x190 __ethnl_set_coalesce.isra.0+0x3f8/0x4d0 ethnl_set_coalesce+0x34/0x90 ethnl_default_set_doit+0xdd/0x1e0 genl_family_rcv_msg_doit+0xdc/0x130 genl_family_rcv_msg+0x154/0x230 ? __pfx_ethnl_default_set_doit+0x10/0x10 genl_rcv_msg+0x4b/0xa0 ? __pfx_genl_rcv_msg+0x10/0x10 netlink_rcv_skb+0x5a/0x110 genl_rcv+0x28/0x40 netlink_unicast+0x1af/0x280 netlink_sendmsg+0x20e/0x460 __sys_sendto+0x1fe/0x210 ? find_held_lock+0x2b/0x80 ? do_user_addr_fault+0x3a2/0x8a0 ? __lock_release+0x5e/0x160 ? do_user_addr_fault+0x3a2/0x8a0 ? lock_release+0x72/0x140 ? do_user_addr_fault+0x3a7/0x8a0 __x64_sys_sendto+0x29/0x30 do_syscall_64+0x78/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 4d4ac2ececd3 ("virtio_net: Add a lock for per queue RX coalesce") Signed-off-by: Heng Qi <hengqi@linux.alibaba.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20240528134116.117426-3-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-05-28 13:41:16 +00:00
for (i = 0; i < vi->max_queue_pairs; i++) {
mutex_lock(&vi->rq[i].dim_lock);
vi->rq[i].dim_enabled = false;
virtio_net: fix a spurious deadlock issue When the following snippet is run, lockdep will report a deadlock[1]. /* Acquire all queues dim_locks */ for (i = 0; i < vi->max_queue_pairs; i++) mutex_lock(&vi->rq[i].dim_lock); There's no deadlock here because the vq locks are always taken in the same order, but lockdep can not figure it out. So refactoring the code to alleviate the problem. [1] ======================================================== WARNING: possible recursive locking detected 6.9.0-rc7+ #319 Not tainted -------------------------------------------- ethtool/962 is trying to acquire lock: but task is already holding lock: other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&vi->rq[i].dim_lock); lock(&vi->rq[i].dim_lock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by ethtool/962: #0: ffffffff82dbaab0 (cb_lock){++++}-{3:3}, at: genl_rcv+0x19/0x40 #1: ffffffff82dad0a8 (rtnl_mutex){+.+.}-{3:3}, at: ethnl_default_set_doit+0xbe/0x1e0 stack backtrace: CPU: 6 PID: 962 Comm: ethtool Not tainted 6.9.0-rc7+ #319 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: <TASK> dump_stack_lvl+0x79/0xb0 check_deadlock+0x130/0x220 __lock_acquire+0x861/0x990 lock_acquire.part.0+0x72/0x1d0 ? lock_acquire+0xf8/0x130 __mutex_lock+0x71/0xd50 virtnet_set_coalesce+0x151/0x190 __ethnl_set_coalesce.isra.0+0x3f8/0x4d0 ethnl_set_coalesce+0x34/0x90 ethnl_default_set_doit+0xdd/0x1e0 genl_family_rcv_msg_doit+0xdc/0x130 genl_family_rcv_msg+0x154/0x230 ? __pfx_ethnl_default_set_doit+0x10/0x10 genl_rcv_msg+0x4b/0xa0 ? __pfx_genl_rcv_msg+0x10/0x10 netlink_rcv_skb+0x5a/0x110 genl_rcv+0x28/0x40 netlink_unicast+0x1af/0x280 netlink_sendmsg+0x20e/0x460 __sys_sendto+0x1fe/0x210 ? find_held_lock+0x2b/0x80 ? do_user_addr_fault+0x3a2/0x8a0 ? __lock_release+0x5e/0x160 ? do_user_addr_fault+0x3a2/0x8a0 ? lock_release+0x72/0x140 ? do_user_addr_fault+0x3a7/0x8a0 __x64_sys_sendto+0x29/0x30 do_syscall_64+0x78/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 4d4ac2ececd3 ("virtio_net: Add a lock for per queue RX coalesce") Signed-off-by: Heng Qi <hengqi@linux.alibaba.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20240528134116.117426-3-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-05-28 13:41:16 +00:00
mutex_unlock(&vi->rq[i].dim_lock);
}
}
/* Since the per-queue coalescing params can be set,
* we need apply the global new params even if they
* are not updated.
*/
coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
virtio_net: fix a spurious deadlock issue When the following snippet is run, lockdep will report a deadlock[1]. /* Acquire all queues dim_locks */ for (i = 0; i < vi->max_queue_pairs; i++) mutex_lock(&vi->rq[i].dim_lock); There's no deadlock here because the vq locks are always taken in the same order, but lockdep can not figure it out. So refactoring the code to alleviate the problem. [1] ======================================================== WARNING: possible recursive locking detected 6.9.0-rc7+ #319 Not tainted -------------------------------------------- ethtool/962 is trying to acquire lock: but task is already holding lock: other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&vi->rq[i].dim_lock); lock(&vi->rq[i].dim_lock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by ethtool/962: #0: ffffffff82dbaab0 (cb_lock){++++}-{3:3}, at: genl_rcv+0x19/0x40 #1: ffffffff82dad0a8 (rtnl_mutex){+.+.}-{3:3}, at: ethnl_default_set_doit+0xbe/0x1e0 stack backtrace: CPU: 6 PID: 962 Comm: ethtool Not tainted 6.9.0-rc7+ #319 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: <TASK> dump_stack_lvl+0x79/0xb0 check_deadlock+0x130/0x220 __lock_acquire+0x861/0x990 lock_acquire.part.0+0x72/0x1d0 ? lock_acquire+0xf8/0x130 __mutex_lock+0x71/0xd50 virtnet_set_coalesce+0x151/0x190 __ethnl_set_coalesce.isra.0+0x3f8/0x4d0 ethnl_set_coalesce+0x34/0x90 ethnl_default_set_doit+0xdd/0x1e0 genl_family_rcv_msg_doit+0xdc/0x130 genl_family_rcv_msg+0x154/0x230 ? __pfx_ethnl_default_set_doit+0x10/0x10 genl_rcv_msg+0x4b/0xa0 ? __pfx_genl_rcv_msg+0x10/0x10 netlink_rcv_skb+0x5a/0x110 genl_rcv+0x28/0x40 netlink_unicast+0x1af/0x280 netlink_sendmsg+0x20e/0x460 __sys_sendto+0x1fe/0x210 ? find_held_lock+0x2b/0x80 ? do_user_addr_fault+0x3a2/0x8a0 ? __lock_release+0x5e/0x160 ? do_user_addr_fault+0x3a2/0x8a0 ? lock_release+0x72/0x140 ? do_user_addr_fault+0x3a7/0x8a0 __x64_sys_sendto+0x29/0x30 do_syscall_64+0x78/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 4d4ac2ececd3 ("virtio_net: Add a lock for per queue RX coalesce") Signed-off-by: Heng Qi <hengqi@linux.alibaba.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20240528134116.117426-3-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-05-28 13:41:16 +00:00
&sgs_rx))
return -EINVAL;
vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs;
vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames;
for (i = 0; i < vi->max_queue_pairs; i++) {
virtio_net: fix a spurious deadlock issue When the following snippet is run, lockdep will report a deadlock[1]. /* Acquire all queues dim_locks */ for (i = 0; i < vi->max_queue_pairs; i++) mutex_lock(&vi->rq[i].dim_lock); There's no deadlock here because the vq locks are always taken in the same order, but lockdep can not figure it out. So refactoring the code to alleviate the problem. [1] ======================================================== WARNING: possible recursive locking detected 6.9.0-rc7+ #319 Not tainted -------------------------------------------- ethtool/962 is trying to acquire lock: but task is already holding lock: other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&vi->rq[i].dim_lock); lock(&vi->rq[i].dim_lock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by ethtool/962: #0: ffffffff82dbaab0 (cb_lock){++++}-{3:3}, at: genl_rcv+0x19/0x40 #1: ffffffff82dad0a8 (rtnl_mutex){+.+.}-{3:3}, at: ethnl_default_set_doit+0xbe/0x1e0 stack backtrace: CPU: 6 PID: 962 Comm: ethtool Not tainted 6.9.0-rc7+ #319 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: <TASK> dump_stack_lvl+0x79/0xb0 check_deadlock+0x130/0x220 __lock_acquire+0x861/0x990 lock_acquire.part.0+0x72/0x1d0 ? lock_acquire+0xf8/0x130 __mutex_lock+0x71/0xd50 virtnet_set_coalesce+0x151/0x190 __ethnl_set_coalesce.isra.0+0x3f8/0x4d0 ethnl_set_coalesce+0x34/0x90 ethnl_default_set_doit+0xdd/0x1e0 genl_family_rcv_msg_doit+0xdc/0x130 genl_family_rcv_msg+0x154/0x230 ? __pfx_ethnl_default_set_doit+0x10/0x10 genl_rcv_msg+0x4b/0xa0 ? __pfx_genl_rcv_msg+0x10/0x10 netlink_rcv_skb+0x5a/0x110 genl_rcv+0x28/0x40 netlink_unicast+0x1af/0x280 netlink_sendmsg+0x20e/0x460 __sys_sendto+0x1fe/0x210 ? find_held_lock+0x2b/0x80 ? do_user_addr_fault+0x3a2/0x8a0 ? __lock_release+0x5e/0x160 ? do_user_addr_fault+0x3a2/0x8a0 ? lock_release+0x72/0x140 ? do_user_addr_fault+0x3a7/0x8a0 __x64_sys_sendto+0x29/0x30 do_syscall_64+0x78/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 4d4ac2ececd3 ("virtio_net: Add a lock for per queue RX coalesce") Signed-off-by: Heng Qi <hengqi@linux.alibaba.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20240528134116.117426-3-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-05-28 13:41:16 +00:00
mutex_lock(&vi->rq[i].dim_lock);
vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs;
vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames;
mutex_unlock(&vi->rq[i].dim_lock);
virtio_net: fix a spurious deadlock issue When the following snippet is run, lockdep will report a deadlock[1]. /* Acquire all queues dim_locks */ for (i = 0; i < vi->max_queue_pairs; i++) mutex_lock(&vi->rq[i].dim_lock); There's no deadlock here because the vq locks are always taken in the same order, but lockdep can not figure it out. So refactoring the code to alleviate the problem. [1] ======================================================== WARNING: possible recursive locking detected 6.9.0-rc7+ #319 Not tainted -------------------------------------------- ethtool/962 is trying to acquire lock: but task is already holding lock: other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&vi->rq[i].dim_lock); lock(&vi->rq[i].dim_lock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by ethtool/962: #0: ffffffff82dbaab0 (cb_lock){++++}-{3:3}, at: genl_rcv+0x19/0x40 #1: ffffffff82dad0a8 (rtnl_mutex){+.+.}-{3:3}, at: ethnl_default_set_doit+0xbe/0x1e0 stack backtrace: CPU: 6 PID: 962 Comm: ethtool Not tainted 6.9.0-rc7+ #319 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: <TASK> dump_stack_lvl+0x79/0xb0 check_deadlock+0x130/0x220 __lock_acquire+0x861/0x990 lock_acquire.part.0+0x72/0x1d0 ? lock_acquire+0xf8/0x130 __mutex_lock+0x71/0xd50 virtnet_set_coalesce+0x151/0x190 __ethnl_set_coalesce.isra.0+0x3f8/0x4d0 ethnl_set_coalesce+0x34/0x90 ethnl_default_set_doit+0xdd/0x1e0 genl_family_rcv_msg_doit+0xdc/0x130 genl_family_rcv_msg+0x154/0x230 ? __pfx_ethnl_default_set_doit+0x10/0x10 genl_rcv_msg+0x4b/0xa0 ? __pfx_genl_rcv_msg+0x10/0x10 netlink_rcv_skb+0x5a/0x110 genl_rcv+0x28/0x40 netlink_unicast+0x1af/0x280 netlink_sendmsg+0x20e/0x460 __sys_sendto+0x1fe/0x210 ? find_held_lock+0x2b/0x80 ? do_user_addr_fault+0x3a2/0x8a0 ? __lock_release+0x5e/0x160 ? do_user_addr_fault+0x3a2/0x8a0 ? lock_release+0x72/0x140 ? do_user_addr_fault+0x3a7/0x8a0 __x64_sys_sendto+0x29/0x30 do_syscall_64+0x78/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 4d4ac2ececd3 ("virtio_net: Add a lock for per queue RX coalesce") Signed-off-by: Heng Qi <hengqi@linux.alibaba.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20240528134116.117426-3-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-05-28 13:41:16 +00:00
}
virtio_net: fix a spurious deadlock issue When the following snippet is run, lockdep will report a deadlock[1]. /* Acquire all queues dim_locks */ for (i = 0; i < vi->max_queue_pairs; i++) mutex_lock(&vi->rq[i].dim_lock); There's no deadlock here because the vq locks are always taken in the same order, but lockdep can not figure it out. So refactoring the code to alleviate the problem. [1] ======================================================== WARNING: possible recursive locking detected 6.9.0-rc7+ #319 Not tainted -------------------------------------------- ethtool/962 is trying to acquire lock: but task is already holding lock: other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&vi->rq[i].dim_lock); lock(&vi->rq[i].dim_lock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by ethtool/962: #0: ffffffff82dbaab0 (cb_lock){++++}-{3:3}, at: genl_rcv+0x19/0x40 #1: ffffffff82dad0a8 (rtnl_mutex){+.+.}-{3:3}, at: ethnl_default_set_doit+0xbe/0x1e0 stack backtrace: CPU: 6 PID: 962 Comm: ethtool Not tainted 6.9.0-rc7+ #319 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: <TASK> dump_stack_lvl+0x79/0xb0 check_deadlock+0x130/0x220 __lock_acquire+0x861/0x990 lock_acquire.part.0+0x72/0x1d0 ? lock_acquire+0xf8/0x130 __mutex_lock+0x71/0xd50 virtnet_set_coalesce+0x151/0x190 __ethnl_set_coalesce.isra.0+0x3f8/0x4d0 ethnl_set_coalesce+0x34/0x90 ethnl_default_set_doit+0xdd/0x1e0 genl_family_rcv_msg_doit+0xdc/0x130 genl_family_rcv_msg+0x154/0x230 ? __pfx_ethnl_default_set_doit+0x10/0x10 genl_rcv_msg+0x4b/0xa0 ? __pfx_genl_rcv_msg+0x10/0x10 netlink_rcv_skb+0x5a/0x110 genl_rcv+0x28/0x40 netlink_unicast+0x1af/0x280 netlink_sendmsg+0x20e/0x460 __sys_sendto+0x1fe/0x210 ? find_held_lock+0x2b/0x80 ? do_user_addr_fault+0x3a2/0x8a0 ? __lock_release+0x5e/0x160 ? do_user_addr_fault+0x3a2/0x8a0 ? lock_release+0x72/0x140 ? do_user_addr_fault+0x3a7/0x8a0 __x64_sys_sendto+0x29/0x30 do_syscall_64+0x78/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 4d4ac2ececd3 ("virtio_net: Add a lock for per queue RX coalesce") Signed-off-by: Heng Qi <hengqi@linux.alibaba.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20240528134116.117426-3-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-05-28 13:41:16 +00:00
return 0;
}
static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
struct ethtool_coalesce *ec)
{
int err;
err = virtnet_send_tx_notf_coal_cmds(vi, ec);
if (err)
return err;
err = virtnet_send_rx_notf_coal_cmds(vi, ec);
if (err)
return err;
return 0;
}
static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi,
struct ethtool_coalesce *ec,
u16 queue)
{
bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce;
u32 max_usecs, max_packets;
bool cur_rx_dim;
int err;
mutex_lock(&vi->rq[queue].dim_lock);
cur_rx_dim = vi->rq[queue].dim_enabled;
max_usecs = vi->rq[queue].intr_coal.max_usecs;
max_packets = vi->rq[queue].intr_coal.max_packets;
if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs ||
ec->rx_max_coalesced_frames != max_packets)) {
mutex_unlock(&vi->rq[queue].dim_lock);
return -EINVAL;
}
if (rx_ctrl_dim_on && !cur_rx_dim) {
vi->rq[queue].dim_enabled = true;
mutex_unlock(&vi->rq[queue].dim_lock);
return 0;
}
if (!rx_ctrl_dim_on && cur_rx_dim)
vi->rq[queue].dim_enabled = false;
/* If no params are updated, userspace ethtool will
* reject the modification.
*/
err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue,
ec->rx_coalesce_usecs,
ec->rx_max_coalesced_frames);
mutex_unlock(&vi->rq[queue].dim_lock);
return err;
}
static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi,
struct ethtool_coalesce *ec,
u16 queue)
{
int err;
err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue);
if (err)
return err;
err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue,
ec->tx_coalesce_usecs,
ec->tx_max_coalesced_frames);
if (err)
return err;
return 0;
}
static void virtnet_rx_dim_work(struct work_struct *work)
{
struct dim *dim = container_of(work, struct dim, work);
struct receive_queue *rq = container_of(dim,
struct receive_queue, dim);
struct virtnet_info *vi = rq->vq->vdev->priv;
struct net_device *dev = vi->dev;
struct dim_cq_moder update_moder;
int qnum, err;
qnum = rq - vi->rq;
mutex_lock(&rq->dim_lock);
if (!rq->dim_enabled)
goto out;
update_moder = net_dim_get_rx_irq_moder(dev, dim);
if (update_moder.usec != rq->intr_coal.max_usecs ||
update_moder.pkts != rq->intr_coal.max_packets) {
err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum,
update_moder.usec,
update_moder.pkts);
if (err)
pr_debug("%s: Failed to send dim parameters on rxq%d\n",
dev->name, qnum);
}
out:
dim->state = DIM_START_MEASURE;
mutex_unlock(&rq->dim_lock);
}
static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
{
/* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
* or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated.
*/
if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs)
return -EOPNOTSUPP;
if (ec->tx_max_coalesced_frames > 1 ||
ec->rx_max_coalesced_frames != 1)
return -EINVAL;
return 0;
}
static int virtnet_should_update_vq_weight(int dev_flags, int weight,
int vq_weight, bool *should_update)
{
if (weight ^ vq_weight) {
if (dev_flags & IFF_UP)
return -EBUSY;
*should_update = true;
}
return 0;
}
static int virtnet_set_coalesce(struct net_device *dev,
struct ethtool_coalesce *ec,
struct kernel_ethtool_coalesce *kernel_coal,
struct netlink_ext_ack *extack)
{
struct virtnet_info *vi = netdev_priv(dev);
int ret, queue_number, napi_weight;
bool update_napi = false;
/* Can't change NAPI weight if the link is up */
napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) {
ret = virtnet_should_update_vq_weight(dev->flags, napi_weight,
vi->sq[queue_number].napi.weight,
&update_napi);
if (ret)
return ret;
if (update_napi) {
/* All queues that belong to [queue_number, vi->max_queue_pairs] will be
* updated for the sake of simplicity, which might not be necessary
*/
break;
}
}
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL))
ret = virtnet_send_notf_coal_cmds(vi, ec);
else
ret = virtnet_coal_params_supported(ec);
if (ret)
return ret;
if (update_napi) {
for (; queue_number < vi->max_queue_pairs; queue_number++)
vi->sq[queue_number].napi.weight = napi_weight;
}
return ret;
}
static int virtnet_get_coalesce(struct net_device *dev,
struct ethtool_coalesce *ec,
struct kernel_ethtool_coalesce *kernel_coal,
struct netlink_ext_ack *extack)
{
struct virtnet_info *vi = netdev_priv(dev);
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs;
ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs;
ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets;
ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets;
ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled;
} else {
ec->rx_max_coalesced_frames = 1;
if (vi->sq[0].napi.weight)
ec->tx_max_coalesced_frames = 1;
}
return 0;
}
static int virtnet_set_per_queue_coalesce(struct net_device *dev,
u32 queue,
struct ethtool_coalesce *ec)
{
struct virtnet_info *vi = netdev_priv(dev);
int ret, napi_weight;
bool update_napi = false;
if (queue >= vi->max_queue_pairs)
return -EINVAL;
/* Can't change NAPI weight if the link is up */
napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
ret = virtnet_should_update_vq_weight(dev->flags, napi_weight,
vi->sq[queue].napi.weight,
&update_napi);
if (ret)
return ret;
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue);
else
ret = virtnet_coal_params_supported(ec);
if (ret)
return ret;
if (update_napi)
vi->sq[queue].napi.weight = napi_weight;
return 0;
}
static int virtnet_get_per_queue_coalesce(struct net_device *dev,
u32 queue,
struct ethtool_coalesce *ec)
{
struct virtnet_info *vi = netdev_priv(dev);
if (queue >= vi->max_queue_pairs)
return -EINVAL;
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) {
mutex_lock(&vi->rq[queue].dim_lock);
ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs;
ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs;
ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets;
ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets;
ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled;
mutex_unlock(&vi->rq[queue].dim_lock);
} else {
ec->rx_max_coalesced_frames = 1;
if (vi->sq[queue].napi.weight)
ec->tx_max_coalesced_frames = 1;
}
return 0;
}
static void virtnet_init_settings(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
vi->speed = SPEED_UNKNOWN;
vi->duplex = DUPLEX_UNKNOWN;
}
static u32 virtnet_get_rxfh_key_size(struct net_device *dev)
{
return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size;
}
static u32 virtnet_get_rxfh_indir_size(struct net_device *dev)
{
return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size;
}
static int virtnet_get_rxfh(struct net_device *dev,
struct ethtool_rxfh_param *rxfh)
{
struct virtnet_info *vi = netdev_priv(dev);
int i;
if (rxfh->indir) {
for (i = 0; i < vi->rss_indir_table_size; ++i)
rxfh->indir[i] = vi->rss.indirection_table[i];
}
if (rxfh->key)
memcpy(rxfh->key, vi->rss.key, vi->rss_key_size);
rxfh->hfunc = ETH_RSS_HASH_TOP;
return 0;
}
static int virtnet_set_rxfh(struct net_device *dev,
struct ethtool_rxfh_param *rxfh,
struct netlink_ext_ack *extack)
{
struct virtnet_info *vi = netdev_priv(dev);
virtio_net: Do not send RSS key if it is not supported There is a bug when setting the RSS options in virtio_net that can break the whole machine, getting the kernel into an infinite loop. Running the following command in any QEMU virtual machine with virtionet will reproduce this problem: # ethtool -X eth0 hfunc toeplitz This is how the problem happens: 1) ethtool_set_rxfh() calls virtnet_set_rxfh() 2) virtnet_set_rxfh() calls virtnet_commit_rss_command() 3) virtnet_commit_rss_command() populates 4 entries for the rss scatter-gather 4) Since the command above does not have a key, then the last scatter-gatter entry will be zeroed, since rss_key_size == 0. sg_buf_size = vi->rss_key_size; 5) This buffer is passed to qemu, but qemu is not happy with a buffer with zero length, and do the following in virtqueue_map_desc() (QEMU function): if (!sz) { virtio_error(vdev, "virtio: zero sized buffers are not allowed"); 6) virtio_error() (also QEMU function) set the device as broken vdev->broken = true; 7) Qemu bails out, and do not repond this crazy kernel. 8) The kernel is waiting for the response to come back (function virtnet_send_command()) 9) The kernel is waiting doing the following : while (!virtqueue_get_buf(vi->cvq, &tmp) && !virtqueue_is_broken(vi->cvq)) cpu_relax(); 10) None of the following functions above is true, thus, the kernel loops here forever. Keeping in mind that virtqueue_is_broken() does not look at the qemu `vdev->broken`, so, it never realizes that the vitio is broken at QEMU side. Fix it by not sending RSS commands if the feature is not available in the device. Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.") Cc: stable@vger.kernel.org Cc: qemu-devel@nongnu.org Signed-off-by: Breno Leitao <leitao@debian.org> Reviewed-by: Heng Qi <hengqi@linux.alibaba.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-04-03 15:43:12 +00:00
bool update = false;
int i;
if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
rxfh->hfunc != ETH_RSS_HASH_TOP)
return -EOPNOTSUPP;
if (rxfh->indir) {
virtio_net: Do not send RSS key if it is not supported There is a bug when setting the RSS options in virtio_net that can break the whole machine, getting the kernel into an infinite loop. Running the following command in any QEMU virtual machine with virtionet will reproduce this problem: # ethtool -X eth0 hfunc toeplitz This is how the problem happens: 1) ethtool_set_rxfh() calls virtnet_set_rxfh() 2) virtnet_set_rxfh() calls virtnet_commit_rss_command() 3) virtnet_commit_rss_command() populates 4 entries for the rss scatter-gather 4) Since the command above does not have a key, then the last scatter-gatter entry will be zeroed, since rss_key_size == 0. sg_buf_size = vi->rss_key_size; 5) This buffer is passed to qemu, but qemu is not happy with a buffer with zero length, and do the following in virtqueue_map_desc() (QEMU function): if (!sz) { virtio_error(vdev, "virtio: zero sized buffers are not allowed"); 6) virtio_error() (also QEMU function) set the device as broken vdev->broken = true; 7) Qemu bails out, and do not repond this crazy kernel. 8) The kernel is waiting for the response to come back (function virtnet_send_command()) 9) The kernel is waiting doing the following : while (!virtqueue_get_buf(vi->cvq, &tmp) && !virtqueue_is_broken(vi->cvq)) cpu_relax(); 10) None of the following functions above is true, thus, the kernel loops here forever. Keeping in mind that virtqueue_is_broken() does not look at the qemu `vdev->broken`, so, it never realizes that the vitio is broken at QEMU side. Fix it by not sending RSS commands if the feature is not available in the device. Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.") Cc: stable@vger.kernel.org Cc: qemu-devel@nongnu.org Signed-off-by: Breno Leitao <leitao@debian.org> Reviewed-by: Heng Qi <hengqi@linux.alibaba.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-04-03 15:43:12 +00:00
if (!vi->has_rss)
return -EOPNOTSUPP;
for (i = 0; i < vi->rss_indir_table_size; ++i)
vi->rss.indirection_table[i] = rxfh->indir[i];
virtio_net: Do not send RSS key if it is not supported There is a bug when setting the RSS options in virtio_net that can break the whole machine, getting the kernel into an infinite loop. Running the following command in any QEMU virtual machine with virtionet will reproduce this problem: # ethtool -X eth0 hfunc toeplitz This is how the problem happens: 1) ethtool_set_rxfh() calls virtnet_set_rxfh() 2) virtnet_set_rxfh() calls virtnet_commit_rss_command() 3) virtnet_commit_rss_command() populates 4 entries for the rss scatter-gather 4) Since the command above does not have a key, then the last scatter-gatter entry will be zeroed, since rss_key_size == 0. sg_buf_size = vi->rss_key_size; 5) This buffer is passed to qemu, but qemu is not happy with a buffer with zero length, and do the following in virtqueue_map_desc() (QEMU function): if (!sz) { virtio_error(vdev, "virtio: zero sized buffers are not allowed"); 6) virtio_error() (also QEMU function) set the device as broken vdev->broken = true; 7) Qemu bails out, and do not repond this crazy kernel. 8) The kernel is waiting for the response to come back (function virtnet_send_command()) 9) The kernel is waiting doing the following : while (!virtqueue_get_buf(vi->cvq, &tmp) && !virtqueue_is_broken(vi->cvq)) cpu_relax(); 10) None of the following functions above is true, thus, the kernel loops here forever. Keeping in mind that virtqueue_is_broken() does not look at the qemu `vdev->broken`, so, it never realizes that the vitio is broken at QEMU side. Fix it by not sending RSS commands if the feature is not available in the device. Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.") Cc: stable@vger.kernel.org Cc: qemu-devel@nongnu.org Signed-off-by: Breno Leitao <leitao@debian.org> Reviewed-by: Heng Qi <hengqi@linux.alibaba.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-04-03 15:43:12 +00:00
update = true;
}
virtio_net: Do not send RSS key if it is not supported There is a bug when setting the RSS options in virtio_net that can break the whole machine, getting the kernel into an infinite loop. Running the following command in any QEMU virtual machine with virtionet will reproduce this problem: # ethtool -X eth0 hfunc toeplitz This is how the problem happens: 1) ethtool_set_rxfh() calls virtnet_set_rxfh() 2) virtnet_set_rxfh() calls virtnet_commit_rss_command() 3) virtnet_commit_rss_command() populates 4 entries for the rss scatter-gather 4) Since the command above does not have a key, then the last scatter-gatter entry will be zeroed, since rss_key_size == 0. sg_buf_size = vi->rss_key_size; 5) This buffer is passed to qemu, but qemu is not happy with a buffer with zero length, and do the following in virtqueue_map_desc() (QEMU function): if (!sz) { virtio_error(vdev, "virtio: zero sized buffers are not allowed"); 6) virtio_error() (also QEMU function) set the device as broken vdev->broken = true; 7) Qemu bails out, and do not repond this crazy kernel. 8) The kernel is waiting for the response to come back (function virtnet_send_command()) 9) The kernel is waiting doing the following : while (!virtqueue_get_buf(vi->cvq, &tmp) && !virtqueue_is_broken(vi->cvq)) cpu_relax(); 10) None of the following functions above is true, thus, the kernel loops here forever. Keeping in mind that virtqueue_is_broken() does not look at the qemu `vdev->broken`, so, it never realizes that the vitio is broken at QEMU side. Fix it by not sending RSS commands if the feature is not available in the device. Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.") Cc: stable@vger.kernel.org Cc: qemu-devel@nongnu.org Signed-off-by: Breno Leitao <leitao@debian.org> Reviewed-by: Heng Qi <hengqi@linux.alibaba.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-04-03 15:43:12 +00:00
if (rxfh->key) {
/* If either _F_HASH_REPORT or _F_RSS are negotiated, the
* device provides hash calculation capabilities, that is,
* hash_key is configured.
*/
if (!vi->has_rss && !vi->has_rss_hash_report)
return -EOPNOTSUPP;
memcpy(vi->rss.key, rxfh->key, vi->rss_key_size);
virtio_net: Do not send RSS key if it is not supported There is a bug when setting the RSS options in virtio_net that can break the whole machine, getting the kernel into an infinite loop. Running the following command in any QEMU virtual machine with virtionet will reproduce this problem: # ethtool -X eth0 hfunc toeplitz This is how the problem happens: 1) ethtool_set_rxfh() calls virtnet_set_rxfh() 2) virtnet_set_rxfh() calls virtnet_commit_rss_command() 3) virtnet_commit_rss_command() populates 4 entries for the rss scatter-gather 4) Since the command above does not have a key, then the last scatter-gatter entry will be zeroed, since rss_key_size == 0. sg_buf_size = vi->rss_key_size; 5) This buffer is passed to qemu, but qemu is not happy with a buffer with zero length, and do the following in virtqueue_map_desc() (QEMU function): if (!sz) { virtio_error(vdev, "virtio: zero sized buffers are not allowed"); 6) virtio_error() (also QEMU function) set the device as broken vdev->broken = true; 7) Qemu bails out, and do not repond this crazy kernel. 8) The kernel is waiting for the response to come back (function virtnet_send_command()) 9) The kernel is waiting doing the following : while (!virtqueue_get_buf(vi->cvq, &tmp) && !virtqueue_is_broken(vi->cvq)) cpu_relax(); 10) None of the following functions above is true, thus, the kernel loops here forever. Keeping in mind that virtqueue_is_broken() does not look at the qemu `vdev->broken`, so, it never realizes that the vitio is broken at QEMU side. Fix it by not sending RSS commands if the feature is not available in the device. Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.") Cc: stable@vger.kernel.org Cc: qemu-devel@nongnu.org Signed-off-by: Breno Leitao <leitao@debian.org> Reviewed-by: Heng Qi <hengqi@linux.alibaba.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-04-03 15:43:12 +00:00
update = true;
}
virtio_net: Do not send RSS key if it is not supported There is a bug when setting the RSS options in virtio_net that can break the whole machine, getting the kernel into an infinite loop. Running the following command in any QEMU virtual machine with virtionet will reproduce this problem: # ethtool -X eth0 hfunc toeplitz This is how the problem happens: 1) ethtool_set_rxfh() calls virtnet_set_rxfh() 2) virtnet_set_rxfh() calls virtnet_commit_rss_command() 3) virtnet_commit_rss_command() populates 4 entries for the rss scatter-gather 4) Since the command above does not have a key, then the last scatter-gatter entry will be zeroed, since rss_key_size == 0. sg_buf_size = vi->rss_key_size; 5) This buffer is passed to qemu, but qemu is not happy with a buffer with zero length, and do the following in virtqueue_map_desc() (QEMU function): if (!sz) { virtio_error(vdev, "virtio: zero sized buffers are not allowed"); 6) virtio_error() (also QEMU function) set the device as broken vdev->broken = true; 7) Qemu bails out, and do not repond this crazy kernel. 8) The kernel is waiting for the response to come back (function virtnet_send_command()) 9) The kernel is waiting doing the following : while (!virtqueue_get_buf(vi->cvq, &tmp) && !virtqueue_is_broken(vi->cvq)) cpu_relax(); 10) None of the following functions above is true, thus, the kernel loops here forever. Keeping in mind that virtqueue_is_broken() does not look at the qemu `vdev->broken`, so, it never realizes that the vitio is broken at QEMU side. Fix it by not sending RSS commands if the feature is not available in the device. Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.") Cc: stable@vger.kernel.org Cc: qemu-devel@nongnu.org Signed-off-by: Breno Leitao <leitao@debian.org> Reviewed-by: Heng Qi <hengqi@linux.alibaba.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-04-03 15:43:12 +00:00
if (update)
virtnet_commit_rss_command(vi);
return 0;
}
static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs)
{
struct virtnet_info *vi = netdev_priv(dev);
int rc = 0;
switch (info->cmd) {
case ETHTOOL_GRXRINGS:
info->data = vi->curr_queue_pairs;
break;
case ETHTOOL_GRXFH:
virtnet_get_hashflow(vi, info);
break;
default:
rc = -EOPNOTSUPP;
}
return rc;
}
static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
{
struct virtnet_info *vi = netdev_priv(dev);
int rc = 0;
switch (info->cmd) {
case ETHTOOL_SRXFH:
if (!virtnet_set_hashflow(vi, info))
rc = -EINVAL;
break;
default:
rc = -EOPNOTSUPP;
}
return rc;
}
static const struct ethtool_ops virtnet_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
.get_drvinfo = virtnet_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_ringparam = virtnet_get_ringparam,
.set_ringparam = virtnet_set_ringparam,
.get_strings = virtnet_get_strings,
.get_sset_count = virtnet_get_sset_count,
.get_ethtool_stats = virtnet_get_ethtool_stats,
.set_channels = virtnet_set_channels,
.get_channels = virtnet_get_channels,
.get_ts_info = ethtool_op_get_ts_info,
.get_link_ksettings = virtnet_get_link_ksettings,
.set_link_ksettings = virtnet_set_link_ksettings,
.set_coalesce = virtnet_set_coalesce,
.get_coalesce = virtnet_get_coalesce,
.set_per_queue_coalesce = virtnet_set_per_queue_coalesce,
.get_per_queue_coalesce = virtnet_get_per_queue_coalesce,
.get_rxfh_key_size = virtnet_get_rxfh_key_size,
.get_rxfh_indir_size = virtnet_get_rxfh_indir_size,
.get_rxfh = virtnet_get_rxfh,
.set_rxfh = virtnet_set_rxfh,
.get_rxnfc = virtnet_get_rxnfc,
.set_rxnfc = virtnet_set_rxnfc,
};
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
static void virtnet_get_queue_stats_rx(struct net_device *dev, int i,
struct netdev_queue_stats_rx *stats)
{
struct virtnet_info *vi = netdev_priv(dev);
struct receive_queue *rq = &vi->rq[i];
struct virtnet_stats_ctx ctx = {0};
virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true);
virtnet_get_hw_stats(vi, &ctx, i * 2);
virtnet_fill_stats(vi, i * 2, &ctx, (void *)&rq->stats, true, 0);
}
static void virtnet_get_queue_stats_tx(struct net_device *dev, int i,
struct netdev_queue_stats_tx *stats)
{
struct virtnet_info *vi = netdev_priv(dev);
struct send_queue *sq = &vi->sq[i];
struct virtnet_stats_ctx ctx = {0};
virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true);
virtnet_get_hw_stats(vi, &ctx, i * 2 + 1);
virtnet_fill_stats(vi, i * 2 + 1, &ctx, (void *)&sq->stats, true, 0);
}
static void virtnet_get_base_stats(struct net_device *dev,
struct netdev_queue_stats_rx *rx,
struct netdev_queue_stats_tx *tx)
{
struct virtnet_info *vi = netdev_priv(dev);
/* The queue stats of the virtio-net will not be reset. So here we
* return 0.
*/
rx->bytes = 0;
rx->packets = 0;
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
rx->hw_drops = 0;
rx->hw_drop_overruns = 0;
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
rx->csum_unnecessary = 0;
rx->csum_none = 0;
rx->csum_bad = 0;
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) {
rx->hw_gro_packets = 0;
rx->hw_gro_bytes = 0;
rx->hw_gro_wire_packets = 0;
rx->hw_gro_wire_bytes = 0;
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED)
rx->hw_drop_ratelimits = 0;
tx->bytes = 0;
tx->packets = 0;
tx->stop = 0;
tx->wake = 0;
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
tx->hw_drops = 0;
tx->hw_drop_errors = 0;
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) {
tx->csum_none = 0;
tx->needs_csum = 0;
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
tx->hw_gso_packets = 0;
tx->hw_gso_bytes = 0;
tx->hw_gso_wire_packets = 0;
tx->hw_gso_wire_bytes = 0;
}
if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED)
tx->hw_drop_ratelimits = 0;
}
static const struct netdev_stat_ops virtnet_stat_ops = {
.get_queue_stats_rx = virtnet_get_queue_stats_rx,
.get_queue_stats_tx = virtnet_get_queue_stats_tx,
.get_base_stats = virtnet_get_base_stats,
};
static void virtnet_freeze_down(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
/* Make sure no work handler is accessing the device */
flush_work(&vi->config_work);
disable_rx_mode_work(vi);
flush_work(&vi->rx_mode_work);
netif_tx_lock_bh(vi->dev);
netif_device_detach(vi->dev);
netif_tx_unlock_bh(vi->dev);
virtio_net: fix xdp_rxq_info bug after suspend/resume The following sequence currently causes a driver bug warning when using virtio_net: # ip link set eth0 up # echo mem > /sys/power/state (or e.g. # rtcwake -s 10 -m mem) <resume> # ip link set eth0 down Missing register, driver bug WARNING: CPU: 0 PID: 375 at net/core/xdp.c:138 xdp_rxq_info_unreg+0x58/0x60 Call trace: xdp_rxq_info_unreg+0x58/0x60 virtnet_close+0x58/0xac __dev_close_many+0xac/0x140 __dev_change_flags+0xd8/0x210 dev_change_flags+0x24/0x64 do_setlink+0x230/0xdd0 ... This happens because virtnet_freeze() frees the receive_queue completely (including struct xdp_rxq_info) but does not call xdp_rxq_info_unreg(). Similarly, virtnet_restore() sets up the receive_queue again but does not call xdp_rxq_info_reg(). Actually, parts of virtnet_freeze_down() and virtnet_restore_up() are almost identical to virtnet_close() and virtnet_open(): only the calls to xdp_rxq_info_(un)reg() are missing. This means that we can fix this easily and avoid such problems in the future by just calling virtnet_close()/open() from the freeze/restore handlers. Aside from adding the missing xdp_rxq_info calls the only difference is that the refill work is only cancelled if netif_running(). However, this should not make any functional difference since the refill work should only be active if the network interface is actually up. Fixes: 754b8a21a96d ("virtio_net: setup xdp_rxq_info") Signed-off-by: Stephan Gerhold <stephan.gerhold@kernkonzept.com> Acked-by: Jesper Dangaard Brouer <brouer@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20220621114845.3650258-1-stephan.gerhold@kernkonzept.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-06-21 11:48:44 +00:00
if (netif_running(vi->dev))
virtnet_close(vi->dev);
}
static int init_vqs(struct virtnet_info *vi);
static int virtnet_restore_up(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
virtio_net: fix xdp_rxq_info bug after suspend/resume The following sequence currently causes a driver bug warning when using virtio_net: # ip link set eth0 up # echo mem > /sys/power/state (or e.g. # rtcwake -s 10 -m mem) <resume> # ip link set eth0 down Missing register, driver bug WARNING: CPU: 0 PID: 375 at net/core/xdp.c:138 xdp_rxq_info_unreg+0x58/0x60 Call trace: xdp_rxq_info_unreg+0x58/0x60 virtnet_close+0x58/0xac __dev_close_many+0xac/0x140 __dev_change_flags+0xd8/0x210 dev_change_flags+0x24/0x64 do_setlink+0x230/0xdd0 ... This happens because virtnet_freeze() frees the receive_queue completely (including struct xdp_rxq_info) but does not call xdp_rxq_info_unreg(). Similarly, virtnet_restore() sets up the receive_queue again but does not call xdp_rxq_info_reg(). Actually, parts of virtnet_freeze_down() and virtnet_restore_up() are almost identical to virtnet_close() and virtnet_open(): only the calls to xdp_rxq_info_(un)reg() are missing. This means that we can fix this easily and avoid such problems in the future by just calling virtnet_close()/open() from the freeze/restore handlers. Aside from adding the missing xdp_rxq_info calls the only difference is that the refill work is only cancelled if netif_running(). However, this should not make any functional difference since the refill work should only be active if the network interface is actually up. Fixes: 754b8a21a96d ("virtio_net: setup xdp_rxq_info") Signed-off-by: Stephan Gerhold <stephan.gerhold@kernkonzept.com> Acked-by: Jesper Dangaard Brouer <brouer@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20220621114845.3650258-1-stephan.gerhold@kernkonzept.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-06-21 11:48:44 +00:00
int err;
err = init_vqs(vi);
if (err)
return err;
virtio_device_ready(vdev);
virtio-net: fix the race between refill work and close We try using cancel_delayed_work_sync() to prevent the work from enabling NAPI. This is insufficient since we don't disable the source of the refill work scheduling. This means an NAPI poll callback after cancel_delayed_work_sync() can schedule the refill work then can re-enable the NAPI that leads to use-after-free [1]. Since the work can enable NAPI, we can't simply disable NAPI before calling cancel_delayed_work_sync(). So fix this by introducing a dedicated boolean to control whether or not the work could be scheduled from NAPI. [1] ================================================================== BUG: KASAN: use-after-free in refill_work+0x43/0xd4 Read of size 2 at addr ffff88810562c92e by task kworker/2:1/42 CPU: 2 PID: 42 Comm: kworker/2:1 Not tainted 5.19.0-rc1+ #480 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: events refill_work Call Trace: <TASK> dump_stack_lvl+0x34/0x44 print_report.cold+0xbb/0x6ac ? _printk+0xad/0xde ? refill_work+0x43/0xd4 kasan_report+0xa8/0x130 ? refill_work+0x43/0xd4 refill_work+0x43/0xd4 process_one_work+0x43d/0x780 worker_thread+0x2a0/0x6f0 ? process_one_work+0x780/0x780 kthread+0x167/0x1a0 ? kthread_exit+0x50/0x50 ret_from_fork+0x22/0x30 </TASK> ... Fixes: b2baed69e605c ("virtio_net: set/cancel work on ndo_open/ndo_stop") Signed-off-by: Jason Wang <jasowang@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2022-07-25 07:21:59 +00:00
enable_delayed_refill(vi);
enable_rx_mode_work(vi);
virtio-net: fix the race between refill work and close We try using cancel_delayed_work_sync() to prevent the work from enabling NAPI. This is insufficient since we don't disable the source of the refill work scheduling. This means an NAPI poll callback after cancel_delayed_work_sync() can schedule the refill work then can re-enable the NAPI that leads to use-after-free [1]. Since the work can enable NAPI, we can't simply disable NAPI before calling cancel_delayed_work_sync(). So fix this by introducing a dedicated boolean to control whether or not the work could be scheduled from NAPI. [1] ================================================================== BUG: KASAN: use-after-free in refill_work+0x43/0xd4 Read of size 2 at addr ffff88810562c92e by task kworker/2:1/42 CPU: 2 PID: 42 Comm: kworker/2:1 Not tainted 5.19.0-rc1+ #480 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: events refill_work Call Trace: <TASK> dump_stack_lvl+0x34/0x44 print_report.cold+0xbb/0x6ac ? _printk+0xad/0xde ? refill_work+0x43/0xd4 kasan_report+0xa8/0x130 ? refill_work+0x43/0xd4 refill_work+0x43/0xd4 process_one_work+0x43d/0x780 worker_thread+0x2a0/0x6f0 ? process_one_work+0x780/0x780 kthread+0x167/0x1a0 ? kthread_exit+0x50/0x50 ret_from_fork+0x22/0x30 </TASK> ... Fixes: b2baed69e605c ("virtio_net: set/cancel work on ndo_open/ndo_stop") Signed-off-by: Jason Wang <jasowang@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2022-07-25 07:21:59 +00:00
if (netif_running(vi->dev)) {
virtio_net: fix xdp_rxq_info bug after suspend/resume The following sequence currently causes a driver bug warning when using virtio_net: # ip link set eth0 up # echo mem > /sys/power/state (or e.g. # rtcwake -s 10 -m mem) <resume> # ip link set eth0 down Missing register, driver bug WARNING: CPU: 0 PID: 375 at net/core/xdp.c:138 xdp_rxq_info_unreg+0x58/0x60 Call trace: xdp_rxq_info_unreg+0x58/0x60 virtnet_close+0x58/0xac __dev_close_many+0xac/0x140 __dev_change_flags+0xd8/0x210 dev_change_flags+0x24/0x64 do_setlink+0x230/0xdd0 ... This happens because virtnet_freeze() frees the receive_queue completely (including struct xdp_rxq_info) but does not call xdp_rxq_info_unreg(). Similarly, virtnet_restore() sets up the receive_queue again but does not call xdp_rxq_info_reg(). Actually, parts of virtnet_freeze_down() and virtnet_restore_up() are almost identical to virtnet_close() and virtnet_open(): only the calls to xdp_rxq_info_(un)reg() are missing. This means that we can fix this easily and avoid such problems in the future by just calling virtnet_close()/open() from the freeze/restore handlers. Aside from adding the missing xdp_rxq_info calls the only difference is that the refill work is only cancelled if netif_running(). However, this should not make any functional difference since the refill work should only be active if the network interface is actually up. Fixes: 754b8a21a96d ("virtio_net: setup xdp_rxq_info") Signed-off-by: Stephan Gerhold <stephan.gerhold@kernkonzept.com> Acked-by: Jesper Dangaard Brouer <brouer@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20220621114845.3650258-1-stephan.gerhold@kernkonzept.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-06-21 11:48:44 +00:00
err = virtnet_open(vi->dev);
if (err)
return err;
}
netif_tx_lock_bh(vi->dev);
netif_device_attach(vi->dev);
netif_tx_unlock_bh(vi->dev);
return err;
}
static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
{
__virtio64 *_offloads __free(kfree) = NULL;
struct scatterlist sg;
_offloads = kzalloc(sizeof(*_offloads), GFP_KERNEL);
if (!_offloads)
return -ENOMEM;
*_offloads = cpu_to_virtio64(vi->vdev, offloads);
sg_init_one(&sg, _offloads, sizeof(*_offloads));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) {
dev_warn(&vi->dev->dev, "Fail to set guest offload.\n");
return -EINVAL;
}
return 0;
}
static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
{
u64 offloads = 0;
if (!vi->guest_offloads)
return 0;
return virtnet_set_guest_offloads(vi, offloads);
}
static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
{
u64 offloads = vi->guest_offloads;
if (!vi->guest_offloads)
return 0;
return virtnet_set_guest_offloads(vi, offloads);
}
static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq,
struct xsk_buff_pool *pool)
{
int err, qindex;
qindex = rq - vi->rq;
if (pool) {
err = xdp_rxq_info_reg(&rq->xsk_rxq_info, vi->dev, qindex, rq->napi.napi_id);
if (err < 0)
return err;
err = xdp_rxq_info_reg_mem_model(&rq->xsk_rxq_info,
MEM_TYPE_XSK_BUFF_POOL, NULL);
if (err < 0)
goto unreg;
xsk_pool_set_rxq_info(pool, &rq->xsk_rxq_info);
}
virtnet_rx_pause(vi, rq);
err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf);
if (err) {
netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err);
pool = NULL;
}
rq->xsk_pool = pool;
virtnet_rx_resume(vi, rq);
if (pool)
return 0;
unreg:
xdp_rxq_info_unreg(&rq->xsk_rxq_info);
return err;
}
static int virtnet_xsk_pool_enable(struct net_device *dev,
struct xsk_buff_pool *pool,
u16 qid)
{
struct virtnet_info *vi = netdev_priv(dev);
struct receive_queue *rq;
struct device *dma_dev;
struct send_queue *sq;
int err, size;
if (vi->hdr_len > xsk_pool_get_headroom(pool))
return -EINVAL;
/* In big_packets mode, xdp cannot work, so there is no need to
* initialize xsk of rq.
*/
if (vi->big_packets && !vi->mergeable_rx_bufs)
return -ENOENT;
if (qid >= vi->curr_queue_pairs)
return -EINVAL;
sq = &vi->sq[qid];
rq = &vi->rq[qid];
/* xsk assumes that tx and rx must have the same dma device. The af-xdp
* may use one buffer to receive from the rx and reuse this buffer to
* send by the tx. So the dma dev of sq and rq must be the same one.
*
* But vq->dma_dev allows every vq has the respective dma dev. So I
* check the dma dev of vq and sq is the same dev.
*/
if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq))
return -EINVAL;
dma_dev = virtqueue_dma_dev(rq->vq);
if (!dma_dev)
return -EINVAL;
size = virtqueue_get_vring_size(rq->vq);
rq->xsk_buffs = kvcalloc(size, sizeof(*rq->xsk_buffs), GFP_KERNEL);
if (!rq->xsk_buffs)
return -ENOMEM;
err = xsk_pool_dma_map(pool, dma_dev, 0);
if (err)
goto err_xsk_map;
err = virtnet_rq_bind_xsk_pool(vi, rq, pool);
if (err)
goto err_rq;
return 0;
err_rq:
xsk_pool_dma_unmap(pool, 0);
err_xsk_map:
return err;
}
static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid)
{
struct virtnet_info *vi = netdev_priv(dev);
struct xsk_buff_pool *pool;
struct receive_queue *rq;
int err;
if (qid >= vi->curr_queue_pairs)
return -EINVAL;
rq = &vi->rq[qid];
pool = rq->xsk_pool;
err = virtnet_rq_bind_xsk_pool(vi, rq, NULL);
xsk_pool_dma_unmap(pool, 0);
kvfree(rq->xsk_buffs);
return err;
}
static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp)
{
if (xdp->xsk.pool)
return virtnet_xsk_pool_enable(dev, xdp->xsk.pool,
xdp->xsk.queue_id);
else
return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id);
}
static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM +
sizeof(struct skb_shared_info));
unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN;
struct virtnet_info *vi = netdev_priv(dev);
struct bpf_prog *old_prog;
virito-net: set queues after reset during xdp_set We set queues before reset which will cause a crash[1]. This is because is_xdp_raw_buffer_queue() depends on the old xdp queue pairs number to do the correct detection. So fix this by - passing xdp queue pairs and current queue pairs to virtnet_reset() - change vi->xdp_qp after reset but before refill, to make sure both free_unused_bufs() and refill can make correct detection of XDP. - remove the duplicated queue pairs setting before virtnet_reset() since we will do it inside virtnet_reset() [1] [ 74.328168] general protection fault: 0000 [#1] SMP [ 74.328625] Modules linked in: nfsd xfs libcrc32c virtio_net virtio_pci [ 74.329117] CPU: 0 PID: 2849 Comm: xdp2 Not tainted 4.10.0-rc7+ #499 [ 74.329577] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014 [ 74.330424] task: ffff88007a894000 task.stack: ffffc90004388000 [ 74.330844] RIP: 0010:skb_release_head_state+0x28/0x80 [ 74.331298] RSP: 0018:ffffc9000438b8d0 EFLAGS: 00010206 [ 74.331676] RAX: 0000000000000000 RBX: ffff88007ad96300 RCX: 0000000000000000 [ 74.332217] RDX: ffff88007fc137a8 RSI: ffff88007fc0db28 RDI: 0001bf00000001be [ 74.332758] RBP: ffffc9000438b8d8 R08: 000000000005008f R09: 00000000000005f9 [ 74.333274] R10: ffff88007d001700 R11: ffffffff820a8a4d R12: ffff88007ad96300 [ 74.333787] R13: 0000000000000002 R14: ffff880036604000 R15: 000077ff80000000 [ 74.334308] FS: 00007fc70d8a7b40(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000 [ 74.334891] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 74.335314] CR2: 00007fff4144a710 CR3: 000000007ab56000 CR4: 00000000003406f0 [ 74.335830] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 74.336373] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 74.336895] Call Trace: [ 74.337086] skb_release_all+0xd/0x30 [ 74.337356] consume_skb+0x2c/0x90 [ 74.337607] free_unused_bufs+0x1ff/0x270 [virtio_net] [ 74.337988] ? vp_synchronize_vectors+0x3b/0x60 [virtio_pci] [ 74.338398] virtnet_xdp+0x21e/0x440 [virtio_net] [ 74.338741] dev_change_xdp_fd+0x101/0x140 [ 74.339048] do_setlink+0xcf4/0xd20 [ 74.339304] ? symcmp+0xf/0x20 [ 74.339529] ? mls_level_isvalid+0x52/0x60 [ 74.339828] ? mls_range_isvalid+0x43/0x50 [ 74.340135] ? nla_parse+0xa0/0x100 [ 74.340400] rtnl_setlink+0xd4/0x120 [ 74.340664] ? cpumask_next_and+0x30/0x50 [ 74.340966] rtnetlink_rcv_msg+0x7f/0x1f0 [ 74.341259] ? sock_has_perm+0x59/0x60 [ 74.341586] ? napi_consume_skb+0xe2/0x100 [ 74.342010] ? rtnl_newlink+0x890/0x890 [ 74.342435] netlink_rcv_skb+0x92/0xb0 [ 74.342846] rtnetlink_rcv+0x23/0x30 [ 74.343277] netlink_unicast+0x162/0x210 [ 74.343677] netlink_sendmsg+0x2db/0x390 [ 74.343968] sock_sendmsg+0x33/0x40 [ 74.344233] SYSC_sendto+0xee/0x160 [ 74.344482] ? SYSC_bind+0xb0/0xe0 [ 74.344806] ? sock_alloc_file+0x92/0x110 [ 74.345106] ? fd_install+0x20/0x30 [ 74.345360] ? sock_map_fd+0x3f/0x60 [ 74.345586] SyS_sendto+0x9/0x10 [ 74.345790] entry_SYSCALL_64_fastpath+0x1a/0xa9 [ 74.346086] RIP: 0033:0x7fc70d1b8f6d [ 74.346312] RSP: 002b:00007fff4144a708 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 74.346785] RAX: ffffffffffffffda RBX: 00000000ffffffff RCX: 00007fc70d1b8f6d [ 74.347244] RDX: 000000000000002c RSI: 00007fff4144a720 RDI: 0000000000000003 [ 74.347683] RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000000 [ 74.348544] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fff4144bd90 [ 74.349082] R13: 0000000000000002 R14: 0000000000000002 R15: 00007fff4144cda0 [ 74.349607] Code: 00 00 00 55 48 89 e5 53 48 89 fb 48 8b 7f 58 48 85 ff 74 0e 40 f6 c7 01 74 3d 48 c7 43 58 00 00 00 00 48 8b 7b 68 48 85 ff 74 05 <f0> ff 0f 74 20 48 8b 43 60 48 85 c0 74 14 65 8b 15 f3 ab 8d 7e [ 74.351008] RIP: skb_release_head_state+0x28/0x80 RSP: ffffc9000438b8d0 [ 74.351625] ---[ end trace fe6e19fd11cfc80b ]--- Fixes: 2de2f7f40ef9 ("virtio_net: XDP support for adjust_head") Cc: John Fastabend <john.fastabend@gmail.com> Signed-off-by: Jason Wang <jasowang@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-02-20 03:50:20 +00:00
u16 xdp_qp = 0, curr_qp;
int i, err;
if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
&& (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) {
NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first");
return -EOPNOTSUPP;
}
if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required");
return -EINVAL;
}
if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) {
NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags");
netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz);
return -EINVAL;
}
curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
if (prog)
xdp_qp = nr_cpu_ids;
/* XDP requires extra queues for XDP_TX */
if (curr_qp + xdp_qp > vi->max_queue_pairs) {
netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",
curr_qp + xdp_qp, vi->max_queue_pairs);
xdp_qp = 0;
}
2019-01-29 00:45:57 +00:00
old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
if (!prog && !old_prog)
return 0;
if (prog)
bpf_prog_add(prog, vi->max_queue_pairs - 1);
/* Make sure NAPI is not using any XDP TX queues for RX. */
if (netif_running(dev)) {
for (i = 0; i < vi->max_queue_pairs; i++) {
napi_disable(&vi->rq[i].napi);
virtnet_napi_tx_disable(&vi->sq[i].napi);
}
}
2019-01-29 00:45:57 +00:00
if (!prog) {
for (i = 0; i < vi->max_queue_pairs; i++) {
rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
if (i == 0)
virtnet_restore_guest_offloads(vi);
}
synchronize_net();
}
err = virtnet_set_queues(vi, curr_qp + xdp_qp);
if (err)
goto err;
netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
vi->xdp_queue_pairs = xdp_qp;
2019-01-29 00:45:57 +00:00
if (prog) {
vi->xdp_enabled = true;
2019-01-29 00:45:57 +00:00
for (i = 0; i < vi->max_queue_pairs; i++) {
rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
if (i == 0 && !old_prog)
virtnet_clear_guest_offloads(vi);
}
drivers: net: turn on XDP features A summary of the flags being set for various drivers is given below. Note that XDP_F_REDIRECT_TARGET and XDP_F_FRAG_TARGET are features that can be turned off and on at runtime. This means that these flags may be set and unset under RTNL lock protection by the driver. Hence, READ_ONCE must be used by code loading the flag value. Also, these flags are not used for synchronization against the availability of XDP resources on a device. It is merely a hint, and hence the read may race with the actual teardown of XDP resources on the device. This may change in the future, e.g. operations taking a reference on the XDP resources of the driver, and in turn inhibiting turning off this flag. However, for now, it can only be used as a hint to check whether device supports becoming a redirection target. Turn 'hw-offload' feature flag on for: - netronome (nfp) - netdevsim. Turn 'native' and 'zerocopy' features flags on for: - intel (i40e, ice, ixgbe, igc) - mellanox (mlx5). - stmmac - netronome (nfp) Turn 'native' features flags on for: - amazon (ena) - broadcom (bnxt) - freescale (dpaa, dpaa2, enetc) - funeth - intel (igb) - marvell (mvneta, mvpp2, octeontx2) - mellanox (mlx4) - mtk_eth_soc - qlogic (qede) - sfc - socionext (netsec) - ti (cpsw) - tap - tsnep - veth - xen - virtio_net. Turn 'basic' (tx, pass, aborted and drop) features flags on for: - netronome (nfp) - cavium (thunder) - hyperv. Turn 'redirect_target' feature flag on for: - amanzon (ena) - broadcom (bnxt) - freescale (dpaa, dpaa2) - intel (i40e, ice, igb, ixgbe) - ti (cpsw) - marvell (mvneta, mvpp2) - sfc - socionext (netsec) - qlogic (qede) - mellanox (mlx5) - tap - veth - virtio_net - xen Reviewed-by: Gerhard Engleder <gerhard@engleder-embedded.com> Reviewed-by: Simon Horman <simon.horman@corigine.com> Acked-by: Stanislav Fomichev <sdf@google.com> Acked-by: Jakub Kicinski <kuba@kernel.org> Co-developed-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> Signed-off-by: Marek Majtyka <alardam@gmail.com> Link: https://lore.kernel.org/r/3eca9fafb308462f7edb1f58e451d59209aa07eb.1675245258.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-01 10:24:18 +00:00
if (!old_prog)
xdp_features_set_redirect_target(dev, true);
} else {
drivers: net: turn on XDP features A summary of the flags being set for various drivers is given below. Note that XDP_F_REDIRECT_TARGET and XDP_F_FRAG_TARGET are features that can be turned off and on at runtime. This means that these flags may be set and unset under RTNL lock protection by the driver. Hence, READ_ONCE must be used by code loading the flag value. Also, these flags are not used for synchronization against the availability of XDP resources on a device. It is merely a hint, and hence the read may race with the actual teardown of XDP resources on the device. This may change in the future, e.g. operations taking a reference on the XDP resources of the driver, and in turn inhibiting turning off this flag. However, for now, it can only be used as a hint to check whether device supports becoming a redirection target. Turn 'hw-offload' feature flag on for: - netronome (nfp) - netdevsim. Turn 'native' and 'zerocopy' features flags on for: - intel (i40e, ice, ixgbe, igc) - mellanox (mlx5). - stmmac - netronome (nfp) Turn 'native' features flags on for: - amazon (ena) - broadcom (bnxt) - freescale (dpaa, dpaa2, enetc) - funeth - intel (igb) - marvell (mvneta, mvpp2, octeontx2) - mellanox (mlx4) - mtk_eth_soc - qlogic (qede) - sfc - socionext (netsec) - ti (cpsw) - tap - tsnep - veth - xen - virtio_net. Turn 'basic' (tx, pass, aborted and drop) features flags on for: - netronome (nfp) - cavium (thunder) - hyperv. Turn 'redirect_target' feature flag on for: - amanzon (ena) - broadcom (bnxt) - freescale (dpaa, dpaa2) - intel (i40e, ice, igb, ixgbe) - ti (cpsw) - marvell (mvneta, mvpp2) - sfc - socionext (netsec) - qlogic (qede) - mellanox (mlx5) - tap - veth - virtio_net - xen Reviewed-by: Gerhard Engleder <gerhard@engleder-embedded.com> Reviewed-by: Simon Horman <simon.horman@corigine.com> Acked-by: Stanislav Fomichev <sdf@google.com> Acked-by: Jakub Kicinski <kuba@kernel.org> Co-developed-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> Signed-off-by: Marek Majtyka <alardam@gmail.com> Link: https://lore.kernel.org/r/3eca9fafb308462f7edb1f58e451d59209aa07eb.1675245258.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-01 10:24:18 +00:00
xdp_features_clear_redirect_target(dev);
vi->xdp_enabled = false;
2019-01-29 00:45:57 +00:00
}
for (i = 0; i < vi->max_queue_pairs; i++) {
if (old_prog)
bpf_prog_put(old_prog);
if (netif_running(dev)) {
virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
virtnet_napi_tx_enable(vi, vi->sq[i].vq,
&vi->sq[i].napi);
}
}
return 0;
err:
2019-01-29 00:45:57 +00:00
if (!prog) {
virtnet_clear_guest_offloads(vi);
for (i = 0; i < vi->max_queue_pairs; i++)
rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
}
if (netif_running(dev)) {
for (i = 0; i < vi->max_queue_pairs; i++) {
virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
virtnet_napi_tx_enable(vi, vi->sq[i].vq,
&vi->sq[i].napi);
}
}
if (prog)
bpf_prog_sub(prog, vi->max_queue_pairs - 1);
return err;
}
static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
switch (xdp->command) {
case XDP_SETUP_PROG:
return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
case XDP_SETUP_XSK_POOL:
return virtnet_xsk_pool_setup(dev, xdp);
default:
return -EINVAL;
}
}
static int virtnet_get_phys_port_name(struct net_device *dev, char *buf,
size_t len)
{
struct virtnet_info *vi = netdev_priv(dev);
int ret;
if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY))
return -EOPNOTSUPP;
ret = snprintf(buf, len, "sby");
if (ret >= len)
return -EOPNOTSUPP;
return 0;
}
static int virtnet_set_features(struct net_device *dev,
netdev_features_t features)
{
struct virtnet_info *vi = netdev_priv(dev);
Revert "virtio-net: ethtool configurable RXCSUM" This reverts commit 3618ad2a7c0e78e4258386394d5d5f92a3dbccf8. When control vq is not negotiated, that commit causes a crash: [ 72.229171] kernel BUG at drivers/net/virtio_net.c:1667! [ 72.230266] invalid opcode: 0000 [#1] PREEMPT SMP [ 72.231172] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.9.0-rc8-02934-g3618ad2a7c0e7 #1 [ 72.231172] EIP: virtnet_send_command+0x120/0x140 [ 72.231172] Code: 00 0f 94 c0 8b 7d f0 65 33 3d 14 00 00 00 75 1c 8d 65 f4 5b 5e 5f 5d c3 66 90 be 01 00 00 00 e9 6e ff ff ff 8d b6 00 +00 00 00 <0f> 0b e8 d9 bb 82 00 eb 17 8d b4 26 00 00 00 00 8d b4 26 00 00 00 [ 72.231172] EAX: 0000000d EBX: f72895c0 ECX: 00000017 EDX: 00000011 [ 72.231172] ESI: f7197800 EDI: ed69bd00 EBP: ed69bcf4 ESP: ed69bc98 [ 72.231172] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010246 [ 72.231172] CR0: 80050033 CR2: 00000000 CR3: 02c84000 CR4: 000406f0 [ 72.231172] Call Trace: [ 72.231172] ? __virt_addr_valid+0x45/0x60 [ 72.231172] ? ___cache_free+0x51f/0x760 [ 72.231172] ? kobject_uevent_env+0xf4/0x560 [ 72.231172] virtnet_set_guest_offloads+0x4d/0x80 [ 72.231172] virtnet_set_features+0x85/0x120 [ 72.231172] ? virtnet_set_guest_offloads+0x80/0x80 [ 72.231172] __netdev_update_features+0x27a/0x8e0 [ 72.231172] ? kobject_uevent+0xa/0x20 [ 72.231172] ? netdev_register_kobject+0x12c/0x160 [ 72.231172] register_netdevice+0x4fe/0x740 [ 72.231172] register_netdev+0x1c/0x40 [ 72.231172] virtnet_probe+0x728/0xb60 [ 72.231172] ? _raw_spin_unlock+0x1d/0x40 [ 72.231172] ? virtio_vdpa_get_status+0x1c/0x20 [ 72.231172] virtio_dev_probe+0x1c6/0x271 [ 72.231172] really_probe+0x195/0x2e0 [ 72.231172] driver_probe_device+0x26/0x60 [ 72.231172] device_driver_attach+0x49/0x60 [ 72.231172] __driver_attach+0x46/0xc0 [ 72.231172] ? device_driver_attach+0x60/0x60 [ 72.231172] bus_add_driver+0x197/0x1c0 [ 72.231172] driver_register+0x66/0xc0 [ 72.231172] register_virtio_driver+0x1b/0x40 [ 72.231172] virtio_net_driver_init+0x61/0x86 [ 72.231172] ? veth_init+0x14/0x14 [ 72.231172] do_one_initcall+0x76/0x2e4 [ 72.231172] ? rdinit_setup+0x2a/0x2a [ 72.231172] do_initcalls+0xb2/0xd5 [ 72.231172] kernel_init_freeable+0x14f/0x179 [ 72.231172] ? rest_init+0x100/0x100 [ 72.231172] kernel_init+0xd/0xe0 [ 72.231172] ret_from_fork+0x1c/0x30 [ 72.231172] Modules linked in: [ 72.269563] ---[ end trace a6ebc4afea0e6cb1 ]--- The reason is that virtnet_set_features now calls virtnet_set_guest_offloads unconditionally, it used to only call it when there is something to configure. If device does not have a control vq, everything breaks. Revert the original commit for now. Cc: Tonghao Zhang <xiangxia.m.yue@gmail.com> Fixes: 3618ad2a7c0e7 ("virtio-net: ethtool configurable RXCSUM") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Willem de Bruijn <willemb@google.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20201021142944.13615-1-mst@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-21 14:30:43 +00:00
u64 offloads;
int err;
if ((dev->features ^ features) & NETIF_F_GRO_HW) {
if (vi->xdp_enabled)
Revert "virtio-net: ethtool configurable RXCSUM" This reverts commit 3618ad2a7c0e78e4258386394d5d5f92a3dbccf8. When control vq is not negotiated, that commit causes a crash: [ 72.229171] kernel BUG at drivers/net/virtio_net.c:1667! [ 72.230266] invalid opcode: 0000 [#1] PREEMPT SMP [ 72.231172] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.9.0-rc8-02934-g3618ad2a7c0e7 #1 [ 72.231172] EIP: virtnet_send_command+0x120/0x140 [ 72.231172] Code: 00 0f 94 c0 8b 7d f0 65 33 3d 14 00 00 00 75 1c 8d 65 f4 5b 5e 5f 5d c3 66 90 be 01 00 00 00 e9 6e ff ff ff 8d b6 00 +00 00 00 <0f> 0b e8 d9 bb 82 00 eb 17 8d b4 26 00 00 00 00 8d b4 26 00 00 00 [ 72.231172] EAX: 0000000d EBX: f72895c0 ECX: 00000017 EDX: 00000011 [ 72.231172] ESI: f7197800 EDI: ed69bd00 EBP: ed69bcf4 ESP: ed69bc98 [ 72.231172] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010246 [ 72.231172] CR0: 80050033 CR2: 00000000 CR3: 02c84000 CR4: 000406f0 [ 72.231172] Call Trace: [ 72.231172] ? __virt_addr_valid+0x45/0x60 [ 72.231172] ? ___cache_free+0x51f/0x760 [ 72.231172] ? kobject_uevent_env+0xf4/0x560 [ 72.231172] virtnet_set_guest_offloads+0x4d/0x80 [ 72.231172] virtnet_set_features+0x85/0x120 [ 72.231172] ? virtnet_set_guest_offloads+0x80/0x80 [ 72.231172] __netdev_update_features+0x27a/0x8e0 [ 72.231172] ? kobject_uevent+0xa/0x20 [ 72.231172] ? netdev_register_kobject+0x12c/0x160 [ 72.231172] register_netdevice+0x4fe/0x740 [ 72.231172] register_netdev+0x1c/0x40 [ 72.231172] virtnet_probe+0x728/0xb60 [ 72.231172] ? _raw_spin_unlock+0x1d/0x40 [ 72.231172] ? virtio_vdpa_get_status+0x1c/0x20 [ 72.231172] virtio_dev_probe+0x1c6/0x271 [ 72.231172] really_probe+0x195/0x2e0 [ 72.231172] driver_probe_device+0x26/0x60 [ 72.231172] device_driver_attach+0x49/0x60 [ 72.231172] __driver_attach+0x46/0xc0 [ 72.231172] ? device_driver_attach+0x60/0x60 [ 72.231172] bus_add_driver+0x197/0x1c0 [ 72.231172] driver_register+0x66/0xc0 [ 72.231172] register_virtio_driver+0x1b/0x40 [ 72.231172] virtio_net_driver_init+0x61/0x86 [ 72.231172] ? veth_init+0x14/0x14 [ 72.231172] do_one_initcall+0x76/0x2e4 [ 72.231172] ? rdinit_setup+0x2a/0x2a [ 72.231172] do_initcalls+0xb2/0xd5 [ 72.231172] kernel_init_freeable+0x14f/0x179 [ 72.231172] ? rest_init+0x100/0x100 [ 72.231172] kernel_init+0xd/0xe0 [ 72.231172] ret_from_fork+0x1c/0x30 [ 72.231172] Modules linked in: [ 72.269563] ---[ end trace a6ebc4afea0e6cb1 ]--- The reason is that virtnet_set_features now calls virtnet_set_guest_offloads unconditionally, it used to only call it when there is something to configure. If device does not have a control vq, everything breaks. Revert the original commit for now. Cc: Tonghao Zhang <xiangxia.m.yue@gmail.com> Fixes: 3618ad2a7c0e7 ("virtio-net: ethtool configurable RXCSUM") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Willem de Bruijn <willemb@google.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20201021142944.13615-1-mst@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-21 14:30:43 +00:00
return -EBUSY;
if (features & NETIF_F_GRO_HW)
Revert "virtio-net: ethtool configurable RXCSUM" This reverts commit 3618ad2a7c0e78e4258386394d5d5f92a3dbccf8. When control vq is not negotiated, that commit causes a crash: [ 72.229171] kernel BUG at drivers/net/virtio_net.c:1667! [ 72.230266] invalid opcode: 0000 [#1] PREEMPT SMP [ 72.231172] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.9.0-rc8-02934-g3618ad2a7c0e7 #1 [ 72.231172] EIP: virtnet_send_command+0x120/0x140 [ 72.231172] Code: 00 0f 94 c0 8b 7d f0 65 33 3d 14 00 00 00 75 1c 8d 65 f4 5b 5e 5f 5d c3 66 90 be 01 00 00 00 e9 6e ff ff ff 8d b6 00 +00 00 00 <0f> 0b e8 d9 bb 82 00 eb 17 8d b4 26 00 00 00 00 8d b4 26 00 00 00 [ 72.231172] EAX: 0000000d EBX: f72895c0 ECX: 00000017 EDX: 00000011 [ 72.231172] ESI: f7197800 EDI: ed69bd00 EBP: ed69bcf4 ESP: ed69bc98 [ 72.231172] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010246 [ 72.231172] CR0: 80050033 CR2: 00000000 CR3: 02c84000 CR4: 000406f0 [ 72.231172] Call Trace: [ 72.231172] ? __virt_addr_valid+0x45/0x60 [ 72.231172] ? ___cache_free+0x51f/0x760 [ 72.231172] ? kobject_uevent_env+0xf4/0x560 [ 72.231172] virtnet_set_guest_offloads+0x4d/0x80 [ 72.231172] virtnet_set_features+0x85/0x120 [ 72.231172] ? virtnet_set_guest_offloads+0x80/0x80 [ 72.231172] __netdev_update_features+0x27a/0x8e0 [ 72.231172] ? kobject_uevent+0xa/0x20 [ 72.231172] ? netdev_register_kobject+0x12c/0x160 [ 72.231172] register_netdevice+0x4fe/0x740 [ 72.231172] register_netdev+0x1c/0x40 [ 72.231172] virtnet_probe+0x728/0xb60 [ 72.231172] ? _raw_spin_unlock+0x1d/0x40 [ 72.231172] ? virtio_vdpa_get_status+0x1c/0x20 [ 72.231172] virtio_dev_probe+0x1c6/0x271 [ 72.231172] really_probe+0x195/0x2e0 [ 72.231172] driver_probe_device+0x26/0x60 [ 72.231172] device_driver_attach+0x49/0x60 [ 72.231172] __driver_attach+0x46/0xc0 [ 72.231172] ? device_driver_attach+0x60/0x60 [ 72.231172] bus_add_driver+0x197/0x1c0 [ 72.231172] driver_register+0x66/0xc0 [ 72.231172] register_virtio_driver+0x1b/0x40 [ 72.231172] virtio_net_driver_init+0x61/0x86 [ 72.231172] ? veth_init+0x14/0x14 [ 72.231172] do_one_initcall+0x76/0x2e4 [ 72.231172] ? rdinit_setup+0x2a/0x2a [ 72.231172] do_initcalls+0xb2/0xd5 [ 72.231172] kernel_init_freeable+0x14f/0x179 [ 72.231172] ? rest_init+0x100/0x100 [ 72.231172] kernel_init+0xd/0xe0 [ 72.231172] ret_from_fork+0x1c/0x30 [ 72.231172] Modules linked in: [ 72.269563] ---[ end trace a6ebc4afea0e6cb1 ]--- The reason is that virtnet_set_features now calls virtnet_set_guest_offloads unconditionally, it used to only call it when there is something to configure. If device does not have a control vq, everything breaks. Revert the original commit for now. Cc: Tonghao Zhang <xiangxia.m.yue@gmail.com> Fixes: 3618ad2a7c0e7 ("virtio-net: ethtool configurable RXCSUM") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Willem de Bruijn <willemb@google.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20201021142944.13615-1-mst@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-21 14:30:43 +00:00
offloads = vi->guest_offloads_capable;
else
Revert "virtio-net: ethtool configurable RXCSUM" This reverts commit 3618ad2a7c0e78e4258386394d5d5f92a3dbccf8. When control vq is not negotiated, that commit causes a crash: [ 72.229171] kernel BUG at drivers/net/virtio_net.c:1667! [ 72.230266] invalid opcode: 0000 [#1] PREEMPT SMP [ 72.231172] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.9.0-rc8-02934-g3618ad2a7c0e7 #1 [ 72.231172] EIP: virtnet_send_command+0x120/0x140 [ 72.231172] Code: 00 0f 94 c0 8b 7d f0 65 33 3d 14 00 00 00 75 1c 8d 65 f4 5b 5e 5f 5d c3 66 90 be 01 00 00 00 e9 6e ff ff ff 8d b6 00 +00 00 00 <0f> 0b e8 d9 bb 82 00 eb 17 8d b4 26 00 00 00 00 8d b4 26 00 00 00 [ 72.231172] EAX: 0000000d EBX: f72895c0 ECX: 00000017 EDX: 00000011 [ 72.231172] ESI: f7197800 EDI: ed69bd00 EBP: ed69bcf4 ESP: ed69bc98 [ 72.231172] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010246 [ 72.231172] CR0: 80050033 CR2: 00000000 CR3: 02c84000 CR4: 000406f0 [ 72.231172] Call Trace: [ 72.231172] ? __virt_addr_valid+0x45/0x60 [ 72.231172] ? ___cache_free+0x51f/0x760 [ 72.231172] ? kobject_uevent_env+0xf4/0x560 [ 72.231172] virtnet_set_guest_offloads+0x4d/0x80 [ 72.231172] virtnet_set_features+0x85/0x120 [ 72.231172] ? virtnet_set_guest_offloads+0x80/0x80 [ 72.231172] __netdev_update_features+0x27a/0x8e0 [ 72.231172] ? kobject_uevent+0xa/0x20 [ 72.231172] ? netdev_register_kobject+0x12c/0x160 [ 72.231172] register_netdevice+0x4fe/0x740 [ 72.231172] register_netdev+0x1c/0x40 [ 72.231172] virtnet_probe+0x728/0xb60 [ 72.231172] ? _raw_spin_unlock+0x1d/0x40 [ 72.231172] ? virtio_vdpa_get_status+0x1c/0x20 [ 72.231172] virtio_dev_probe+0x1c6/0x271 [ 72.231172] really_probe+0x195/0x2e0 [ 72.231172] driver_probe_device+0x26/0x60 [ 72.231172] device_driver_attach+0x49/0x60 [ 72.231172] __driver_attach+0x46/0xc0 [ 72.231172] ? device_driver_attach+0x60/0x60 [ 72.231172] bus_add_driver+0x197/0x1c0 [ 72.231172] driver_register+0x66/0xc0 [ 72.231172] register_virtio_driver+0x1b/0x40 [ 72.231172] virtio_net_driver_init+0x61/0x86 [ 72.231172] ? veth_init+0x14/0x14 [ 72.231172] do_one_initcall+0x76/0x2e4 [ 72.231172] ? rdinit_setup+0x2a/0x2a [ 72.231172] do_initcalls+0xb2/0xd5 [ 72.231172] kernel_init_freeable+0x14f/0x179 [ 72.231172] ? rest_init+0x100/0x100 [ 72.231172] kernel_init+0xd/0xe0 [ 72.231172] ret_from_fork+0x1c/0x30 [ 72.231172] Modules linked in: [ 72.269563] ---[ end trace a6ebc4afea0e6cb1 ]--- The reason is that virtnet_set_features now calls virtnet_set_guest_offloads unconditionally, it used to only call it when there is something to configure. If device does not have a control vq, everything breaks. Revert the original commit for now. Cc: Tonghao Zhang <xiangxia.m.yue@gmail.com> Fixes: 3618ad2a7c0e7 ("virtio-net: ethtool configurable RXCSUM") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Willem de Bruijn <willemb@google.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20201021142944.13615-1-mst@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-21 14:30:43 +00:00
offloads = vi->guest_offloads_capable &
~GUEST_OFFLOAD_GRO_HW_MASK;
Revert "virtio-net: ethtool configurable RXCSUM" This reverts commit 3618ad2a7c0e78e4258386394d5d5f92a3dbccf8. When control vq is not negotiated, that commit causes a crash: [ 72.229171] kernel BUG at drivers/net/virtio_net.c:1667! [ 72.230266] invalid opcode: 0000 [#1] PREEMPT SMP [ 72.231172] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.9.0-rc8-02934-g3618ad2a7c0e7 #1 [ 72.231172] EIP: virtnet_send_command+0x120/0x140 [ 72.231172] Code: 00 0f 94 c0 8b 7d f0 65 33 3d 14 00 00 00 75 1c 8d 65 f4 5b 5e 5f 5d c3 66 90 be 01 00 00 00 e9 6e ff ff ff 8d b6 00 +00 00 00 <0f> 0b e8 d9 bb 82 00 eb 17 8d b4 26 00 00 00 00 8d b4 26 00 00 00 [ 72.231172] EAX: 0000000d EBX: f72895c0 ECX: 00000017 EDX: 00000011 [ 72.231172] ESI: f7197800 EDI: ed69bd00 EBP: ed69bcf4 ESP: ed69bc98 [ 72.231172] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010246 [ 72.231172] CR0: 80050033 CR2: 00000000 CR3: 02c84000 CR4: 000406f0 [ 72.231172] Call Trace: [ 72.231172] ? __virt_addr_valid+0x45/0x60 [ 72.231172] ? ___cache_free+0x51f/0x760 [ 72.231172] ? kobject_uevent_env+0xf4/0x560 [ 72.231172] virtnet_set_guest_offloads+0x4d/0x80 [ 72.231172] virtnet_set_features+0x85/0x120 [ 72.231172] ? virtnet_set_guest_offloads+0x80/0x80 [ 72.231172] __netdev_update_features+0x27a/0x8e0 [ 72.231172] ? kobject_uevent+0xa/0x20 [ 72.231172] ? netdev_register_kobject+0x12c/0x160 [ 72.231172] register_netdevice+0x4fe/0x740 [ 72.231172] register_netdev+0x1c/0x40 [ 72.231172] virtnet_probe+0x728/0xb60 [ 72.231172] ? _raw_spin_unlock+0x1d/0x40 [ 72.231172] ? virtio_vdpa_get_status+0x1c/0x20 [ 72.231172] virtio_dev_probe+0x1c6/0x271 [ 72.231172] really_probe+0x195/0x2e0 [ 72.231172] driver_probe_device+0x26/0x60 [ 72.231172] device_driver_attach+0x49/0x60 [ 72.231172] __driver_attach+0x46/0xc0 [ 72.231172] ? device_driver_attach+0x60/0x60 [ 72.231172] bus_add_driver+0x197/0x1c0 [ 72.231172] driver_register+0x66/0xc0 [ 72.231172] register_virtio_driver+0x1b/0x40 [ 72.231172] virtio_net_driver_init+0x61/0x86 [ 72.231172] ? veth_init+0x14/0x14 [ 72.231172] do_one_initcall+0x76/0x2e4 [ 72.231172] ? rdinit_setup+0x2a/0x2a [ 72.231172] do_initcalls+0xb2/0xd5 [ 72.231172] kernel_init_freeable+0x14f/0x179 [ 72.231172] ? rest_init+0x100/0x100 [ 72.231172] kernel_init+0xd/0xe0 [ 72.231172] ret_from_fork+0x1c/0x30 [ 72.231172] Modules linked in: [ 72.269563] ---[ end trace a6ebc4afea0e6cb1 ]--- The reason is that virtnet_set_features now calls virtnet_set_guest_offloads unconditionally, it used to only call it when there is something to configure. If device does not have a control vq, everything breaks. Revert the original commit for now. Cc: Tonghao Zhang <xiangxia.m.yue@gmail.com> Fixes: 3618ad2a7c0e7 ("virtio-net: ethtool configurable RXCSUM") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Willem de Bruijn <willemb@google.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20201021142944.13615-1-mst@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-21 14:30:43 +00:00
err = virtnet_set_guest_offloads(vi, offloads);
if (err)
return err;
vi->guest_offloads = offloads;
}
if ((dev->features ^ features) & NETIF_F_RXHASH) {
if (features & NETIF_F_RXHASH)
vi->rss.hash_types = vi->rss_hash_types_saved;
else
vi->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE;
if (!virtnet_commit_rss_command(vi))
return -EINVAL;
}
return 0;
}
static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct virtnet_info *priv = netdev_priv(dev);
struct send_queue *sq = &priv->sq[txqueue];
struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue);
u64_stats_update_begin(&sq->stats.syncp);
virtio_net: use u64_stats_t infra to avoid data-races syzbot reported a data-race in virtnet_poll / virtnet_stats [1] u64_stats_t infra has very nice accessors that must be used to avoid potential load-store tearing. [1] BUG: KCSAN: data-race in virtnet_poll / virtnet_stats read-write to 0xffff88810271b1a0 of 8 bytes by interrupt on cpu 0: virtnet_receive drivers/net/virtio_net.c:2102 [inline] virtnet_poll+0x6c8/0xb40 drivers/net/virtio_net.c:2148 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0x3b/0x90 kernel/softirq.c:644 common_interrupt+0x7f/0x90 arch/x86/kernel/irq.c:247 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:636 __sanitizer_cov_trace_const_cmp8+0x0/0x80 kernel/kcov.c:306 jbd2_write_access_granted fs/jbd2/transaction.c:1174 [inline] jbd2_journal_get_write_access+0x94/0x1c0 fs/jbd2/transaction.c:1239 __ext4_journal_get_write_access+0x154/0x3f0 fs/ext4/ext4_jbd2.c:241 ext4_reserve_inode_write+0x14e/0x200 fs/ext4/inode.c:5745 __ext4_mark_inode_dirty+0x8e/0x440 fs/ext4/inode.c:5919 ext4_evict_inode+0xaf0/0xdc0 fs/ext4/inode.c:299 evict+0x1aa/0x410 fs/inode.c:664 iput_final fs/inode.c:1775 [inline] iput+0x42c/0x5b0 fs/inode.c:1801 do_unlinkat+0x2b9/0x4f0 fs/namei.c:4405 __do_sys_unlink fs/namei.c:4446 [inline] __se_sys_unlink fs/namei.c:4444 [inline] __x64_sys_unlink+0x30/0x40 fs/namei.c:4444 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88810271b1a0 of 8 bytes by task 2814 on cpu 1: virtnet_stats+0x1b3/0x340 drivers/net/virtio_net.c:2564 dev_get_stats+0x6d/0x860 net/core/dev.c:10511 rtnl_fill_stats+0x45/0x320 net/core/rtnetlink.c:1261 rtnl_fill_ifinfo+0xd0e/0x1120 net/core/rtnetlink.c:1867 rtnl_dump_ifinfo+0x7f9/0xc20 net/core/rtnetlink.c:2240 netlink_dump+0x390/0x720 net/netlink/af_netlink.c:2266 netlink_recvmsg+0x425/0x780 net/netlink/af_netlink.c:1992 sock_recvmsg_nosec net/socket.c:1027 [inline] sock_recvmsg net/socket.c:1049 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2760 ___sys_recvmsg net/socket.c:2802 [inline] __sys_recvmsg+0x1ea/0x270 net/socket.c:2832 __do_sys_recvmsg net/socket.c:2842 [inline] __se_sys_recvmsg net/socket.c:2839 [inline] __x64_sys_recvmsg+0x46/0x50 net/socket.c:2839 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x000000000045c334 -> 0x000000000045c376 Fixes: 3fa2a1df9094 ("virtio-net: per cpu 64 bit stats (v2)") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-26 17:18:40 +00:00
u64_stats_inc(&sq->stats.tx_timeouts);
u64_stats_update_end(&sq->stats.syncp);
netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n",
txqueue, sq->name, sq->vq->index, sq->vq->name,
jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start)));
}
static int virtnet_init_irq_moder(struct virtnet_info *vi)
{
u8 profile_flags = 0, coal_flags = 0;
int ret, i;
profile_flags |= DIM_PROFILE_RX;
coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS;
ret = net_dim_init_irq_moder(vi->dev, profile_flags, coal_flags,
DIM_CQ_PERIOD_MODE_START_FROM_EQE,
0, virtnet_rx_dim_work, NULL);
if (ret)
return ret;
for (i = 0; i < vi->max_queue_pairs; i++)
net_dim_setting(vi->dev, &vi->rq[i].dim, false);
return 0;
}
static void virtnet_free_irq_moder(struct virtnet_info *vi)
{
if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
return;
rtnl_lock();
net_dim_free_irq_moder(vi->dev);
rtnl_unlock();
}
static const struct net_device_ops virtnet_netdev = {
.ndo_open = virtnet_open,
.ndo_stop = virtnet_close,
.ndo_start_xmit = start_xmit,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = virtnet_set_mac_address,
.ndo_set_rx_mode = virtnet_set_rx_mode,
.ndo_get_stats64 = virtnet_stats,
.ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
.ndo_bpf = virtnet_xdp,
.ndo_xdp_xmit = virtnet_xdp_xmit,
.ndo_xsk_wakeup = virtnet_xsk_wakeup,
.ndo_features_check = passthru_features_check,
.ndo_get_phys_port_name = virtnet_get_phys_port_name,
.ndo_set_features = virtnet_set_features,
.ndo_tx_timeout = virtnet_tx_timeout,
};
static void virtnet_config_changed_work(struct work_struct *work)
{
struct virtnet_info *vi =
container_of(work, struct virtnet_info, config_work);
u16 v;
if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS,
struct virtio_net_config, status, &v) < 0)
return;
if (v & VIRTIO_NET_S_ANNOUNCE) {
netdev_notify_peers(vi->dev);
virtnet_ack_link_announce(vi);
}
/* Ignore unknown (future) status bits */
v &= VIRTIO_NET_S_LINK_UP;
if (vi->status == v)
return;
vi->status = v;
if (vi->status & VIRTIO_NET_S_LINK_UP) {
virtnet_update_settings(vi);
netif_carrier_on(vi->dev);
netif_tx_wake_all_queues(vi->dev);
} else {
netif_carrier_off(vi->dev);
netif_tx_stop_all_queues(vi->dev);
}
}
static void virtnet_config_changed(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
schedule_work(&vi->config_work);
}
static void virtnet_free_queues(struct virtnet_info *vi)
{
virtio: delete napi structures from netdev before releasing memory free_netdev calls netif_napi_del too, but it's too late, because napi structures are placed on vi->rq. netif_napi_add() is called from virtnet_alloc_queues. general protection fault: 0000 [#1] SMP Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: ip6table_filter ip6_tables iptable_filter ip_tables virtio_balloon pcspkr virtio_net(-) i2c_pii CPU: 1 PID: 347 Comm: rmmod Not tainted 3.13.0-rc2+ #171 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8800b779c420 ti: ffff8800379e0000 task.ti: ffff8800379e0000 RIP: 0010:[<ffffffff81322e19>] [<ffffffff81322e19>] __list_del_entry+0x29/0xd0 RSP: 0018:ffff8800379e1dd0 EFLAGS: 00010a83 RAX: 6b6b6b6b6b6b6b6b RBX: ffff8800379c2fd0 RCX: dead000000200200 RDX: 6b6b6b6b6b6b6b6b RSI: 0000000000000001 RDI: ffff8800379c2fd0 RBP: ffff8800379e1dd0 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff8800379c2f90 R13: ffff880037839160 R14: 0000000000000000 R15: 00000000013352f0 FS: 00007f1400e34740(0000) GS:ffff8800bfb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f464124c763 CR3: 00000000b68cf000 CR4: 00000000000006e0 Stack: ffff8800379e1df0 ffffffff8155beab 6b6b6b6b6b6b6b2b ffff8800378391c0 ffff8800379e1e18 ffffffff8156499b ffff880037839be0 ffff880037839d20 ffff88003779d3f0 ffff8800379e1e38 ffffffffa003477c ffff88003779d388 Call Trace: [<ffffffff8155beab>] netif_napi_del+0x1b/0x80 [<ffffffff8156499b>] free_netdev+0x8b/0x110 [<ffffffffa003477c>] virtnet_remove+0x7c/0x90 [virtio_net] [<ffffffff813ae323>] virtio_dev_remove+0x23/0x80 [<ffffffff813f62ef>] __device_release_driver+0x7f/0xf0 [<ffffffff813f6ca0>] driver_detach+0xc0/0xd0 [<ffffffff813f5f28>] bus_remove_driver+0x58/0xd0 [<ffffffff813f72ec>] driver_unregister+0x2c/0x50 [<ffffffff813ae65e>] unregister_virtio_driver+0xe/0x10 [<ffffffffa0036942>] virtio_net_driver_exit+0x10/0x6ce [virtio_net] [<ffffffff810d7cf2>] SyS_delete_module+0x172/0x220 [<ffffffff810a732d>] ? trace_hardirqs_on+0xd/0x10 [<ffffffff810f5d4c>] ? __audit_syscall_entry+0x9c/0xf0 [<ffffffff81677f69>] system_call_fastpath+0x16/0x1b Code: 00 00 55 48 8b 17 48 b9 00 01 10 00 00 00 ad de 48 8b 47 08 48 89 e5 48 39 ca 74 29 48 b9 00 02 20 00 00 00 RIP [<ffffffff81322e19>] __list_del_entry+0x29/0xd0 RSP <ffff8800379e1dd0> ---[ end trace d5931cd3f87c9763 ]--- Fixes: 986a4f4d452d (virtio_net: multiqueue support) Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: "Michael S. Tsirkin" <mst@redhat.com> Signed-off-by: Andrey Vagin <avagin@openvz.org> Acked-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-05 14:36:21 +00:00
int i;
virtio-net: correctly delete napi hash We don't delete napi from hash list during module exit. This will cause the following panic when doing module load and unload: BUG: unable to handle kernel paging request at 0000004e00000075 IP: [<ffffffff816bd01b>] napi_hash_add+0x6b/0xf0 PGD 3c5d5067 PUD 0 Oops: 0000 [#1] SMP ... Call Trace: [<ffffffffa0a5bfb7>] init_vqs+0x107/0x490 [virtio_net] [<ffffffffa0a5c9f2>] virtnet_probe+0x562/0x791815639d880be [virtio_net] [<ffffffff8139e667>] virtio_dev_probe+0x137/0x200 [<ffffffff814c7f2a>] driver_probe_device+0x7a/0x250 [<ffffffff814c81d3>] __driver_attach+0x93/0xa0 [<ffffffff814c8140>] ? __device_attach+0x40/0x40 [<ffffffff814c6053>] bus_for_each_dev+0x63/0xa0 [<ffffffff814c7a79>] driver_attach+0x19/0x20 [<ffffffff814c76f0>] bus_add_driver+0x170/0x220 [<ffffffffa0a60000>] ? 0xffffffffa0a60000 [<ffffffff814c894f>] driver_register+0x5f/0xf0 [<ffffffff8139e41b>] register_virtio_driver+0x1b/0x30 [<ffffffffa0a60010>] virtio_net_driver_init+0x10/0x12 [virtio_net] This patch fixes this by doing this in virtnet_free_queues(). And also don't delete napi in virtnet_freeze() since it will call virtnet_free_queues() which has already did this. Fixes 91815639d880 ("virtio-net: rx busy polling support") Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-12 05:57:44 +00:00
for (i = 0; i < vi->max_queue_pairs; i++) {
__netif_napi_del(&vi->rq[i].napi);
__netif_napi_del(&vi->sq[i].napi);
virtio-net: correctly delete napi hash We don't delete napi from hash list during module exit. This will cause the following panic when doing module load and unload: BUG: unable to handle kernel paging request at 0000004e00000075 IP: [<ffffffff816bd01b>] napi_hash_add+0x6b/0xf0 PGD 3c5d5067 PUD 0 Oops: 0000 [#1] SMP ... Call Trace: [<ffffffffa0a5bfb7>] init_vqs+0x107/0x490 [virtio_net] [<ffffffffa0a5c9f2>] virtnet_probe+0x562/0x791815639d880be [virtio_net] [<ffffffff8139e667>] virtio_dev_probe+0x137/0x200 [<ffffffff814c7f2a>] driver_probe_device+0x7a/0x250 [<ffffffff814c81d3>] __driver_attach+0x93/0xa0 [<ffffffff814c8140>] ? __device_attach+0x40/0x40 [<ffffffff814c6053>] bus_for_each_dev+0x63/0xa0 [<ffffffff814c7a79>] driver_attach+0x19/0x20 [<ffffffff814c76f0>] bus_add_driver+0x170/0x220 [<ffffffffa0a60000>] ? 0xffffffffa0a60000 [<ffffffff814c894f>] driver_register+0x5f/0xf0 [<ffffffff8139e41b>] register_virtio_driver+0x1b/0x30 [<ffffffffa0a60010>] virtio_net_driver_init+0x10/0x12 [virtio_net] This patch fixes this by doing this in virtnet_free_queues(). And also don't delete napi in virtnet_freeze() since it will call virtnet_free_queues() which has already did this. Fixes 91815639d880 ("virtio-net: rx busy polling support") Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-12 05:57:44 +00:00
}
virtio: delete napi structures from netdev before releasing memory free_netdev calls netif_napi_del too, but it's too late, because napi structures are placed on vi->rq. netif_napi_add() is called from virtnet_alloc_queues. general protection fault: 0000 [#1] SMP Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: ip6table_filter ip6_tables iptable_filter ip_tables virtio_balloon pcspkr virtio_net(-) i2c_pii CPU: 1 PID: 347 Comm: rmmod Not tainted 3.13.0-rc2+ #171 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8800b779c420 ti: ffff8800379e0000 task.ti: ffff8800379e0000 RIP: 0010:[<ffffffff81322e19>] [<ffffffff81322e19>] __list_del_entry+0x29/0xd0 RSP: 0018:ffff8800379e1dd0 EFLAGS: 00010a83 RAX: 6b6b6b6b6b6b6b6b RBX: ffff8800379c2fd0 RCX: dead000000200200 RDX: 6b6b6b6b6b6b6b6b RSI: 0000000000000001 RDI: ffff8800379c2fd0 RBP: ffff8800379e1dd0 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff8800379c2f90 R13: ffff880037839160 R14: 0000000000000000 R15: 00000000013352f0 FS: 00007f1400e34740(0000) GS:ffff8800bfb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f464124c763 CR3: 00000000b68cf000 CR4: 00000000000006e0 Stack: ffff8800379e1df0 ffffffff8155beab 6b6b6b6b6b6b6b2b ffff8800378391c0 ffff8800379e1e18 ffffffff8156499b ffff880037839be0 ffff880037839d20 ffff88003779d3f0 ffff8800379e1e38 ffffffffa003477c ffff88003779d388 Call Trace: [<ffffffff8155beab>] netif_napi_del+0x1b/0x80 [<ffffffff8156499b>] free_netdev+0x8b/0x110 [<ffffffffa003477c>] virtnet_remove+0x7c/0x90 [virtio_net] [<ffffffff813ae323>] virtio_dev_remove+0x23/0x80 [<ffffffff813f62ef>] __device_release_driver+0x7f/0xf0 [<ffffffff813f6ca0>] driver_detach+0xc0/0xd0 [<ffffffff813f5f28>] bus_remove_driver+0x58/0xd0 [<ffffffff813f72ec>] driver_unregister+0x2c/0x50 [<ffffffff813ae65e>] unregister_virtio_driver+0xe/0x10 [<ffffffffa0036942>] virtio_net_driver_exit+0x10/0x6ce [virtio_net] [<ffffffff810d7cf2>] SyS_delete_module+0x172/0x220 [<ffffffff810a732d>] ? trace_hardirqs_on+0xd/0x10 [<ffffffff810f5d4c>] ? __audit_syscall_entry+0x9c/0xf0 [<ffffffff81677f69>] system_call_fastpath+0x16/0x1b Code: 00 00 55 48 8b 17 48 b9 00 01 10 00 00 00 ad de 48 8b 47 08 48 89 e5 48 39 ca 74 29 48 b9 00 02 20 00 00 00 RIP [<ffffffff81322e19>] __list_del_entry+0x29/0xd0 RSP <ffff8800379e1dd0> ---[ end trace d5931cd3f87c9763 ]--- Fixes: 986a4f4d452d (virtio_net: multiqueue support) Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: "Michael S. Tsirkin" <mst@redhat.com> Signed-off-by: Andrey Vagin <avagin@openvz.org> Acked-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-05 14:36:21 +00:00
/* We called __netif_napi_del(),
* we need to respect an RCU grace period before freeing vi->rq
*/
synchronize_net();
kfree(vi->rq);
kfree(vi->sq);
kfree(vi->ctrl);
}
static void _free_receive_bufs(struct virtnet_info *vi)
{
struct bpf_prog *old_prog;
int i;
for (i = 0; i < vi->max_queue_pairs; i++) {
while (vi->rq[i].pages)
__free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
if (old_prog)
bpf_prog_put(old_prog);
}
}
static void free_receive_bufs(struct virtnet_info *vi)
{
rtnl_lock();
_free_receive_bufs(vi);
rtnl_unlock();
}
static void free_receive_page_frags(struct virtnet_info *vi)
{
int i;
for (i = 0; i < vi->max_queue_pairs; i++)
if (vi->rq[i].alloc_frag.page) {
if (vi->rq[i].do_dma && vi->rq[i].last_dma)
virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0);
put_page(vi->rq[i].alloc_frag.page);
}
}
static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf)
{
if (!is_xdp_frame(buf))
dev_kfree_skb(buf);
else
xdp_return_frame(ptr_to_xdp(buf));
}
static void free_unused_bufs(struct virtnet_info *vi)
{
void *buf;
int i;
for (i = 0; i < vi->max_queue_pairs; i++) {
struct virtqueue *vq = vi->sq[i].vq;
while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
virtnet_sq_free_unused_buf(vq, buf);
cond_resched();
}
for (i = 0; i < vi->max_queue_pairs; i++) {
struct virtqueue *vq = vi->rq[i].vq;
while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
virtnet_rq_unmap_free_buf(vq, buf);
cond_resched();
}
}
static void virtnet_del_vqs(struct virtnet_info *vi)
{
struct virtio_device *vdev = vi->vdev;
virtnet_clean_affinity(vi);
vdev->config->del_vqs(vdev);
virtnet_free_queues(vi);
}
/* How large should a single buffer be so a queue full of these can fit at
* least one full packet?
* Logic below assumes the mergeable buffer header is used.
*/
static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
{
const unsigned int hdr_len = vi->hdr_len;
unsigned int rq_size = virtqueue_get_vring_size(vq);
unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
return max(max(min_buf_len, hdr_len) - hdr_len,
(unsigned int)GOOD_PACKET_LEN);
}
static int virtnet_find_vqs(struct virtnet_info *vi)
{
struct virtqueue_info *vqs_info;
struct virtqueue **vqs;
virtio_net: Fix "‘%d’ directive writing between 1 and 11 bytes into a region of size 10" warnings Fix the warnings when building virtio_net driver. " drivers/net/virtio_net.c: In function ‘init_vqs’: drivers/net/virtio_net.c:4551:48: warning: ‘%d’ directive writing between 1 and 11 bytes into a region of size 10 [-Wformat-overflow=] 4551 | sprintf(vi->rq[i].name, "input.%d", i); | ^~ In function ‘virtnet_find_vqs’, inlined from ‘init_vqs’ at drivers/net/virtio_net.c:4645:8: drivers/net/virtio_net.c:4551:41: note: directive argument in the range [-2147483643, 65534] 4551 | sprintf(vi->rq[i].name, "input.%d", i); | ^~~~~~~~~~ drivers/net/virtio_net.c:4551:17: note: ‘sprintf’ output between 8 and 18 bytes into a destination of size 16 4551 | sprintf(vi->rq[i].name, "input.%d", i); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/virtio_net.c: In function ‘init_vqs’: drivers/net/virtio_net.c:4552:49: warning: ‘%d’ directive writing between 1 and 11 bytes into a region of size 9 [-Wformat-overflow=] 4552 | sprintf(vi->sq[i].name, "output.%d", i); | ^~ In function ‘virtnet_find_vqs’, inlined from ‘init_vqs’ at drivers/net/virtio_net.c:4645:8: drivers/net/virtio_net.c:4552:41: note: directive argument in the range [-2147483643, 65534] 4552 | sprintf(vi->sq[i].name, "output.%d", i); | ^~~~~~~~~~~ drivers/net/virtio_net.c:4552:17: note: ‘sprintf’ output between 9 and 19 bytes into a destination of size 16 4552 | sprintf(vi->sq[i].name, "output.%d", i); " Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> Link: https://lore.kernel.org/r/20240104020902.2753599-1-yanjun.zhu@intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-01-04 02:09:02 +00:00
int ret = -ENOMEM;
int total_vqs;
bool *ctx;
virtio_net: Fix "‘%d’ directive writing between 1 and 11 bytes into a region of size 10" warnings Fix the warnings when building virtio_net driver. " drivers/net/virtio_net.c: In function ‘init_vqs’: drivers/net/virtio_net.c:4551:48: warning: ‘%d’ directive writing between 1 and 11 bytes into a region of size 10 [-Wformat-overflow=] 4551 | sprintf(vi->rq[i].name, "input.%d", i); | ^~ In function ‘virtnet_find_vqs’, inlined from ‘init_vqs’ at drivers/net/virtio_net.c:4645:8: drivers/net/virtio_net.c:4551:41: note: directive argument in the range [-2147483643, 65534] 4551 | sprintf(vi->rq[i].name, "input.%d", i); | ^~~~~~~~~~ drivers/net/virtio_net.c:4551:17: note: ‘sprintf’ output between 8 and 18 bytes into a destination of size 16 4551 | sprintf(vi->rq[i].name, "input.%d", i); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/virtio_net.c: In function ‘init_vqs’: drivers/net/virtio_net.c:4552:49: warning: ‘%d’ directive writing between 1 and 11 bytes into a region of size 9 [-Wformat-overflow=] 4552 | sprintf(vi->sq[i].name, "output.%d", i); | ^~ In function ‘virtnet_find_vqs’, inlined from ‘init_vqs’ at drivers/net/virtio_net.c:4645:8: drivers/net/virtio_net.c:4552:41: note: directive argument in the range [-2147483643, 65534] 4552 | sprintf(vi->sq[i].name, "output.%d", i); | ^~~~~~~~~~~ drivers/net/virtio_net.c:4552:17: note: ‘sprintf’ output between 9 and 19 bytes into a destination of size 16 4552 | sprintf(vi->sq[i].name, "output.%d", i); " Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> Link: https://lore.kernel.org/r/20240104020902.2753599-1-yanjun.zhu@intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-01-04 02:09:02 +00:00
u16 i;
/* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
* possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
* possible control vq.
*/
total_vqs = vi->max_queue_pairs * 2 +
virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
/* Allocate space for find_vqs parameters */
treewide: kzalloc() -> kcalloc() The kzalloc() function has a 2-factor argument form, kcalloc(). This patch replaces cases of: kzalloc(a * b, gfp) with: kcalloc(a * b, gfp) as well as handling cases of: kzalloc(a * b * c, gfp) with: kzalloc(array3_size(a, b, c), gfp) as it's slightly less ugly than: kzalloc_array(array_size(a, b), c, gfp) This does, however, attempt to ignore constant size factors like: kzalloc(4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( kzalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | kzalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( kzalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(char) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(u8) * COUNT + COUNT , ...) | kzalloc( - sizeof(__u8) * COUNT + COUNT , ...) | kzalloc( - sizeof(char) * COUNT + COUNT , ...) | kzalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( - kzalloc + kcalloc ( - sizeof(TYPE) * (COUNT_ID) + COUNT_ID, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * COUNT_ID + COUNT_ID, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * (COUNT_CONST) + COUNT_CONST, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * COUNT_CONST + COUNT_CONST, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (COUNT_ID) + COUNT_ID, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * COUNT_ID + COUNT_ID, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (COUNT_CONST) + COUNT_CONST, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * COUNT_CONST + COUNT_CONST, sizeof(THING) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ - kzalloc + kcalloc ( - SIZE * COUNT + COUNT, SIZE , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( kzalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( kzalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kzalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kzalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( kzalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products, // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( kzalloc(C1 * C2 * C3, ...) | kzalloc( - (E1) * E2 * E3 + array3_size(E1, E2, E3) , ...) | kzalloc( - (E1) * (E2) * E3 + array3_size(E1, E2, E3) , ...) | kzalloc( - (E1) * (E2) * (E3) + array3_size(E1, E2, E3) , ...) | kzalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants, // keeping sizeof() as the second factor argument. @@ expression THING, E1, E2; type TYPE; constant C1, C2, C3; @@ ( kzalloc(sizeof(THING) * C2, ...) | kzalloc(sizeof(TYPE) * C2, ...) | kzalloc(C1 * C2 * C3, ...) | kzalloc(C1 * C2, ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * (E2) + E2, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * E2 + E2, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (E2) + E2, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * E2 + E2, sizeof(THING) , ...) | - kzalloc + kcalloc ( - (E1) * E2 + E1, E2 , ...) | - kzalloc + kcalloc ( - (E1) * (E2) + E1, E2 , ...) | - kzalloc + kcalloc ( - E1 * E2 + E1, E2 , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 21:03:40 +00:00
vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL);
if (!vqs)
goto err_vq;
vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL);
if (!vqs_info)
goto err_vqs_info;
if (!vi->big_packets || vi->mergeable_rx_bufs) {
treewide: kzalloc() -> kcalloc() The kzalloc() function has a 2-factor argument form, kcalloc(). This patch replaces cases of: kzalloc(a * b, gfp) with: kcalloc(a * b, gfp) as well as handling cases of: kzalloc(a * b * c, gfp) with: kzalloc(array3_size(a, b, c), gfp) as it's slightly less ugly than: kzalloc_array(array_size(a, b), c, gfp) This does, however, attempt to ignore constant size factors like: kzalloc(4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( kzalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | kzalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( kzalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(char) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(u8) * COUNT + COUNT , ...) | kzalloc( - sizeof(__u8) * COUNT + COUNT , ...) | kzalloc( - sizeof(char) * COUNT + COUNT , ...) | kzalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( - kzalloc + kcalloc ( - sizeof(TYPE) * (COUNT_ID) + COUNT_ID, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * COUNT_ID + COUNT_ID, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * (COUNT_CONST) + COUNT_CONST, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * COUNT_CONST + COUNT_CONST, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (COUNT_ID) + COUNT_ID, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * COUNT_ID + COUNT_ID, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (COUNT_CONST) + COUNT_CONST, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * COUNT_CONST + COUNT_CONST, sizeof(THING) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ - kzalloc + kcalloc ( - SIZE * COUNT + COUNT, SIZE , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( kzalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( kzalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kzalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kzalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( kzalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products, // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( kzalloc(C1 * C2 * C3, ...) | kzalloc( - (E1) * E2 * E3 + array3_size(E1, E2, E3) , ...) | kzalloc( - (E1) * (E2) * E3 + array3_size(E1, E2, E3) , ...) | kzalloc( - (E1) * (E2) * (E3) + array3_size(E1, E2, E3) , ...) | kzalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants, // keeping sizeof() as the second factor argument. @@ expression THING, E1, E2; type TYPE; constant C1, C2, C3; @@ ( kzalloc(sizeof(THING) * C2, ...) | kzalloc(sizeof(TYPE) * C2, ...) | kzalloc(C1 * C2 * C3, ...) | kzalloc(C1 * C2, ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * (E2) + E2, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * E2 + E2, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (E2) + E2, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * E2 + E2, sizeof(THING) , ...) | - kzalloc + kcalloc ( - (E1) * E2 + E1, E2 , ...) | - kzalloc + kcalloc ( - (E1) * (E2) + E1, E2 , ...) | - kzalloc + kcalloc ( - E1 * E2 + E1, E2 , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 21:03:40 +00:00
ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL);
if (!ctx)
goto err_ctx;
} else {
ctx = NULL;
}
/* Parameters for control virtqueue, if any */
if (vi->has_cvq) {
vqs_info[total_vqs - 1].name = "control";
}
/* Allocate/initialize parameters for send/receive virtqueues */
for (i = 0; i < vi->max_queue_pairs; i++) {
vqs_info[rxq2vq(i)].callback = skb_recv_done;
vqs_info[txq2vq(i)].callback = skb_xmit_done;
virtio_net: Fix "‘%d’ directive writing between 1 and 11 bytes into a region of size 10" warnings Fix the warnings when building virtio_net driver. " drivers/net/virtio_net.c: In function ‘init_vqs’: drivers/net/virtio_net.c:4551:48: warning: ‘%d’ directive writing between 1 and 11 bytes into a region of size 10 [-Wformat-overflow=] 4551 | sprintf(vi->rq[i].name, "input.%d", i); | ^~ In function ‘virtnet_find_vqs’, inlined from ‘init_vqs’ at drivers/net/virtio_net.c:4645:8: drivers/net/virtio_net.c:4551:41: note: directive argument in the range [-2147483643, 65534] 4551 | sprintf(vi->rq[i].name, "input.%d", i); | ^~~~~~~~~~ drivers/net/virtio_net.c:4551:17: note: ‘sprintf’ output between 8 and 18 bytes into a destination of size 16 4551 | sprintf(vi->rq[i].name, "input.%d", i); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/virtio_net.c: In function ‘init_vqs’: drivers/net/virtio_net.c:4552:49: warning: ‘%d’ directive writing between 1 and 11 bytes into a region of size 9 [-Wformat-overflow=] 4552 | sprintf(vi->sq[i].name, "output.%d", i); | ^~ In function ‘virtnet_find_vqs’, inlined from ‘init_vqs’ at drivers/net/virtio_net.c:4645:8: drivers/net/virtio_net.c:4552:41: note: directive argument in the range [-2147483643, 65534] 4552 | sprintf(vi->sq[i].name, "output.%d", i); | ^~~~~~~~~~~ drivers/net/virtio_net.c:4552:17: note: ‘sprintf’ output between 9 and 19 bytes into a destination of size 16 4552 | sprintf(vi->sq[i].name, "output.%d", i); " Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> Link: https://lore.kernel.org/r/20240104020902.2753599-1-yanjun.zhu@intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-01-04 02:09:02 +00:00
sprintf(vi->rq[i].name, "input.%u", i);
sprintf(vi->sq[i].name, "output.%u", i);
vqs_info[rxq2vq(i)].name = vi->rq[i].name;
vqs_info[txq2vq(i)].name = vi->sq[i].name;
if (ctx)
vqs_info[rxq2vq(i)].ctx = true;
}
ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, vqs_info, NULL);
if (ret)
goto err_find;
if (vi->has_cvq) {
vi->cvq = vqs[total_vqs - 1];
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
}
for (i = 0; i < vi->max_queue_pairs; i++) {
vi->rq[i].vq = vqs[rxq2vq(i)];
vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq);
vi->sq[i].vq = vqs[txq2vq(i)];
}
/* run here: ret == 0. */
err_find:
kfree(ctx);
err_ctx:
kfree(vqs_info);
err_vqs_info:
kfree(vqs);
err_vq:
return ret;
}
static int virtnet_alloc_queues(struct virtnet_info *vi)
{
int i;
if (vi->has_cvq) {
vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
if (!vi->ctrl)
goto err_ctrl;
} else {
vi->ctrl = NULL;
}
treewide: kzalloc() -> kcalloc() The kzalloc() function has a 2-factor argument form, kcalloc(). This patch replaces cases of: kzalloc(a * b, gfp) with: kcalloc(a * b, gfp) as well as handling cases of: kzalloc(a * b * c, gfp) with: kzalloc(array3_size(a, b, c), gfp) as it's slightly less ugly than: kzalloc_array(array_size(a, b), c, gfp) This does, however, attempt to ignore constant size factors like: kzalloc(4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( kzalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | kzalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( kzalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(char) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(u8) * COUNT + COUNT , ...) | kzalloc( - sizeof(__u8) * COUNT + COUNT , ...) | kzalloc( - sizeof(char) * COUNT + COUNT , ...) | kzalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( - kzalloc + kcalloc ( - sizeof(TYPE) * (COUNT_ID) + COUNT_ID, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * COUNT_ID + COUNT_ID, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * (COUNT_CONST) + COUNT_CONST, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * COUNT_CONST + COUNT_CONST, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (COUNT_ID) + COUNT_ID, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * COUNT_ID + COUNT_ID, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (COUNT_CONST) + COUNT_CONST, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * COUNT_CONST + COUNT_CONST, sizeof(THING) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ - kzalloc + kcalloc ( - SIZE * COUNT + COUNT, SIZE , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( kzalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( kzalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kzalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kzalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( kzalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products, // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( kzalloc(C1 * C2 * C3, ...) | kzalloc( - (E1) * E2 * E3 + array3_size(E1, E2, E3) , ...) | kzalloc( - (E1) * (E2) * E3 + array3_size(E1, E2, E3) , ...) | kzalloc( - (E1) * (E2) * (E3) + array3_size(E1, E2, E3) , ...) | kzalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants, // keeping sizeof() as the second factor argument. @@ expression THING, E1, E2; type TYPE; constant C1, C2, C3; @@ ( kzalloc(sizeof(THING) * C2, ...) | kzalloc(sizeof(TYPE) * C2, ...) | kzalloc(C1 * C2 * C3, ...) | kzalloc(C1 * C2, ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * (E2) + E2, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * E2 + E2, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (E2) + E2, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * E2 + E2, sizeof(THING) , ...) | - kzalloc + kcalloc ( - (E1) * E2 + E1, E2 , ...) | - kzalloc + kcalloc ( - (E1) * (E2) + E1, E2 , ...) | - kzalloc + kcalloc ( - E1 * E2 + E1, E2 , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 21:03:40 +00:00
vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL);
if (!vi->sq)
goto err_sq;
treewide: kzalloc() -> kcalloc() The kzalloc() function has a 2-factor argument form, kcalloc(). This patch replaces cases of: kzalloc(a * b, gfp) with: kcalloc(a * b, gfp) as well as handling cases of: kzalloc(a * b * c, gfp) with: kzalloc(array3_size(a, b, c), gfp) as it's slightly less ugly than: kzalloc_array(array_size(a, b), c, gfp) This does, however, attempt to ignore constant size factors like: kzalloc(4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( kzalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | kzalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( kzalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(char) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(u8) * COUNT + COUNT , ...) | kzalloc( - sizeof(__u8) * COUNT + COUNT , ...) | kzalloc( - sizeof(char) * COUNT + COUNT , ...) | kzalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( - kzalloc + kcalloc ( - sizeof(TYPE) * (COUNT_ID) + COUNT_ID, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * COUNT_ID + COUNT_ID, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * (COUNT_CONST) + COUNT_CONST, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * COUNT_CONST + COUNT_CONST, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (COUNT_ID) + COUNT_ID, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * COUNT_ID + COUNT_ID, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (COUNT_CONST) + COUNT_CONST, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * COUNT_CONST + COUNT_CONST, sizeof(THING) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ - kzalloc + kcalloc ( - SIZE * COUNT + COUNT, SIZE , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( kzalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( kzalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kzalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kzalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( kzalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products, // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( kzalloc(C1 * C2 * C3, ...) | kzalloc( - (E1) * E2 * E3 + array3_size(E1, E2, E3) , ...) | kzalloc( - (E1) * (E2) * E3 + array3_size(E1, E2, E3) , ...) | kzalloc( - (E1) * (E2) * (E3) + array3_size(E1, E2, E3) , ...) | kzalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants, // keeping sizeof() as the second factor argument. @@ expression THING, E1, E2; type TYPE; constant C1, C2, C3; @@ ( kzalloc(sizeof(THING) * C2, ...) | kzalloc(sizeof(TYPE) * C2, ...) | kzalloc(C1 * C2 * C3, ...) | kzalloc(C1 * C2, ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * (E2) + E2, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * E2 + E2, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (E2) + E2, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * E2 + E2, sizeof(THING) , ...) | - kzalloc + kcalloc ( - (E1) * E2 + E1, E2 , ...) | - kzalloc + kcalloc ( - (E1) * (E2) + E1, E2 , ...) | - kzalloc + kcalloc ( - E1 * E2 + E1, E2 , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 21:03:40 +00:00
vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL);
if (!vi->rq)
goto err_rq;
INIT_DELAYED_WORK(&vi->refill, refill_work);
for (i = 0; i < vi->max_queue_pairs; i++) {
vi->rq[i].pages = NULL;
netif_napi_add_weight(vi->dev, &vi->rq[i].napi, virtnet_poll,
napi_weight);
netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi,
virtnet_poll_tx,
napi_tx ? napi_weight : 0);
sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
u64_stats_init(&vi->rq[i].stats.syncp);
u64_stats_init(&vi->sq[i].stats.syncp);
mutex_init(&vi->rq[i].dim_lock);
}
return 0;
err_rq:
kfree(vi->sq);
err_sq:
kfree(vi->ctrl);
err_ctrl:
return -ENOMEM;
}
static int init_vqs(struct virtnet_info *vi)
{
int ret;
/* Allocate send & receive queues */
ret = virtnet_alloc_queues(vi);
if (ret)
goto err;
ret = virtnet_find_vqs(vi);
if (ret)
goto err_free;
cpus_read_lock();
virtnet_set_affinity(vi);
cpus_read_unlock();
return 0;
err_free:
virtnet_free_queues(vi);
err:
return ret;
}
#ifdef CONFIG_SYSFS
static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
char *buf)
{
struct virtnet_info *vi = netdev_priv(queue->dev);
unsigned int queue_index = get_netdev_rx_queue_index(queue);
unsigned int headroom = virtnet_get_headroom(vi);
unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
struct ewma_pkt_len *avg;
BUG_ON(queue_index >= vi->max_queue_pairs);
avg = &vi->rq[queue_index].mrg_avg_pkt_len;
return sprintf(buf, "%u\n",
get_mergeable_buf_len(&vi->rq[queue_index], avg,
SKB_DATA_ALIGN(headroom + tailroom)));
}
static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
__ATTR_RO(mergeable_rx_buffer_size);
static struct attribute *virtio_net_mrg_rx_attrs[] = {
&mergeable_rx_buffer_size_attribute.attr,
NULL
};
static const struct attribute_group virtio_net_mrg_rx_group = {
.name = "virtio_net",
.attrs = virtio_net_mrg_rx_attrs
};
#endif
static bool virtnet_fail_on_feature(struct virtio_device *vdev,
unsigned int fbit,
const char *fname, const char *dname)
{
if (!virtio_has_feature(vdev, fbit))
return false;
dev_err(&vdev->dev, "device advertises feature %s but not %s",
fname, dname);
return true;
}
#define VIRTNET_FAIL_ON(vdev, fbit, dbit) \
virtnet_fail_on_feature(vdev, fbit, #fbit, dbit)
static bool virtnet_validate_features(struct virtio_device *vdev)
{
if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) &&
(VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX,
"VIRTIO_NET_F_CTRL_VQ") ||
VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN,
"VIRTIO_NET_F_CTRL_VQ") ||
VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE,
"VIRTIO_NET_F_CTRL_VQ") ||
VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") ||
VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
"VIRTIO_NET_F_CTRL_VQ") ||
VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
"VIRTIO_NET_F_CTRL_VQ") ||
VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
"VIRTIO_NET_F_CTRL_VQ") ||
VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL,
"VIRTIO_NET_F_CTRL_VQ") ||
VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL,
"VIRTIO_NET_F_CTRL_VQ"))) {
return false;
}
return true;
}
#define MIN_MTU ETH_MIN_MTU
#define MAX_MTU ETH_MAX_MTU
static int virtnet_validate(struct virtio_device *vdev)
{
if (!vdev->config->get) {
dev_err(&vdev->dev, "%s failure: config access disabled\n",
__func__);
return -EINVAL;
}
if (!virtnet_validate_features(vdev))
return -EINVAL;
if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
int mtu = virtio_cread16(vdev,
offsetof(struct virtio_net_config,
mtu));
if (mtu < MIN_MTU)
__virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
}
if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) &&
!virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby");
__virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY);
}
return 0;
}
static bool virtnet_check_guest_gso(const struct virtnet_info *vi)
{
return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
(virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) &&
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6));
}
virtio-net: use mtu size as buffer length for big packets Currently add_recvbuf_big() allocates MAX_SKB_FRAGS segments for big packets even when GUEST_* offloads are not present on the device. However, if guest GSO is not supported, it would be sufficient to allocate segments to cover just up the MTU size and no further. Allocating the maximum amount of segments results in a large waste of buffer space in the queue, which limits the number of packets that can be buffered and can result in reduced performance. Therefore, if guest GSO is not supported, use the MTU to calculate the optimal amount of segments required. Below is the iperf TCP test results over a Mellanox NIC, using vDPA for 1 VQ, queue size 1024, before and after the change, with the iperf server running over the virtio-net interface. MTU(Bytes)/Bandwidth (Gbit/s) Before After 1500 22.5 22.4 9000 12.8 25.9 And result of queue size 256. MTU(Bytes)/Bandwidth (Gbit/s) Before After 9000 2.15 11.9 With this patch no degradation is observed with multiple below tests and feature bit combinations. Results are summarized below for q depth of 1024. Interface MTU is 1500 if MTU feature is disabled. MTU is set to 9000 in other tests. Features/ Bandwidth (Gbit/s) Before After mtu off 20.1 20.2 mtu/indirect on 17.4 17.3 mtu/indirect/packed on 17.2 17.2 Signed-off-by: Gavin Li <gavinl@nvidia.com> Reviewed-by: Gavi Teitz <gavi@nvidia.com> Reviewed-by: Parav Pandit <parav@nvidia.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Si-Wei Liu <si-wei.liu@oracle.com> Message-Id: <20220914144911.56422-3-gavinl@nvidia.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com>
2022-09-14 14:49:11 +00:00
static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu)
{
bool guest_gso = virtnet_check_guest_gso(vi);
/* If device can receive ANY guest GSO packets, regardless of mtu,
* allocate packets of maximum size, otherwise limit it to only
* mtu size worth only.
*/
if (mtu > ETH_DATA_LEN || guest_gso) {
vi->big_packets = true;
vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE);
}
}
#define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10
static enum xdp_rss_hash_type
virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = {
[VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE,
[VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4,
[VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP,
[VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP,
[VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6,
[VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP,
[VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP,
[VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX,
[VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX,
[VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX
};
static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type)
{
const struct xdp_buff *xdp = (void *)_ctx;
struct virtio_net_hdr_v1_hash *hdr_hash;
struct virtnet_info *vi;
u16 hash_report;
if (!(xdp->rxq->dev->features & NETIF_F_RXHASH))
return -ENODATA;
vi = netdev_priv(xdp->rxq->dev);
hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len);
hash_report = __le16_to_cpu(hdr_hash->hash_report);
if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE)
hash_report = VIRTIO_NET_HASH_REPORT_NONE;
*rss_type = virtnet_xdp_rss_type[hash_report];
*hash = __le32_to_cpu(hdr_hash->hash_value);
return 0;
}
static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = {
.xmo_rx_hash = virtnet_xdp_rx_hash,
};
static int virtnet_probe(struct virtio_device *vdev)
{
int i, err = -ENOMEM;
struct net_device *dev;
struct virtnet_info *vi;
u16 max_queue_pairs;
virtio-net: use mtu size as buffer length for big packets Currently add_recvbuf_big() allocates MAX_SKB_FRAGS segments for big packets even when GUEST_* offloads are not present on the device. However, if guest GSO is not supported, it would be sufficient to allocate segments to cover just up the MTU size and no further. Allocating the maximum amount of segments results in a large waste of buffer space in the queue, which limits the number of packets that can be buffered and can result in reduced performance. Therefore, if guest GSO is not supported, use the MTU to calculate the optimal amount of segments required. Below is the iperf TCP test results over a Mellanox NIC, using vDPA for 1 VQ, queue size 1024, before and after the change, with the iperf server running over the virtio-net interface. MTU(Bytes)/Bandwidth (Gbit/s) Before After 1500 22.5 22.4 9000 12.8 25.9 And result of queue size 256. MTU(Bytes)/Bandwidth (Gbit/s) Before After 9000 2.15 11.9 With this patch no degradation is observed with multiple below tests and feature bit combinations. Results are summarized below for q depth of 1024. Interface MTU is 1500 if MTU feature is disabled. MTU is set to 9000 in other tests. Features/ Bandwidth (Gbit/s) Before After mtu off 20.1 20.2 mtu/indirect on 17.4 17.3 mtu/indirect/packed on 17.2 17.2 Signed-off-by: Gavin Li <gavinl@nvidia.com> Reviewed-by: Gavi Teitz <gavi@nvidia.com> Reviewed-by: Parav Pandit <parav@nvidia.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Si-Wei Liu <si-wei.liu@oracle.com> Message-Id: <20220914144911.56422-3-gavinl@nvidia.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com>
2022-09-14 14:49:11 +00:00
int mtu = 0;
/* Find if host supports multiqueue/rss virtio_net device */
max_queue_pairs = 1;
if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
max_queue_pairs =
virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs));
/* We need at least 2 queue's */
if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
max_queue_pairs = 1;
/* Allocate ourselves a network device with room for our info */
dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
if (!dev)
return -ENOMEM;
/* Set up network device as normal. */
dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE |
IFF_TX_SKB_NO_LINEAR;
dev->netdev_ops = &virtnet_netdev;
virtio-net: support queue stat To enhance functionality, we now support reporting statistics through the netdev-generic netlink (netdev-genl) queue stats interface. However, this does not extend to all statistics, so a new field, qstat_offset, has been introduced. This field determines which statistics should be reported via netdev-genl queue stats. Given that queue stats are retrieved individually per queue, it's necessary for the virtnet_get_hw_stats() function to be capable of fetching statistics for a specific queue. As the document https://docs.kernel.org/next/networking/statistics.html#notes-for-driver-authors We should not duplicate the stats which get reported via the netlink API in ethtool. If the stats are for queue stat, that will not be reported by ethtool -S. python3 ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump qstats-get --json '{"scope": "queue"}' [{'ifindex': 2, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 157844011, 'rx-csum-bad': 0, 'rx-csum-none': 0, 'rx-csum-unnecessary': 2195386, 'rx-hw-drop-overruns': 0, 'rx-hw-drop-ratelimits': 0, 'rx-hw-drops': 12964, 'rx-packets': 598929}, {'ifindex': 2, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 1938511, 'tx-csum-none': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drop-ratelimits': 0, 'tx-hw-drops': 0, 'tx-needs-csum': 61263, 'tx-packets': 15515}] Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:28 +00:00
dev->stat_ops = &virtnet_stat_ops;
dev->features = NETIF_F_HIGHDMA;
dev->ethtool_ops = &virtnet_ethtool_ops;
SET_NETDEV_DEV(dev, &vdev->dev);
/* Do we support "hardware" checksums? */
if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
/* This opens up the world of extra features. */
dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
if (csum)
dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
dev->hw_features |= NETIF_F_TSO
| NETIF_F_TSO_ECN | NETIF_F_TSO6;
}
/* Individual feature bits: what can host handle? */
if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
dev->hw_features |= NETIF_F_TSO;
if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
dev->hw_features |= NETIF_F_TSO6;
if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
dev->hw_features |= NETIF_F_TSO_ECN;
if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO))
dev->hw_features |= NETIF_F_GSO_UDP_L4;
dev->features |= NETIF_F_GSO_ROBUST;
if (gso)
dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
/* (!csum && gso) case will be fixed by register_netdev() */
}
/* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't
* need to calculate checksums for partially checksummed packets,
* as they're considered valid by the upper layer.
* 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only
* receives fully checksummed packets. The device may assist in
* validating these packets' checksums, so the driver won't have to.
*/
dev->features |= NETIF_F_RXCSUM;
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
dev->features |= NETIF_F_GRO_HW;
Revert "virtio-net: ethtool configurable RXCSUM" This reverts commit 3618ad2a7c0e78e4258386394d5d5f92a3dbccf8. When control vq is not negotiated, that commit causes a crash: [ 72.229171] kernel BUG at drivers/net/virtio_net.c:1667! [ 72.230266] invalid opcode: 0000 [#1] PREEMPT SMP [ 72.231172] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.9.0-rc8-02934-g3618ad2a7c0e7 #1 [ 72.231172] EIP: virtnet_send_command+0x120/0x140 [ 72.231172] Code: 00 0f 94 c0 8b 7d f0 65 33 3d 14 00 00 00 75 1c 8d 65 f4 5b 5e 5f 5d c3 66 90 be 01 00 00 00 e9 6e ff ff ff 8d b6 00 +00 00 00 <0f> 0b e8 d9 bb 82 00 eb 17 8d b4 26 00 00 00 00 8d b4 26 00 00 00 [ 72.231172] EAX: 0000000d EBX: f72895c0 ECX: 00000017 EDX: 00000011 [ 72.231172] ESI: f7197800 EDI: ed69bd00 EBP: ed69bcf4 ESP: ed69bc98 [ 72.231172] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010246 [ 72.231172] CR0: 80050033 CR2: 00000000 CR3: 02c84000 CR4: 000406f0 [ 72.231172] Call Trace: [ 72.231172] ? __virt_addr_valid+0x45/0x60 [ 72.231172] ? ___cache_free+0x51f/0x760 [ 72.231172] ? kobject_uevent_env+0xf4/0x560 [ 72.231172] virtnet_set_guest_offloads+0x4d/0x80 [ 72.231172] virtnet_set_features+0x85/0x120 [ 72.231172] ? virtnet_set_guest_offloads+0x80/0x80 [ 72.231172] __netdev_update_features+0x27a/0x8e0 [ 72.231172] ? kobject_uevent+0xa/0x20 [ 72.231172] ? netdev_register_kobject+0x12c/0x160 [ 72.231172] register_netdevice+0x4fe/0x740 [ 72.231172] register_netdev+0x1c/0x40 [ 72.231172] virtnet_probe+0x728/0xb60 [ 72.231172] ? _raw_spin_unlock+0x1d/0x40 [ 72.231172] ? virtio_vdpa_get_status+0x1c/0x20 [ 72.231172] virtio_dev_probe+0x1c6/0x271 [ 72.231172] really_probe+0x195/0x2e0 [ 72.231172] driver_probe_device+0x26/0x60 [ 72.231172] device_driver_attach+0x49/0x60 [ 72.231172] __driver_attach+0x46/0xc0 [ 72.231172] ? device_driver_attach+0x60/0x60 [ 72.231172] bus_add_driver+0x197/0x1c0 [ 72.231172] driver_register+0x66/0xc0 [ 72.231172] register_virtio_driver+0x1b/0x40 [ 72.231172] virtio_net_driver_init+0x61/0x86 [ 72.231172] ? veth_init+0x14/0x14 [ 72.231172] do_one_initcall+0x76/0x2e4 [ 72.231172] ? rdinit_setup+0x2a/0x2a [ 72.231172] do_initcalls+0xb2/0xd5 [ 72.231172] kernel_init_freeable+0x14f/0x179 [ 72.231172] ? rest_init+0x100/0x100 [ 72.231172] kernel_init+0xd/0xe0 [ 72.231172] ret_from_fork+0x1c/0x30 [ 72.231172] Modules linked in: [ 72.269563] ---[ end trace a6ebc4afea0e6cb1 ]--- The reason is that virtnet_set_features now calls virtnet_set_guest_offloads unconditionally, it used to only call it when there is something to configure. If device does not have a control vq, everything breaks. Revert the original commit for now. Cc: Tonghao Zhang <xiangxia.m.yue@gmail.com> Fixes: 3618ad2a7c0e7 ("virtio-net: ethtool configurable RXCSUM") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Willem de Bruijn <willemb@google.com> Acked-by: Jason Wang <jasowang@redhat.com> Link: https://lore.kernel.org/r/20201021142944.13615-1-mst@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-21 14:30:43 +00:00
if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
dev->hw_features |= NETIF_F_GRO_HW;
dev->vlan_features = dev->features;
drivers: net: turn on XDP features A summary of the flags being set for various drivers is given below. Note that XDP_F_REDIRECT_TARGET and XDP_F_FRAG_TARGET are features that can be turned off and on at runtime. This means that these flags may be set and unset under RTNL lock protection by the driver. Hence, READ_ONCE must be used by code loading the flag value. Also, these flags are not used for synchronization against the availability of XDP resources on a device. It is merely a hint, and hence the read may race with the actual teardown of XDP resources on the device. This may change in the future, e.g. operations taking a reference on the XDP resources of the driver, and in turn inhibiting turning off this flag. However, for now, it can only be used as a hint to check whether device supports becoming a redirection target. Turn 'hw-offload' feature flag on for: - netronome (nfp) - netdevsim. Turn 'native' and 'zerocopy' features flags on for: - intel (i40e, ice, ixgbe, igc) - mellanox (mlx5). - stmmac - netronome (nfp) Turn 'native' features flags on for: - amazon (ena) - broadcom (bnxt) - freescale (dpaa, dpaa2, enetc) - funeth - intel (igb) - marvell (mvneta, mvpp2, octeontx2) - mellanox (mlx4) - mtk_eth_soc - qlogic (qede) - sfc - socionext (netsec) - ti (cpsw) - tap - tsnep - veth - xen - virtio_net. Turn 'basic' (tx, pass, aborted and drop) features flags on for: - netronome (nfp) - cavium (thunder) - hyperv. Turn 'redirect_target' feature flag on for: - amanzon (ena) - broadcom (bnxt) - freescale (dpaa, dpaa2) - intel (i40e, ice, igb, ixgbe) - ti (cpsw) - marvell (mvneta, mvpp2) - sfc - socionext (netsec) - qlogic (qede) - mellanox (mlx5) - tap - veth - virtio_net - xen Reviewed-by: Gerhard Engleder <gerhard@engleder-embedded.com> Reviewed-by: Simon Horman <simon.horman@corigine.com> Acked-by: Stanislav Fomichev <sdf@google.com> Acked-by: Jakub Kicinski <kuba@kernel.org> Co-developed-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> Signed-off-by: Marek Majtyka <alardam@gmail.com> Link: https://lore.kernel.org/r/3eca9fafb308462f7edb1f58e451d59209aa07eb.1675245258.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-02-01 10:24:18 +00:00
dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT;
/* MTU range: 68 - 65535 */
dev->min_mtu = MIN_MTU;
dev->max_mtu = MAX_MTU;
/* Configuration may specify what MAC to use. Otherwise random. */
if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
u8 addr[ETH_ALEN];
virtio_cread_bytes(vdev,
offsetof(struct virtio_net_config, mac),
addr, ETH_ALEN);
eth_hw_addr_set(dev, addr);
} else {
eth_hw_addr_random(dev);
dev_info(&vdev->dev, "Assigned random MAC address %pM\n",
dev->dev_addr);
}
/* Set up our device-specific information */
vi = netdev_priv(dev);
vi->dev = dev;
vi->vdev = vdev;
vdev->priv = vi;
net: Explicitly initialize u64_stats_sync structures for lockdep In order to enable lockdep on seqcount/seqlock structures, we must explicitly initialize any locks. The u64_stats_sync structure, uses a seqcount, and thus we need to introduce a u64_stats_init() function and use it to initialize the structure. This unfortunately adds a lot of fairly trivial initialization code to a number of drivers. But the benefit of ensuring correctness makes this worth while. Because these changes are required for lockdep to be enabled, and the changes are quite trivial, I've not yet split this patch out into 30-some separate patches, as I figured it would be better to get the various maintainers thoughts on how to best merge this change along with the seqcount lockdep enablement. Feedback would be appreciated! Signed-off-by: John Stultz <john.stultz@linaro.org> Acked-by: Julian Anastasov <ja@ssi.bg> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> Cc: "David S. Miller" <davem@davemloft.net> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org> Cc: James Morris <jmorris@namei.org> Cc: Jesse Gross <jesse@nicira.com> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Mirko Lindner <mlindner@marvell.com> Cc: Patrick McHardy <kaber@trash.net> Cc: Roger Luethi <rl@hellgate.ch> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Simon Horman <horms@verge.net.au> Cc: Stephen Hemminger <stephen@networkplumber.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Cc: Wensong Zhang <wensong@linux-vs.org> Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/1381186321-4906-2-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
2013-10-07 22:51:58 +00:00
INIT_WORK(&vi->config_work, virtnet_config_changed_work);
INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work);
virtio-net: fix the race between refill work and close We try using cancel_delayed_work_sync() to prevent the work from enabling NAPI. This is insufficient since we don't disable the source of the refill work scheduling. This means an NAPI poll callback after cancel_delayed_work_sync() can schedule the refill work then can re-enable the NAPI that leads to use-after-free [1]. Since the work can enable NAPI, we can't simply disable NAPI before calling cancel_delayed_work_sync(). So fix this by introducing a dedicated boolean to control whether or not the work could be scheduled from NAPI. [1] ================================================================== BUG: KASAN: use-after-free in refill_work+0x43/0xd4 Read of size 2 at addr ffff88810562c92e by task kworker/2:1/42 CPU: 2 PID: 42 Comm: kworker/2:1 Not tainted 5.19.0-rc1+ #480 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: events refill_work Call Trace: <TASK> dump_stack_lvl+0x34/0x44 print_report.cold+0xbb/0x6ac ? _printk+0xad/0xde ? refill_work+0x43/0xd4 kasan_report+0xa8/0x130 ? refill_work+0x43/0xd4 refill_work+0x43/0xd4 process_one_work+0x43d/0x780 worker_thread+0x2a0/0x6f0 ? process_one_work+0x780/0x780 kthread+0x167/0x1a0 ? kthread_exit+0x50/0x50 ret_from_fork+0x22/0x30 </TASK> ... Fixes: b2baed69e605c ("virtio_net: set/cancel work on ndo_open/ndo_stop") Signed-off-by: Jason Wang <jasowang@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2022-07-25 07:21:59 +00:00
spin_lock_init(&vi->refill_lock);
if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) {
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
vi->mergeable_rx_bufs = true;
dev->xdp_features |= NETDEV_XDP_ACT_RX_SG;
}
virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv) Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-17 06:41:34 +00:00
if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
vi->has_rss_hash_report = true;
virtio_net: Do not send RSS key if it is not supported There is a bug when setting the RSS options in virtio_net that can break the whole machine, getting the kernel into an infinite loop. Running the following command in any QEMU virtual machine with virtionet will reproduce this problem: # ethtool -X eth0 hfunc toeplitz This is how the problem happens: 1) ethtool_set_rxfh() calls virtnet_set_rxfh() 2) virtnet_set_rxfh() calls virtnet_commit_rss_command() 3) virtnet_commit_rss_command() populates 4 entries for the rss scatter-gather 4) Since the command above does not have a key, then the last scatter-gatter entry will be zeroed, since rss_key_size == 0. sg_buf_size = vi->rss_key_size; 5) This buffer is passed to qemu, but qemu is not happy with a buffer with zero length, and do the following in virtqueue_map_desc() (QEMU function): if (!sz) { virtio_error(vdev, "virtio: zero sized buffers are not allowed"); 6) virtio_error() (also QEMU function) set the device as broken vdev->broken = true; 7) Qemu bails out, and do not repond this crazy kernel. 8) The kernel is waiting for the response to come back (function virtnet_send_command()) 9) The kernel is waiting doing the following : while (!virtqueue_get_buf(vi->cvq, &tmp) && !virtqueue_is_broken(vi->cvq)) cpu_relax(); 10) None of the following functions above is true, thus, the kernel loops here forever. Keeping in mind that virtqueue_is_broken() does not look at the qemu `vdev->broken`, so, it never realizes that the vitio is broken at QEMU side. Fix it by not sending RSS commands if the feature is not available in the device. Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.") Cc: stable@vger.kernel.org Cc: qemu-devel@nongnu.org Signed-off-by: Breno Leitao <leitao@debian.org> Reviewed-by: Heng Qi <hengqi@linux.alibaba.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-04-03 15:43:12 +00:00
if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
vi->has_rss = true;
vi->rss_indir_table_size =
virtio_cread16(vdev, offsetof(struct virtio_net_config,
rss_max_indirection_table_length));
virtio_net: Do not send RSS key if it is not supported There is a bug when setting the RSS options in virtio_net that can break the whole machine, getting the kernel into an infinite loop. Running the following command in any QEMU virtual machine with virtionet will reproduce this problem: # ethtool -X eth0 hfunc toeplitz This is how the problem happens: 1) ethtool_set_rxfh() calls virtnet_set_rxfh() 2) virtnet_set_rxfh() calls virtnet_commit_rss_command() 3) virtnet_commit_rss_command() populates 4 entries for the rss scatter-gather 4) Since the command above does not have a key, then the last scatter-gatter entry will be zeroed, since rss_key_size == 0. sg_buf_size = vi->rss_key_size; 5) This buffer is passed to qemu, but qemu is not happy with a buffer with zero length, and do the following in virtqueue_map_desc() (QEMU function): if (!sz) { virtio_error(vdev, "virtio: zero sized buffers are not allowed"); 6) virtio_error() (also QEMU function) set the device as broken vdev->broken = true; 7) Qemu bails out, and do not repond this crazy kernel. 8) The kernel is waiting for the response to come back (function virtnet_send_command()) 9) The kernel is waiting doing the following : while (!virtqueue_get_buf(vi->cvq, &tmp) && !virtqueue_is_broken(vi->cvq)) cpu_relax(); 10) None of the following functions above is true, thus, the kernel loops here forever. Keeping in mind that virtqueue_is_broken() does not look at the qemu `vdev->broken`, so, it never realizes that the vitio is broken at QEMU side. Fix it by not sending RSS commands if the feature is not available in the device. Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.") Cc: stable@vger.kernel.org Cc: qemu-devel@nongnu.org Signed-off-by: Breno Leitao <leitao@debian.org> Reviewed-by: Heng Qi <hengqi@linux.alibaba.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-04-03 15:43:12 +00:00
}
if (vi->has_rss || vi->has_rss_hash_report) {
vi->rss_key_size =
virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size));
vi->rss_hash_types_supported =
virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types));
vi->rss_hash_types_supported &=
~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX |
VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
VIRTIO_NET_RSS_HASH_TYPE_UDP_EX);
dev->hw_features |= NETIF_F_RXHASH;
dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops;
}
if (vi->has_rss_hash_report)
vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash);
else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
else
vi->hdr_len = sizeof(struct virtio_net_hdr);
if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
vi->any_header_sg = true;
if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
vi->has_cvq = true;
mutex_init(&vi->cvq_lock);
if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
mtu = virtio_cread16(vdev,
offsetof(struct virtio_net_config,
mtu));
if (mtu < dev->min_mtu) {
/* Should never trigger: MTU was previously validated
* in virtnet_validate.
*/
dev_err(&vdev->dev,
"device MTU appears to have changed it is now %d < %d",
mtu, dev->min_mtu);
err = -EINVAL;
goto free;
}
dev->mtu = mtu;
dev->max_mtu = mtu;
}
virtio-net: use mtu size as buffer length for big packets Currently add_recvbuf_big() allocates MAX_SKB_FRAGS segments for big packets even when GUEST_* offloads are not present on the device. However, if guest GSO is not supported, it would be sufficient to allocate segments to cover just up the MTU size and no further. Allocating the maximum amount of segments results in a large waste of buffer space in the queue, which limits the number of packets that can be buffered and can result in reduced performance. Therefore, if guest GSO is not supported, use the MTU to calculate the optimal amount of segments required. Below is the iperf TCP test results over a Mellanox NIC, using vDPA for 1 VQ, queue size 1024, before and after the change, with the iperf server running over the virtio-net interface. MTU(Bytes)/Bandwidth (Gbit/s) Before After 1500 22.5 22.4 9000 12.8 25.9 And result of queue size 256. MTU(Bytes)/Bandwidth (Gbit/s) Before After 9000 2.15 11.9 With this patch no degradation is observed with multiple below tests and feature bit combinations. Results are summarized below for q depth of 1024. Interface MTU is 1500 if MTU feature is disabled. MTU is set to 9000 in other tests. Features/ Bandwidth (Gbit/s) Before After mtu off 20.1 20.2 mtu/indirect on 17.4 17.3 mtu/indirect/packed on 17.2 17.2 Signed-off-by: Gavin Li <gavinl@nvidia.com> Reviewed-by: Gavi Teitz <gavi@nvidia.com> Reviewed-by: Parav Pandit <parav@nvidia.com> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Reviewed-by: Si-Wei Liu <si-wei.liu@oracle.com> Message-Id: <20220914144911.56422-3-gavinl@nvidia.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com>
2022-09-14 14:49:11 +00:00
virtnet_set_big_packets(vi, mtu);
if (vi->any_header_sg)
dev->needed_headroom = vi->hdr_len;
virtio-net: Set needed_headroom for virtio-net when VIRTIO_F_ANY_LAYOUT is true This is a small supplement for commit e7428e95a06fb516fac1308bd0e176e27c0b9287 ("virtio-net: put virtio-net header inline with data"). TCP packages have enough room to put virtio-net header in, but UDP packages do not. By setting dev->needed_headroom for virtio-net device, UDP packages could have enough room. For UDP packages, sk_buff is alloced in fun __ip_append_data. The size is "alloclen + hh_len + 15", and "hh_len = LL_RESERVED_SPACE(rt-dst.dev);". The Macro is defined as follows: #define LL_RESERVED_SPACE(dev) \ ((((dev)->hard_header_len+(dev)->needed_headroom)\ &~(HH_DATA_MOD - 1)) + HH_DATA_MOD) By default, for UDP packages, after skb is allocated, only 16 bytes reserved. And 2 bytes remained after mac header is set. That is not enough to put virtio-net header in. If we set dev->needed_headroom to 12 or 10 (according to mergeable_rx_bufs is on or off ), more room can be reserved. Then there is enough room for UDP packages to put the header in. test result list as below: guest and host: suse11sp3, netperf, intel 2.4GHz +-------+---------+---------+---------+---------+ | | old | new | +-------+---------+---------+---------+---------+ | UDP | Gbit/s | pps | Gbit/s | pps | | 64 | 0.57 | 692232 | 0.61 | 742420 | | 256 | 1.60 | 686860 | 1.71 | 733331 | | 512 | 2.92 | 674576 | 3.07 | 710446 | | 1024 | 4.99 | 598977 | 5.17 | 620821 | | 1460 | 5.68 | 483757 | 7.16 | 610519 | | 4096 | 6.98 | 637468 | 7.21 | 658471 | +-------+---------+---------+---------+---------+ Signed-off-by: Zhang Jie <zhangjie14@huawei.com> Acked-by: Rusty Russell <rusty@rustcorp.com.au> Acked-by: Jason Wang <jasowang@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-29 10:43:22 +00:00
/* Enable multiqueue by default */
if (num_online_cpus() >= max_queue_pairs)
vi->curr_queue_pairs = max_queue_pairs;
else
vi->curr_queue_pairs = num_online_cpus();
vi->max_queue_pairs = max_queue_pairs;
/* Allocate/initialize the rx/tx queues, and invoke find_vqs */
err = init_vqs(vi);
if (err)
goto free;
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
vi->intr_coal_rx.max_usecs = 0;
vi->intr_coal_tx.max_usecs = 0;
vi->intr_coal_rx.max_packets = 0;
/* Keep the default values of the coalescing parameters
* aligned with the default napi_tx state.
*/
if (vi->sq[0].napi.weight)
vi->intr_coal_tx.max_packets = 1;
else
vi->intr_coal_tx.max_packets = 0;
}
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) {
/* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */
for (i = 0; i < vi->max_queue_pairs; i++)
if (vi->sq[i].napi.weight)
vi->sq[i].intr_coal.max_packets = 1;
err = virtnet_init_irq_moder(vi);
if (err)
goto free;
}
#ifdef CONFIG_SYSFS
if (vi->mergeable_rx_bufs)
dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
#endif
netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
virtio_net: Revert "virtio_net: set the default max ring size by find_vqs()" This reverts commit 762faee5a2678559d3dc09d95f8f2c54cd0466a7. This has been reported to trip up guests on GCP (Google Cloud). The reason is that virtio_find_vqs_ctx_size is broken on legacy devices. We can in theory fix virtio_find_vqs_ctx_size but in fact the patch itself has several other issues: - It treats unknown speed as < 10G - It leaves userspace no way to find out the ring size set by hypervisor - It tests speed when link is down - It ignores the virtio spec advice: Both \field{speed} and \field{duplex} can change, thus the driver is expected to re-read these values after receiving a configuration change notification. - It is not clear the performance impact has been tested properly Revert the patch for now. Reported-by: Andres Freund <andres@anarazel.de> Link: https://lore.kernel.org/r/20220814212610.GA3690074%40roeck-us.net Link: https://lore.kernel.org/r/20220815070203.plwjx7b3cyugpdt7%40awork3.anarazel.de Link: https://lore.kernel.org/r/3df6bb82-1951-455d-a768-e9e1513eb667%40www.fastmail.com Link: https://lore.kernel.org/r/FCDC5DDE-3CDD-4B8A-916F-CA7D87B547CE%40anarazel.de Fixes: 762faee5a267 ("virtio_net: set the default max ring size by find_vqs()") Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Cc: Jason Wang <jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Tested-by: Andres Freund <andres@anarazel.de> Tested-by: Guenter Roeck <linux@roeck-us.net> Message-Id: <20220816053602.173815-2-mst@redhat.com>
2022-08-16 05:36:27 +00:00
virtnet_init_settings(dev);
if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
vi->failover = net_failover_create(vi->dev);
if (IS_ERR(vi->failover)) {
err = PTR_ERR(vi->failover);
goto free_vqs;
}
}
if (vi->has_rss || vi->has_rss_hash_report)
virtnet_init_default_rss(vi);
enable_rx_mode_work(vi);
/* serialize netdev register + virtio_device_ready() with ndo_open() */
rtnl_lock();
err = register_netdevice(dev);
if (err) {
pr_debug("virtio_net: registering device failed\n");
rtnl_unlock();
goto free_failover;
}
/* Disable config change notification until ndo_open. */
virtio_config_driver_disable(vi->vdev);
virtio_device_ready(vdev);
virtnet_set_queues(vi, vi->curr_queue_pairs);
/* a random MAC address has been assigned, notify the device.
* We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there
* because many devices work fine without getting MAC explicitly
*/
if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
struct scatterlist sg;
sg_init_one(&sg, dev->dev_addr, dev->addr_len);
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
pr_debug("virtio_net: setting MAC address failed\n");
rtnl_unlock();
err = -EINVAL;
goto free_unregister_netdev;
}
}
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) {
struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL;
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
struct scatterlist sg;
__le64 v;
stats_cap = kzalloc(sizeof(*stats_cap), GFP_KERNEL);
if (!stats_cap) {
rtnl_unlock();
err = -ENOMEM;
goto free_unregister_netdev;
}
sg_init_one(&sg, stats_cap, sizeof(*stats_cap));
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS,
VIRTIO_NET_CTRL_STATS_QUERY,
NULL, &sg)) {
pr_debug("virtio_net: fail to get stats capability\n");
rtnl_unlock();
err = -EINVAL;
goto free_unregister_netdev;
}
v = stats_cap->supported_stats_types[0];
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
vi->device_stats_cap = le64_to_cpu(v);
}
/* Assume link up if device can't report link status,
otherwise get link status from config. */
netif_carrier_off(dev);
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
virtnet_config_changed_work(&vi->config_work);
} else {
vi->status = VIRTIO_NET_S_LINK_UP;
virtnet_update_settings(vi);
netif_carrier_on(dev);
}
for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
if (virtio_has_feature(vi->vdev, guest_offloads[i]))
set_bit(guest_offloads[i], &vi->guest_offloads);
vi->guest_offloads_capable = vi->guest_offloads;
rtnl_unlock();
err = virtnet_cpu_notif_add(vi);
if (err) {
pr_debug("virtio_net: registering cpu notifier failed\n");
goto free_unregister_netdev;
}
pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
dev->name, max_queue_pairs);
return 0;
free_unregister_netdev:
unregister_netdev(dev);
free_failover:
net_failover_destroy(vi->failover);
free_vqs:
virtio_net: Fix probe failed when modprobe virtio_net When doing the following test steps, an error was found: step 1: modprobe virtio_net succeeded # modprobe virtio_net <-- OK step 2: fault injection in register_netdevice() # modprobe -r virtio_net <-- OK # ... FAULT_INJECTION: forcing a failure. name failslab, interval 1, probability 0, space 0, times 0 CPU: 0 PID: 3521 Comm: modprobe Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), Call Trace: <TASK> ... should_failslab+0xa/0x20 ... dev_set_name+0xc0/0x100 netdev_register_kobject+0xc2/0x340 register_netdevice+0xbb9/0x1320 virtnet_probe+0x1d72/0x2658 [virtio_net] ... </TASK> virtio_net: probe of virtio0 failed with error -22 step 3: modprobe virtio_net failed # modprobe virtio_net <-- failed virtio_net: probe of virtio0 failed with error -2 The root cause of the problem is that the queues are not disable on the error handling path when register_netdevice() fails in virtnet_probe(), resulting in an error "-ENOENT" returned in the next modprobe call in setup_vq(). virtio_pci_modern_device uses virtqueues to send or receive message, and "queue_enable" records whether the queues are available. In vp_modern_find_vqs(), all queues will be selected and activated, but once queues are enabled there is no way to go back except reset. Fix it by reset virtio device on error handling path. This makes error handling follow the same order as normal device cleanup in virtnet_remove() which does: unregister, destroy failover, then reset. And that flow is better tested than error handling so we can be reasonably sure it works well. Fixes: 024655555021 ("virtio_net: fix use after free on allocation failure") Signed-off-by: Li Zetao <lizetao1@huawei.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Link: https://lore.kernel.org/r/20221122150046.3910638-1-lizetao1@huawei.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2022-11-22 15:00:46 +00:00
virtio_reset_device(vdev);
cancel_delayed_work_sync(&vi->refill);
free_receive_page_frags(vi);
virtnet_del_vqs(vi);
free:
free_netdev(dev);
return err;
}
static void remove_vq_common(struct virtnet_info *vi)
{
virtio_reset_device(vi->vdev);
/* Free unused buffers in both send and recv, if any. */
free_unused_bufs(vi);
free_receive_bufs(vi);
free_receive_page_frags(vi);
virtnet_del_vqs(vi);
}
static void virtnet_remove(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
virtnet_cpu_notif_remove(vi);
/* Make sure no work handler is accessing the device. */
flush_work(&vi->config_work);
disable_rx_mode_work(vi);
flush_work(&vi->rx_mode_work);
virtnet_free_irq_moder(vi);
unregister_netdev(vi->dev);
net_failover_destroy(vi->failover);
remove_vq_common(vi);
free_netdev(vi->dev);
}
static __maybe_unused int virtnet_freeze(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
virtnet_cpu_notif_remove(vi);
virtnet_freeze_down(vdev);
remove_vq_common(vi);
return 0;
}
static __maybe_unused int virtnet_restore(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
int err;
err = virtnet_restore_up(vdev);
if (err)
return err;
virtnet_set_queues(vi, vi->curr_queue_pairs);
err = virtnet_cpu_notif_add(vi);
if (err) {
virtnet_freeze_down(vdev);
remove_vq_common(vi);
virtio-net: correctly handle cpu hotplug notifier during resuming commit 3ab098df35f8b98b6553edc2e40234af512ba877 (virtio-net: don't respond to cpu hotplug notifier if we're not ready) tries to bypass the cpu hotplug notifier by checking the config_enable and does nothing is it was false. So it need to try to hold the config_lock mutex which may happen in atomic environment which leads the following warnings: [ 622.944441] CPU0 attaching NULL sched-domain. [ 622.944446] CPU1 attaching NULL sched-domain. [ 622.944485] CPU0 attaching NULL sched-domain. [ 622.950795] BUG: sleeping function called from invalid context at kernel/mutex.c:616 [ 622.950796] in_atomic(): 1, irqs_disabled(): 1, pid: 10, name: migration/1 [ 622.950796] no locks held by migration/1/10. [ 622.950798] CPU: 1 PID: 10 Comm: migration/1 Not tainted 3.12.0-rc5-wl-01249-gb91e82d #317 [ 622.950799] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 622.950802] 0000000000000000 ffff88001d42dba0 ffffffff81a32f22 ffff88001bfb9c70 [ 622.950803] ffff88001d42dbb0 ffffffff810edb02 ffff88001d42dc38 ffffffff81a396ed [ 622.950805] 0000000000000046 ffff88001d42dbe8 ffffffff810e861d 0000000000000000 [ 622.950805] Call Trace: [ 622.950810] [<ffffffff81a32f22>] dump_stack+0x54/0x74 [ 622.950815] [<ffffffff810edb02>] __might_sleep+0x112/0x114 [ 622.950817] [<ffffffff81a396ed>] mutex_lock_nested+0x3c/0x3c6 [ 622.950818] [<ffffffff810e861d>] ? up+0x39/0x3e [ 622.950821] [<ffffffff8153ea7c>] ? acpi_os_signal_semaphore+0x21/0x2d [ 622.950824] [<ffffffff81565ed1>] ? acpi_ut_release_mutex+0x5e/0x62 [ 622.950828] [<ffffffff816d04ec>] virtnet_cpu_callback+0x33/0x87 [ 622.950830] [<ffffffff81a42576>] notifier_call_chain+0x3c/0x5e [ 622.950832] [<ffffffff810e86a8>] __raw_notifier_call_chain+0xe/0x10 [ 622.950835] [<ffffffff810c5556>] __cpu_notify+0x20/0x37 [ 622.950836] [<ffffffff810c5580>] cpu_notify+0x13/0x15 [ 622.950838] [<ffffffff81a237cd>] take_cpu_down+0x27/0x3a [ 622.950841] [<ffffffff81136289>] stop_machine_cpu_stop+0x93/0xf1 [ 622.950842] [<ffffffff81136167>] cpu_stopper_thread+0xa0/0x12f [ 622.950844] [<ffffffff811361f6>] ? cpu_stopper_thread+0x12f/0x12f [ 622.950847] [<ffffffff81119710>] ? lock_release_holdtime.part.7+0xa3/0xa8 [ 622.950848] [<ffffffff81135e4b>] ? cpu_stop_should_run+0x3f/0x47 [ 622.950850] [<ffffffff810ea9b0>] smpboot_thread_fn+0x1c5/0x1e3 [ 622.950852] [<ffffffff810ea7eb>] ? lg_global_unlock+0x67/0x67 [ 622.950854] [<ffffffff810e36b7>] kthread+0xd8/0xe0 [ 622.950857] [<ffffffff81a3bfad>] ? wait_for_common+0x12f/0x164 [ 622.950859] [<ffffffff810e35df>] ? kthread_create_on_node+0x124/0x124 [ 622.950861] [<ffffffff81a45ffc>] ret_from_fork+0x7c/0xb0 [ 622.950862] [<ffffffff810e35df>] ? kthread_create_on_node+0x124/0x124 [ 622.950876] smpboot: CPU 1 is now offline [ 623.194556] SMP alternatives: lockdep: fixing up alternatives [ 623.194559] smpboot: Booting Node 0 Processor 1 APIC 0x1 ... A correct fix is to unregister the hotcpu notifier during restore and register a new one in resume. Reported-by: Fengguang Wu <fengguang.wu@intel.com> Tested-by: Fengguang Wu <fengguang.wu@intel.com> Cc: Wanlong Gao <gaowanlong@cn.fujitsu.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Wanlong Gao <gaowanlong@cn.fujitsu.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-10-29 07:11:07 +00:00
return err;
}
virtio-net: correctly handle cpu hotplug notifier during resuming commit 3ab098df35f8b98b6553edc2e40234af512ba877 (virtio-net: don't respond to cpu hotplug notifier if we're not ready) tries to bypass the cpu hotplug notifier by checking the config_enable and does nothing is it was false. So it need to try to hold the config_lock mutex which may happen in atomic environment which leads the following warnings: [ 622.944441] CPU0 attaching NULL sched-domain. [ 622.944446] CPU1 attaching NULL sched-domain. [ 622.944485] CPU0 attaching NULL sched-domain. [ 622.950795] BUG: sleeping function called from invalid context at kernel/mutex.c:616 [ 622.950796] in_atomic(): 1, irqs_disabled(): 1, pid: 10, name: migration/1 [ 622.950796] no locks held by migration/1/10. [ 622.950798] CPU: 1 PID: 10 Comm: migration/1 Not tainted 3.12.0-rc5-wl-01249-gb91e82d #317 [ 622.950799] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 622.950802] 0000000000000000 ffff88001d42dba0 ffffffff81a32f22 ffff88001bfb9c70 [ 622.950803] ffff88001d42dbb0 ffffffff810edb02 ffff88001d42dc38 ffffffff81a396ed [ 622.950805] 0000000000000046 ffff88001d42dbe8 ffffffff810e861d 0000000000000000 [ 622.950805] Call Trace: [ 622.950810] [<ffffffff81a32f22>] dump_stack+0x54/0x74 [ 622.950815] [<ffffffff810edb02>] __might_sleep+0x112/0x114 [ 622.950817] [<ffffffff81a396ed>] mutex_lock_nested+0x3c/0x3c6 [ 622.950818] [<ffffffff810e861d>] ? up+0x39/0x3e [ 622.950821] [<ffffffff8153ea7c>] ? acpi_os_signal_semaphore+0x21/0x2d [ 622.950824] [<ffffffff81565ed1>] ? acpi_ut_release_mutex+0x5e/0x62 [ 622.950828] [<ffffffff816d04ec>] virtnet_cpu_callback+0x33/0x87 [ 622.950830] [<ffffffff81a42576>] notifier_call_chain+0x3c/0x5e [ 622.950832] [<ffffffff810e86a8>] __raw_notifier_call_chain+0xe/0x10 [ 622.950835] [<ffffffff810c5556>] __cpu_notify+0x20/0x37 [ 622.950836] [<ffffffff810c5580>] cpu_notify+0x13/0x15 [ 622.950838] [<ffffffff81a237cd>] take_cpu_down+0x27/0x3a [ 622.950841] [<ffffffff81136289>] stop_machine_cpu_stop+0x93/0xf1 [ 622.950842] [<ffffffff81136167>] cpu_stopper_thread+0xa0/0x12f [ 622.950844] [<ffffffff811361f6>] ? cpu_stopper_thread+0x12f/0x12f [ 622.950847] [<ffffffff81119710>] ? lock_release_holdtime.part.7+0xa3/0xa8 [ 622.950848] [<ffffffff81135e4b>] ? cpu_stop_should_run+0x3f/0x47 [ 622.950850] [<ffffffff810ea9b0>] smpboot_thread_fn+0x1c5/0x1e3 [ 622.950852] [<ffffffff810ea7eb>] ? lg_global_unlock+0x67/0x67 [ 622.950854] [<ffffffff810e36b7>] kthread+0xd8/0xe0 [ 622.950857] [<ffffffff81a3bfad>] ? wait_for_common+0x12f/0x164 [ 622.950859] [<ffffffff810e35df>] ? kthread_create_on_node+0x124/0x124 [ 622.950861] [<ffffffff81a45ffc>] ret_from_fork+0x7c/0xb0 [ 622.950862] [<ffffffff810e35df>] ? kthread_create_on_node+0x124/0x124 [ 622.950876] smpboot: CPU 1 is now offline [ 623.194556] SMP alternatives: lockdep: fixing up alternatives [ 623.194559] smpboot: Booting Node 0 Processor 1 APIC 0x1 ... A correct fix is to unregister the hotcpu notifier during restore and register a new one in resume. Reported-by: Fengguang Wu <fengguang.wu@intel.com> Tested-by: Fengguang Wu <fengguang.wu@intel.com> Cc: Wanlong Gao <gaowanlong@cn.fujitsu.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Wanlong Gao <gaowanlong@cn.fujitsu.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-10-29 07:11:07 +00:00
return 0;
}
static struct virtio_device_id id_table[] = {
{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
{ 0 },
};
#define VIRTNET_FEATURES \
VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
VIRTIO_NET_F_MAC, \
VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \
VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
VIRTIO_NET_F_CTRL_MAC_ADDR, \
VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
net: virtio_net: implement exact header length guest feature Virtio spec introduced a feature VIRTIO_NET_F_GUEST_HDRLEN which when set implicates that device benefits from knowing the exact size of the header. For compatibility, to signal to the device that the header is reliable driver also needs to set this feature. Without this feature set by driver, device has to figure out the header size itself. Quoting the original virtio spec: "hdr_len is a hint to the device as to how much of the header needs to be kept to copy into each packet" "a hint" might not be clear for the reader what does it mean, if it is "maybe like that" of "exactly like that". This feature just makes it crystal clear and let the device count on the hdr_len being filled up by the exact length of header. Also note the spec already has following note about hdr_len: "Due to various bugs in implementations, this field is not useful as a guarantee of the transport header size." Without this feature the device needs to parse the header in core data path handling. Accurate information helps the device to eliminate such header parsing and directly use the hardware accelerators for GSO operation. virtio_net_hdr_from_skb() fills up hdr_len to skb_headlen(skb). The driver already complies to fill the correct value. Introduce the feature and advertise it. Note that virtio spec also includes following note for device implementation: "Caution should be taken by the implementation so as to prevent a malicious driver from attacking the device by setting an incorrect hdr_len." There is a plan to support this feature in our emulated device. A device of SolidRun offers this feature bit. They claim this feature will save the device a few cycles for every GSO packet. Link: https://docs.oasis-open.org/virtio/virtio/v1.2/cs01/virtio-v1.2-cs01.html#x1-230006x3 Signed-off-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Parav Pandit <parav@nvidia.com> Reviewed-by: Alvaro Karsz <alvaro.karsz@solid-run.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Willem de Bruijn <willemb@google.com> Link: https://lore.kernel.org/r/20230309094559.917857-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-03-09 09:45:59 +00:00
VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \
VIRTIO_NET_F_VQ_NOTF_COAL, \
virtio_net: support device stats As the spec https://github.com/oasis-tcs/virtio-spec/commit/42f389989823039724f95bbbd243291ab0064f82 make virtio-net support getting the stats from the device by ethtool -S <eth0>. NIC statistics: rx0_packets: 582951 rx0_bytes: 155307077 rx0_drops: 0 rx0_xdp_packets: 0 rx0_xdp_tx: 0 rx0_xdp_redirects: 0 rx0_xdp_drops: 0 rx0_kicks: 17007 rx0_hw_packets: 2179409 rx0_hw_bytes: 510015040 rx0_hw_notifications: 0 rx0_hw_interrupts: 0 rx0_hw_needs_csum: 2179409 rx0_hw_ratelimit_bytes: 0 tx0_packets: 15361 tx0_bytes: 1918970 tx0_xdp_tx: 0 tx0_xdp_tx_drops: 0 tx0_kicks: 15361 tx0_timeouts: 0 tx0_hw_packets: 32272 tx0_hw_bytes: 4311698 tx0_hw_notifications: 0 tx0_hw_interrupts: 0 tx0_hw_ratelimit_bytes: 0 The follow stats are hidden, there are exported by the queue stat API in the subsequent comment. VIRTNET_STATS_DESC_RX(basic, drops) VIRTNET_STATS_DESC_RX(basic, drop_overruns), VIRTNET_STATS_DESC_TX(basic, drops), VIRTNET_STATS_DESC_TX(basic, drop_malformed), VIRTNET_STATS_DESC_RX(csum, csum_valid), VIRTNET_STATS_DESC_RX(csum, csum_none), VIRTNET_STATS_DESC_RX(csum, csum_bad), VIRTNET_STATS_DESC_TX(csum, needs_csum), VIRTNET_STATS_DESC_TX(csum, csum_none), VIRTNET_STATS_DESC_RX(gso, gso_packets), VIRTNET_STATS_DESC_RX(gso, gso_bytes), VIRTNET_STATS_DESC_RX(gso, gso_packets_coalesced), VIRTNET_STATS_DESC_RX(gso, gso_bytes_coalesced), VIRTNET_STATS_DESC_TX(gso, gso_packets), VIRTNET_STATS_DESC_TX(gso, gso_bytes), VIRTNET_STATS_DESC_TX(gso, gso_segments), VIRTNET_STATS_DESC_TX(gso, gso_segments_bytes), VIRTNET_STATS_DESC_RX(speed, ratelimit_packets), VIRTNET_STATS_DESC_TX(speed, ratelimit_packets), Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2024-04-26 03:39:24 +00:00
VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS
static unsigned int features[] = {
VIRTNET_FEATURES,
};
static unsigned int features_legacy[] = {
VIRTNET_FEATURES,
VIRTIO_NET_F_GSO,
VIRTIO_F_ANY_LAYOUT,
};
static struct virtio_driver virtio_net_driver = {
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
.feature_table_legacy = features_legacy,
.feature_table_size_legacy = ARRAY_SIZE(features_legacy),
.driver.name = KBUILD_MODNAME,
.id_table = id_table,
.validate = virtnet_validate,
.probe = virtnet_probe,
.remove = virtnet_remove,
.config_changed = virtnet_config_changed,
#ifdef CONFIG_PM_SLEEP
.freeze = virtnet_freeze,
.restore = virtnet_restore,
#endif
};
static __init int virtio_net_driver_init(void)
{
int ret;
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online",
virtnet_cpu_online,
virtnet_cpu_down_prep);
if (ret < 0)
goto out;
virtionet_online = ret;
ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead",
NULL, virtnet_cpu_dead);
if (ret)
goto err_dead;
ret = register_virtio_driver(&virtio_net_driver);
if (ret)
goto err_virtio;
return 0;
err_virtio:
cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
err_dead:
cpuhp_remove_multi_state(virtionet_online);
out:
return ret;
}
module_init(virtio_net_driver_init);
static __exit void virtio_net_driver_exit(void)
{
unregister_virtio_driver(&virtio_net_driver);
cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
cpuhp_remove_multi_state(virtionet_online);
}
module_exit(virtio_net_driver_exit);
MODULE_DEVICE_TABLE(virtio, id_table);
MODULE_DESCRIPTION("Virtio network driver");
MODULE_LICENSE("GPL");