linux/drivers/net/loopback.c
Eric Dumazet 0bef512012 net: add netdev_lockdep_set_classes() to virtual drivers
Based on a syzbot report, it appears many virtual
drivers do not yet use netdev_lockdep_set_classes(),
triggerring lockdep false positives.

WARNING: possible recursive locking detected
6.8.0-rc4-next-20240212-syzkaller #0 Not tainted

syz-executor.0/19016 is trying to acquire lock:
 ffff8880162cb298 (_xmit_ETHER#2){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline]
 ffff8880162cb298 (_xmit_ETHER#2){+.-.}-{2:2}, at: __netif_tx_lock include/linux/netdevice.h:4452 [inline]
 ffff8880162cb298 (_xmit_ETHER#2){+.-.}-{2:2}, at: sch_direct_xmit+0x1c4/0x5f0 net/sched/sch_generic.c:340

but task is already holding lock:
 ffff8880223db4d8 (_xmit_ETHER#2){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline]
 ffff8880223db4d8 (_xmit_ETHER#2){+.-.}-{2:2}, at: __netif_tx_lock include/linux/netdevice.h:4452 [inline]
 ffff8880223db4d8 (_xmit_ETHER#2){+.-.}-{2:2}, at: sch_direct_xmit+0x1c4/0x5f0 net/sched/sch_generic.c:340

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
  lock(_xmit_ETHER#2);
  lock(_xmit_ETHER#2);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

9 locks held by syz-executor.0/19016:
  #0: ffffffff8f385208 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock net/core/rtnetlink.c:79 [inline]
  #0: ffffffff8f385208 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x82c/0x1040 net/core/rtnetlink.c:6603
  #1: ffffc90000a08c00 ((&in_dev->mr_ifc_timer)){+.-.}-{0:0}, at: call_timer_fn+0xc0/0x600 kernel/time/timer.c:1697
  #2: ffffffff8e131520 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:298 [inline]
  #2: ffffffff8e131520 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:750 [inline]
  #2: ffffffff8e131520 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x45f/0x1360 net/ipv4/ip_output.c:228
  #3: ffffffff8e131580 (rcu_read_lock_bh){....}-{1:2}, at: local_bh_disable include/linux/bottom_half.h:20 [inline]
  #3: ffffffff8e131580 (rcu_read_lock_bh){....}-{1:2}, at: rcu_read_lock_bh include/linux/rcupdate.h:802 [inline]
  #3: ffffffff8e131580 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0x2c4/0x3b10 net/core/dev.c:4284
  #4: ffff8880416e3258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: spin_trylock include/linux/spinlock.h:361 [inline]
  #4: ffff8880416e3258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: qdisc_run_begin include/net/sch_generic.h:195 [inline]
  #4: ffff8880416e3258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: __dev_xmit_skb net/core/dev.c:3771 [inline]
  #4: ffff8880416e3258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: __dev_queue_xmit+0x1262/0x3b10 net/core/dev.c:4325
  #5: ffff8880223db4d8 (_xmit_ETHER#2){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline]
  #5: ffff8880223db4d8 (_xmit_ETHER#2){+.-.}-{2:2}, at: __netif_tx_lock include/linux/netdevice.h:4452 [inline]
  #5: ffff8880223db4d8 (_xmit_ETHER#2){+.-.}-{2:2}, at: sch_direct_xmit+0x1c4/0x5f0 net/sched/sch_generic.c:340
  #6: ffffffff8e131520 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:298 [inline]
  #6: ffffffff8e131520 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:750 [inline]
  #6: ffffffff8e131520 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x45f/0x1360 net/ipv4/ip_output.c:228
  #7: ffffffff8e131580 (rcu_read_lock_bh){....}-{1:2}, at: local_bh_disable include/linux/bottom_half.h:20 [inline]
  #7: ffffffff8e131580 (rcu_read_lock_bh){....}-{1:2}, at: rcu_read_lock_bh include/linux/rcupdate.h:802 [inline]
  #7: ffffffff8e131580 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0x2c4/0x3b10 net/core/dev.c:4284
  #8: ffff888014d9d258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: spin_trylock include/linux/spinlock.h:361 [inline]
  #8: ffff888014d9d258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: qdisc_run_begin include/net/sch_generic.h:195 [inline]
  #8: ffff888014d9d258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: __dev_xmit_skb net/core/dev.c:3771 [inline]
  #8: ffff888014d9d258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: __dev_queue_xmit+0x1262/0x3b10 net/core/dev.c:4325

stack backtrace:
CPU: 1 PID: 19016 Comm: syz-executor.0 Not tainted 6.8.0-rc4-next-20240212-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024
Call Trace:
 <IRQ>
  __dump_stack lib/dump_stack.c:88 [inline]
  dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114
  check_deadlock kernel/locking/lockdep.c:3062 [inline]
  validate_chain+0x15c1/0x58e0 kernel/locking/lockdep.c:3856
  __lock_acquire+0x1346/0x1fd0 kernel/locking/lockdep.c:5137
  lock_acquire+0x1e4/0x530 kernel/locking/lockdep.c:5754
  __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline]
  _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154
  spin_lock include/linux/spinlock.h:351 [inline]
  __netif_tx_lock include/linux/netdevice.h:4452 [inline]
  sch_direct_xmit+0x1c4/0x5f0 net/sched/sch_generic.c:340
  __dev_xmit_skb net/core/dev.c:3784 [inline]
  __dev_queue_xmit+0x1912/0x3b10 net/core/dev.c:4325
  neigh_output include/net/neighbour.h:542 [inline]
  ip_finish_output2+0xe66/0x1360 net/ipv4/ip_output.c:235
  iptunnel_xmit+0x540/0x9b0 net/ipv4/ip_tunnel_core.c:82
  ip_tunnel_xmit+0x20ee/0x2960 net/ipv4/ip_tunnel.c:831
  erspan_xmit+0x9de/0x1460 net/ipv4/ip_gre.c:720
  __netdev_start_xmit include/linux/netdevice.h:4989 [inline]
  netdev_start_xmit include/linux/netdevice.h:5003 [inline]
  xmit_one net/core/dev.c:3555 [inline]
  dev_hard_start_xmit+0x242/0x770 net/core/dev.c:3571
  sch_direct_xmit+0x2b6/0x5f0 net/sched/sch_generic.c:342
  __dev_xmit_skb net/core/dev.c:3784 [inline]
  __dev_queue_xmit+0x1912/0x3b10 net/core/dev.c:4325
  neigh_output include/net/neighbour.h:542 [inline]
  ip_finish_output2+0xe66/0x1360 net/ipv4/ip_output.c:235
  igmpv3_send_cr net/ipv4/igmp.c:723 [inline]
  igmp_ifc_timer_expire+0xb71/0xd90 net/ipv4/igmp.c:813
  call_timer_fn+0x17e/0x600 kernel/time/timer.c:1700
  expire_timers kernel/time/timer.c:1751 [inline]
  __run_timers+0x621/0x830 kernel/time/timer.c:2038
  run_timer_softirq+0x67/0xf0 kernel/time/timer.c:2051
  __do_softirq+0x2bc/0x943 kernel/softirq.c:554
  invoke_softirq kernel/softirq.c:428 [inline]
  __irq_exit_rcu+0xf2/0x1c0 kernel/softirq.c:633
  irq_exit_rcu+0x9/0x30 kernel/softirq.c:645
  instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1076 [inline]
  sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1076
 </IRQ>
 <TASK>
  asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:702
 RIP: 0010:resched_offsets_ok kernel/sched/core.c:10127 [inline]
 RIP: 0010:__might_resched+0x16f/0x780 kernel/sched/core.c:10142
Code: 00 4c 89 e8 48 c1 e8 03 48 ba 00 00 00 00 00 fc ff df 48 89 44 24 38 0f b6 04 10 84 c0 0f 85 87 04 00 00 41 8b 45 00 c1 e0 08 <01> d8 44 39 e0 0f 85 d6 00 00 00 44 89 64 24 1c 48 8d bc 24 a0 00
RSP: 0018:ffffc9000ee069e0 EFLAGS: 00000246
RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff8880296a9e00
RDX: dffffc0000000000 RSI: ffff8880296a9e00 RDI: ffffffff8bfe8fa0
RBP: ffffc9000ee06b00 R08: ffffffff82326877 R09: 1ffff11002b5ad1b
R10: dffffc0000000000 R11: ffffed1002b5ad1c R12: 0000000000000000
R13: ffff8880296aa23c R14: 000000000000062a R15: 1ffff92001dc0d44
  down_write+0x19/0x50 kernel/locking/rwsem.c:1578
  kernfs_activate fs/kernfs/dir.c:1403 [inline]
  kernfs_add_one+0x4af/0x8b0 fs/kernfs/dir.c:819
  __kernfs_create_file+0x22e/0x2e0 fs/kernfs/file.c:1056
  sysfs_add_file_mode_ns+0x24a/0x310 fs/sysfs/file.c:307
  create_files fs/sysfs/group.c:64 [inline]
  internal_create_group+0x4f4/0xf20 fs/sysfs/group.c:152
  internal_create_groups fs/sysfs/group.c:192 [inline]
  sysfs_create_groups+0x56/0x120 fs/sysfs/group.c:218
  create_dir lib/kobject.c:78 [inline]
  kobject_add_internal+0x472/0x8d0 lib/kobject.c:240
  kobject_add_varg lib/kobject.c:374 [inline]
  kobject_init_and_add+0x124/0x190 lib/kobject.c:457
  netdev_queue_add_kobject net/core/net-sysfs.c:1706 [inline]
  netdev_queue_update_kobjects+0x1f3/0x480 net/core/net-sysfs.c:1758
  register_queue_kobjects net/core/net-sysfs.c:1819 [inline]
  netdev_register_kobject+0x265/0x310 net/core/net-sysfs.c:2059
  register_netdevice+0x1191/0x19c0 net/core/dev.c:10298
  bond_newlink+0x3b/0x90 drivers/net/bonding/bond_netlink.c:576
  rtnl_newlink_create net/core/rtnetlink.c:3506 [inline]
  __rtnl_newlink net/core/rtnetlink.c:3726 [inline]
  rtnl_newlink+0x158f/0x20a0 net/core/rtnetlink.c:3739
  rtnetlink_rcv_msg+0x885/0x1040 net/core/rtnetlink.c:6606
  netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543
  netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline]
  netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367
  netlink_sendmsg+0xa3c/0xd70 net/netlink/af_netlink.c:1908
  sock_sendmsg_nosec net/socket.c:730 [inline]
  __sock_sendmsg+0x221/0x270 net/socket.c:745
  __sys_sendto+0x3a4/0x4f0 net/socket.c:2191
  __do_sys_sendto net/socket.c:2203 [inline]
  __se_sys_sendto net/socket.c:2199 [inline]
  __x64_sys_sendto+0xde/0x100 net/socket.c:2199
 do_syscall_64+0xfb/0x240
 entry_SYSCALL_64_after_hwframe+0x6d/0x75
RIP: 0033:0x7fc3fa87fa9c

Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20240212140700.2795436-4-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-02-13 18:45:06 -08:00

282 lines
7.2 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Pseudo-driver for the loopback interface.
*
* Version: @(#)loopback.c 1.0.4b 08/16/93
*
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Donald Becker, <becker@scyld.com>
*
* Alan Cox : Fixed oddments for NET3.014
* Alan Cox : Rejig for NET3.029 snap #3
* Alan Cox : Fixed NET3.029 bugs and sped up
* Larry McVoy : Tiny tweak to double performance
* Alan Cox : Backed out LMV's tweak - the linux mm
* can't take it...
* Michael Griffith: Don't bother computing the checksums
* on packets received on the loopback
* interface.
* Alexey Kuznetsov: Potential hang under some extreme
* cases removed.
*/
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/fs.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/in.h>
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/ethtool.h>
#include <net/sch_generic.h>
#include <net/sock.h>
#include <net/checksum.h>
#include <linux/if_ether.h> /* For the statistics structure. */
#include <linux/if_arp.h> /* For ARPHRD_ETHER */
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/percpu.h>
#include <linux/net_tstamp.h>
#include <net/net_namespace.h>
#include <linux/u64_stats_sync.h>
/* blackhole_netdev - a device used for dsts that are marked expired!
* This is global device (instead of per-net-ns) since it's not needed
* to be per-ns and gets initialized at boot time.
*/
struct net_device *blackhole_netdev;
EXPORT_SYMBOL(blackhole_netdev);
/* The higher levels take care of making this non-reentrant (it's
* called with bh's disabled).
*/
static netdev_tx_t loopback_xmit(struct sk_buff *skb,
struct net_device *dev)
{
int len;
skb_tx_timestamp(skb);
/* do not fool net_timestamp_check() with various clock bases */
skb_clear_tstamp(skb);
skb_orphan(skb);
/* Before queueing this packet to __netif_rx(),
* make sure dst is refcounted.
*/
skb_dst_force(skb);
skb->protocol = eth_type_trans(skb, dev);
len = skb->len;
if (likely(__netif_rx(skb) == NET_RX_SUCCESS))
dev_lstats_add(dev, len);
return NETDEV_TX_OK;
}
void dev_lstats_read(struct net_device *dev, u64 *packets, u64 *bytes)
{
int i;
*packets = 0;
*bytes = 0;
for_each_possible_cpu(i) {
const struct pcpu_lstats *lb_stats;
u64 tbytes, tpackets;
unsigned int start;
lb_stats = per_cpu_ptr(dev->lstats, i);
do {
start = u64_stats_fetch_begin(&lb_stats->syncp);
tpackets = u64_stats_read(&lb_stats->packets);
tbytes = u64_stats_read(&lb_stats->bytes);
} while (u64_stats_fetch_retry(&lb_stats->syncp, start));
*bytes += tbytes;
*packets += tpackets;
}
}
EXPORT_SYMBOL(dev_lstats_read);
static void loopback_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *stats)
{
u64 packets, bytes;
dev_lstats_read(dev, &packets, &bytes);
stats->rx_packets = packets;
stats->tx_packets = packets;
stats->rx_bytes = bytes;
stats->tx_bytes = bytes;
}
static u32 always_on(struct net_device *dev)
{
return 1;
}
static const struct ethtool_ops loopback_ethtool_ops = {
.get_link = always_on,
.get_ts_info = ethtool_op_get_ts_info,
};
static int loopback_dev_init(struct net_device *dev)
{
dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
if (!dev->lstats)
return -ENOMEM;
netdev_lockdep_set_classes(dev);
return 0;
}
static void loopback_dev_free(struct net_device *dev)
{
dev_net(dev)->loopback_dev = NULL;
free_percpu(dev->lstats);
}
static const struct net_device_ops loopback_ops = {
.ndo_init = loopback_dev_init,
.ndo_start_xmit = loopback_xmit,
.ndo_get_stats64 = loopback_get_stats64,
.ndo_set_mac_address = eth_mac_addr,
};
static void gen_lo_setup(struct net_device *dev,
unsigned int mtu,
const struct ethtool_ops *eth_ops,
const struct header_ops *hdr_ops,
const struct net_device_ops *dev_ops,
void (*dev_destructor)(struct net_device *dev))
{
dev->mtu = mtu;
dev->hard_header_len = ETH_HLEN; /* 14 */
dev->min_header_len = ETH_HLEN; /* 14 */
dev->addr_len = ETH_ALEN; /* 6 */
dev->type = ARPHRD_LOOPBACK; /* 0x0001*/
dev->flags = IFF_LOOPBACK;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
netif_keep_dst(dev);
dev->hw_features = NETIF_F_GSO_SOFTWARE;
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST
| NETIF_F_GSO_SOFTWARE
| NETIF_F_HW_CSUM
| NETIF_F_RXCSUM
| NETIF_F_SCTP_CRC
| NETIF_F_HIGHDMA
| NETIF_F_LLTX
| NETIF_F_NETNS_LOCAL
| NETIF_F_VLAN_CHALLENGED
| NETIF_F_LOOPBACK;
dev->ethtool_ops = eth_ops;
dev->header_ops = hdr_ops;
dev->netdev_ops = dev_ops;
dev->needs_free_netdev = true;
dev->priv_destructor = dev_destructor;
netif_set_tso_max_size(dev, GSO_MAX_SIZE);
}
/* The loopback device is special. There is only one instance
* per network namespace.
*/
static void loopback_setup(struct net_device *dev)
{
gen_lo_setup(dev, (64 * 1024), &loopback_ethtool_ops, &eth_header_ops,
&loopback_ops, loopback_dev_free);
}
/* Setup and register the loopback device. */
static __net_init int loopback_net_init(struct net *net)
{
struct net_device *dev;
int err;
err = -ENOMEM;
dev = alloc_netdev(0, "lo", NET_NAME_PREDICTABLE, loopback_setup);
if (!dev)
goto out;
dev_net_set(dev, net);
err = register_netdev(dev);
if (err)
goto out_free_netdev;
BUG_ON(dev->ifindex != LOOPBACK_IFINDEX);
net->loopback_dev = dev;
return 0;
out_free_netdev:
free_netdev(dev);
out:
if (net_eq(net, &init_net))
panic("loopback: Failed to register netdevice: %d\n", err);
return err;
}
/* Registered in net/core/dev.c */
struct pernet_operations __net_initdata loopback_net_ops = {
.init = loopback_net_init,
};
/* blackhole netdevice */
static netdev_tx_t blackhole_netdev_xmit(struct sk_buff *skb,
struct net_device *dev)
{
kfree_skb(skb);
net_warn_ratelimited("%s(): Dropping skb.\n", __func__);
return NETDEV_TX_OK;
}
static const struct net_device_ops blackhole_netdev_ops = {
.ndo_start_xmit = blackhole_netdev_xmit,
};
/* This is a dst-dummy device used specifically for invalidated
* DSTs and unlike loopback, this is not per-ns.
*/
static void blackhole_netdev_setup(struct net_device *dev)
{
gen_lo_setup(dev, ETH_MIN_MTU, NULL, NULL, &blackhole_netdev_ops, NULL);
}
/* Setup and register the blackhole_netdev. */
static int __init blackhole_netdev_init(void)
{
blackhole_netdev = alloc_netdev(0, "blackhole_dev", NET_NAME_UNKNOWN,
blackhole_netdev_setup);
if (!blackhole_netdev)
return -ENOMEM;
rtnl_lock();
dev_init_scheduler(blackhole_netdev);
dev_activate(blackhole_netdev);
rtnl_unlock();
blackhole_netdev->flags |= IFF_UP | IFF_RUNNING;
dev_net_set(blackhole_netdev, &init_net);
return 0;
}
device_initcall(blackhole_netdev_init);