linux/drivers/net/ethernet/intel/e1000/e1000.h
Vladimir Davydov b2f963bfae e1000: fix lockdep warning in e1000_reset_task
The patch fixes the following lockdep warning, which is 100%
reproducible on network restart:

======================================================
[ INFO: possible circular locking dependency detected ]
3.12.0+ #47 Tainted: GF
-------------------------------------------------------
kworker/1:1/27 is trying to acquire lock:
 ((&(&adapter->watchdog_task)->work)){+.+...}, at: [<ffffffff8108a5b0>] flush_work+0x0/0x70

but task is already holding lock:
 (&adapter->mutex){+.+...}, at: [<ffffffffa0177c0a>] e1000_reset_task+0x4a/0xa0 [e1000]

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&adapter->mutex){+.+...}:
       [<ffffffff810bdb5d>] lock_acquire+0x9d/0x120
       [<ffffffff816b8cbc>] mutex_lock_nested+0x4c/0x390
       [<ffffffffa017233d>] e1000_watchdog+0x7d/0x5b0 [e1000]
       [<ffffffff8108b972>] process_one_work+0x1d2/0x510
       [<ffffffff8108ca80>] worker_thread+0x120/0x3a0
       [<ffffffff81092c1e>] kthread+0xee/0x110
       [<ffffffff816c3d7c>] ret_from_fork+0x7c/0xb0

-> #0 ((&(&adapter->watchdog_task)->work)){+.+...}:
       [<ffffffff810bd9c0>] __lock_acquire+0x1710/0x1810
       [<ffffffff810bdb5d>] lock_acquire+0x9d/0x120
       [<ffffffff8108a5eb>] flush_work+0x3b/0x70
       [<ffffffff8108b5d8>] __cancel_work_timer+0x98/0x140
       [<ffffffff8108b693>] cancel_delayed_work_sync+0x13/0x20
       [<ffffffffa0170cec>] e1000_down_and_stop+0x3c/0x60 [e1000]
       [<ffffffffa01775b1>] e1000_down+0x131/0x220 [e1000]
       [<ffffffffa0177c12>] e1000_reset_task+0x52/0xa0 [e1000]
       [<ffffffff8108b972>] process_one_work+0x1d2/0x510
       [<ffffffff8108ca80>] worker_thread+0x120/0x3a0
       [<ffffffff81092c1e>] kthread+0xee/0x110
       [<ffffffff816c3d7c>] ret_from_fork+0x7c/0xb0

other info that might help us debug this:

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&adapter->mutex);
                               lock((&(&adapter->watchdog_task)->work));
                               lock(&adapter->mutex);
  lock((&(&adapter->watchdog_task)->work));

 *** DEADLOCK ***

3 locks held by kworker/1:1/27:
 #0:  (events){.+.+.+}, at: [<ffffffff8108b906>] process_one_work+0x166/0x510
 #1:  ((&adapter->reset_task)){+.+...}, at: [<ffffffff8108b906>] process_one_work+0x166/0x510
 #2:  (&adapter->mutex){+.+...}, at: [<ffffffffa0177c0a>] e1000_reset_task+0x4a/0xa0 [e1000]

stack backtrace:
CPU: 1 PID: 27 Comm: kworker/1:1 Tainted: GF            3.12.0+ #47
Hardware name: System manufacturer System Product Name/P5B-VM SE, BIOS 0501    05/31/2007
Workqueue: events e1000_reset_task [e1000]
 ffffffff820f6000 ffff88007b9dba98 ffffffff816b54a2 0000000000000002
 ffffffff820f5e50 ffff88007b9dbae8 ffffffff810ba936 ffff88007b9dbac8
 ffff88007b9dbb48 ffff88007b9d8f00 ffff88007b9d8780 ffff88007b9d8f00
Call Trace:
 [<ffffffff816b54a2>] dump_stack+0x49/0x5f
 [<ffffffff810ba936>] print_circular_bug+0x216/0x310
 [<ffffffff810bd9c0>] __lock_acquire+0x1710/0x1810
 [<ffffffff8108a5b0>] ? __flush_work+0x250/0x250
 [<ffffffff810bdb5d>] lock_acquire+0x9d/0x120
 [<ffffffff8108a5b0>] ? __flush_work+0x250/0x250
 [<ffffffff8108a5eb>] flush_work+0x3b/0x70
 [<ffffffff8108a5b0>] ? __flush_work+0x250/0x250
 [<ffffffff8108b5d8>] __cancel_work_timer+0x98/0x140
 [<ffffffff8108b693>] cancel_delayed_work_sync+0x13/0x20
 [<ffffffffa0170cec>] e1000_down_and_stop+0x3c/0x60 [e1000]
 [<ffffffffa01775b1>] e1000_down+0x131/0x220 [e1000]
 [<ffffffffa0177c12>] e1000_reset_task+0x52/0xa0 [e1000]
 [<ffffffff8108b972>] process_one_work+0x1d2/0x510
 [<ffffffff8108b906>] ? process_one_work+0x166/0x510
 [<ffffffff8108ca80>] worker_thread+0x120/0x3a0
 [<ffffffff8108c960>] ? manage_workers+0x2c0/0x2c0
 [<ffffffff81092c1e>] kthread+0xee/0x110
 [<ffffffff81092b30>] ? __init_kthread_worker+0x70/0x70
 [<ffffffff816c3d7c>] ret_from_fork+0x7c/0xb0
 [<ffffffff81092b30>] ? __init_kthread_worker+0x70/0x70

== The issue background ==

The problem occurs, because e1000_down(), which is called under
adapter->mutex by e1000_reset_task(), tries to synchronously cancel
e1000 auxiliary works (reset_task, watchdog_task, phy_info_task,
fifo_stall_task), which take adapter->mutex in their handlers. So the
question is what does adapter->mutex protect there?

The adapter->mutex was introduced by commit 0ef4ee ("e1000: convert to
private mutex from rtnl") as a replacement for rtnl_lock() taken in the
asynchronous handlers. It targeted on fixing a similar lockdep warning
issued when e1000_down() was called under rtnl_lock(), and it fixed it,
but unfortunately it introduced the lockdep warning described above.
Anyway, that said the source of this bug is that the asynchronous works
were made to take rtnl_lock() some time ago, so let's look deeper and
find why it was added there.

The rtnl_lock() was added to asynchronous handlers by commit 338c15
("e1000: fix occasional panic on unload") in order to prevent
asynchronous handlers from execution after the module is unloaded
(e1000_down() is called) as it follows from the comment to the commit:

> Net drivers in general have an issue where timers fired
> by mod_timer or work threads with schedule_work are running
> outside of the rtnl_lock.
>
> With no other lock protection these routines are vulnerable
> to races with driver unload or reset paths.
>
> The longer term solution to this might be a redesign with
> safer locks being taken in the driver to guarantee no
> reentrance, but for now a safe and effective fix is
> to take the rtnl_lock in these routines.

I'm not sure if this locking scheme fixed the problem or just made it
unlikely, although I incline to the latter. Anyway, this was long time
ago when e1000 auxiliary works were implemented as timers scheduling
real work handlers in their routines. The e1000_down() function only
canceled the timers, but left the real handlers running if they were
running, which could result in work execution after module unload.
Today, the e1000 driver uses sane delayed works instead of the pair
timer+work to implement its delayed asynchronous handlers, and the
e1000_down() synchronously cancels all the works so that the problem
that commit 338c15 tried to cope with disappeared, and we don't need any
locks in the handlers any more. Moreover, any locking there can
potentially result in a deadlock.

So, this patch reverts commits 0ef4ee and 338c15.

Fixes: 0ef4eedc2e ("e1000: convert to private mutex from rtnl")
Fixes: 338c15e470 ("e1000: fix occasional panic on unload")
Cc: Tushar Dave <tushar.n.dave@intel.com>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2013-11-29 23:55:40 -08:00

369 lines
10 KiB
C

/*******************************************************************************
Intel PRO/1000 Linux driver
Copyright(c) 1999 - 2006 Intel Corporation.
This program is free software; you can redistribute it and/or modify it
under the terms and conditions of the GNU General Public License,
version 2, as published by the Free Software Foundation.
This program is distributed in the hope it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
the file called "COPYING".
Contact Information:
Linux NICS <linux.nics@intel.com>
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
*******************************************************************************/
/* Linux PRO/1000 Ethernet Driver main header file */
#ifndef _E1000_H_
#define _E1000_H_
#include <linux/stddef.h>
#include <linux/module.h>
#include <linux/types.h>
#include <asm/byteorder.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/errno.h>
#include <linux/ioport.h>
#include <linux/pci.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/delay.h>
#include <linux/timer.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/interrupt.h>
#include <linux/string.h>
#include <linux/pagemap.h>
#include <linux/dma-mapping.h>
#include <linux/bitops.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <linux/capability.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <net/pkt_sched.h>
#include <linux/list.h>
#include <linux/reboot.h>
#include <net/checksum.h>
#include <linux/mii.h>
#include <linux/ethtool.h>
#include <linux/if_vlan.h>
#define BAR_0 0
#define BAR_1 1
#define BAR_5 5
#define INTEL_E1000_ETHERNET_DEVICE(device_id) {\
PCI_DEVICE(PCI_VENDOR_ID_INTEL, device_id)}
struct e1000_adapter;
#include "e1000_hw.h"
#define E1000_MAX_INTR 10
/*
* Count for polling __E1000_RESET condition every 10-20msec.
*/
#define E1000_CHECK_RESET_COUNT 50
/* TX/RX descriptor defines */
#define E1000_DEFAULT_TXD 256
#define E1000_MAX_TXD 256
#define E1000_MIN_TXD 48
#define E1000_MAX_82544_TXD 4096
#define E1000_DEFAULT_RXD 256
#define E1000_MAX_RXD 256
#define E1000_MIN_RXD 48
#define E1000_MAX_82544_RXD 4096
#define E1000_MIN_ITR_USECS 10 /* 100000 irq/sec */
#define E1000_MAX_ITR_USECS 10000 /* 100 irq/sec */
/* this is the size past which hardware will drop packets when setting LPE=0 */
#define MAXIMUM_ETHERNET_VLAN_SIZE 1522
/* Supported Rx Buffer Sizes */
#define E1000_RXBUFFER_128 128 /* Used for packet split */
#define E1000_RXBUFFER_256 256 /* Used for packet split */
#define E1000_RXBUFFER_512 512
#define E1000_RXBUFFER_1024 1024
#define E1000_RXBUFFER_2048 2048
#define E1000_RXBUFFER_4096 4096
#define E1000_RXBUFFER_8192 8192
#define E1000_RXBUFFER_16384 16384
/* SmartSpeed delimiters */
#define E1000_SMARTSPEED_DOWNSHIFT 3
#define E1000_SMARTSPEED_MAX 15
/* Packet Buffer allocations */
#define E1000_PBA_BYTES_SHIFT 0xA
#define E1000_TX_HEAD_ADDR_SHIFT 7
#define E1000_PBA_TX_MASK 0xFFFF0000
/* Flow Control Watermarks */
#define E1000_FC_HIGH_DIFF 0x1638 /* High: 5688 bytes below Rx FIFO size */
#define E1000_FC_LOW_DIFF 0x1640 /* Low: 5696 bytes below Rx FIFO size */
#define E1000_FC_PAUSE_TIME 0xFFFF /* pause for the max or until send xon */
/* How many Tx Descriptors do we need to call netif_wake_queue ? */
#define E1000_TX_QUEUE_WAKE 16
/* How many Rx Buffers do we bundle into one write to the hardware ? */
#define E1000_RX_BUFFER_WRITE 16 /* Must be power of 2 */
#define AUTO_ALL_MODES 0
#define E1000_EEPROM_82544_APM 0x0004
#define E1000_EEPROM_APME 0x0400
#ifndef E1000_MASTER_SLAVE
/* Switch to override PHY master/slave setting */
#define E1000_MASTER_SLAVE e1000_ms_hw_default
#endif
#define E1000_MNG_VLAN_NONE (-1)
/* wrapper around a pointer to a socket buffer,
* so a DMA handle can be stored along with the buffer
*/
struct e1000_buffer {
struct sk_buff *skb;
dma_addr_t dma;
struct page *page;
unsigned long time_stamp;
u16 length;
u16 next_to_watch;
unsigned int segs;
unsigned int bytecount;
u16 mapped_as_page;
};
struct e1000_tx_ring {
/* pointer to the descriptor ring memory */
void *desc;
/* physical address of the descriptor ring */
dma_addr_t dma;
/* length of descriptor ring in bytes */
unsigned int size;
/* number of descriptors in the ring */
unsigned int count;
/* next descriptor to associate a buffer with */
unsigned int next_to_use;
/* next descriptor to check for DD status bit */
unsigned int next_to_clean;
/* array of buffer information structs */
struct e1000_buffer *buffer_info;
u16 tdh;
u16 tdt;
bool last_tx_tso;
};
struct e1000_rx_ring {
/* pointer to the descriptor ring memory */
void *desc;
/* physical address of the descriptor ring */
dma_addr_t dma;
/* length of descriptor ring in bytes */
unsigned int size;
/* number of descriptors in the ring */
unsigned int count;
/* next descriptor to associate a buffer with */
unsigned int next_to_use;
/* next descriptor to check for DD status bit */
unsigned int next_to_clean;
/* array of buffer information structs */
struct e1000_buffer *buffer_info;
struct sk_buff *rx_skb_top;
/* cpu for rx queue */
int cpu;
u16 rdh;
u16 rdt;
};
#define E1000_DESC_UNUSED(R) \
((((R)->next_to_clean > (R)->next_to_use) \
? 0 : (R)->count) + (R)->next_to_clean - (R)->next_to_use - 1)
#define E1000_RX_DESC_EXT(R, i) \
(&(((union e1000_rx_desc_extended *)((R).desc))[i]))
#define E1000_GET_DESC(R, i, type) (&(((struct type *)((R).desc))[i]))
#define E1000_RX_DESC(R, i) E1000_GET_DESC(R, i, e1000_rx_desc)
#define E1000_TX_DESC(R, i) E1000_GET_DESC(R, i, e1000_tx_desc)
#define E1000_CONTEXT_DESC(R, i) E1000_GET_DESC(R, i, e1000_context_desc)
/* board specific private data structure */
struct e1000_adapter {
unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
u16 mng_vlan_id;
u32 bd_number;
u32 rx_buffer_len;
u32 wol;
u32 smartspeed;
u32 en_mng_pt;
u16 link_speed;
u16 link_duplex;
spinlock_t stats_lock;
unsigned int total_tx_bytes;
unsigned int total_tx_packets;
unsigned int total_rx_bytes;
unsigned int total_rx_packets;
/* Interrupt Throttle Rate */
u32 itr;
u32 itr_setting;
u16 tx_itr;
u16 rx_itr;
u8 fc_autoneg;
/* TX */
struct e1000_tx_ring *tx_ring; /* One per active queue */
unsigned int restart_queue;
u32 txd_cmd;
u32 tx_int_delay;
u32 tx_abs_int_delay;
u32 gotcl;
u64 gotcl_old;
u64 tpt_old;
u64 colc_old;
u32 tx_timeout_count;
u32 tx_fifo_head;
u32 tx_head_addr;
u32 tx_fifo_size;
u8 tx_timeout_factor;
atomic_t tx_fifo_stall;
bool pcix_82544;
bool detect_tx_hung;
bool dump_buffers;
/* RX */
bool (*clean_rx)(struct e1000_adapter *adapter,
struct e1000_rx_ring *rx_ring,
int *work_done, int work_to_do);
void (*alloc_rx_buf)(struct e1000_adapter *adapter,
struct e1000_rx_ring *rx_ring,
int cleaned_count);
struct e1000_rx_ring *rx_ring; /* One per active queue */
struct napi_struct napi;
int num_tx_queues;
int num_rx_queues;
u64 hw_csum_err;
u64 hw_csum_good;
u32 alloc_rx_buff_failed;
u32 rx_int_delay;
u32 rx_abs_int_delay;
bool rx_csum;
u32 gorcl;
u64 gorcl_old;
/* OS defined structs */
struct net_device *netdev;
struct pci_dev *pdev;
/* structs defined in e1000_hw.h */
struct e1000_hw hw;
struct e1000_hw_stats stats;
struct e1000_phy_info phy_info;
struct e1000_phy_stats phy_stats;
u32 test_icr;
struct e1000_tx_ring test_tx_ring;
struct e1000_rx_ring test_rx_ring;
int msg_enable;
/* to not mess up cache alignment, always add to the bottom */
bool tso_force;
bool smart_power_down; /* phy smart power down */
bool quad_port_a;
unsigned long flags;
u32 eeprom_wol;
/* for ioport free */
int bars;
int need_ioport;
bool discarding;
struct work_struct reset_task;
struct delayed_work watchdog_task;
struct delayed_work fifo_stall_task;
struct delayed_work phy_info_task;
};
enum e1000_state_t {
__E1000_TESTING,
__E1000_RESETTING,
__E1000_DOWN
};
#undef pr_fmt
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
struct net_device *e1000_get_hw_dev(struct e1000_hw *hw);
#define e_dbg(format, arg...) \
netdev_dbg(e1000_get_hw_dev(hw), format, ## arg)
#define e_err(msglvl, format, arg...) \
netif_err(adapter, msglvl, adapter->netdev, format, ## arg)
#define e_info(msglvl, format, arg...) \
netif_info(adapter, msglvl, adapter->netdev, format, ## arg)
#define e_warn(msglvl, format, arg...) \
netif_warn(adapter, msglvl, adapter->netdev, format, ## arg)
#define e_notice(msglvl, format, arg...) \
netif_notice(adapter, msglvl, adapter->netdev, format, ## arg)
#define e_dev_info(format, arg...) \
dev_info(&adapter->pdev->dev, format, ## arg)
#define e_dev_warn(format, arg...) \
dev_warn(&adapter->pdev->dev, format, ## arg)
#define e_dev_err(format, arg...) \
dev_err(&adapter->pdev->dev, format, ## arg)
extern char e1000_driver_name[];
extern const char e1000_driver_version[];
int e1000_up(struct e1000_adapter *adapter);
void e1000_down(struct e1000_adapter *adapter);
void e1000_reinit_locked(struct e1000_adapter *adapter);
void e1000_reset(struct e1000_adapter *adapter);
int e1000_set_spd_dplx(struct e1000_adapter *adapter, u32 spd, u8 dplx);
int e1000_setup_all_rx_resources(struct e1000_adapter *adapter);
int e1000_setup_all_tx_resources(struct e1000_adapter *adapter);
void e1000_free_all_rx_resources(struct e1000_adapter *adapter);
void e1000_free_all_tx_resources(struct e1000_adapter *adapter);
void e1000_update_stats(struct e1000_adapter *adapter);
bool e1000_has_link(struct e1000_adapter *adapter);
void e1000_power_up_phy(struct e1000_adapter *);
void e1000_set_ethtool_ops(struct net_device *netdev);
void e1000_check_options(struct e1000_adapter *adapter);
char *e1000_get_hw_dev_name(struct e1000_hw *hw);
#endif /* _E1000_H_ */