e1000e: use hardware writeback batching

Most e1000e parts support batching writebacks.  The problem with this is
that when some of the TADV or TIDV timers are not set, Tx can sit forever.

This is solved in this patch with write flushes using the Flush Partial
Descriptors (FPD) bit in TIDV and RDTR.

This improves bus utilization and removes partial writes on e1000e,
particularly from 82571 parts in S5500 chipset based machines.

Only ES2LAN and 82571/2 parts are included in this optimization, to reduce
testing load.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Emil Tantilov <emil.s.tantilov@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Jesse Brandeburg 2010-09-29 21:38:49 +00:00 committed by David S. Miller
parent 6af3b9ebfe
commit 3a3b758605
6 changed files with 88 additions and 4 deletions

View File

@ -1801,7 +1801,8 @@ struct e1000_info e1000_82571_info = {
| FLAG_RESET_OVERWRITES_LAA /* errata */ | FLAG_RESET_OVERWRITES_LAA /* errata */
| FLAG_TARC_SPEED_MODE_BIT /* errata */ | FLAG_TARC_SPEED_MODE_BIT /* errata */
| FLAG_APME_CHECK_PORT_B, | FLAG_APME_CHECK_PORT_B,
.flags2 = FLAG2_DISABLE_ASPM_L1, /* errata 13 */ .flags2 = FLAG2_DISABLE_ASPM_L1 /* errata 13 */
| FLAG2_DMA_BURST,
.pba = 38, .pba = 38,
.max_hw_frame_size = DEFAULT_JUMBO, .max_hw_frame_size = DEFAULT_JUMBO,
.get_variants = e1000_get_variants_82571, .get_variants = e1000_get_variants_82571,
@ -1819,7 +1820,8 @@ struct e1000_info e1000_82572_info = {
| FLAG_RX_CSUM_ENABLED | FLAG_RX_CSUM_ENABLED
| FLAG_HAS_CTRLEXT_ON_LOAD | FLAG_HAS_CTRLEXT_ON_LOAD
| FLAG_TARC_SPEED_MODE_BIT, /* errata */ | FLAG_TARC_SPEED_MODE_BIT, /* errata */
.flags2 = FLAG2_DISABLE_ASPM_L1, /* errata 13 */ .flags2 = FLAG2_DISABLE_ASPM_L1 /* errata 13 */
| FLAG2_DMA_BURST,
.pba = 38, .pba = 38,
.max_hw_frame_size = DEFAULT_JUMBO, .max_hw_frame_size = DEFAULT_JUMBO,
.get_variants = e1000_get_variants_82571, .get_variants = e1000_get_variants_82571,

View File

@ -446,7 +446,9 @@
/* Transmit Descriptor Control */ /* Transmit Descriptor Control */
#define E1000_TXDCTL_PTHRESH 0x0000003F /* TXDCTL Prefetch Threshold */ #define E1000_TXDCTL_PTHRESH 0x0000003F /* TXDCTL Prefetch Threshold */
#define E1000_TXDCTL_HTHRESH 0x00003F00 /* TXDCTL Host Threshold */
#define E1000_TXDCTL_WTHRESH 0x003F0000 /* TXDCTL Writeback Threshold */ #define E1000_TXDCTL_WTHRESH 0x003F0000 /* TXDCTL Writeback Threshold */
#define E1000_TXDCTL_GRAN 0x01000000 /* TXDCTL Granularity */
#define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000 /* GRAN=1, WTHRESH=1 */ #define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000 /* GRAN=1, WTHRESH=1 */
#define E1000_TXDCTL_MAX_TX_DESC_PREFETCH 0x0100001F /* GRAN=1, PTHRESH=31 */ #define E1000_TXDCTL_MAX_TX_DESC_PREFETCH 0x0100001F /* GRAN=1, PTHRESH=31 */
/* Enable the counting of desc. still to be processed. */ /* Enable the counting of desc. still to be processed. */

View File

@ -153,6 +153,33 @@ struct e1000_info;
/* Time to wait before putting the device into D3 if there's no link (in ms). */ /* Time to wait before putting the device into D3 if there's no link (in ms). */
#define LINK_TIMEOUT 100 #define LINK_TIMEOUT 100
#define DEFAULT_RDTR 0
#define DEFAULT_RADV 8
#define BURST_RDTR 0x20
#define BURST_RADV 0x20
/*
* in the case of WTHRESH, it appears at least the 82571/2 hardware
* writes back 4 descriptors when WTHRESH=5, and 3 descriptors when
* WTHRESH=4, and since we want 64 bytes at a time written back, set
* it to 5
*/
#define E1000_TXDCTL_DMA_BURST_ENABLE \
(E1000_TXDCTL_GRAN | /* set descriptor granularity */ \
E1000_TXDCTL_COUNT_DESC | \
(5 << 16) | /* wthresh must be +1 more than desired */\
(1 << 8) | /* hthresh */ \
0x1f) /* pthresh */
#define E1000_RXDCTL_DMA_BURST_ENABLE \
(0x01000000 | /* set descriptor granularity */ \
(4 << 16) | /* set writeback threshold */ \
(4 << 8) | /* set prefetch threshold */ \
0x20) /* set hthresh */
#define E1000_TIDV_FPD (1 << 31)
#define E1000_RDTR_FPD (1 << 31)
enum e1000_boards { enum e1000_boards {
board_82571, board_82571,
board_82572, board_82572,
@ -425,6 +452,7 @@ struct e1000_info {
#define FLAG2_DISABLE_ASPM_L1 (1 << 3) #define FLAG2_DISABLE_ASPM_L1 (1 << 3)
#define FLAG2_HAS_PHY_STATS (1 << 4) #define FLAG2_HAS_PHY_STATS (1 << 4)
#define FLAG2_HAS_EEE (1 << 5) #define FLAG2_HAS_EEE (1 << 5)
#define FLAG2_DMA_BURST (1 << 6)
#define E1000_RX_DESC_PS(R, i) \ #define E1000_RX_DESC_PS(R, i) \
(&(((union e1000_rx_desc_packet_split *)((R).desc))[i])) (&(((union e1000_rx_desc_packet_split *)((R).desc))[i]))

View File

@ -1494,6 +1494,7 @@ struct e1000_info e1000_es2_info = {
| FLAG_APME_CHECK_PORT_B | FLAG_APME_CHECK_PORT_B
| FLAG_DISABLE_FC_PAUSE_TIME /* errata */ | FLAG_DISABLE_FC_PAUSE_TIME /* errata */
| FLAG_TIPG_MEDIUM_FOR_80003ESLAN, | FLAG_TIPG_MEDIUM_FOR_80003ESLAN,
.flags2 = FLAG2_DMA_BURST,
.pba = 38, .pba = 38,
.max_hw_frame_size = DEFAULT_JUMBO, .max_hw_frame_size = DEFAULT_JUMBO,
.get_variants = e1000_get_variants_80003es2lan, .get_variants = e1000_get_variants_80003es2lan,

View File

@ -2650,6 +2650,26 @@ static void e1000_configure_tx(struct e1000_adapter *adapter)
/* Tx irq moderation */ /* Tx irq moderation */
ew32(TADV, adapter->tx_abs_int_delay); ew32(TADV, adapter->tx_abs_int_delay);
if (adapter->flags2 & FLAG2_DMA_BURST) {
u32 txdctl = er32(TXDCTL(0));
txdctl &= ~(E1000_TXDCTL_PTHRESH | E1000_TXDCTL_HTHRESH |
E1000_TXDCTL_WTHRESH);
/*
* set up some performance related parameters to encourage the
* hardware to use the bus more efficiently in bursts, depends
* on the tx_int_delay to be enabled,
* wthresh = 5 ==> burst write a cacheline (64 bytes) at a time
* hthresh = 1 ==> prefetch when one or more available
* pthresh = 0x1f ==> prefetch if internal cache 31 or less
* BEWARE: this seems to work but should be considered first if
* there are tx hangs or other tx related bugs
*/
txdctl |= E1000_TXDCTL_DMA_BURST_ENABLE;
ew32(TXDCTL(0), txdctl);
/* erratum work around: set txdctl the same for both queues */
ew32(TXDCTL(1), txdctl);
}
/* Program the Transmit Control Register */ /* Program the Transmit Control Register */
tctl = er32(TCTL); tctl = er32(TCTL);
tctl &= ~E1000_TCTL_CT; tctl &= ~E1000_TCTL_CT;
@ -2872,6 +2892,29 @@ static void e1000_configure_rx(struct e1000_adapter *adapter)
e1e_flush(); e1e_flush();
msleep(10); msleep(10);
if (adapter->flags2 & FLAG2_DMA_BURST) {
/*
* set the writeback threshold (only takes effect if the RDTR
* is set). set GRAN=1 and write back up to 0x4 worth, and
* enable prefetching of 0x20 rx descriptors
* granularity = 01
* wthresh = 04,
* hthresh = 04,
* pthresh = 0x20
*/
ew32(RXDCTL(0), E1000_RXDCTL_DMA_BURST_ENABLE);
ew32(RXDCTL(1), E1000_RXDCTL_DMA_BURST_ENABLE);
/*
* override the delay timers for enabling bursting, only if
* the value was not set by the user via module options
*/
if (adapter->rx_int_delay == DEFAULT_RDTR)
adapter->rx_int_delay = BURST_RDTR;
if (adapter->rx_abs_int_delay == DEFAULT_RADV)
adapter->rx_abs_int_delay = BURST_RADV;
}
/* set the Receive Delay Timer Register */ /* set the Receive Delay Timer Register */
ew32(RDTR, adapter->rx_int_delay); ew32(RDTR, adapter->rx_int_delay);
@ -4235,6 +4278,16 @@ link_up:
/* Force detection of hung controller every watchdog period */ /* Force detection of hung controller every watchdog period */
adapter->detect_tx_hung = 1; adapter->detect_tx_hung = 1;
/* flush partial descriptors to memory before detecting tx hang */
if (adapter->flags2 & FLAG2_DMA_BURST) {
ew32(TIDV, adapter->tx_int_delay | E1000_TIDV_FPD);
ew32(RDTR, adapter->rx_int_delay | E1000_RDTR_FPD);
/*
* no need to flush the writes because the timeout code does
* an er32 first thing
*/
}
/* /*
* With 82571 controllers, LAA may be overwritten due to controller * With 82571 controllers, LAA may be overwritten due to controller
* reset from the other port. Set the appropriate LAA in RAR[0] * reset from the other port. Set the appropriate LAA in RAR[0]

View File

@ -91,7 +91,6 @@ E1000_PARAM(TxAbsIntDelay, "Transmit Absolute Interrupt Delay");
* Valid Range: 0-65535 * Valid Range: 0-65535
*/ */
E1000_PARAM(RxIntDelay, "Receive Interrupt Delay"); E1000_PARAM(RxIntDelay, "Receive Interrupt Delay");
#define DEFAULT_RDTR 0
#define MAX_RXDELAY 0xFFFF #define MAX_RXDELAY 0xFFFF
#define MIN_RXDELAY 0 #define MIN_RXDELAY 0
@ -101,7 +100,6 @@ E1000_PARAM(RxIntDelay, "Receive Interrupt Delay");
* Valid Range: 0-65535 * Valid Range: 0-65535
*/ */
E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay"); E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay");
#define DEFAULT_RADV 8
#define MAX_RXABSDELAY 0xFFFF #define MAX_RXABSDELAY 0xFFFF
#define MIN_RXABSDELAY 0 #define MIN_RXABSDELAY 0