From 8c83f80b2d335176f72d8729fe1dfe19c5812cb1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 9 Dec 2008 00:10:11 -0800
Subject: [PATCH 01/11] sungem: Make PCS PHY support partially work again.

As reported by Hermann Lauer, PCS PHY support in the sungem
driver simply doesn't work.

When the chip is reset due to open, or some other similar operation,
the PCS is reset too but we don't program it back into a running
state.  The result is no link when the device is brought up.

This partially rectifies the situation for the moment, by kicking
the PCS after a sungem chip reset so that it will renegotiate and
be re-enabled again.

The behavior is still a little bit dodgy as the added renegotiate
make the link take some time after bringing the interface up,
but this is a significant improvement in that things actually work
now :-)

Based almost entirely upon an initial patch by Hermann.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/sungem.c | 129 ++++++++++++++++++++++++-------------------
 1 file changed, 73 insertions(+), 56 deletions(-)

diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 1349e419673c..bb7a47829f7f 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -1142,6 +1142,70 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
+static void gem_pcs_reset(struct gem *gp)
+{
+	int limit;
+	u32 val;
+
+	/* Reset PCS unit. */
+	val = readl(gp->regs + PCS_MIICTRL);
+	val |= PCS_MIICTRL_RST;
+	writel(val, gp->regs + PCS_MIICTRL);
+
+	limit = 32;
+	while (readl(gp->regs + PCS_MIICTRL) & PCS_MIICTRL_RST) {
+		udelay(100);
+		if (limit-- <= 0)
+			break;
+	}
+	if (limit <= 0)
+		printk(KERN_WARNING "%s: PCS reset bit would not clear.\n",
+		       gp->dev->name);
+}
+
+static void gem_pcs_reinit_adv(struct gem *gp)
+{
+	u32 val;
+
+	/* Make sure PCS is disabled while changing advertisement
+	 * configuration.
+	 */
+	val = readl(gp->regs + PCS_CFG);
+	val &= ~(PCS_CFG_ENABLE | PCS_CFG_TO);
+	writel(val, gp->regs + PCS_CFG);
+
+	/* Advertise all capabilities except assymetric
+	 * pause.
+	 */
+	val = readl(gp->regs + PCS_MIIADV);
+	val |= (PCS_MIIADV_FD | PCS_MIIADV_HD |
+		PCS_MIIADV_SP | PCS_MIIADV_AP);
+	writel(val, gp->regs + PCS_MIIADV);
+
+	/* Enable and restart auto-negotiation, disable wrapback/loopback,
+	 * and re-enable PCS.
+	 */
+	val = readl(gp->regs + PCS_MIICTRL);
+	val |= (PCS_MIICTRL_RAN | PCS_MIICTRL_ANE);
+	val &= ~PCS_MIICTRL_WB;
+	writel(val, gp->regs + PCS_MIICTRL);
+
+	val = readl(gp->regs + PCS_CFG);
+	val |= PCS_CFG_ENABLE;
+	writel(val, gp->regs + PCS_CFG);
+
+	/* Make sure serialink loopback is off.  The meaning
+	 * of this bit is logically inverted based upon whether
+	 * you are in Serialink or SERDES mode.
+	 */
+	val = readl(gp->regs + PCS_SCTRL);
+	if (gp->phy_type == phy_serialink)
+		val &= ~PCS_SCTRL_LOOP;
+	else
+		val |= PCS_SCTRL_LOOP;
+	writel(val, gp->regs + PCS_SCTRL);
+}
+
 #define STOP_TRIES 32
 
 /* Must be invoked under gp->lock and gp->tx_lock. */
@@ -1168,6 +1232,9 @@ static void gem_reset(struct gem *gp)
 
 	if (limit <= 0)
 		printk(KERN_ERR "%s: SW reset is ghetto.\n", gp->dev->name);
+
+	if (gp->phy_type == phy_serialink || gp->phy_type == phy_serdes)
+		gem_pcs_reinit_adv(gp);
 }
 
 /* Must be invoked under gp->lock and gp->tx_lock. */
@@ -1324,7 +1391,7 @@ static int gem_set_link_modes(struct gem *gp)
 	    	   gp->phy_type == phy_serdes) {
 		u32 pcs_lpa = readl(gp->regs + PCS_MIILP);
 
-		if (pcs_lpa & PCS_MIIADV_FD)
+		if ((pcs_lpa & PCS_MIIADV_FD) || gp->phy_type == phy_serdes)
 			full_duplex = 1;
 		speed = SPEED_1000;
 	}
@@ -1488,6 +1555,9 @@ static void gem_link_timer(unsigned long data)
 			val = readl(gp->regs + PCS_MIISTAT);
 
 		if ((val & PCS_MIISTAT_LS) != 0) {
+			if (gp->lstate == link_up)
+				goto restart;
+
 			gp->lstate = link_up;
 			netif_carrier_on(gp->dev);
 			(void)gem_set_link_modes(gp);
@@ -1708,61 +1778,8 @@ static void gem_init_phy(struct gem *gp)
 		if (gp->phy_mii.def && gp->phy_mii.def->ops->init)
 			gp->phy_mii.def->ops->init(&gp->phy_mii);
 	} else {
-		u32 val;
-		int limit;
-
-		/* Reset PCS unit. */
-		val = readl(gp->regs + PCS_MIICTRL);
-		val |= PCS_MIICTRL_RST;
-		writel(val, gp->regs + PCS_MIICTRL);
-
-		limit = 32;
-		while (readl(gp->regs + PCS_MIICTRL) & PCS_MIICTRL_RST) {
-			udelay(100);
-			if (limit-- <= 0)
-				break;
-		}
-		if (limit <= 0)
-			printk(KERN_WARNING "%s: PCS reset bit would not clear.\n",
-			       gp->dev->name);
-
-		/* Make sure PCS is disabled while changing advertisement
-		 * configuration.
-		 */
-		val = readl(gp->regs + PCS_CFG);
-		val &= ~(PCS_CFG_ENABLE | PCS_CFG_TO);
-		writel(val, gp->regs + PCS_CFG);
-
-		/* Advertise all capabilities except assymetric
-		 * pause.
-		 */
-		val = readl(gp->regs + PCS_MIIADV);
-		val |= (PCS_MIIADV_FD | PCS_MIIADV_HD |
-			PCS_MIIADV_SP | PCS_MIIADV_AP);
-		writel(val, gp->regs + PCS_MIIADV);
-
-		/* Enable and restart auto-negotiation, disable wrapback/loopback,
-		 * and re-enable PCS.
-		 */
-		val = readl(gp->regs + PCS_MIICTRL);
-		val |= (PCS_MIICTRL_RAN | PCS_MIICTRL_ANE);
-		val &= ~PCS_MIICTRL_WB;
-		writel(val, gp->regs + PCS_MIICTRL);
-
-		val = readl(gp->regs + PCS_CFG);
-		val |= PCS_CFG_ENABLE;
-		writel(val, gp->regs + PCS_CFG);
-
-		/* Make sure serialink loopback is off.  The meaning
-		 * of this bit is logically inverted based upon whether
-		 * you are in Serialink or SERDES mode.
-		 */
-		val = readl(gp->regs + PCS_SCTRL);
-		if (gp->phy_type == phy_serialink)
-			val &= ~PCS_SCTRL_LOOP;
-		else
-			val |= PCS_SCTRL_LOOP;
-		writel(val, gp->regs + PCS_SCTRL);
+		gem_pcs_reset(gp);
+		gem_pcs_reinit_adv(gp);
 	}
 
 	/* Default aneg parameters */

From 8d3a564da34e5844aca4f991b73f8ca512246b23 Mon Sep 17 00:00:00 2001
From: Doug Leith <doug.leith@nuim.ie>
Date: Tue, 9 Dec 2008 00:13:04 -0800
Subject: [PATCH 02/11] tcp: tcp_vegas cong avoid fix

This patch addresses a book-keeping issue in tcp_vegas.c.  At present
tcp_vegas does separate book-keeping of cwnd based on packet sequence
numbers.  A mismatch can develop between this book-keeping and
tp->snd_cwnd due, for example, to delayed acks acking multiple
packets.  When vegas transitions to reno operation (e.g. following
loss), then this mismatch leads to incorrect behaviour (akin to a cwnd
backoff).  This seems mostly to affect operation at low cwnds where
delayed acking can lead to a significant fraction of cwnd being
covered by a single ack, leading to the book-keeping mismatch.  This
patch modifies the congestion avoidance update to avoid the need for
separate book-keeping while leaving vegas congestion avoidance
functionally unchanged.  A secondary advantage of this modification is
that the use of fixed-point (via V_PARAM_SHIFT) and 64 bit arithmetic
is no longer necessary, simplifying the code.

Some example test measurements with the patched code (confirming no functional
change in the congestion avoidance algorithm) can be seen at:

http://www.hamilton.ie/doug/vegaspatch/

Signed-off-by: Doug Leith <doug.leith@nuim.ie>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_vegas.c | 80 ++++++--------------------------------------
 1 file changed, 10 insertions(+), 70 deletions(-)

diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 7cd22262de3a..a453aac91bd3 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -40,18 +40,14 @@
 
 #include "tcp_vegas.h"
 
-/* Default values of the Vegas variables, in fixed-point representation
- * with V_PARAM_SHIFT bits to the right of the binary point.
- */
-#define V_PARAM_SHIFT 1
-static int alpha = 2<<V_PARAM_SHIFT;
-static int beta  = 4<<V_PARAM_SHIFT;
-static int gamma = 1<<V_PARAM_SHIFT;
+static int alpha = 2;
+static int beta  = 4;
+static int gamma = 1;
 
 module_param(alpha, int, 0644);
-MODULE_PARM_DESC(alpha, "lower bound of packets in network (scale by 2)");
+MODULE_PARM_DESC(alpha, "lower bound of packets in network");
 module_param(beta, int, 0644);
-MODULE_PARM_DESC(beta, "upper bound of packets in network (scale by 2)");
+MODULE_PARM_DESC(beta, "upper bound of packets in network");
 module_param(gamma, int, 0644);
 MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)");
 
@@ -172,49 +168,13 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 		return;
 	}
 
-	/* The key players are v_beg_snd_una and v_beg_snd_nxt.
-	 *
-	 * These are so named because they represent the approximate values
-	 * of snd_una and snd_nxt at the beginning of the current RTT. More
-	 * precisely, they represent the amount of data sent during the RTT.
-	 * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
-	 * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding
-	 * bytes of data have been ACKed during the course of the RTT, giving
-	 * an "actual" rate of:
-	 *
-	 *     (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration)
-	 *
-	 * Unfortunately, v_beg_snd_una is not exactly equal to snd_una,
-	 * because delayed ACKs can cover more than one segment, so they
-	 * don't line up nicely with the boundaries of RTTs.
-	 *
-	 * Another unfortunate fact of life is that delayed ACKs delay the
-	 * advance of the left edge of our send window, so that the number
-	 * of bytes we send in an RTT is often less than our cwnd will allow.
-	 * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
-	 */
-
 	if (after(ack, vegas->beg_snd_nxt)) {
 		/* Do the Vegas once-per-RTT cwnd adjustment. */
-		u32 old_wnd, old_snd_cwnd;
-
-
-		/* Here old_wnd is essentially the window of data that was
-		 * sent during the previous RTT, and has all
-		 * been acknowledged in the course of the RTT that ended
-		 * with the ACK we just received. Likewise, old_snd_cwnd
-		 * is the cwnd during the previous RTT.
-		 */
-		old_wnd = (vegas->beg_snd_nxt - vegas->beg_snd_una) /
-			tp->mss_cache;
-		old_snd_cwnd = vegas->beg_snd_cwnd;
 
 		/* Save the extent of the current window so we can use this
 		 * at the end of the next RTT.
 		 */
-		vegas->beg_snd_una  = vegas->beg_snd_nxt;
 		vegas->beg_snd_nxt  = tp->snd_nxt;
-		vegas->beg_snd_cwnd = tp->snd_cwnd;
 
 		/* We do the Vegas calculations only if we got enough RTT
 		 * samples that we can be reasonably sure that we got
@@ -252,22 +212,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 			 *
 			 * This is:
 			 *     (actual rate in segments) * baseRTT
-			 * We keep it as a fixed point number with
-			 * V_PARAM_SHIFT bits to the right of the binary point.
 			 */
-			target_cwnd = ((u64)old_wnd * vegas->baseRTT);
-			target_cwnd <<= V_PARAM_SHIFT;
-			do_div(target_cwnd, rtt);
+			target_cwnd = tp->snd_cwnd * vegas->baseRTT / rtt;
 
 			/* Calculate the difference between the window we had,
 			 * and the window we would like to have. This quantity
 			 * is the "Diff" from the Arizona Vegas papers.
-			 *
-			 * Again, this is a fixed point number with
-			 * V_PARAM_SHIFT bits to the right of the binary
-			 * point.
 			 */
-			diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd;
+			diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT;
 
 			if (diff > gamma && tp->snd_ssthresh > 2 ) {
 				/* Going too fast. Time to slow down
@@ -282,16 +234,13 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 				 * truncation robs us of full link
 				 * utilization.
 				 */
-				tp->snd_cwnd = min(tp->snd_cwnd,
-						   ((u32)target_cwnd >>
-						    V_PARAM_SHIFT)+1);
+				tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
 
 			} else if (tp->snd_cwnd <= tp->snd_ssthresh) {
 				/* Slow start.  */
 				tcp_slow_start(tp);
 			} else {
 				/* Congestion avoidance. */
-				u32 next_snd_cwnd;
 
 				/* Figure out where we would like cwnd
 				 * to be.
@@ -300,26 +249,17 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 					/* The old window was too fast, so
 					 * we slow down.
 					 */
-					next_snd_cwnd = old_snd_cwnd - 1;
+					tp->snd_cwnd--;
 				} else if (diff < alpha) {
 					/* We don't have enough extra packets
 					 * in the network, so speed up.
 					 */
-					next_snd_cwnd = old_snd_cwnd + 1;
+					tp->snd_cwnd++;
 				} else {
 					/* Sending just as fast as we
 					 * should be.
 					 */
-					next_snd_cwnd = old_snd_cwnd;
 				}
-
-				/* Adjust cwnd upward or downward, toward the
-				 * desired value.
-				 */
-				if (next_snd_cwnd > tp->snd_cwnd)
-					tp->snd_cwnd++;
-				else if (next_snd_cwnd < tp->snd_cwnd)
-					tp->snd_cwnd--;
 			}
 
 			if (tp->snd_cwnd < 2)

From fbf0229e946deb9e6c711f9eaa5c8d670c3a28fd Mon Sep 17 00:00:00 2001
From: Hermann Lauer <Hermann.Lauer@iwr.uni-heidelberg.de>
Date: Tue, 9 Dec 2008 15:39:14 -0800
Subject: [PATCH 03/11] sungem: improve ethtool output with internal pcs and
 serdes

From: Hermann Lauer <Hermann.Lauer@iwr.uni-heidelberg.de>

Attached is a patch which improves the output of ethtool (see below)
to some sensefull values with a sungem fibre card which uses the
sungem interal pcs connected to a serdes chip. The seriallink case in
the driver is untouched.

Most values are hardcoded, because gigabit fibre autoneg is anyways
limited and the driver don't really support much at the moment with
that hardware.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/sungem.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index bb7a47829f7f..fed7eba65ead 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -2697,6 +2697,21 @@ static int gem_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 		cmd->speed = 0;
 		cmd->duplex = cmd->port = cmd->phy_address =
 			cmd->transceiver = cmd->autoneg = 0;
+
+		/* serdes means usually a Fibre connector, with most fixed */
+		if (gp->phy_type == phy_serdes) {
+			cmd->port = PORT_FIBRE;
+			cmd->supported = (SUPPORTED_1000baseT_Half |
+				SUPPORTED_1000baseT_Full |
+				SUPPORTED_FIBRE | SUPPORTED_Autoneg |
+				SUPPORTED_Pause | SUPPORTED_Asym_Pause);
+			cmd->advertising = cmd->supported;
+			cmd->transceiver = XCVR_INTERNAL;
+			if (gp->lstate == link_up)
+				cmd->speed = SPEED_1000;
+			cmd->duplex = DUPLEX_FULL;
+			cmd->autoneg = 1;
+		}
 	}
 	cmd->maxtxpkt = cmd->maxrxpkt = 0;
 

From 24fc7b86dc0470616803be2f921c8cd5c459175d Mon Sep 17 00:00:00 2001
From: Jan Sembera <jsembera@suse.cz>
Date: Tue, 9 Dec 2008 15:48:32 -0800
Subject: [PATCH 04/11] ipv6: silence log messages for locally generated
 multicast

This patch fixes minor annoyance during transmission of unsolicited
neighbor advertisements from userspace to multicast addresses (as
far as I can see in RFC, this is allowed and the similar functionality
for IPv4 has been in arping for a long time).

Outgoing multicast packets get reinserted into local processing as if they
are received from the network. The machine thus sees its own NA and fills
the logs with error messages. This patch removes the message if NA has been
generated locally.

Signed-off-by: Jan Sembera <jsembera@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ndisc.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 172438320eec..d0f54d18e19b 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -912,8 +912,13 @@ static void ndisc_recv_na(struct sk_buff *skb)
 		   is invalid, but ndisc specs say nothing
 		   about it. It could be misconfiguration, or
 		   an smart proxy agent tries to help us :-)
+
+		   We should not print the error if NA has been
+		   received from loopback - it is just our own
+		   unsolicited advertisement.
 		 */
-		ND_PRINTK1(KERN_WARNING
+		if (skb->pkt_type != PACKET_LOOPBACK)
+			ND_PRINTK1(KERN_WARNING
 			   "ICMPv6 NA: someone advertises our address on %s!\n",
 			   ifp->idev->dev->name);
 		in6_ifa_put(ifp);

From 7b363e440021a1cf9ed76944b2685f48dacefb3e Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 9 Dec 2008 23:22:26 -0800
Subject: [PATCH 05/11] netpoll: fix race on poll_list resulting in garbage
 entry

	A few months back a race was discused between the netpoll napi service
path, and the fast path through net_rx_action:
http://kerneltrap.org/mailarchive/linux-netdev/2007/10/16/345470

A patch was submitted for that bug, but I think we missed a case.

Consider the following scenario:

INITIAL STATE
CPU0 has one napi_struct A on its poll_list
CPU1 is calling netpoll_send_skb and needs to call poll_napi on the same
napi_struct A that CPU0 has on its list


CPU0						CPU1
net_rx_action					poll_napi
!list_empty (returns true)			locks poll_lock for A
						 poll_one_napi
						  napi->poll
						   netif_rx_complete
						    __napi_complete
						    (removes A from poll_list)
list_entry(list->next)


In the above scenario, net_rx_action assumes that the per-cpu poll_list is
exclusive to that cpu.  netpoll of course violates that, and because the netpoll
path can dequeue from the poll list, its possible for CPU0 to detect a non-empty
list at the top of the while loop in net_rx_action, but have it become empty by
the time it calls list_entry.  Since the poll_list isn't surrounded by any other
structure, the returned data from that list_entry call in this situation is
garbage, and any number of crashes can result based on what exactly that garbage
is.

Given that its not fasible for performance reasons to place exclusive locks
arround each cpus poll list to provide that mutal exclusion, I think the best
solution is modify the netpoll path in such a way that we continue to guarantee
that the poll_list for a cpu is in fact exclusive to that cpu.  To do this I've
implemented the patch below.  It adds an additional bit to the state field in
the napi_struct.  When executing napi->poll from the netpoll_path, this bit will
be set. When a driver calls netif_rx_complete, if that bit is set, it will not
remove the napi_struct from the poll_list.  That work will be saved for the next
iteration of net_rx_action.

I've tested this and it seems to work well.  About the biggest drawback I can
see to it is the fact that it might result in an extra loop through
net_rx_action in the event that the device is actually contended for (i.e. the
netpoll path actually preforms all the needed work no the device, and the call
to net_rx_action winds up doing nothing, except removing the napi_struct from
the poll_list.  However I think this is probably a small price to pay, given
that the alternative is a crash.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 7 +++++++
 net/core/netpoll.c        | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9d77b1d7dca8..e26f54952892 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -319,6 +319,7 @@ enum
 {
 	NAPI_STATE_SCHED,	/* Poll is scheduled */
 	NAPI_STATE_DISABLE,	/* Disable pending */
+	NAPI_STATE_NPSVC,	/* Netpoll - don't dequeue from poll_list */
 };
 
 extern void __napi_schedule(struct napi_struct *n);
@@ -1497,6 +1498,12 @@ static inline void netif_rx_complete(struct net_device *dev,
 {
 	unsigned long flags;
 
+	/*
+	 * don't let napi dequeue from the cpu poll list
+	 * just in case its running on a different cpu
+	 */
+	if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state)))
+		return;
 	local_irq_save(flags);
 	__netif_rx_complete(dev, napi);
 	local_irq_restore(flags);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 6c7af390be0a..dadac6281f20 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -133,9 +133,11 @@ static int poll_one_napi(struct netpoll_info *npinfo,
 
 	npinfo->rx_flags |= NETPOLL_RX_DROP;
 	atomic_inc(&trapped);
+	set_bit(NAPI_STATE_NPSVC, &napi->state);
 
 	work = napi->poll(napi, budget);
 
+	clear_bit(NAPI_STATE_NPSVC, &napi->state);
 	atomic_dec(&trapped);
 	npinfo->rx_flags &= ~NETPOLL_RX_DROP;
 

From bb49eed4221092fde4e2337534ae51d8f28a2fbe Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Wed, 10 Dec 2008 15:24:50 -0800
Subject: [PATCH 06/11] IA64: HP_SIMETH needs to depend upon NET

From: Alexander Beregalov <a.beregalov@gmail.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/ia64/hp/sim/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/ia64/hp/sim/Kconfig b/arch/ia64/hp/sim/Kconfig
index f92306bbedb8..8d513a8c5266 100644
--- a/arch/ia64/hp/sim/Kconfig
+++ b/arch/ia64/hp/sim/Kconfig
@@ -4,6 +4,7 @@ menu "HP Simulator drivers"
 
 config HP_SIMETH
 	bool "Simulated Ethernet "
+	depends on NET
 
 config HP_SIMSERIAL
 	bool "Simulated serial driver support"

From 30bb0e0dce78427f3e5cb728d6b5ea73acbefffa Mon Sep 17 00:00:00 2001
From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu, 11 Dec 2008 21:28:11 -0800
Subject: [PATCH 07/11] e1000e: fix double release of mutex

During a reset, releasing the swflag after it failed to be acquired would
cause a double unlock of the mutex.  Instead, test whether acquisition of
the swflag was successful and if not, do not release the swflag.  The reset
must still be done to bring the device to a quiescent state.

This resolves [BUG 12200] BUG: bad unlock balance detected! e1000e
http://bugzilla.kernel.org/show_bug.cgi?id=12200

Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/e1000e/ich8lan.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c
index 523b9716a543..d115a6d30f29 100644
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c
@@ -1893,12 +1893,17 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw)
 		ctrl |= E1000_CTRL_PHY_RST;
 	}
 	ret_val = e1000_acquire_swflag_ich8lan(hw);
+	/* Whether or not the swflag was acquired, we need to reset the part */
 	hw_dbg(hw, "Issuing a global reset to ich8lan");
 	ew32(CTRL, (ctrl | E1000_CTRL_RST));
 	msleep(20);
 
-	/* release the swflag because it is not reset by hardware reset */
-	e1000_release_swflag_ich8lan(hw);
+	if (!ret_val) {
+		/* release the swflag because it is not reset by
+		 * hardware reset
+		 */
+		e1000_release_swflag_ich8lan(hw);
+	}
 
 	ret_val = e1000e_get_auto_rd_done(hw);
 	if (ret_val) {

From ec8f2375d7584969501918651241f91eca2a6ad3 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Thu, 11 Dec 2008 21:31:50 -0800
Subject: [PATCH 08/11] netlabel: Compiler warning and NULL pointer dereference
 fix

Fix the two compiler warnings show below.  Thanks to Geert Uytterhoeven for
finding and reporting the problem.

 net/netlabel/netlabel_unlabeled.c:567: warning: 'entry' may be used
   uninitialized in this function
 net/netlabel/netlabel_unlabeled.c:629: warning: 'entry' may be used
   uninitialized in this function

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_unlabeled.c | 38 ++++++++++++++++---------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 90c8506a0aac..8c0308032178 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -562,7 +562,6 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
 				      const struct in_addr *mask,
 				      struct netlbl_audit *audit_info)
 {
-	int ret_val = 0;
 	struct netlbl_af4list *list_entry;
 	struct netlbl_unlhsh_addr4 *entry;
 	struct audit_buffer *audit_buf;
@@ -577,7 +576,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
 	if (list_entry != NULL)
 		entry = netlbl_unlhsh_addr4_entry(list_entry);
 	else
-		ret_val = -ENOENT;
+		entry = NULL;
 
 	audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL,
 					      audit_info);
@@ -588,19 +587,21 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
 					  addr->s_addr, mask->s_addr);
 		if (dev != NULL)
 			dev_put(dev);
-		if (entry && security_secid_to_secctx(entry->secid,
-						      &secctx,
-						      &secctx_len) == 0) {
+		if (entry != NULL &&
+		    security_secid_to_secctx(entry->secid,
+					     &secctx, &secctx_len) == 0) {
 			audit_log_format(audit_buf, " sec_obj=%s", secctx);
 			security_release_secctx(secctx, secctx_len);
 		}
-		audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0);
+		audit_log_format(audit_buf, " res=%u", entry != NULL ? 1 : 0);
 		audit_log_end(audit_buf);
 	}
 
-	if (ret_val == 0)
-		call_rcu(&entry->rcu, netlbl_unlhsh_free_addr4);
-	return ret_val;
+	if (entry == NULL)
+		return -ENOENT;
+
+	call_rcu(&entry->rcu, netlbl_unlhsh_free_addr4);
+	return 0;
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -624,7 +625,6 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
 				      const struct in6_addr *mask,
 				      struct netlbl_audit *audit_info)
 {
-	int ret_val = 0;
 	struct netlbl_af6list *list_entry;
 	struct netlbl_unlhsh_addr6 *entry;
 	struct audit_buffer *audit_buf;
@@ -638,7 +638,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
 	if (list_entry != NULL)
 		entry = netlbl_unlhsh_addr6_entry(list_entry);
 	else
-		ret_val = -ENOENT;
+		entry = NULL;
 
 	audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL,
 					      audit_info);
@@ -649,19 +649,21 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
 					  addr, mask);
 		if (dev != NULL)
 			dev_put(dev);
-		if (entry && security_secid_to_secctx(entry->secid,
-						      &secctx,
-						      &secctx_len) == 0) {
+		if (entry != NULL &&
+		    security_secid_to_secctx(entry->secid,
+					     &secctx, &secctx_len) == 0) {
 			audit_log_format(audit_buf, " sec_obj=%s", secctx);
 			security_release_secctx(secctx, secctx_len);
 		}
-		audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0);
+		audit_log_format(audit_buf, " res=%u", entry != NULL ? 1 : 0);
 		audit_log_end(audit_buf);
 	}
 
-	if (ret_val == 0)
-		call_rcu(&entry->rcu, netlbl_unlhsh_free_addr6);
-	return ret_val;
+	if (entry == NULL)
+		return -ENOENT;
+
+	call_rcu(&entry->rcu, netlbl_unlhsh_free_addr6);
+	return 0;
 }
 #endif /* IPv6 */
 

From be70ed189bc0d16e1609a1c6c04ec9418b4dd11a Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 15 Dec 2008 00:19:14 -0800
Subject: [PATCH 09/11] netfilter: update rwlock initialization for nat_table

The commit e099a173573ce1ba171092aee7bb3c72ea686e59
(netfilter: netns nat: per-netns NAT table) renamed the
nat_table from __nat_table to nat_table without updating the
__RW_LOCK_UNLOCKED(__nat_table.lock).

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_nat_rule.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index bea54a685109..8d489e746b21 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -61,7 +61,7 @@ static struct
 static struct xt_table nat_table = {
 	.name		= "nat",
 	.valid_hooks	= NAT_VALID_HOOKS,
-	.lock		= __RW_LOCK_UNLOCKED(__nat_table.lock),
+	.lock		= __RW_LOCK_UNLOCKED(nat_table.lock),
 	.me		= THIS_MODULE,
 	.af		= AF_INET,
 };

From eb9b851b980e20ba5f6bdfd6ec24f4bc77623ce6 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <hawk@comx.dk>
Date: Mon, 15 Dec 2008 00:39:17 -0800
Subject: [PATCH 10/11] SCHED: netem: Correct documentation comment in code.

The netem simulator is no longer limited by Linux timer resolution HZ.
Not since Patrick McHardy changed the QoS system to use hrtimer.

Signed-off-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index a11959908d9a..98402f0efa47 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -46,9 +46,6 @@
 	 layering other disciplines.  It does not need to do bandwidth
 	 control either since that can be handled by using token
 	 bucket or other rate control.
-
-	 The simulator is limited by the Linux timer resolution
-	 and will create packet bursts on the HZ boundary (1ms).
 */
 
 struct netem_sched_data {

From 4798a2b84ea5a98e4f36a815a646cb48ff521684 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi.denis-courmont@nokia.com>
Date: Mon, 15 Dec 2008 00:53:57 -0800
Subject: [PATCH 11/11] Phonet: keep TX queue disabled when the device is off
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pep-gprs.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index 9978afbd9f2a..803eeef0aa85 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -155,12 +155,13 @@ static void gprs_data_ready(struct sock *sk, int len)
 static void gprs_write_space(struct sock *sk)
 {
 	struct gprs_dev *dev = sk->sk_user_data;
+	struct net_device *net = dev->net;
 	unsigned credits = pep_writeable(sk);
 
 	spin_lock_bh(&dev->tx_lock);
 	dev->tx_max = credits;
-	if (credits > skb_queue_len(&dev->tx_queue))
-		netif_wake_queue(dev->net);
+	if (credits > skb_queue_len(&dev->tx_queue) && netif_running(net))
+		netif_wake_queue(net);
 	spin_unlock_bh(&dev->tx_lock);
 }
 
@@ -168,6 +169,23 @@ static void gprs_write_space(struct sock *sk)
  * Network device callbacks
  */
 
+static int gprs_open(struct net_device *dev)
+{
+	struct gprs_dev *gp = netdev_priv(dev);
+
+	gprs_write_space(gp->sk);
+	return 0;
+}
+
+static int gprs_close(struct net_device *dev)
+{
+	struct gprs_dev *gp = netdev_priv(dev);
+
+	netif_stop_queue(dev);
+	flush_work(&gp->tx_work);
+	return 0;
+}
+
 static int gprs_xmit(struct sk_buff *skb, struct net_device *net)
 {
 	struct gprs_dev *dev = netdev_priv(net);
@@ -254,6 +272,8 @@ static void gprs_setup(struct net_device *net)
 	net->tx_queue_len	= 10;
 
 	net->destructor		= free_netdev;
+	net->open		= gprs_open;
+	net->stop		= gprs_close;
 	net->hard_start_xmit	= gprs_xmit; /* mandatory */
 	net->change_mtu		= gprs_set_mtu;
 	net->get_stats		= gprs_get_stats;
@@ -318,7 +338,6 @@ int gprs_attach(struct sock *sk)
 	dev->sk = sk;
 
 	printk(KERN_DEBUG"%s: attached\n", net->name);
-	gprs_write_space(sk); /* kick off TX */
 	return net->ifindex;
 
 out_rel:
@@ -341,7 +360,5 @@ void gprs_detach(struct sock *sk)
 
 	printk(KERN_DEBUG"%s: detached\n", net->name);
 	unregister_netdev(net);
-	flush_scheduled_work();
 	sock_put(sk);
-	skb_queue_purge(&dev->tx_queue);
 }