From 122bdf67f15a22bcabf6c2cab3a545d17ccf68dc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 14 Mar 2012 21:13:11 +0000
Subject: [PATCH 1/9] ipv6: fix icmp6_dst_alloc()

commit 87a115783 ( ipv6: Move xfrm_lookup() call down into
icmp6_dst_alloc().) forgot to convert one error path, leading
to crashes in mld_sendpack()

Many thanks to Dave Jones for providing a very complete bug report.

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8c2e3ab58f2a..22b766407de1 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1077,7 +1077,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	struct net *net = dev_net(dev);
 
 	if (unlikely(!idev))
-		return NULL;
+		return ERR_PTR(-ENODEV);
 
 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
 	if (unlikely(!rt)) {

From cc34eb672eedb5ff248ac3bf9971a76f141fd141 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 13 Mar 2012 18:04:25 +0000
Subject: [PATCH 2/9] sch_sfq: revert dont put new flow at the end of flows

This reverts commit d47a0ac7b6 (sch_sfq: dont put new flow at the end of
flows)

As Jesper found out, patch sounded great but has bad side effects.

In stress situation, pushing new flows in front of the queue can prevent
old flows doing any progress. Packets can stay in SFQ queue for
unlimited amount of time.

It's possible to add heuristics to limit this problem, but this would
add complexity outside of SFQ scope.

A more sensible answer to Dave Taht concerns (who reported the issued I
tried to solve in original commit) is probably to use a qdisc hierarchy
so that high prio packets dont enter a potentially crowded SFQ qdisc.

Reported-by: Jesper Dangaard Brouer <jdb@comx.dk>
Cc: Dave Taht <dave.taht@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 60d47180f043..02a21abea65e 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -469,11 +469,15 @@ enqueue:
 	if (slot->qlen == 1) {		/* The flow is new */
 		if (q->tail == NULL) {	/* It is the first flow */
 			slot->next = x;
-			q->tail = slot;
 		} else {
 			slot->next = q->tail->next;
 			q->tail->next = x;
 		}
+		/* We put this flow at the end of our flow list.
+		 * This might sound unfair for a new flow to wait after old ones,
+		 * but we could endup servicing new flows only, and freeze old ones.
+		 */
+		q->tail = slot;
 		/* We could use a bigger initial quantum for new flows */
 		slot->allot = q->scaled_quantum;
 	}

From 127d0a198a310970b31866af8bbb6d4b1068e546 Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Thu, 15 Mar 2012 14:08:28 +0000
Subject: [PATCH 3/9] bnx2x: fix a crash on corrupt firmware file

If the requested firmware is deemed corrupt and then released, reset the
pointer to NULL in order to avoid double-freeing it in
bnx2x_release_firmware() or dereferencing it in bnx2x_init_firmware().

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Acked-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 254521319150..00ff62f92858 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -10901,6 +10901,7 @@ init_ops_alloc_err:
 	kfree(bp->init_data);
 request_firmware_exit:
 	release_firmware(bp->firmware);
+	bp->firmware = NULL;
 
 	return rc;
 }

From c0ea452e422a1fc78ec8c639df64012d0b8dbb4a Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Thu, 15 Mar 2012 14:08:29 +0000
Subject: [PATCH 4/9] bnx2x: fix memory leak in bnx2x_init_firmware()

When cycling the interface down and up, bnx2x_init_firmware() knows that
the firmware is already loaded, but nevertheless it allocates certain
arrays anew (init_data, init_ops, init_ops_offsets, iro_arr). The old
arrays are leaked.

Fix the leaks by returning early if the firmware was already loaded.
Because if the firmware is loaded, so are the arrays.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Acked-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/bnx2x/bnx2x_main.c  | 50 +++++++++----------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 00ff62f92858..b69f8762b339 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -10824,38 +10824,36 @@ do {									\
 
 int bnx2x_init_firmware(struct bnx2x *bp)
 {
+	const char *fw_file_name;
 	struct bnx2x_fw_file_hdr *fw_hdr;
 	int rc;
 
+	if (bp->firmware)
+		return 0;
 
-	if (!bp->firmware) {
-		const char *fw_file_name;
+	if (CHIP_IS_E1(bp))
+		fw_file_name = FW_FILE_NAME_E1;
+	else if (CHIP_IS_E1H(bp))
+		fw_file_name = FW_FILE_NAME_E1H;
+	else if (!CHIP_IS_E1x(bp))
+		fw_file_name = FW_FILE_NAME_E2;
+	else {
+		BNX2X_ERR("Unsupported chip revision\n");
+		return -EINVAL;
+	}
+	BNX2X_DEV_INFO("Loading %s\n", fw_file_name);
 
-		if (CHIP_IS_E1(bp))
-			fw_file_name = FW_FILE_NAME_E1;
-		else if (CHIP_IS_E1H(bp))
-			fw_file_name = FW_FILE_NAME_E1H;
-		else if (!CHIP_IS_E1x(bp))
-			fw_file_name = FW_FILE_NAME_E2;
-		else {
-			BNX2X_ERR("Unsupported chip revision\n");
-			return -EINVAL;
-		}
-		BNX2X_DEV_INFO("Loading %s\n", fw_file_name);
+	rc = request_firmware(&bp->firmware, fw_file_name, &bp->pdev->dev);
+	if (rc) {
+		BNX2X_ERR("Can't load firmware file %s\n",
+			  fw_file_name);
+		goto request_firmware_exit;
+	}
 
-		rc = request_firmware(&bp->firmware, fw_file_name,
-				      &bp->pdev->dev);
-		if (rc) {
-			BNX2X_ERR("Can't load firmware file %s\n",
-				  fw_file_name);
-			goto request_firmware_exit;
-		}
-
-		rc = bnx2x_check_firmware(bp);
-		if (rc) {
-			BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name);
-			goto request_firmware_exit;
-		}
+	rc = bnx2x_check_firmware(bp);
+	if (rc) {
+		BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name);
+		goto request_firmware_exit;
 	}
 
 	fw_hdr = (struct bnx2x_fw_file_hdr *)bp->firmware->data;

From 7bdd402706cf26bfef9050dfee3f229b7f33ee4f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 14 Mar 2012 06:56:25 +0000
Subject: [PATCH 5/9] net/usbnet: reserve headroom on rx skbs

network drivers should reserve some headroom on incoming skbs so that we
dont need expensive reallocations, eg forwarding packets in tunnels.

This NET_SKB_PAD padding is done in various helpers, like
__netdev_alloc_skb_ip_align() in this patch, combining NET_SKB_PAD and
NET_IP_ALIGN magic.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Oliver Neukum <oneukum@suse.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 81b96e303757..59681f01a54e 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -328,13 +328,13 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags)
 	unsigned long		lockflags;
 	size_t			size = dev->rx_urb_size;
 
-	if ((skb = alloc_skb (size + NET_IP_ALIGN, flags)) == NULL) {
+	skb = __netdev_alloc_skb_ip_align(dev->net, size, flags);
+	if (!skb) {
 		netif_dbg(dev, rx_err, dev->net, "no rx skb\n");
 		usbnet_defer_kevent (dev, EVENT_RX_MEMORY);
 		usb_free_urb (urb);
 		return -ENOMEM;
 	}
-	skb_reserve (skb, NET_IP_ALIGN);
 
 	entry = (struct skb_data *) skb->cb;
 	entry->urb = urb;

From bb6d5e76fb4fba9aa36726db41404512f3286c0f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 14 Mar 2012 08:53:34 +0000
Subject: [PATCH 6/9] net/hyperv: fix erroneous NETDEV_TX_BUSY use

A driver start_xmit() method cannot free skb and return NETDEV_TX_BUSY,
since caller is going to reuse freed skb.

This is mostly a revert of commit bf769375c (staging: hv: fix the return
status of netvsc_start_xmit())

In fact netif_tx_stop_queue() / netif_stop_queue() is needed before
returning NETDEV_TX_BUSY or you can trigger a ksoftirqd fatal loop.

In case of memory allocation error, only safe way is to drop the packet
and return NETDEV_TX_OK

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index bf01841bda5b..610860f28968 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -166,7 +166,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 
 		dev_kfree_skb(skb);
 		net->stats.tx_dropped++;
-		return NETDEV_TX_BUSY;
+		return NETDEV_TX_OK;
 	}
 
 	packet->extension = (void *)(unsigned long)packet +
@@ -226,7 +226,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 		dev_kfree_skb_any(skb);
 	}
 
-	return ret ? NETDEV_TX_BUSY : NETDEV_TX_OK;
+	return NETDEV_TX_OK;
 }
 
 /*

From b8fbaef586176f6abe0eb7887ddae66e99898b79 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 14 Mar 2012 09:21:44 +0000
Subject: [PATCH 7/9] wimax/i2400m: fix erroneous NETDEV_TX_BUSY use

A driver start_xmit() method cannot free skb and return NETDEV_TX_BUSY,
since caller is going to reuse freed skb.

In fact netif_tx_stop_queue() / netif_stop_queue() is needed before
returning NETDEV_TX_BUSY or you can trigger a ksoftirqd fatal loop.

In case of memory allocation error, only safe way is to drop the packet
and return NETDEV_TX_OK

Also increments tx_dropped counter

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wimax/i2400m/netdev.c | 30 ++++++++++--------------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/drivers/net/wimax/i2400m/netdev.c b/drivers/net/wimax/i2400m/netdev.c
index 64a110604ad3..63e4b709efa9 100644
--- a/drivers/net/wimax/i2400m/netdev.c
+++ b/drivers/net/wimax/i2400m/netdev.c
@@ -367,38 +367,28 @@ netdev_tx_t i2400m_hard_start_xmit(struct sk_buff *skb,
 {
 	struct i2400m *i2400m = net_dev_to_i2400m(net_dev);
 	struct device *dev = i2400m_dev(i2400m);
-	int result;
+	int result = -1;
 
 	d_fnstart(3, dev, "(skb %p net_dev %p)\n", skb, net_dev);
-	if (skb_header_cloned(skb)) {
-		/*
-		 * Make tcpdump/wireshark happy -- if they are
-		 * running, the skb is cloned and we will overwrite
-		 * the mac fields in i2400m_tx_prep_header. Expand
-		 * seems to fix this...
-		 */
-		result = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
-		if (result) {
-			result = NETDEV_TX_BUSY;
-			goto error_expand;
-		}
-	}
+
+	if (skb_header_cloned(skb) && 
+	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+		goto drop;
 
 	if (i2400m->state == I2400M_SS_IDLE)
 		result = i2400m_net_wake_tx(i2400m, net_dev, skb);
 	else
 		result = i2400m_net_tx(i2400m, net_dev, skb);
-	if (result <  0)
+	if (result <  0) {
+drop:
 		net_dev->stats.tx_dropped++;
-	else {
+	} else {
 		net_dev->stats.tx_packets++;
 		net_dev->stats.tx_bytes += skb->len;
 	}
-	result = NETDEV_TX_OK;
-error_expand:
-	kfree_skb(skb);
+	dev_kfree_skb(skb);
 	d_fnend(3, dev, "(skb %p net_dev %p) = %d\n", skb, net_dev, result);
-	return result;
+	return NETDEV_TX_OK;
 }
 
 

From c577923756b7fe9071f28a76b66b83b306d1d001 Mon Sep 17 00:00:00 2001
From: "RongQing.Li" <roy.qing.li@gmail.com>
Date: Thu, 15 Mar 2012 22:54:14 +0000
Subject: [PATCH 8/9] ipv6: Don't dev_hold(dev) in ip6_mc_find_dev_rcu.

ip6_mc_find_dev_rcu() is called with rcu_read_lock(), so don't
need to dev_hold().
With dev_hold(), not corresponding dev_put(), will lead to leak.

[ bug introduced in 96b52e61be1 (ipv6: mcast: RCU conversions) ]

Signed-off-by: RongQing.Li <roy.qing.li@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index b853f06cc148..16c33e308121 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -257,7 +257,6 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
 
 		if (rt) {
 			dev = rt->dst.dev;
-			dev_hold(dev);
 			dst_release(&rt->dst);
 		}
 	} else

From a16a1647fa6b6783c2e91623e72e86f0c2adac5e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 16 Mar 2012 02:00:34 +0000
Subject: [PATCH 9/9] netfilter: ctnetlink: fix race between delete and timeout
 expiration

Kerin Millar reported hardlockups while running `conntrackd -c'
in a busy firewall. That system (with several processors) was
acting as backup in a primary-backup setup.

After several tries, I found a race condition between the deletion
operation of ctnetlink and timeout expiration. This patch fixes
this problem.

Tested-by: Kerin Millar <kerframil@gmail.com>
Reported-by: Kerin Millar <kerframil@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 10687692831e..b49da6c925b3 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -943,20 +943,21 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		}
 	}
 
-	if (nf_conntrack_event_report(IPCT_DESTROY, ct,
-				      NETLINK_CB(skb).pid,
-				      nlmsg_report(nlh)) < 0) {
+	if (del_timer(&ct->timeout)) {
+		if (nf_conntrack_event_report(IPCT_DESTROY, ct,
+					      NETLINK_CB(skb).pid,
+					      nlmsg_report(nlh)) < 0) {
+			nf_ct_delete_from_lists(ct);
+			/* we failed to report the event, try later */
+			nf_ct_insert_dying_list(ct);
+			nf_ct_put(ct);
+			return 0;
+		}
+		/* death_by_timeout would report the event again */
+		set_bit(IPS_DYING_BIT, &ct->status);
 		nf_ct_delete_from_lists(ct);
-		/* we failed to report the event, try later */
-		nf_ct_insert_dying_list(ct);
 		nf_ct_put(ct);
-		return 0;
 	}
-
-	/* death_by_timeout would report the event again */
-	set_bit(IPS_DYING_BIT, &ct->status);
-
-	nf_ct_kill(ct);
 	nf_ct_put(ct);
 
 	return 0;