From bc284f94f84c3d76e49c6f3df9028c503f9589d9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 31 May 2010 05:47:32 -0700 Subject: [PATCH 01/44] greth: Fix build after OF device conversions. Signed-off-by: David S. Miller --- drivers/net/greth.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/greth.c b/drivers/net/greth.c index f37a4c143ddd..3a029d02c2b4 100644 --- a/drivers/net/greth.c +++ b/drivers/net/greth.c @@ -1607,14 +1607,13 @@ static struct of_device_id greth_of_match[] = { MODULE_DEVICE_TABLE(of, greth_of_match); static struct of_platform_driver greth_of_driver = { - .name = "grlib-greth", - .match_table = greth_of_match, + .driver = { + .name = "grlib-greth", + .owner = THIS_MODULE, + .of_match_table = greth_of_match, + }, .probe = greth_of_probe, .remove = __devexit_p(greth_of_remove), - .driver = { - .owner = THIS_MODULE, - .name = "grlib-greth", - }, }; static int __init greth_init(void) From c936e8bd1de2fa50c49e3df6fa5036bf07870b67 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Mon, 31 May 2010 16:41:09 +0200 Subject: [PATCH 02/44] netfilter: don't xt_jumpstack_alloc twice in xt_register_table In xt_register_table, xt_jumpstack_alloc is called first, later xt_replace_table is used. But in xt_replace_table, xt_jumpstack_alloc will be used again. Then the memory allocated by previous xt_jumpstack_alloc will be leaked. We can simply remove the previous xt_jumpstack_alloc because there aren't any users of newinfo between xt_jumpstack_alloc and xt_replace_table. Signed-off-by: Xiaotian Feng Cc: Patrick McHardy Cc: "David S. Miller" Cc: Jan Engelhardt Cc: Andrew Morton Cc: Rusty Russell Cc: Alexey Dobriyan Acked-By: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/netfilter/x_tables.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 445de702b8b7..47b1e7917a9c 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -844,10 +844,6 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table_info *private; struct xt_table *t, *table; - ret = xt_jumpstack_alloc(newinfo); - if (ret < 0) - return ERR_PTR(ret); - /* Don't add one object to multiple lists. */ table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL); if (!table) { From 7489aec8eed4f2f1eb3b4d35763bd3ea30b32ef5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 May 2010 16:41:35 +0200 Subject: [PATCH 03/44] netfilter: xtables: stackptr should be percpu commit f3c5c1bfd4 (netfilter: xtables: make ip_tables reentrant) introduced a performance regression, because stackptr array is shared by all cpus, adding cache line ping pongs. (16 cpus share a 64 bytes cache line) Fix this using alloc_percpu() Signed-off-by: Eric Dumazet Acked-By: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 2 +- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 2 +- net/netfilter/x_tables.c | 13 +++---------- 4 files changed, 6 insertions(+), 13 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index c00cc0c4d0b7..24e5d01d27d0 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -397,7 +397,7 @@ struct xt_table_info { * @stacksize jumps (number of user chains) can possibly be made. */ unsigned int stacksize; - unsigned int *stackptr; + unsigned int __percpu *stackptr; void ***jumpstack; /* ipt_entry tables: one per CPU */ /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */ diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 63958f3394a5..4b6c5ca610fc 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -336,7 +336,7 @@ ipt_do_table(struct sk_buff *skb, cpu = smp_processor_id(); table_base = private->entries[cpu]; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; - stackptr = &private->stackptr[cpu]; + stackptr = per_cpu_ptr(private->stackptr, cpu); origptr = *stackptr; e = get_entry(table_base, private->hook_entry[hook]); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 6f517bd83692..9d2d68f0e605 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -363,7 +363,7 @@ ip6t_do_table(struct sk_buff *skb, cpu = smp_processor_id(); table_base = private->entries[cpu]; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; - stackptr = &private->stackptr[cpu]; + stackptr = per_cpu_ptr(private->stackptr, cpu); origptr = *stackptr; e = get_entry(table_base, private->hook_entry[hook]); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 47b1e7917a9c..e34622fa0003 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -699,10 +699,8 @@ void xt_free_table_info(struct xt_table_info *info) vfree(info->jumpstack); else kfree(info->jumpstack); - if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE) - vfree(info->stackptr); - else - kfree(info->stackptr); + + free_percpu(info->stackptr); kfree(info); } @@ -753,14 +751,9 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) unsigned int size; int cpu; - size = sizeof(unsigned int) * nr_cpu_ids; - if (size > PAGE_SIZE) - i->stackptr = vmalloc(size); - else - i->stackptr = kmalloc(size, GFP_KERNEL); + i->stackptr = alloc_percpu(unsigned int); if (i->stackptr == NULL) return -ENOMEM; - memset(i->stackptr, 0, size); size = sizeof(void **) * nr_cpu_ids; if (size > PAGE_SIZE) From b1faf5666438090a4dc4fceac8502edc7788b7e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 May 2010 23:44:05 -0700 Subject: [PATCH 04/44] net: sock_queue_err_skb() dont mess with sk_forward_alloc Correct sk_forward_alloc handling for error_queue would need to use a backlog of frames that softirq handler could not deliver because socket is owned by user thread. Or extend backlog processing to be able to process normal and error packets. Another possibility is to not use mem charge for error queue, this is what I implemented in this patch. Note: this reverts commit 29030374 (net: fix sk_forward_alloc corruptions), since we dont need to lock socket anymore. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 15 +-------------- net/core/skbuff.c | 30 ++++++++++++++++++++++++++++-- net/ipv4/udp.c | 6 ++---- net/ipv6/udp.c | 6 ++---- 4 files changed, 33 insertions(+), 24 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index ca241ea14875..731150d52799 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1524,20 +1524,7 @@ extern void sk_stop_timer(struct sock *sk, struct timer_list* timer); extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) -{ - /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces - number of warnings when compiling with -W --ANK - */ - if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= - (unsigned)sk->sk_rcvbuf) - return -ENOMEM; - skb_set_owner_r(skb, sk); - skb_queue_tail(&sk->sk_error_queue, skb); - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, skb->len); - return 0; -} +extern int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb); /* * Recover an error report and clear atomically diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4e7ac09c281a..9f07e749d7b1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2965,6 +2965,34 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) } EXPORT_SYMBOL_GPL(skb_cow_data); +static void sock_rmem_free(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); +} + +/* + * Note: We dont mem charge error packets (no sk_forward_alloc changes) + */ +int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) +{ + if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= + (unsigned)sk->sk_rcvbuf) + return -ENOMEM; + + skb_orphan(skb); + skb->sk = sk; + skb->destructor = sock_rmem_free; + atomic_add(skb->truesize, &sk->sk_rmem_alloc); + + skb_queue_tail(&sk->sk_error_queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, skb->len); + return 0; +} +EXPORT_SYMBOL(sock_queue_err_skb); + void skb_tstamp_tx(struct sk_buff *orig_skb, struct skb_shared_hwtstamps *hwtstamps) { @@ -2997,9 +3025,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; - bh_lock_sock(sk); err = sock_queue_err_skb(sk, skb); - bh_unlock_sock(sk); if (err) kfree_skb(skb); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 50678f9a2763..eec4ff456e33 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -633,11 +633,9 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) if (!inet->recverr) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; - } else { - bh_lock_sock(sk); + } else ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); - bh_unlock_sock(sk); - } + sk->sk_err = err; sk->sk_error_report(sk); out: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3048f906c042..87be58673b55 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -466,11 +466,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state != TCP_ESTABLISHED && !np->recverr) goto out; - if (np->recverr) { - bh_lock_sock(sk); + if (np->recverr) ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); - bh_unlock_sock(sk); - } + sk->sk_err = err; sk->sk_error_report(sk); out: From 288fcee8b7aa98796d96cd5b1b2e8005639328bf Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 31 May 2010 23:48:19 -0700 Subject: [PATCH 05/44] net/ipv4/tcp_input.c: fix compilation breakage when FASTRETRANS_DEBUG > 1 Commit: c720c7e8383aff1cb219bddf474ed89d850336e3 missed these. Signed-off-by: Joe Perches Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3e6dafcb1071..548d575e6cc6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2639,7 +2639,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) if (sk->sk_family == AF_INET) { printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", msg, - &inet->daddr, ntohs(inet->dport), + &inet->inet_daddr, ntohs(inet->inet_dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); @@ -2649,7 +2649,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) struct ipv6_pinfo *np = inet6_sk(sk); printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", msg, - &np->daddr, ntohs(inet->dport), + &np->daddr, ntohs(inet->inet_dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); From 6bd17eb96ffc9c3b52927913d59da9ced5109c6a Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Mon, 31 May 2010 08:56:03 +0000 Subject: [PATCH 06/44] can: mpc5xxx_can.c: Fix build failure Fixes build error caused by the OF device_node pointer being moved into struct device. Signed-off-by: Anatolij Gustschin Cc: Wolfgang Grandegger Cc: Grant Likely Signed-off-by: David S. Miller --- drivers/net/can/mscan/mpc5xxx_can.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c index 8af8442c694a..af753936e835 100644 --- a/drivers/net/can/mscan/mpc5xxx_can.c +++ b/drivers/net/can/mscan/mpc5xxx_can.c @@ -73,7 +73,7 @@ static u32 __devinit mpc52xx_can_get_clock(struct of_device *ofdev, else *mscan_clksrc = MSCAN_CLKSRC_XTAL; - freq = mpc5xxx_get_bus_frequency(ofdev->node); + freq = mpc5xxx_get_bus_frequency(ofdev->dev.of_node); if (!freq) return 0; @@ -152,7 +152,7 @@ static u32 __devinit mpc512x_can_get_clock(struct of_device *ofdev, } /* Determine the MSCAN device index from the physical address */ - pval = of_get_property(ofdev->node, "reg", &plen); + pval = of_get_property(ofdev->dev.of_node, "reg", &plen); BUG_ON(!pval || plen < sizeof(*pval)); clockidx = (*pval & 0x80) ? 1 : 0; if (*pval & 0x2000) @@ -168,11 +168,11 @@ static u32 __devinit mpc512x_can_get_clock(struct of_device *ofdev, */ if (clock_name && !strcmp(clock_name, "ip")) { *mscan_clksrc = MSCAN_CLKSRC_IPS; - freq = mpc5xxx_get_bus_frequency(ofdev->node); + freq = mpc5xxx_get_bus_frequency(ofdev->dev.of_node); } else { *mscan_clksrc = MSCAN_CLKSRC_BUS; - pval = of_get_property(ofdev->node, + pval = of_get_property(ofdev->dev.of_node, "fsl,mscan-clock-divider", &plen); if (pval && plen == sizeof(*pval)) clockdiv = *pval; @@ -251,7 +251,7 @@ static int __devinit mpc5xxx_can_probe(struct of_device *ofdev, const struct of_device_id *id) { struct mpc5xxx_can_data *data = (struct mpc5xxx_can_data *)id->data; - struct device_node *np = ofdev->node; + struct device_node *np = ofdev->dev.of_node; struct net_device *dev; struct mscan_priv *priv; void __iomem *base; From 3ffd05159815d477f971a3259fc758f0c3c7e640 Mon Sep 17 00:00:00 2001 From: Sathya Perla Date: Tue, 1 Jun 2010 00:19:33 -0700 Subject: [PATCH 07/44] be2net: convert hdr.timeout in be_cmd_loopback_test() to le32 The current code fails on ppc as hdr.timeout is not being converted to le32. Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/benet/be_cmds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/benet/be_cmds.c b/drivers/net/benet/be_cmds.c index 9e305d7fb4bd..b9ad799c719f 100644 --- a/drivers/net/benet/be_cmds.c +++ b/drivers/net/benet/be_cmds.c @@ -1593,7 +1593,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, be_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_LOWLEVEL, OPCODE_LOWLEVEL_LOOPBACK_TEST, sizeof(*req)); - req->hdr.timeout = 4; + req->hdr.timeout = cpu_to_le32(4); req->pattern = cpu_to_le64(pattern); req->src_port = cpu_to_le32(port_num); From aa989f5e46bb913e1a5966bb7d32eb2d00c1894e Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 31 May 2010 01:10:01 +0000 Subject: [PATCH 08/44] virtio-net: pass gfp to add_buf virtio-net bounces buffer allocations off to a thread if it can't allocate buffers from the atomic pool. However, if posting buffers still requires atomic buffers, this is unlikely to succeed. Fix by passing in the proper gfp_t parameter. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 78eb3190b9b1..1edb7a61983c 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -340,7 +340,7 @@ static int add_recvbuf_small(struct virtnet_info *vi, gfp_t gfp) skb_to_sgvec(skb, vi->rx_sg + 1, 0, skb->len); - err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, 2, skb); + err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, 2, skb, gfp); if (err < 0) dev_kfree_skb(skb); @@ -385,8 +385,8 @@ static int add_recvbuf_big(struct virtnet_info *vi, gfp_t gfp) /* chain first in list head */ first->private = (unsigned long)list; - err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, MAX_SKB_FRAGS + 2, - first); + err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, MAX_SKB_FRAGS + 2, + first, gfp); if (err < 0) give_pages(vi, first); @@ -404,7 +404,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp) sg_init_one(vi->rx_sg, page_address(page), PAGE_SIZE); - err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, 1, page); + err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, 1, page, gfp); if (err < 0) give_pages(vi, page); From 5ed83663f77ee7404022d046321f69545cd311b8 Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Mon, 31 May 2010 00:24:49 +0000 Subject: [PATCH 09/44] ksz884x: convert to netdev_tx_t Convert TX hook to netdev_tx_t type Signed-off-by: Denis Kirjanov Signed-off-by: David S. Miller --- drivers/net/ksz884x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ksz884x.c b/drivers/net/ksz884x.c index c80ca64277b2..4568b6f163eb 100644 --- a/drivers/net/ksz884x.c +++ b/drivers/net/ksz884x.c @@ -4854,7 +4854,7 @@ static inline void copy_old_skb(struct sk_buff *old, struct sk_buff *skb) * * Return 0 if successful; otherwise an error code indicating failure. */ -static int netdev_tx(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t netdev_tx(struct sk_buff *skb, struct net_device *dev) { struct dev_priv *priv = netdev_priv(dev); struct dev_info *hw_priv = priv->adapter; From 96ed741e15896eea43f7203523db88bc8105c359 Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Mon, 31 May 2010 00:26:21 +0000 Subject: [PATCH 10/44] ksz884x: Add missing validate_addr hook Add missing validate_addr hook Signed-off-by: Denis Kirjanov Signed-off-by: David S. Miller --- drivers/net/ksz884x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ksz884x.c b/drivers/net/ksz884x.c index 4568b6f163eb..7805bbf1d53a 100644 --- a/drivers/net/ksz884x.c +++ b/drivers/net/ksz884x.c @@ -6863,6 +6863,7 @@ static const struct net_device_ops netdev_ops = { .ndo_tx_timeout = netdev_tx_timeout, .ndo_change_mtu = netdev_change_mtu, .ndo_set_mac_address = netdev_set_mac_address, + .ndo_validate_addr = eth_validate_addr, .ndo_do_ioctl = netdev_ioctl, .ndo_set_rx_mode = netdev_set_rx_mode, #ifdef CONFIG_NET_POLL_CONTROLLER From b42d9165e1e3d92e4e3318642463dbe592a12568 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 1 Jun 2010 00:26:06 -0700 Subject: [PATCH 11/44] drivers/isdn/hardware/mISDN: Use GFP_ATOMIC when a lock is held The function inittiger is only called from nj_init_card, where a lock is held. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @gfp exists@ identifier fn; position p; @@ fn(...) { ... when != spin_unlock_irqrestore when any GFP_KERNEL@p ... when any } @locked@ identifier gfp.fn; @@ spin_lock_irqsave(...) ... when != spin_unlock_irqrestore fn(...) @depends on locked@ position gfp.p; @@ - GFP_KERNEL@p + GFP_ATOMIC // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/netjet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c index 0a3553df065f..54ae71a907f9 100644 --- a/drivers/isdn/hardware/mISDN/netjet.c +++ b/drivers/isdn/hardware/mISDN/netjet.c @@ -320,12 +320,12 @@ inittiger(struct tiger_hw *card) return -ENOMEM; } for (i = 0; i < 2; i++) { - card->bc[i].hsbuf = kmalloc(NJ_DMA_TXSIZE, GFP_KERNEL); + card->bc[i].hsbuf = kmalloc(NJ_DMA_TXSIZE, GFP_ATOMIC); if (!card->bc[i].hsbuf) { pr_info("%s: no B%d send buffer\n", card->name, i + 1); return -ENOMEM; } - card->bc[i].hrbuf = kmalloc(NJ_DMA_RXSIZE, GFP_KERNEL); + card->bc[i].hrbuf = kmalloc(NJ_DMA_RXSIZE, GFP_ATOMIC); if (!card->bc[i].hrbuf) { pr_info("%s: no B%d recv buffer\n", card->name, i + 1); return -ENOMEM; From 397f385bdba6cdf7752467a7ae81810340929e44 Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Wed, 19 May 2010 10:30:49 +0900 Subject: [PATCH 12/44] ath5k: wake queues on reset We can wake all queues after a chip reset since everything should be set up and we are ready to transmit. If we don't do that we might end up starting up with stopped queues, not beeing able to transmit. (This started to happen after "ath5k: clean up queue manipulation" but since periodic calibration also stopped and started the queues this effect was hidden most of the time). This way we can also get rid of the superfluous ath5k_reset_wake() function. Signed-off-by: Bruno Randolf Acked-by: Nick Kossifidis Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath5k/base.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index cc6d41dec332..2978359c4366 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -222,7 +222,6 @@ static int ath5k_tx(struct ieee80211_hw *hw, struct sk_buff *skb); static int ath5k_tx_queue(struct ieee80211_hw *hw, struct sk_buff *skb, struct ath5k_txq *txq); static int ath5k_reset(struct ath5k_softc *sc, struct ieee80211_channel *chan); -static int ath5k_reset_wake(struct ath5k_softc *sc); static int ath5k_start(struct ieee80211_hw *hw); static void ath5k_stop(struct ieee80211_hw *hw); static int ath5k_add_interface(struct ieee80211_hw *hw, @@ -2770,7 +2769,7 @@ ath5k_tasklet_reset(unsigned long data) { struct ath5k_softc *sc = (void *)data; - ath5k_reset_wake(sc); + ath5k_reset(sc, sc->curchan); } /* @@ -2941,23 +2940,13 @@ ath5k_reset(struct ath5k_softc *sc, struct ieee80211_channel *chan) ath5k_beacon_config(sc); /* intrs are enabled by ath5k_beacon_config */ + ieee80211_wake_queues(sc->hw); + return 0; err: return ret; } -static int -ath5k_reset_wake(struct ath5k_softc *sc) -{ - int ret; - - ret = ath5k_reset(sc, sc->curchan); - if (!ret) - ieee80211_wake_queues(sc->hw); - - return ret; -} - static int ath5k_start(struct ieee80211_hw *hw) { return ath5k_init(hw->priv); From 8ae5977ff95c03fe6c36a5721c57dcb4bfe4f290 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 30 May 2010 14:52:58 +0200 Subject: [PATCH 13/44] mac80211: fix blockack-req processing Daniel reported that the paged RX changes had broken blockack request frame processing due to using data that wasn't really part of the skb data. Fix this using skb_copy_bits() for the needed data. As a side effect, this adds a check on processing too short frames, which previously this code could do. Reported-by: Daniel Halperin Signed-off-by: Johannes Berg Acked-by: Daniel Halperin Signed-off-by: John W. Linville --- net/mac80211/rx.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6e2a7bcd8cb8..5e0b65406c44 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1818,17 +1818,26 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) return RX_CONTINUE; if (ieee80211_is_back_req(bar->frame_control)) { + struct { + __le16 control, start_seq_num; + } __packed bar_data; + if (!rx->sta) return RX_DROP_MONITOR; + + if (skb_copy_bits(skb, offsetof(struct ieee80211_bar, control), + &bar_data, sizeof(bar_data))) + return RX_DROP_MONITOR; + spin_lock(&rx->sta->lock); - tid = le16_to_cpu(bar->control) >> 12; + tid = le16_to_cpu(bar_data.control) >> 12; if (!rx->sta->ampdu_mlme.tid_active_rx[tid]) { spin_unlock(&rx->sta->lock); return RX_DROP_MONITOR; } tid_agg_rx = rx->sta->ampdu_mlme.tid_rx[tid]; - start_seq_num = le16_to_cpu(bar->start_seq_num) >> 4; + start_seq_num = le16_to_cpu(bar_data.start_seq_num) >> 4; /* reset session timer */ if (tid_agg_rx->timeout) From 51a0d38de26226f2779912d92f155b93d539da9a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 31 May 2010 12:00:12 +0200 Subject: [PATCH 14/44] mac80211: fix dialog token allocator The dialog token allocator has apparently been broken since b83f4e15 ("mac80211: fix deadlock in sta->lock") because it got moved out under the spinlock. Fix it. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index c163d0a149f4..98258b7341e3 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -332,14 +332,16 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) IEEE80211_QUEUE_STOP_REASON_AGGREGATION); spin_unlock(&local->ampdu_lock); - spin_unlock_bh(&sta->lock); - /* send an addBA request */ + /* prepare tid data */ sta->ampdu_mlme.dialog_token_allocator++; sta->ampdu_mlme.tid_tx[tid]->dialog_token = sta->ampdu_mlme.dialog_token_allocator; sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; + spin_unlock_bh(&sta->lock); + + /* send AddBA request */ ieee80211_send_addba_request(sdata, pubsta->addr, tid, sta->ampdu_mlme.tid_tx[tid]->dialog_token, sta->ampdu_mlme.tid_tx[tid]->ssn, From fafeeb6c80e3842c6dc19d05de09a23f23eef0d8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Jun 2010 10:04:49 +0000 Subject: [PATCH 15/44] xfrm: force a dst reference in __xfrm_route_forward() Packets going through __xfrm_route_forward() have a not refcounted dst entry, since we enabled a noref forwarding path. xfrm_lookup() might incorrectly release this dst entry. It's a bit late to make invasive changes in xfrm_lookup(), so lets force a refcount in this path. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d965a2bad8d3..4bf27d901333 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2153,6 +2153,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) return 0; } + skb_dst_force(skb); dst = skb_dst(skb); res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0; From f048fa9c8686119c3858a463cab6121dced7c0bf Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 1 Jun 2010 15:05:36 +0000 Subject: [PATCH 16/44] bnx2: Fix hang during rmmod bnx2. The regression is caused by: commit 4327ba435a56ada13eedf3eb332e583c7a0586a9 bnx2: Fix netpoll crash. If ->open() and ->close() are called multiple times, the same napi structs will be added to dev->napi_list multiple times, corrupting the dev->napi_list. This causes free_netdev() to hang during rmmod. We fix this by calling netif_napi_del() during ->close(). Also, bnx2_init_napi() must not be in the __devinit section since it is called by ->open(). Signed-off-by: Michael Chan Signed-off-by: Benjamin Li Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 188e356c30a3..949d7a9dcf92 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -247,6 +247,7 @@ static const struct flash_spec flash_5709 = { MODULE_DEVICE_TABLE(pci, bnx2_pci_tbl); static void bnx2_init_napi(struct bnx2 *bp); +static void bnx2_del_napi(struct bnx2 *bp); static inline u32 bnx2_tx_avail(struct bnx2 *bp, struct bnx2_tx_ring_info *txr) { @@ -6270,6 +6271,7 @@ open_err: bnx2_free_skbs(bp); bnx2_free_irq(bp); bnx2_free_mem(bp); + bnx2_del_napi(bp); return rc; } @@ -6537,6 +6539,7 @@ bnx2_close(struct net_device *dev) bnx2_free_irq(bp); bnx2_free_skbs(bp); bnx2_free_mem(bp); + bnx2_del_napi(bp); bp->link_up = 0; netif_carrier_off(bp->dev); bnx2_set_power_state(bp, PCI_D3hot); @@ -8227,7 +8230,16 @@ bnx2_bus_string(struct bnx2 *bp, char *str) return str; } -static void __devinit +static void +bnx2_del_napi(struct bnx2 *bp) +{ + int i; + + for (i = 0; i < bp->irq_nvecs; i++) + netif_napi_del(&bp->bnx2_napi[i].napi); +} + +static void bnx2_init_napi(struct bnx2 *bp) { int i; From 08f382ebb8a9efb898840aa74cf55148c7a98af6 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Tue, 1 Jun 2010 08:59:33 +0000 Subject: [PATCH 17/44] enic: bug fix: make the set/get netlink VF_PORT support symmetrical To make get/set netlink VF_PORT truly symmetrical, we need to keep track of what items are set and only return those items on get. Previously, the driver wasn't differentiating between a set of attr with a NULL string, for example, and not setting the attr at all. We only want to return the NULL string if the attr was actually set with a NULL string. Otherwise, don't return the attr. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- drivers/net/enic/enic.h | 7 ++ drivers/net/enic/enic_main.c | 206 +++++++++++++++++------------------ 2 files changed, 107 insertions(+), 106 deletions(-) diff --git a/drivers/net/enic/enic.h b/drivers/net/enic/enic.h index 85f2a2e7030a..45e86d1e5b1b 100644 --- a/drivers/net/enic/enic.h +++ b/drivers/net/enic/enic.h @@ -74,7 +74,14 @@ struct enic_msix_entry { void *devid; }; +#define ENIC_SET_APPLIED (1 << 0) +#define ENIC_SET_REQUEST (1 << 1) +#define ENIC_SET_NAME (1 << 2) +#define ENIC_SET_INSTANCE (1 << 3) +#define ENIC_SET_HOST (1 << 4) + struct enic_port_profile { + u32 set; u8 request; char name[PORT_PROFILE_MAX]; u8 instance_uuid[PORT_UUID_MAX]; diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c index 6586b5c7e4b6..bc7d6b96de3d 100644 --- a/drivers/net/enic/enic_main.c +++ b/drivers/net/enic/enic_main.c @@ -1029,8 +1029,7 @@ static int enic_dev_init_done(struct enic *enic, int *done, int *error) return err; } -static int enic_set_port_profile(struct enic *enic, u8 request, u8 *mac, - char *name, u8 *instance_uuid, u8 *host_uuid) +static int enic_set_port_profile(struct enic *enic, u8 *mac) { struct vic_provinfo *vp; u8 oui[3] = VIC_PROVINFO_CISCO_OUI; @@ -1040,97 +1039,112 @@ static int enic_set_port_profile(struct enic *enic, u8 request, u8 *mac, "%02X%02X-%02X%02X%02X%02X%0X%02X"; int err; - if (!name) - return -EINVAL; - - if (!is_valid_ether_addr(mac)) - return -EADDRNOTAVAIL; - - vp = vic_provinfo_alloc(GFP_KERNEL, oui, VIC_PROVINFO_LINUX_TYPE); - if (!vp) - return -ENOMEM; - - vic_provinfo_add_tlv(vp, - VIC_LINUX_PROV_TLV_PORT_PROFILE_NAME_STR, - strlen(name) + 1, name); - - vic_provinfo_add_tlv(vp, - VIC_LINUX_PROV_TLV_CLIENT_MAC_ADDR, - ETH_ALEN, mac); - - if (instance_uuid) { - uuid = instance_uuid; - sprintf(uuid_str, uuid_fmt, - uuid[0], uuid[1], uuid[2], uuid[3], - uuid[4], uuid[5], uuid[6], uuid[7], - uuid[8], uuid[9], uuid[10], uuid[11], - uuid[12], uuid[13], uuid[14], uuid[15]); - vic_provinfo_add_tlv(vp, - VIC_LINUX_PROV_TLV_CLIENT_UUID_STR, - sizeof(uuid_str), uuid_str); - } - - if (host_uuid) { - uuid = host_uuid; - sprintf(uuid_str, uuid_fmt, - uuid[0], uuid[1], uuid[2], uuid[3], - uuid[4], uuid[5], uuid[6], uuid[7], - uuid[8], uuid[9], uuid[10], uuid[11], - uuid[12], uuid[13], uuid[14], uuid[15]); - vic_provinfo_add_tlv(vp, - VIC_LINUX_PROV_TLV_HOST_UUID_STR, - sizeof(uuid_str), uuid_str); - } - err = enic_vnic_dev_deinit(enic); if (err) - goto err_out; + return err; - memset(&enic->pp, 0, sizeof(enic->pp)); + switch (enic->pp.request) { - err = enic_dev_init_prov(enic, vp); - if (err) - goto err_out; + case PORT_REQUEST_ASSOCIATE: - enic->pp.request = request; - memcpy(enic->pp.name, name, PORT_PROFILE_MAX); - if (instance_uuid) - memcpy(enic->pp.instance_uuid, - instance_uuid, PORT_UUID_MAX); - if (host_uuid) - memcpy(enic->pp.host_uuid, - host_uuid, PORT_UUID_MAX); + if (!(enic->pp.set & ENIC_SET_NAME) || !strlen(enic->pp.name)) + return -EINVAL; -err_out: - vic_provinfo_free(vp); + if (!is_valid_ether_addr(mac)) + return -EADDRNOTAVAIL; - return err; -} + vp = vic_provinfo_alloc(GFP_KERNEL, oui, + VIC_PROVINFO_LINUX_TYPE); + if (!vp) + return -ENOMEM; -static int enic_unset_port_profile(struct enic *enic) -{ - memset(&enic->pp, 0, sizeof(enic->pp)); - return enic_vnic_dev_deinit(enic); + vic_provinfo_add_tlv(vp, + VIC_LINUX_PROV_TLV_PORT_PROFILE_NAME_STR, + strlen(enic->pp.name) + 1, enic->pp.name); + + vic_provinfo_add_tlv(vp, + VIC_LINUX_PROV_TLV_CLIENT_MAC_ADDR, + ETH_ALEN, mac); + + if (enic->pp.set & ENIC_SET_INSTANCE) { + uuid = enic->pp.instance_uuid; + sprintf(uuid_str, uuid_fmt, + uuid[0], uuid[1], uuid[2], uuid[3], + uuid[4], uuid[5], uuid[6], uuid[7], + uuid[8], uuid[9], uuid[10], uuid[11], + uuid[12], uuid[13], uuid[14], uuid[15]); + vic_provinfo_add_tlv(vp, + VIC_LINUX_PROV_TLV_CLIENT_UUID_STR, + sizeof(uuid_str), uuid_str); + } + + if (enic->pp.set & ENIC_SET_HOST) { + uuid = enic->pp.host_uuid; + sprintf(uuid_str, uuid_fmt, + uuid[0], uuid[1], uuid[2], uuid[3], + uuid[4], uuid[5], uuid[6], uuid[7], + uuid[8], uuid[9], uuid[10], uuid[11], + uuid[12], uuid[13], uuid[14], uuid[15]); + vic_provinfo_add_tlv(vp, + VIC_LINUX_PROV_TLV_HOST_UUID_STR, + sizeof(uuid_str), uuid_str); + } + + err = enic_dev_init_prov(enic, vp); + vic_provinfo_free(vp); + if (err) + return err; + break; + + case PORT_REQUEST_DISASSOCIATE: + break; + + default: + return -EINVAL; + } + + enic->pp.set |= ENIC_SET_APPLIED; + return 0; } static int enic_set_vf_port(struct net_device *netdev, int vf, struct nlattr *port[]) { struct enic *enic = netdev_priv(netdev); - char *name = NULL; - u8 *instance_uuid = NULL; - u8 *host_uuid = NULL; - u8 request = PORT_REQUEST_DISASSOCIATE; + + memset(&enic->pp, 0, sizeof(enic->pp)); + + if (port[IFLA_PORT_REQUEST]) { + enic->pp.set |= ENIC_SET_REQUEST; + enic->pp.request = nla_get_u8(port[IFLA_PORT_REQUEST]); + } + + if (port[IFLA_PORT_PROFILE]) { + enic->pp.set |= ENIC_SET_NAME; + memcpy(enic->pp.name, nla_data(port[IFLA_PORT_PROFILE]), + PORT_PROFILE_MAX); + } + + if (port[IFLA_PORT_INSTANCE_UUID]) { + enic->pp.set |= ENIC_SET_INSTANCE; + memcpy(enic->pp.instance_uuid, + nla_data(port[IFLA_PORT_INSTANCE_UUID]), PORT_UUID_MAX); + } + + if (port[IFLA_PORT_HOST_UUID]) { + enic->pp.set |= ENIC_SET_HOST; + memcpy(enic->pp.host_uuid, + nla_data(port[IFLA_PORT_HOST_UUID]), PORT_UUID_MAX); + } /* don't support VFs, yet */ if (vf != PORT_SELF_VF) return -EOPNOTSUPP; - if (port[IFLA_PORT_REQUEST]) - request = nla_get_u8(port[IFLA_PORT_REQUEST]); + if (!(enic->pp.set & ENIC_SET_REQUEST)) + return -EOPNOTSUPP; - switch (request) { - case PORT_REQUEST_ASSOCIATE: + if (enic->pp.request == PORT_REQUEST_ASSOCIATE) { /* If the interface mac addr hasn't been assigned, * assign a random mac addr before setting port- @@ -1139,30 +1153,9 @@ static int enic_set_vf_port(struct net_device *netdev, int vf, if (is_zero_ether_addr(netdev->dev_addr)) random_ether_addr(netdev->dev_addr); - - if (port[IFLA_PORT_PROFILE]) - name = nla_data(port[IFLA_PORT_PROFILE]); - - if (port[IFLA_PORT_INSTANCE_UUID]) - instance_uuid = - nla_data(port[IFLA_PORT_INSTANCE_UUID]); - - if (port[IFLA_PORT_HOST_UUID]) - host_uuid = nla_data(port[IFLA_PORT_HOST_UUID]); - - return enic_set_port_profile(enic, request, - netdev->dev_addr, name, - instance_uuid, host_uuid); - - case PORT_REQUEST_DISASSOCIATE: - - return enic_unset_port_profile(enic); - - default: - break; } - return -EOPNOTSUPP; + return enic_set_port_profile(enic, netdev->dev_addr); } static int enic_get_vf_port(struct net_device *netdev, int vf, @@ -1172,14 +1165,12 @@ static int enic_get_vf_port(struct net_device *netdev, int vf, int err, error, done; u16 response = PORT_PROFILE_RESPONSE_SUCCESS; - /* don't support VFs, yet */ - if (vf != PORT_SELF_VF) - return -EOPNOTSUPP; + if (!(enic->pp.set & ENIC_SET_APPLIED)) + return -ENODATA; err = enic_dev_init_done(enic, &done, &error); - if (err) - return err; + error = err; switch (error) { case ERR_SUCCESS: @@ -1202,12 +1193,15 @@ static int enic_get_vf_port(struct net_device *netdev, int vf, NLA_PUT_U16(skb, IFLA_PORT_REQUEST, enic->pp.request); NLA_PUT_U16(skb, IFLA_PORT_RESPONSE, response); - NLA_PUT(skb, IFLA_PORT_PROFILE, PORT_PROFILE_MAX, - enic->pp.name); - NLA_PUT(skb, IFLA_PORT_INSTANCE_UUID, PORT_UUID_MAX, - enic->pp.instance_uuid); - NLA_PUT(skb, IFLA_PORT_HOST_UUID, PORT_UUID_MAX, - enic->pp.host_uuid); + if (enic->pp.set & ENIC_SET_NAME) + NLA_PUT(skb, IFLA_PORT_PROFILE, PORT_PROFILE_MAX, + enic->pp.name); + if (enic->pp.set & ENIC_SET_INSTANCE) + NLA_PUT(skb, IFLA_PORT_INSTANCE_UUID, PORT_UUID_MAX, + enic->pp.instance_uuid); + if (enic->pp.set & ENIC_SET_HOST) + NLA_PUT(skb, IFLA_PORT_HOST_UUID, PORT_UUID_MAX, + enic->pp.host_uuid); return 0; From 194dbcc8a1a97cbac9a619a563e5f6b7f7d5a485 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 12 May 2010 21:31:06 +0000 Subject: [PATCH 18/44] net: init_vlan should not copy slave or master flags The vlan device should not copy the slave or master flags from the real device. It is not in the bond until added nor is it a master. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 55be90826f5f..529842677817 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -708,7 +708,8 @@ static int vlan_dev_init(struct net_device *dev) netif_carrier_off(dev); /* IFF_BROADCAST|IFF_MULTICAST; ??? */ - dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI); + dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | + IFF_MASTER | IFF_SLAVE); dev->iflink = real_dev->ifindex; dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))) | From 2df4a0fa1540c460ec69788ab2a901cc72a75644 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 12 May 2010 21:31:11 +0000 Subject: [PATCH 19/44] net: fix conflict between null_or_orig and null_or_bond If a skb is received on an inactive bond that does not meet the special cases checked for by skb_bond_should_drop it should only be delivered to exact matches as the comment in netif_receive_skb() says. However because null_or_bond could also be null this is not always true. This patch renames null_or_bond to orig_or_bond and initializes it to orig_dev. This keeps the intent of null_or_bond to pass frames received on VLAN interfaces stacked on bonding interfaces without invalidating the statement for null_or_orig. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1845b08c624e..d03470f5260a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2795,7 +2795,7 @@ static int __netif_receive_skb(struct sk_buff *skb) struct net_device *orig_dev; struct net_device *master; struct net_device *null_or_orig; - struct net_device *null_or_bond; + struct net_device *orig_or_bond; int ret = NET_RX_DROP; __be16 type; @@ -2868,10 +2868,10 @@ ncls: * device that may have registered for a specific ptype. The * handler may have to adjust skb->dev and orig_dev. */ - null_or_bond = NULL; + orig_or_bond = orig_dev; if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { - null_or_bond = vlan_dev_real_dev(skb->dev); + orig_or_bond = vlan_dev_real_dev(skb->dev); } type = skb->protocol; @@ -2879,7 +2879,7 @@ ncls: &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { if (ptype->type == type && (ptype->dev == null_or_orig || ptype->dev == skb->dev || ptype->dev == orig_dev || - ptype->dev == null_or_bond)) { + ptype->dev == orig_or_bond)) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; From ceb3d2394532540a52ce34f71e67c8d008913f79 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Sat, 29 May 2010 13:23:34 +0000 Subject: [PATCH 20/44] korina: fix deadlock on RX FIFO overrun By calling korina_restart(), the IRQ handler tries to disable the interrupt it's currently serving. This leads to a deadlock since disable_irq() waits for any running IRQ handlers to finish before returning. This patch addresses the issue by turning korina_restart() into a workqueue task, which is then scheduled when needed. Reproducing the deadlock is easily done using e.g. GNU netcat to send large amounts of UDP data to the host running this driver. Note that the same problem (and fix) applies to TX FIFO underruns, but apparently these are less easy to trigger. Signed-off-by: Phil Sutter Signed-off-by: David S. Miller --- drivers/net/korina.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/net/korina.c b/drivers/net/korina.c index 26bf1b76b997..13533f937e05 100644 --- a/drivers/net/korina.c +++ b/drivers/net/korina.c @@ -135,6 +135,7 @@ struct korina_private { struct napi_struct napi; struct timer_list media_check_timer; struct mii_if_info mii_if; + struct work_struct restart_task; struct net_device *dev; int phy_addr; }; @@ -890,12 +891,12 @@ static int korina_init(struct net_device *dev) /* * Restart the RC32434 ethernet controller. - * FIXME: check the return status where we call it */ -static int korina_restart(struct net_device *dev) +static void korina_restart_task(struct work_struct *work) { - struct korina_private *lp = netdev_priv(dev); - int ret; + struct korina_private *lp = container_of(work, + struct korina_private, restart_task); + struct net_device *dev = lp->dev; /* * Disable interrupts @@ -916,10 +917,9 @@ static int korina_restart(struct net_device *dev) napi_disable(&lp->napi); - ret = korina_init(dev); - if (ret < 0) { + if (korina_init(dev) < 0) { printk(KERN_ERR "%s: cannot restart device\n", dev->name); - return ret; + return; } korina_multicast_list(dev); @@ -927,8 +927,6 @@ static int korina_restart(struct net_device *dev) enable_irq(lp->ovr_irq); enable_irq(lp->tx_irq); enable_irq(lp->rx_irq); - - return ret; } static void korina_clear_and_restart(struct net_device *dev, u32 value) @@ -937,7 +935,7 @@ static void korina_clear_and_restart(struct net_device *dev, u32 value) netif_stop_queue(dev); writel(value, &lp->eth_regs->ethintfc); - korina_restart(dev); + schedule_work(&lp->restart_task); } /* Ethernet Tx Underflow interrupt */ @@ -962,11 +960,8 @@ static irqreturn_t korina_und_interrupt(int irq, void *dev_id) static void korina_tx_timeout(struct net_device *dev) { struct korina_private *lp = netdev_priv(dev); - unsigned long flags; - spin_lock_irqsave(&lp->lock, flags); - korina_restart(dev); - spin_unlock_irqrestore(&lp->lock, flags); + schedule_work(&lp->restart_task); } /* Ethernet Rx Overflow interrupt */ @@ -1086,6 +1081,8 @@ static int korina_close(struct net_device *dev) napi_disable(&lp->napi); + cancel_work_sync(&lp->restart_task); + free_irq(lp->rx_irq, dev); free_irq(lp->tx_irq, dev); free_irq(lp->ovr_irq, dev); @@ -1198,6 +1195,8 @@ static int korina_probe(struct platform_device *pdev) } setup_timer(&lp->media_check_timer, korina_poll_media, (unsigned long) dev); + INIT_WORK(&lp->restart_task, korina_restart_task); + printk(KERN_INFO "%s: " DRV_NAME "-" DRV_VERSION " " DRV_RELDATE "\n", dev->name); out: From 53ee490ac5836d506ea5830f821045aafa3c196f Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Sat, 29 May 2010 13:23:35 +0000 Subject: [PATCH 21/44] korina: use netdev_alloc_skb_ip_align() here, too This patch completes commit 89d71a66c40d629e3b1285def543ab1425558cd5 which missed this spot, as it seems. Signed-off-by: Phil Sutter Signed-off-by: David S. Miller --- drivers/net/korina.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/korina.c b/drivers/net/korina.c index 13533f937e05..3e9b6b7be42e 100644 --- a/drivers/net/korina.c +++ b/drivers/net/korina.c @@ -765,10 +765,9 @@ static int korina_alloc_ring(struct net_device *dev) /* Initialize the receive descriptors */ for (i = 0; i < KORINA_NUM_RDS; i++) { - skb = dev_alloc_skb(KORINA_RBSIZE + 2); + skb = netdev_alloc_skb_ip_align(dev, KORINA_RBSIZE); if (!skb) return -ENOMEM; - skb_reserve(skb, 2); lp->rx_skb[i] = skb; lp->rd_ring[i].control = DMA_DESC_IOD | DMA_COUNT(KORINA_RBSIZE); From b1011b375be106e0a312baafc981a26165283efe Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Sat, 29 May 2010 13:23:36 +0000 Subject: [PATCH 22/44] korina: count RX DMA OVR as rx_fifo_error This way, RX DMA overruns (actually being caused by overrun of the 512byte input FIFO) show up in ifconfig output. The rx_fifo_errors counter is unused otherwise. Signed-off-by: Phil Sutter Signed-off-by: David S. Miller --- drivers/net/korina.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/korina.c b/drivers/net/korina.c index 3e9b6b7be42e..c7a9bef4dfb0 100644 --- a/drivers/net/korina.c +++ b/drivers/net/korina.c @@ -376,7 +376,7 @@ static int korina_rx(struct net_device *dev, int limit) if (devcs & ETH_RX_LE) dev->stats.rx_length_errors++; if (devcs & ETH_RX_OVR) - dev->stats.rx_over_errors++; + dev->stats.rx_fifo_errors++; if (devcs & ETH_RX_CV) dev->stats.rx_frame_errors++; if (devcs & ETH_RX_CES) From e3fe8558c7fc182972c3d947d88744482111f304 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20B=C3=A9nard?= Date: Wed, 2 Jun 2010 06:13:34 -0700 Subject: [PATCH 23/44] net/fec: fix pm to survive to suspend/resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * in the actual driver, calling fec_stop and fec_enet_init doesn't allow to have a working network interface at resume (where a ifconfig down and up is required to recover the interface) * by using fec_enet_close and fec_enet_open, this patch solves this problem and handle the case where the link changed between suspend and resume * this patch also disable clock at suspend and reenable it at resume Signed-off-by: Eric Bénard Signed-off-by: David S. Miller --- drivers/net/fec.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/fec.c b/drivers/net/fec.c index ddf7a86cd466..edfff92a6d8e 100644 --- a/drivers/net/fec.c +++ b/drivers/net/fec.c @@ -1373,10 +1373,9 @@ fec_suspend(struct platform_device *dev, pm_message_t state) if (ndev) { fep = netdev_priv(ndev); - if (netif_running(ndev)) { - netif_device_detach(ndev); - fec_stop(ndev); - } + if (netif_running(ndev)) + fec_enet_close(ndev); + clk_disable(fep->clk); } return 0; } @@ -1385,12 +1384,13 @@ static int fec_resume(struct platform_device *dev) { struct net_device *ndev = platform_get_drvdata(dev); + struct fec_enet_private *fep; if (ndev) { - if (netif_running(ndev)) { - fec_enet_init(ndev, 0); - netif_device_attach(ndev); - } + fep = netdev_priv(ndev); + clk_enable(fep->clk); + if (netif_running(ndev)) + fec_enet_open(ndev); } return 0; } From 33c29dde7d04dc0ec0edb649d20ccf1351c13a06 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 29 May 2010 14:26:59 +0000 Subject: [PATCH 24/44] act_nat: fix the wrong checksum when addr isn't in old_addr/mask fix the wrong checksum when addr isn't in old_addr/mask For TCP and UDP packets, when addr isn't in old_addr/mask we don't do SNAT or DNAT, and we should not update layer 4 checksum. Signed-off-by: Changli Gao ---- net/sched/act_nat.c | 4 ++++ 1 file changed, 4 insertions(+) Signed-off-by: David S. Miller --- net/sched/act_nat.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index d885ba311564..570949417f38 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -159,6 +159,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, iph->daddr = new_addr; csum_replace4(&iph->check, addr, new_addr); + } else if ((iph->frag_off & htons(IP_OFFSET)) || + iph->protocol != IPPROTO_ICMP) { + goto out; } ihl = iph->ihl * 4; @@ -247,6 +250,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, break; } +out: return action; drop: From edafe502404f3669d364b6e96d79b54067b634b4 Mon Sep 17 00:00:00 2001 From: Daniele Lacamera Date: Wed, 2 Jun 2010 02:02:04 +0000 Subject: [PATCH 25/44] TCP: tcp_hybla: Fix integer overflow in slow start increment For large values of rtt, 2^rho operation may overflow u32. Clamp down the increment to 2^16. Signed-off-by: Daniele Lacamera Signed-off-by: David S. Miller --- net/ipv4/tcp_hybla.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index c209e054a634..377bc9349371 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -126,8 +126,8 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * calculate 2^fract in a <<7 value. */ is_slowstart = 1; - increment = ((1 << ca->rho) * hybla_fraction(rho_fractions)) - - 128; + increment = ((1 << min(ca->rho, 16U)) * + hybla_fraction(rho_fractions)) - 128; } else { /* * congestion avoidance From fbc2e7d9cf49e0bf89b9e91fd60a06851a855c5d Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Wed, 2 Jun 2010 07:32:42 -0700 Subject: [PATCH 26/44] cls_u32: use skb_header_pointer() to dereference data safely use skb_header_pointer() to dereference data safely the original skb->data dereference isn't safe, as there isn't any skb->len or skb_is_nonlinear() check. skb_header_pointer() is used instead in this patch. And when the skb isn't long enough, we terminate the function u32_classify() immediately with -1. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 96275422c619..4f522143811e 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -98,11 +98,11 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re { struct { struct tc_u_knode *knode; - u8 *ptr; + unsigned int off; } stack[TC_U32_MAXDEPTH]; struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; - u8 *ptr = skb_network_header(skb); + unsigned int off = skb_network_offset(skb); struct tc_u_knode *n; int sdepth = 0; int off2 = 0; @@ -134,8 +134,14 @@ next_knode: #endif for (i = n->sel.nkeys; i>0; i--, key++) { + unsigned int toff; + __be32 *data, _data; - if ((*(__be32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) { + toff = off + key->off + (off2 & key->offmask); + data = skb_header_pointer(skb, toff, 4, &_data); + if (!data) + goto out; + if ((*data ^ key->val) & key->mask) { n = n->next; goto next_knode; } @@ -174,29 +180,45 @@ check_terminal: if (sdepth >= TC_U32_MAXDEPTH) goto deadloop; stack[sdepth].knode = n; - stack[sdepth].ptr = ptr; + stack[sdepth].off = off; sdepth++; ht = n->ht_down; sel = 0; - if (ht->divisor) - sel = ht->divisor&u32_hash_fold(*(__be32*)(ptr+n->sel.hoff), &n->sel,n->fshift); + if (ht->divisor) { + __be32 *data, _data; + data = skb_header_pointer(skb, off + n->sel.hoff, 4, + &_data); + if (!data) + goto out; + sel = ht->divisor & u32_hash_fold(*data, &n->sel, + n->fshift); + } if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT))) goto next_ht; if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) { off2 = n->sel.off + 3; - if (n->sel.flags&TC_U32_VAROFFSET) - off2 += ntohs(n->sel.offmask & *(__be16*)(ptr+n->sel.offoff)) >>n->sel.offshift; + if (n->sel.flags & TC_U32_VAROFFSET) { + __be16 *data, _data; + + data = skb_header_pointer(skb, + off + n->sel.offoff, + 2, &_data); + if (!data) + goto out; + off2 += ntohs(n->sel.offmask & *data) >> + n->sel.offshift; + } off2 &= ~3; } if (n->sel.flags&TC_U32_EAT) { - ptr += off2; + off += off2; off2 = 0; } - if (ptr < skb_tail_pointer(skb)) + if (off < skb->len) goto next_ht; } @@ -204,9 +226,10 @@ check_terminal: if (sdepth--) { n = stack[sdepth].knode; ht = n->ht_up; - ptr = stack[sdepth].ptr; + off = stack[sdepth].off; goto check_terminal; } +out: return -1; deadloop: From d42a8f464ba14467e5d45dc0eb8f789c82bd0679 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 1 Jun 2010 11:32:43 +0000 Subject: [PATCH 27/44] sfc: Get port number from CS_PORT_NUM, not PCI function number A single shared memory region used to communicate with firmware is mapped into both PCI PFs of the SFC9020 and SFL9021. Drivers must be able to identify which port they are addressing in order to use the correct sub-region. Currently we use the PCI function number, but the PCI address may be virtualised. Use the CS_PORT_NUM register field defined for just this purpose. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/sfc/net_driver.h | 4 +++- drivers/net/sfc/siena.c | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h index 2e6fd89f2a72..5fffd9abffde 100644 --- a/drivers/net/sfc/net_driver.h +++ b/drivers/net/sfc/net_driver.h @@ -645,6 +645,7 @@ union efx_multicast_hash { * struct efx_nic - an Efx NIC * @name: Device name (net device name or bus id before net device registered) * @pci_dev: The PCI device + * @port_num: Index of this host port within the controller * @type: Controller type attributes * @legacy_irq: IRQ number * @workqueue: Workqueue for port reconfigures and the HW monitor. @@ -728,6 +729,7 @@ union efx_multicast_hash { struct efx_nic { char name[IFNAMSIZ]; struct pci_dev *pci_dev; + unsigned port_num; const struct efx_nic_type *type; int legacy_irq; struct workqueue_struct *workqueue; @@ -830,7 +832,7 @@ static inline const char *efx_dev_name(struct efx_nic *efx) static inline unsigned int efx_port_num(struct efx_nic *efx) { - return PCI_FUNC(efx->pci_dev->devfn); + return efx->port_num; } /** diff --git a/drivers/net/sfc/siena.c b/drivers/net/sfc/siena.c index 727b4228e081..7ecd255a7cc0 100644 --- a/drivers/net/sfc/siena.c +++ b/drivers/net/sfc/siena.c @@ -206,6 +206,7 @@ static int siena_probe_nic(struct efx_nic *efx) { struct siena_nic_data *nic_data; bool already_attached = 0; + efx_oword_t reg; int rc; /* Allocate storage for hardware specific data */ @@ -220,6 +221,9 @@ static int siena_probe_nic(struct efx_nic *efx) goto fail1; } + efx_reado(efx, ®, FR_AZ_CS_DEBUG); + efx->port_num = EFX_OWORD_FIELD(reg, FRF_CZ_CS_PORT_NUM) - 1; + efx_mcdi_init(efx); /* Recover from a failed assertion before probing */ From 3b21b508ecc9e043839a5337563cfc77f9fcedb9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 2 Jun 2010 13:43:15 +0000 Subject: [PATCH 28/44] e1000e: change logical negate to bitwise The bitwise negate is intended here. With the logical negate the condition is always false. Signed-off-by: Dan Carpenter Acked-by: Bruce Allan Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/e1000e/netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 24507f3b8b17..57a7e41da69e 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -2554,7 +2554,7 @@ static void e1000_init_manageability_pt(struct e1000_adapter *adapter) mdef = er32(MDEF(i)); /* Ignore filters with anything other than IPMI ports */ - if (mdef & !(E1000_MDEF_PORT_623 | E1000_MDEF_PORT_664)) + if (mdef & ~(E1000_MDEF_PORT_623 | E1000_MDEF_PORT_664)) continue; /* Enable this decision filter in MANC2H */ From 60a5711db646b87b9530b16cbaf3bd53ac5594a5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 2 Jun 2010 23:56:13 +0000 Subject: [PATCH 29/44] isdn/kcapi: return -EFAULT on copy_from_user errors copy_from_user() returns the number of bytes remaining but we should return -EFAULT here. The error code gets returned to the user. Both old_capi_manufacturer() and capi20_manufacturer() had other places that already returned -EFAULT so this won't break anything. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/isdn/capi/kcapi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index bde3c88b8b27..b054494df846 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -1020,12 +1020,12 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data) if (cmd == AVMB1_ADDCARD) { if ((retval = copy_from_user(&cdef, data, sizeof(avmb1_carddef)))) - return retval; + return -EFAULT; cdef.cardtype = AVM_CARDTYPE_B1; } else { if ((retval = copy_from_user(&cdef, data, sizeof(avmb1_extcarddef)))) - return retval; + return -EFAULT; } cparams.port = cdef.port; cparams.irq = cdef.irq; @@ -1218,7 +1218,7 @@ int capi20_manufacturer(unsigned int cmd, void __user *data) kcapi_carddef cdef; if ((retval = copy_from_user(&cdef, data, sizeof(cdef)))) - return retval; + return -EFAULT; cparams.port = cdef.port; cparams.irq = cdef.irq; From d23380701876dd93d310b2548c51d0f78f25d7aa Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 3 Jun 2010 00:05:35 +0000 Subject: [PATCH 30/44] tehuti: return -EFAULT on copy_to_user errors copy_to_user() returns the number of bytes remaining but we want to return a negative error code here. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/tehuti.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c index 20ab16192325..737df6032bbc 100644 --- a/drivers/net/tehuti.c +++ b/drivers/net/tehuti.c @@ -646,7 +646,7 @@ static int bdx_ioctl_priv(struct net_device *ndev, struct ifreq *ifr, int cmd) error = copy_from_user(data, ifr->ifr_data, sizeof(data)); if (error) { pr_err("cant copy from user\n"); - RET(error); + RET(-EFAULT); } DBG("%d 0x%x 0x%x\n", data[0], data[1], data[2]); } @@ -665,7 +665,7 @@ static int bdx_ioctl_priv(struct net_device *ndev, struct ifreq *ifr, int cmd) data[2]); error = copy_to_user(ifr->ifr_data, data, sizeof(data)); if (error) - RET(error); + RET(-EFAULT); break; case BDX_OP_WRITE: From 9e2d11b926765681f72db0373d2ecbbac28359b3 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 2 Jun 2010 10:36:53 +0000 Subject: [PATCH 31/44] epic100: Test __BIG_ENDIAN instead of (non-existent) CONFIG_BIG_ENDIAN Probably no one has used this driver on big-endian systems, since it was setting up descriptor swapping if CONFIG_BIG_ENDIAN is set, which it never is, since that symbol is not mentioned anywhere else in the kernel source. Switch this test to a check for __BIG_ENDIAN so it has a chance at working. Signed-off-by: Roland Dreier Acked-by: Jeff Garzik Signed-off-by: David S. Miller --- drivers/net/epic100.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c index 6838dfc9ef23..4c274657283c 100644 --- a/drivers/net/epic100.c +++ b/drivers/net/epic100.c @@ -87,6 +87,7 @@ static int rx_copybreak; #include #include #include +#include /* These identify the driver base version and may not be removed. */ static char version[] __devinitdata = @@ -230,7 +231,7 @@ static const u16 media2miictl[16] = { * The EPIC100 Rx and Tx buffer descriptors. Note that these * really ARE host-endian; it's not a misannotation. We tell * the card to byteswap them internally on big-endian hosts - - * look for #ifdef CONFIG_BIG_ENDIAN in epic_open(). + * look for #ifdef __BIG_ENDIAN in epic_open(). */ struct epic_tx_desc { @@ -690,7 +691,7 @@ static int epic_open(struct net_device *dev) outl((inl(ioaddr + NVCTL) & ~0x003C) | 0x4800, ioaddr + NVCTL); /* Tell the chip to byteswap descriptors on big-endian hosts */ -#ifdef CONFIG_BIG_ENDIAN +#ifdef __BIG_ENDIAN outl(0x4432 | (RX_FIFO_THRESH<<8), ioaddr + GENCTL); inl(ioaddr + GENCTL); outl(0x0432 | (RX_FIFO_THRESH<<8), ioaddr + GENCTL); @@ -806,7 +807,7 @@ static void epic_restart(struct net_device *dev) for (i = 16; i > 0; i--) outl(0x0008, ioaddr + TEST1); -#ifdef CONFIG_BIG_ENDIAN +#ifdef __BIG_ENDIAN outl(0x0432 | (RX_FIFO_THRESH<<8), ioaddr + GENCTL); #else outl(0x0412 | (RX_FIFO_THRESH<<8), ioaddr + GENCTL); From 3df95ce948dc8ceef07b49003ab944aa047f2a79 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 2 Jun 2010 10:39:56 +0000 Subject: [PATCH 32/44] sfc: Store port number in net_device::dev_id This exposes the port number to userland through sysfs. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/sfc/net_driver.h | 4 +--- drivers/net/sfc/siena.c | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h index 5fffd9abffde..4762c91cb587 100644 --- a/drivers/net/sfc/net_driver.h +++ b/drivers/net/sfc/net_driver.h @@ -645,7 +645,6 @@ union efx_multicast_hash { * struct efx_nic - an Efx NIC * @name: Device name (net device name or bus id before net device registered) * @pci_dev: The PCI device - * @port_num: Index of this host port within the controller * @type: Controller type attributes * @legacy_irq: IRQ number * @workqueue: Workqueue for port reconfigures and the HW monitor. @@ -729,7 +728,6 @@ union efx_multicast_hash { struct efx_nic { char name[IFNAMSIZ]; struct pci_dev *pci_dev; - unsigned port_num; const struct efx_nic_type *type; int legacy_irq; struct workqueue_struct *workqueue; @@ -832,7 +830,7 @@ static inline const char *efx_dev_name(struct efx_nic *efx) static inline unsigned int efx_port_num(struct efx_nic *efx) { - return efx->port_num; + return efx->net_dev->dev_id; } /** diff --git a/drivers/net/sfc/siena.c b/drivers/net/sfc/siena.c index 7ecd255a7cc0..f2b1e6180753 100644 --- a/drivers/net/sfc/siena.c +++ b/drivers/net/sfc/siena.c @@ -222,7 +222,7 @@ static int siena_probe_nic(struct efx_nic *efx) } efx_reado(efx, ®, FR_AZ_CS_DEBUG); - efx->port_num = EFX_OWORD_FIELD(reg, FRF_CZ_CS_PORT_NUM) - 1; + efx->net_dev->dev_id = EFX_OWORD_FIELD(reg, FRF_CZ_CS_PORT_NUM) - 1; efx_mcdi_init(efx); From db2c24175d149b55784f7cb2c303622ce962c1ae Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Wed, 2 Jun 2010 04:55:02 +0000 Subject: [PATCH 33/44] act_pedit: access skb->data safely access skb->data safely we should use skb_header_pointer() and skb_store_bits() to access skb->data to handle small or non-linear skbs. Signed-off-by: Changli Gao ---- net/sched/act_pedit.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) Signed-off-by: David S. Miller --- net/sched/act_pedit.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index fdbd0b7bd840..50e3d945e1f4 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -125,7 +125,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, { struct tcf_pedit *p = a->priv; int i, munged = 0; - u8 *pptr; + unsigned int off; if (!(skb->tc_verd & TC_OK2MUNGE)) { /* should we set skb->cloned? */ @@ -134,7 +134,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, } } - pptr = skb_network_header(skb); + off = skb_network_offset(skb); spin_lock(&p->tcf_lock); @@ -144,17 +144,17 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tc_pedit_key *tkey = p->tcfp_keys; for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { - u32 *ptr; + u32 *ptr, _data; int offset = tkey->off; if (tkey->offmask) { - if (skb->len > tkey->at) { - char *j = pptr + tkey->at; - offset += ((*j & tkey->offmask) >> - tkey->shift); - } else { + char *d, _d; + + d = skb_header_pointer(skb, off + tkey->at, 1, + &_d); + if (!d) goto bad; - } + offset += (*d & tkey->offmask) >> tkey->shift; } if (offset % 4) { @@ -169,9 +169,13 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, goto bad; } - ptr = (u32 *)(pptr+offset); + ptr = skb_header_pointer(skb, off + offset, 4, &_data); + if (!ptr) + goto bad; /* just do it, baby */ *ptr = ((*ptr & tkey->mask) ^ tkey->val); + if (ptr == &_data) + skb_store_bits(skb, off + offset, ptr, 4); munged++; } From a1868dc2878e61778b9d6d8c61d5368e51d68a29 Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Wed, 2 Jun 2010 12:44:05 +0000 Subject: [PATCH 34/44] ixgbe: return IXGBE_ERR_RAR_INDEX when out of range Based on original patch from Shirley Ma Return IXGBE_ERR_RAR_INDEX when RAR index is out of range, instead of returning IXGBE_SUCCESS. CC: Shirley Ma Signed-off-by: Jeff Kirsher Acked-by: Don Skidmore Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_common.c | 2 ++ drivers/net/ixgbe/ixgbe_type.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/net/ixgbe/ixgbe_common.c b/drivers/net/ixgbe/ixgbe_common.c index 1159d9138f05..9595b1bfb8dd 100644 --- a/drivers/net/ixgbe/ixgbe_common.c +++ b/drivers/net/ixgbe/ixgbe_common.c @@ -1188,6 +1188,7 @@ s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high); } else { hw_dbg(hw, "RAR index %d is out of range.\n", index); + return IXGBE_ERR_RAR_INDEX; } return 0; @@ -1219,6 +1220,7 @@ s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index) IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high); } else { hw_dbg(hw, "RAR index %d is out of range.\n", index); + return IXGBE_ERR_RAR_INDEX; } /* clear VMDq pool/queue selection for this RAR */ diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h index 2eb6e151016c..cdd1998f18c7 100644 --- a/drivers/net/ixgbe/ixgbe_type.h +++ b/drivers/net/ixgbe/ixgbe_type.h @@ -2609,6 +2609,7 @@ struct ixgbe_info { #define IXGBE_ERR_EEPROM_VERSION -24 #define IXGBE_ERR_NO_SPACE -25 #define IXGBE_ERR_OVERTEMP -26 +#define IXGBE_ERR_RAR_INDEX -27 #define IXGBE_NOT_IMPLEMENTED 0x7FFFFFFF #endif /* _IXGBE_TYPE_H_ */ From 8764ab2ca7ab5055e1ca80f9cfa4970c34acb804 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 4 Jun 2010 01:57:38 +0000 Subject: [PATCH 35/44] net: check for refcount if pop a stacked dst_entry xfrm triggers a warning if dst_pop() drops a refcount on a noref dst. This patch changes dst_pop() to skb_dst_pop(). skb_dst_pop() drops the refcnt only on a refcounted dst. Also we don't clone the child dst_entry, so it is not refcounted and we can use skb_dst_set_noref() in xfrm_output_one(). Signed-off-by: Steffen Klassert Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 6 +++--- net/xfrm/xfrm_output.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 612069beda73..81d1413a8701 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -250,11 +250,11 @@ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) * Linux networking. Thus, destinations are stackable. */ -static inline struct dst_entry *dst_pop(struct dst_entry *dst) +static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) { - struct dst_entry *child = dst_clone(dst->child); + struct dst_entry *child = skb_dst(skb)->child; - dst_release(dst); + skb_dst_drop(skb); return child; } diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 6a329158bdfa..a3cca0a94346 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -95,13 +95,13 @@ resume: goto error_nolock; } - dst = dst_pop(dst); + dst = skb_dst_pop(skb); if (!dst) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); err = -EHOSTUNREACH; goto error_nolock; } - skb_dst_set(skb, dst); + skb_dst_set_noref(skb, dst); x = dst->xfrm; } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); From ca739481662137b8f717bc21f16719cda3c33d6b Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 3 Jun 2010 17:03:45 +0000 Subject: [PATCH 36/44] ixgbe: only check pfc bits in hang logic if pfc is enabled Only check pfc bits in hang logic if PFC is enabled. Previously, if DCB was enabled but PFC was disabled the incorrect pause bits would be checked. Signed-off-by: John Fastabend Acked-by: Don Skidmore Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index d571d101de08..b2af2f67f604 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -642,7 +642,7 @@ static inline bool ixgbe_tx_xon_state(struct ixgbe_adapter *adapter, u32 txoff = IXGBE_TFCS_TXOFF; #ifdef CONFIG_IXGBE_DCB - if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + if (adapter->dcb_cfg.pfc_mode_enable) { int tc; int reg_idx = tx_ring->reg_idx; int dcb_i = adapter->ring_feature[RING_F_DCB].indices; From 57f1553ee5d9f093660cc49098f494e17ed11668 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 3 Jun 2010 00:42:30 +0000 Subject: [PATCH 37/44] syncookies: remove Kconfig text line about disabled-by-default syncookies default to on since e994b7c901ded7200b525a707c6da71f2cf6d4bb (tcp: Don't make syn cookies initial setting depend on CONFIG_SYSCTL). Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8e3a1fd938ab..7c3a7d191249 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -303,7 +303,7 @@ config ARPD If unsure, say N. config SYN_COOKIES - bool "IP: TCP syncookie support (disabled per default)" + bool "IP: TCP syncookie support" ---help--- Normal TCP/IP networking is open to an attack known as "SYN flooding". This denial-of-service attack prevents legitimate remote @@ -328,13 +328,13 @@ config SYN_COOKIES server is really overloaded. If this happens frequently better turn them off. - If you say Y here, note that SYN cookies aren't enabled by default; - you can enable them by saying Y to "/proc file system support" and + If you say Y here, you can disable SYN cookies at run time by + saying Y to "/proc file system support" and "Sysctl support" below and executing the command - echo 1 >/proc/sys/net/ipv4/tcp_syncookies + echo 0 > /proc/sys/net/ipv4/tcp_syncookies - at boot time after the /proc file system has been mounted. + after the /proc file system has been mounted. If unsure, say N. From 536e00e570c87f258554e919c444b81a7002e46d Mon Sep 17 00:00:00 2001 From: Ben McKeegan Date: Wed, 2 Jun 2010 23:14:33 +0000 Subject: [PATCH 38/44] ppp_generic: fix multilink fragment sizes Fix bug in multilink fragment size calculation introduced by commit 9c705260feea6ae329bc6b6d5f6d2ef0227eda0a "ppp: ppp_mp_explode() redesign" Signed-off-by: Ben McKeegan Signed-off-by: David S. Miller --- drivers/net/ppp_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index c5f8eb102bf7..1b2c29150202 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -1422,7 +1422,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) flen = len; if (nfree > 0) { if (pch->speed == 0) { - flen = totlen/nfree; + flen = len/nfree; if (nbigger > 0) { flen++; nbigger--; From ca55158c6ecb7832a6ad80ac44a14d23bab8cdfc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Jun 2010 09:03:58 +0000 Subject: [PATCH 39/44] rps: tcp: fix rps_sock_flow_table table updates I believe a moderate SYN flood attack can corrupt RFS flow table (rps_sock_flow_table), making RPS/RFS much less effective. Even in a normal situation, server handling short lived sessions suffer from bad steering for the first data packet of a session, if another SYN packet is received for another session. We do following action in tcp_v4_rcv() : sock_rps_save_rxhash(sk, skb->rxhash); We should _not_ do this if sk is a LISTEN socket, as about each packet received on a LISTEN socket has a different rxhash than previous one. -> RPS_NO_CPU markers are spread all over rps_sock_flow_table. Also, it makes sense to protect sk->rxhash field changes with socket lock (We currently can change it even if user thread owns the lock and might use rxhash) This patch moves sock_rps_save_rxhash() to a sock locked section, and only for non LISTEN sockets. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 202cf09c4cd4..fe193e53af44 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1555,6 +1555,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) #endif if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ + sock_rps_save_rxhash(sk, skb->rxhash); TCP_CHECK_TIMER(sk); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; @@ -1579,7 +1580,9 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) } return 0; } - } + } else + sock_rps_save_rxhash(sk, skb->rxhash); + TCP_CHECK_TIMER(sk); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { @@ -1672,8 +1675,6 @@ process: skb->dev = NULL; - sock_rps_save_rxhash(sk, skb->rxhash); - bh_lock_sock_nested(sk); ret = 0; if (!sock_owned_by_user(sk)) { From c44649216522cd607a4027d2ebf4a8147d3fa94c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Jun 2010 05:45:47 +0000 Subject: [PATCH 40/44] tcp: use correct net ns in cookie_v4_check() Its better to make a route lookup in appropriate namespace. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 5c24db4a3c91..9f6b22206c52 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -347,7 +347,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, { .sport = th->dest, .dport = th->source } } }; security_req_classify_flow(req, &fl); - if (ip_route_output_key(&init_net, &rt, &fl)) { + if (ip_route_output_key(sock_net(sk), &rt, &fl)) { reqsk_free(req); goto out; } From ca7335948e294faf8adf65f2c95ca18ea78540db Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Fri, 4 Jun 2010 16:14:15 -0700 Subject: [PATCH 41/44] X25: remove duplicated #include Remove duplicated #include('s) in drivers/net/wan/x25_asy.c Signed-off-by: Huang Weiyi Signed-off-by: David S. Miller --- drivers/net/wan/x25_asy.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 166e77dfffda..e47f5a986b1c 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -37,8 +37,6 @@ #include #include "x25_asy.h" -#include - static struct net_device **x25_asy_devs; static int x25_asy_maxdev = SL_NRUNIT; From 72e09ad107e78d69ff4d3b97a69f0aad2b77280f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 5 Jun 2010 03:03:30 -0700 Subject: [PATCH 42/44] ipv6: avoid high order allocations With mtu=9000, mld_newpack() use order-2 GFP_ATOMIC allocations, that are very unreliable, on machines where PAGE_SIZE=4K Limit allocated skbs to be at most one page. (order-0 allocations) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 59f1881968c7..ab1622d7d409 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1356,7 +1356,10 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) IPV6_TLV_PADN, 0 }; /* we assume size > sizeof(ra) here */ - skb = sock_alloc_send_skb(sk, size + LL_ALLOCATED_SPACE(dev), 1, &err); + size += LL_ALLOCATED_SPACE(dev); + /* limit our allocations to order-0 page */ + size = min_t(int, size, SKB_MAX_ORDER(0, 0)); + skb = sock_alloc_send_skb(sk, size, 1, &err); if (!skb) return NULL; From 8ffb335e8d696affc04f963bf73ce2196f80edb9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 6 Jun 2010 15:34:40 -0700 Subject: [PATCH 43/44] ip6mr: fix a typo in ip6mr_for_each_table() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 073071f2b75b..89c0b077c7aa 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -120,7 +120,7 @@ static void mroute_clean_tables(struct mr6_table *mrt); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES -#define ip6mr_for_each_table(mrt, met) \ +#define ip6mr_for_each_table(mrt, net) \ list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list) static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) From 024a07bacf8287a6ddfa83e9d5b951c5e8b4070e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Sun, 6 Jun 2010 15:38:47 -0700 Subject: [PATCH 44/44] r8169: fix random mdio_write failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some configurations need delay between the "write completed" indication and new write to work reliably. Realtek driver seems to use longer delay when polling the "write complete" bit, so it waits long enough between writes with high probability (but could probably break too). This patch adds a new udelay to make sure we wait unconditionally some time after the write complete indication. This caused a regression with XID 18000000 boards when the board specific phy configuration writing many mdio registers was added in commit 2e955856ff (r8169: phy init for the 8169scd). Some of the configration mdio writes would almost always fail, and depending on failure might leave the PHY in non-working state. Signed-off-by: Timo Teräs Acked-off-by: Francois Romieu Signed-off-by: David S. Miller --- drivers/net/r8169.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 217e709bda3e..03a8318d90a2 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -559,6 +559,11 @@ static void mdio_write(void __iomem *ioaddr, int reg_addr, int value) break; udelay(25); } + /* + * Some configurations require a small delay even after the write + * completed indication or the next write might fail. + */ + udelay(25); } static int mdio_read(void __iomem *ioaddr, int reg_addr)