From 8c83f80b2d335176f72d8729fe1dfe19c5812cb1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Dec 2008 00:10:11 -0800 Subject: [PATCH 01/11] sungem: Make PCS PHY support partially work again. As reported by Hermann Lauer, PCS PHY support in the sungem driver simply doesn't work. When the chip is reset due to open, or some other similar operation, the PCS is reset too but we don't program it back into a running state. The result is no link when the device is brought up. This partially rectifies the situation for the moment, by kicking the PCS after a sungem chip reset so that it will renegotiate and be re-enabled again. The behavior is still a little bit dodgy as the added renegotiate make the link take some time after bringing the interface up, but this is a significant improvement in that things actually work now :-) Based almost entirely upon an initial patch by Hermann. Signed-off-by: David S. Miller --- drivers/net/sungem.c | 129 ++++++++++++++++++++++++------------------- 1 file changed, 73 insertions(+), 56 deletions(-) diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index 1349e419673c..bb7a47829f7f 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -1142,6 +1142,70 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } +static void gem_pcs_reset(struct gem *gp) +{ + int limit; + u32 val; + + /* Reset PCS unit. */ + val = readl(gp->regs + PCS_MIICTRL); + val |= PCS_MIICTRL_RST; + writel(val, gp->regs + PCS_MIICTRL); + + limit = 32; + while (readl(gp->regs + PCS_MIICTRL) & PCS_MIICTRL_RST) { + udelay(100); + if (limit-- <= 0) + break; + } + if (limit <= 0) + printk(KERN_WARNING "%s: PCS reset bit would not clear.\n", + gp->dev->name); +} + +static void gem_pcs_reinit_adv(struct gem *gp) +{ + u32 val; + + /* Make sure PCS is disabled while changing advertisement + * configuration. + */ + val = readl(gp->regs + PCS_CFG); + val &= ~(PCS_CFG_ENABLE | PCS_CFG_TO); + writel(val, gp->regs + PCS_CFG); + + /* Advertise all capabilities except assymetric + * pause. + */ + val = readl(gp->regs + PCS_MIIADV); + val |= (PCS_MIIADV_FD | PCS_MIIADV_HD | + PCS_MIIADV_SP | PCS_MIIADV_AP); + writel(val, gp->regs + PCS_MIIADV); + + /* Enable and restart auto-negotiation, disable wrapback/loopback, + * and re-enable PCS. + */ + val = readl(gp->regs + PCS_MIICTRL); + val |= (PCS_MIICTRL_RAN | PCS_MIICTRL_ANE); + val &= ~PCS_MIICTRL_WB; + writel(val, gp->regs + PCS_MIICTRL); + + val = readl(gp->regs + PCS_CFG); + val |= PCS_CFG_ENABLE; + writel(val, gp->regs + PCS_CFG); + + /* Make sure serialink loopback is off. The meaning + * of this bit is logically inverted based upon whether + * you are in Serialink or SERDES mode. + */ + val = readl(gp->regs + PCS_SCTRL); + if (gp->phy_type == phy_serialink) + val &= ~PCS_SCTRL_LOOP; + else + val |= PCS_SCTRL_LOOP; + writel(val, gp->regs + PCS_SCTRL); +} + #define STOP_TRIES 32 /* Must be invoked under gp->lock and gp->tx_lock. */ @@ -1168,6 +1232,9 @@ static void gem_reset(struct gem *gp) if (limit <= 0) printk(KERN_ERR "%s: SW reset is ghetto.\n", gp->dev->name); + + if (gp->phy_type == phy_serialink || gp->phy_type == phy_serdes) + gem_pcs_reinit_adv(gp); } /* Must be invoked under gp->lock and gp->tx_lock. */ @@ -1324,7 +1391,7 @@ static int gem_set_link_modes(struct gem *gp) gp->phy_type == phy_serdes) { u32 pcs_lpa = readl(gp->regs + PCS_MIILP); - if (pcs_lpa & PCS_MIIADV_FD) + if ((pcs_lpa & PCS_MIIADV_FD) || gp->phy_type == phy_serdes) full_duplex = 1; speed = SPEED_1000; } @@ -1488,6 +1555,9 @@ static void gem_link_timer(unsigned long data) val = readl(gp->regs + PCS_MIISTAT); if ((val & PCS_MIISTAT_LS) != 0) { + if (gp->lstate == link_up) + goto restart; + gp->lstate = link_up; netif_carrier_on(gp->dev); (void)gem_set_link_modes(gp); @@ -1708,61 +1778,8 @@ static void gem_init_phy(struct gem *gp) if (gp->phy_mii.def && gp->phy_mii.def->ops->init) gp->phy_mii.def->ops->init(&gp->phy_mii); } else { - u32 val; - int limit; - - /* Reset PCS unit. */ - val = readl(gp->regs + PCS_MIICTRL); - val |= PCS_MIICTRL_RST; - writel(val, gp->regs + PCS_MIICTRL); - - limit = 32; - while (readl(gp->regs + PCS_MIICTRL) & PCS_MIICTRL_RST) { - udelay(100); - if (limit-- <= 0) - break; - } - if (limit <= 0) - printk(KERN_WARNING "%s: PCS reset bit would not clear.\n", - gp->dev->name); - - /* Make sure PCS is disabled while changing advertisement - * configuration. - */ - val = readl(gp->regs + PCS_CFG); - val &= ~(PCS_CFG_ENABLE | PCS_CFG_TO); - writel(val, gp->regs + PCS_CFG); - - /* Advertise all capabilities except assymetric - * pause. - */ - val = readl(gp->regs + PCS_MIIADV); - val |= (PCS_MIIADV_FD | PCS_MIIADV_HD | - PCS_MIIADV_SP | PCS_MIIADV_AP); - writel(val, gp->regs + PCS_MIIADV); - - /* Enable and restart auto-negotiation, disable wrapback/loopback, - * and re-enable PCS. - */ - val = readl(gp->regs + PCS_MIICTRL); - val |= (PCS_MIICTRL_RAN | PCS_MIICTRL_ANE); - val &= ~PCS_MIICTRL_WB; - writel(val, gp->regs + PCS_MIICTRL); - - val = readl(gp->regs + PCS_CFG); - val |= PCS_CFG_ENABLE; - writel(val, gp->regs + PCS_CFG); - - /* Make sure serialink loopback is off. The meaning - * of this bit is logically inverted based upon whether - * you are in Serialink or SERDES mode. - */ - val = readl(gp->regs + PCS_SCTRL); - if (gp->phy_type == phy_serialink) - val &= ~PCS_SCTRL_LOOP; - else - val |= PCS_SCTRL_LOOP; - writel(val, gp->regs + PCS_SCTRL); + gem_pcs_reset(gp); + gem_pcs_reinit_adv(gp); } /* Default aneg parameters */ From 8d3a564da34e5844aca4f991b73f8ca512246b23 Mon Sep 17 00:00:00 2001 From: Doug Leith Date: Tue, 9 Dec 2008 00:13:04 -0800 Subject: [PATCH 02/11] tcp: tcp_vegas cong avoid fix This patch addresses a book-keeping issue in tcp_vegas.c. At present tcp_vegas does separate book-keeping of cwnd based on packet sequence numbers. A mismatch can develop between this book-keeping and tp->snd_cwnd due, for example, to delayed acks acking multiple packets. When vegas transitions to reno operation (e.g. following loss), then this mismatch leads to incorrect behaviour (akin to a cwnd backoff). This seems mostly to affect operation at low cwnds where delayed acking can lead to a significant fraction of cwnd being covered by a single ack, leading to the book-keeping mismatch. This patch modifies the congestion avoidance update to avoid the need for separate book-keeping while leaving vegas congestion avoidance functionally unchanged. A secondary advantage of this modification is that the use of fixed-point (via V_PARAM_SHIFT) and 64 bit arithmetic is no longer necessary, simplifying the code. Some example test measurements with the patched code (confirming no functional change in the congestion avoidance algorithm) can be seen at: http://www.hamilton.ie/doug/vegaspatch/ Signed-off-by: Doug Leith Signed-off-by: David S. Miller --- net/ipv4/tcp_vegas.c | 80 ++++++-------------------------------------- 1 file changed, 10 insertions(+), 70 deletions(-) diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 7cd22262de3a..a453aac91bd3 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -40,18 +40,14 @@ #include "tcp_vegas.h" -/* Default values of the Vegas variables, in fixed-point representation - * with V_PARAM_SHIFT bits to the right of the binary point. - */ -#define V_PARAM_SHIFT 1 -static int alpha = 2<beg_snd_nxt)) { /* Do the Vegas once-per-RTT cwnd adjustment. */ - u32 old_wnd, old_snd_cwnd; - - - /* Here old_wnd is essentially the window of data that was - * sent during the previous RTT, and has all - * been acknowledged in the course of the RTT that ended - * with the ACK we just received. Likewise, old_snd_cwnd - * is the cwnd during the previous RTT. - */ - old_wnd = (vegas->beg_snd_nxt - vegas->beg_snd_una) / - tp->mss_cache; - old_snd_cwnd = vegas->beg_snd_cwnd; /* Save the extent of the current window so we can use this * at the end of the next RTT. */ - vegas->beg_snd_una = vegas->beg_snd_nxt; vegas->beg_snd_nxt = tp->snd_nxt; - vegas->beg_snd_cwnd = tp->snd_cwnd; /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got @@ -252,22 +212,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * * This is: * (actual rate in segments) * baseRTT - * We keep it as a fixed point number with - * V_PARAM_SHIFT bits to the right of the binary point. */ - target_cwnd = ((u64)old_wnd * vegas->baseRTT); - target_cwnd <<= V_PARAM_SHIFT; - do_div(target_cwnd, rtt); + target_cwnd = tp->snd_cwnd * vegas->baseRTT / rtt; /* Calculate the difference between the window we had, * and the window we would like to have. This quantity * is the "Diff" from the Arizona Vegas papers. - * - * Again, this is a fixed point number with - * V_PARAM_SHIFT bits to the right of the binary - * point. */ - diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd; + diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; if (diff > gamma && tp->snd_ssthresh > 2 ) { /* Going too fast. Time to slow down @@ -282,16 +234,13 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * truncation robs us of full link * utilization. */ - tp->snd_cwnd = min(tp->snd_cwnd, - ((u32)target_cwnd >> - V_PARAM_SHIFT)+1); + tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); } else if (tp->snd_cwnd <= tp->snd_ssthresh) { /* Slow start. */ tcp_slow_start(tp); } else { /* Congestion avoidance. */ - u32 next_snd_cwnd; /* Figure out where we would like cwnd * to be. @@ -300,26 +249,17 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) /* The old window was too fast, so * we slow down. */ - next_snd_cwnd = old_snd_cwnd - 1; + tp->snd_cwnd--; } else if (diff < alpha) { /* We don't have enough extra packets * in the network, so speed up. */ - next_snd_cwnd = old_snd_cwnd + 1; + tp->snd_cwnd++; } else { /* Sending just as fast as we * should be. */ - next_snd_cwnd = old_snd_cwnd; } - - /* Adjust cwnd upward or downward, toward the - * desired value. - */ - if (next_snd_cwnd > tp->snd_cwnd) - tp->snd_cwnd++; - else if (next_snd_cwnd < tp->snd_cwnd) - tp->snd_cwnd--; } if (tp->snd_cwnd < 2) From fbf0229e946deb9e6c711f9eaa5c8d670c3a28fd Mon Sep 17 00:00:00 2001 From: Hermann Lauer Date: Tue, 9 Dec 2008 15:39:14 -0800 Subject: [PATCH 03/11] sungem: improve ethtool output with internal pcs and serdes From: Hermann Lauer Attached is a patch which improves the output of ethtool (see below) to some sensefull values with a sungem fibre card which uses the sungem interal pcs connected to a serdes chip. The seriallink case in the driver is untouched. Most values are hardcoded, because gigabit fibre autoneg is anyways limited and the driver don't really support much at the moment with that hardware. Signed-off-by: David S. Miller --- drivers/net/sungem.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index bb7a47829f7f..fed7eba65ead 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -2697,6 +2697,21 @@ static int gem_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) cmd->speed = 0; cmd->duplex = cmd->port = cmd->phy_address = cmd->transceiver = cmd->autoneg = 0; + + /* serdes means usually a Fibre connector, with most fixed */ + if (gp->phy_type == phy_serdes) { + cmd->port = PORT_FIBRE; + cmd->supported = (SUPPORTED_1000baseT_Half | + SUPPORTED_1000baseT_Full | + SUPPORTED_FIBRE | SUPPORTED_Autoneg | + SUPPORTED_Pause | SUPPORTED_Asym_Pause); + cmd->advertising = cmd->supported; + cmd->transceiver = XCVR_INTERNAL; + if (gp->lstate == link_up) + cmd->speed = SPEED_1000; + cmd->duplex = DUPLEX_FULL; + cmd->autoneg = 1; + } } cmd->maxtxpkt = cmd->maxrxpkt = 0; From 24fc7b86dc0470616803be2f921c8cd5c459175d Mon Sep 17 00:00:00 2001 From: Jan Sembera Date: Tue, 9 Dec 2008 15:48:32 -0800 Subject: [PATCH 04/11] ipv6: silence log messages for locally generated multicast This patch fixes minor annoyance during transmission of unsolicited neighbor advertisements from userspace to multicast addresses (as far as I can see in RFC, this is allowed and the similar functionality for IPv4 has been in arping for a long time). Outgoing multicast packets get reinserted into local processing as if they are received from the network. The machine thus sees its own NA and fills the logs with error messages. This patch removes the message if NA has been generated locally. Signed-off-by: Jan Sembera Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 172438320eec..d0f54d18e19b 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -912,8 +912,13 @@ static void ndisc_recv_na(struct sk_buff *skb) is invalid, but ndisc specs say nothing about it. It could be misconfiguration, or an smart proxy agent tries to help us :-) + + We should not print the error if NA has been + received from loopback - it is just our own + unsolicited advertisement. */ - ND_PRINTK1(KERN_WARNING + if (skb->pkt_type != PACKET_LOOPBACK) + ND_PRINTK1(KERN_WARNING "ICMPv6 NA: someone advertises our address on %s!\n", ifp->idev->dev->name); in6_ifa_put(ifp); From 7b363e440021a1cf9ed76944b2685f48dacefb3e Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 9 Dec 2008 23:22:26 -0800 Subject: [PATCH 05/11] netpoll: fix race on poll_list resulting in garbage entry A few months back a race was discused between the netpoll napi service path, and the fast path through net_rx_action: http://kerneltrap.org/mailarchive/linux-netdev/2007/10/16/345470 A patch was submitted for that bug, but I think we missed a case. Consider the following scenario: INITIAL STATE CPU0 has one napi_struct A on its poll_list CPU1 is calling netpoll_send_skb and needs to call poll_napi on the same napi_struct A that CPU0 has on its list CPU0 CPU1 net_rx_action poll_napi !list_empty (returns true) locks poll_lock for A poll_one_napi napi->poll netif_rx_complete __napi_complete (removes A from poll_list) list_entry(list->next) In the above scenario, net_rx_action assumes that the per-cpu poll_list is exclusive to that cpu. netpoll of course violates that, and because the netpoll path can dequeue from the poll list, its possible for CPU0 to detect a non-empty list at the top of the while loop in net_rx_action, but have it become empty by the time it calls list_entry. Since the poll_list isn't surrounded by any other structure, the returned data from that list_entry call in this situation is garbage, and any number of crashes can result based on what exactly that garbage is. Given that its not fasible for performance reasons to place exclusive locks arround each cpus poll list to provide that mutal exclusion, I think the best solution is modify the netpoll path in such a way that we continue to guarantee that the poll_list for a cpu is in fact exclusive to that cpu. To do this I've implemented the patch below. It adds an additional bit to the state field in the napi_struct. When executing napi->poll from the netpoll_path, this bit will be set. When a driver calls netif_rx_complete, if that bit is set, it will not remove the napi_struct from the poll_list. That work will be saved for the next iteration of net_rx_action. I've tested this and it seems to work well. About the biggest drawback I can see to it is the fact that it might result in an extra loop through net_rx_action in the event that the device is actually contended for (i.e. the netpoll path actually preforms all the needed work no the device, and the call to net_rx_action winds up doing nothing, except removing the napi_struct from the poll_list. However I think this is probably a small price to pay, given that the alternative is a crash. Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++++ net/core/netpoll.c | 2 ++ 2 files changed, 9 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9d77b1d7dca8..e26f54952892 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -319,6 +319,7 @@ enum { NAPI_STATE_SCHED, /* Poll is scheduled */ NAPI_STATE_DISABLE, /* Disable pending */ + NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ }; extern void __napi_schedule(struct napi_struct *n); @@ -1497,6 +1498,12 @@ static inline void netif_rx_complete(struct net_device *dev, { unsigned long flags; + /* + * don't let napi dequeue from the cpu poll list + * just in case its running on a different cpu + */ + if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state))) + return; local_irq_save(flags); __netif_rx_complete(dev, napi); local_irq_restore(flags); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 6c7af390be0a..dadac6281f20 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -133,9 +133,11 @@ static int poll_one_napi(struct netpoll_info *npinfo, npinfo->rx_flags |= NETPOLL_RX_DROP; atomic_inc(&trapped); + set_bit(NAPI_STATE_NPSVC, &napi->state); work = napi->poll(napi, budget); + clear_bit(NAPI_STATE_NPSVC, &napi->state); atomic_dec(&trapped); npinfo->rx_flags &= ~NETPOLL_RX_DROP; From bb49eed4221092fde4e2337534ae51d8f28a2fbe Mon Sep 17 00:00:00 2001 From: Alexander Beregalov Date: Wed, 10 Dec 2008 15:24:50 -0800 Subject: [PATCH 06/11] IA64: HP_SIMETH needs to depend upon NET From: Alexander Beregalov Signed-off-by: David S. Miller --- arch/ia64/hp/sim/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/ia64/hp/sim/Kconfig b/arch/ia64/hp/sim/Kconfig index f92306bbedb8..8d513a8c5266 100644 --- a/arch/ia64/hp/sim/Kconfig +++ b/arch/ia64/hp/sim/Kconfig @@ -4,6 +4,7 @@ menu "HP Simulator drivers" config HP_SIMETH bool "Simulated Ethernet " + depends on NET config HP_SIMSERIAL bool "Simulated serial driver support" From 30bb0e0dce78427f3e5cb728d6b5ea73acbefffa Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Thu, 11 Dec 2008 21:28:11 -0800 Subject: [PATCH 07/11] e1000e: fix double release of mutex During a reset, releasing the swflag after it failed to be acquired would cause a double unlock of the mutex. Instead, test whether acquisition of the swflag was successful and if not, do not release the swflag. The reset must still be done to bring the device to a quiescent state. This resolves [BUG 12200] BUG: bad unlock balance detected! e1000e http://bugzilla.kernel.org/show_bug.cgi?id=12200 Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/e1000e/ich8lan.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c index 523b9716a543..d115a6d30f29 100644 --- a/drivers/net/e1000e/ich8lan.c +++ b/drivers/net/e1000e/ich8lan.c @@ -1893,12 +1893,17 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) ctrl |= E1000_CTRL_PHY_RST; } ret_val = e1000_acquire_swflag_ich8lan(hw); + /* Whether or not the swflag was acquired, we need to reset the part */ hw_dbg(hw, "Issuing a global reset to ich8lan"); ew32(CTRL, (ctrl | E1000_CTRL_RST)); msleep(20); - /* release the swflag because it is not reset by hardware reset */ - e1000_release_swflag_ich8lan(hw); + if (!ret_val) { + /* release the swflag because it is not reset by + * hardware reset + */ + e1000_release_swflag_ich8lan(hw); + } ret_val = e1000e_get_auto_rd_done(hw); if (ret_val) { From ec8f2375d7584969501918651241f91eca2a6ad3 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 11 Dec 2008 21:31:50 -0800 Subject: [PATCH 08/11] netlabel: Compiler warning and NULL pointer dereference fix Fix the two compiler warnings show below. Thanks to Geert Uytterhoeven for finding and reporting the problem. net/netlabel/netlabel_unlabeled.c:567: warning: 'entry' may be used uninitialized in this function net/netlabel/netlabel_unlabeled.c:629: warning: 'entry' may be used uninitialized in this function Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/netlabel/netlabel_unlabeled.c | 38 ++++++++++++++++--------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 90c8506a0aac..8c0308032178 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -562,7 +562,6 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, const struct in_addr *mask, struct netlbl_audit *audit_info) { - int ret_val = 0; struct netlbl_af4list *list_entry; struct netlbl_unlhsh_addr4 *entry; struct audit_buffer *audit_buf; @@ -577,7 +576,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, if (list_entry != NULL) entry = netlbl_unlhsh_addr4_entry(list_entry); else - ret_val = -ENOENT; + entry = NULL; audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); @@ -588,19 +587,21 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, addr->s_addr, mask->s_addr); if (dev != NULL) dev_put(dev); - if (entry && security_secid_to_secctx(entry->secid, - &secctx, - &secctx_len) == 0) { + if (entry != NULL && + security_secid_to_secctx(entry->secid, + &secctx, &secctx_len) == 0) { audit_log_format(audit_buf, " sec_obj=%s", secctx); security_release_secctx(secctx, secctx_len); } - audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); + audit_log_format(audit_buf, " res=%u", entry != NULL ? 1 : 0); audit_log_end(audit_buf); } - if (ret_val == 0) - call_rcu(&entry->rcu, netlbl_unlhsh_free_addr4); - return ret_val; + if (entry == NULL) + return -ENOENT; + + call_rcu(&entry->rcu, netlbl_unlhsh_free_addr4); + return 0; } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -624,7 +625,6 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, const struct in6_addr *mask, struct netlbl_audit *audit_info) { - int ret_val = 0; struct netlbl_af6list *list_entry; struct netlbl_unlhsh_addr6 *entry; struct audit_buffer *audit_buf; @@ -638,7 +638,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, if (list_entry != NULL) entry = netlbl_unlhsh_addr6_entry(list_entry); else - ret_val = -ENOENT; + entry = NULL; audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); @@ -649,19 +649,21 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, addr, mask); if (dev != NULL) dev_put(dev); - if (entry && security_secid_to_secctx(entry->secid, - &secctx, - &secctx_len) == 0) { + if (entry != NULL && + security_secid_to_secctx(entry->secid, + &secctx, &secctx_len) == 0) { audit_log_format(audit_buf, " sec_obj=%s", secctx); security_release_secctx(secctx, secctx_len); } - audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); + audit_log_format(audit_buf, " res=%u", entry != NULL ? 1 : 0); audit_log_end(audit_buf); } - if (ret_val == 0) - call_rcu(&entry->rcu, netlbl_unlhsh_free_addr6); - return ret_val; + if (entry == NULL) + return -ENOENT; + + call_rcu(&entry->rcu, netlbl_unlhsh_free_addr6); + return 0; } #endif /* IPv6 */ From be70ed189bc0d16e1609a1c6c04ec9418b4dd11a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 15 Dec 2008 00:19:14 -0800 Subject: [PATCH 09/11] netfilter: update rwlock initialization for nat_table The commit e099a173573ce1ba171092aee7bb3c72ea686e59 (netfilter: netns nat: per-netns NAT table) renamed the nat_table from __nat_table to nat_table without updating the __RW_LOCK_UNLOCKED(__nat_table.lock). Signed-off-by: Steven Rostedt Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_nat_rule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index bea54a685109..8d489e746b21 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -61,7 +61,7 @@ static struct static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(__nat_table.lock), + .lock = __RW_LOCK_UNLOCKED(nat_table.lock), .me = THIS_MODULE, .af = AF_INET, }; From eb9b851b980e20ba5f6bdfd6ec24f4bc77623ce6 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 15 Dec 2008 00:39:17 -0800 Subject: [PATCH 10/11] SCHED: netem: Correct documentation comment in code. The netem simulator is no longer limited by Linux timer resolution HZ. Not since Patrick McHardy changed the QoS system to use hrtimer. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index a11959908d9a..98402f0efa47 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -46,9 +46,6 @@ layering other disciplines. It does not need to do bandwidth control either since that can be handled by using token bucket or other rate control. - - The simulator is limited by the Linux timer resolution - and will create packet bursts on the HZ boundary (1ms). */ struct netem_sched_data { From 4798a2b84ea5a98e4f36a815a646cb48ff521684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Mon, 15 Dec 2008 00:53:57 -0800 Subject: [PATCH 11/11] Phonet: keep TX queue disabled when the device is off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: RĂ©mi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep-gprs.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index 9978afbd9f2a..803eeef0aa85 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -155,12 +155,13 @@ static void gprs_data_ready(struct sock *sk, int len) static void gprs_write_space(struct sock *sk) { struct gprs_dev *dev = sk->sk_user_data; + struct net_device *net = dev->net; unsigned credits = pep_writeable(sk); spin_lock_bh(&dev->tx_lock); dev->tx_max = credits; - if (credits > skb_queue_len(&dev->tx_queue)) - netif_wake_queue(dev->net); + if (credits > skb_queue_len(&dev->tx_queue) && netif_running(net)) + netif_wake_queue(net); spin_unlock_bh(&dev->tx_lock); } @@ -168,6 +169,23 @@ static void gprs_write_space(struct sock *sk) * Network device callbacks */ +static int gprs_open(struct net_device *dev) +{ + struct gprs_dev *gp = netdev_priv(dev); + + gprs_write_space(gp->sk); + return 0; +} + +static int gprs_close(struct net_device *dev) +{ + struct gprs_dev *gp = netdev_priv(dev); + + netif_stop_queue(dev); + flush_work(&gp->tx_work); + return 0; +} + static int gprs_xmit(struct sk_buff *skb, struct net_device *net) { struct gprs_dev *dev = netdev_priv(net); @@ -254,6 +272,8 @@ static void gprs_setup(struct net_device *net) net->tx_queue_len = 10; net->destructor = free_netdev; + net->open = gprs_open; + net->stop = gprs_close; net->hard_start_xmit = gprs_xmit; /* mandatory */ net->change_mtu = gprs_set_mtu; net->get_stats = gprs_get_stats; @@ -318,7 +338,6 @@ int gprs_attach(struct sock *sk) dev->sk = sk; printk(KERN_DEBUG"%s: attached\n", net->name); - gprs_write_space(sk); /* kick off TX */ return net->ifindex; out_rel: @@ -341,7 +360,5 @@ void gprs_detach(struct sock *sk) printk(KERN_DEBUG"%s: detached\n", net->name); unregister_netdev(net); - flush_scheduled_work(); sock_put(sk); - skb_queue_purge(&dev->tx_queue); }