From ee6ae1a1d58c70fc864bc777a36be56b0880ebff Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 20 Jul 2012 02:28:46 +0000 Subject: [PATCH 1/6] net: honour netif_set_real_num_tx_queues() retval In netif_copy_real_num_queues() the return value of netif_set_real_num_tx_queues() should be checked. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ab0251d541ab..eb06e58bed0b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2110,7 +2110,12 @@ static inline int netif_set_real_num_rx_queues(struct net_device *dev, static inline int netif_copy_real_num_queues(struct net_device *to_dev, const struct net_device *from_dev) { - netif_set_real_num_tx_queues(to_dev, from_dev->real_num_tx_queues); + int err; + + err = netif_set_real_num_tx_queues(to_dev, + from_dev->real_num_tx_queues); + if (err) + return err; #ifdef CONFIG_RPS return netif_set_real_num_rx_queues(to_dev, from_dev->real_num_rx_queues); From d40156aa5ecbd51fed932ed4813df82b56e5ff4d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 20 Jul 2012 02:28:47 +0000 Subject: [PATCH 2/6] rtnl: allow to specify different num for rx and tx queue count Also cut out unused function parameters and possible err in return value. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 14 ++++++++------ include/net/rtnetlink.h | 10 ++++++---- net/core/rtnetlink.c | 16 ++++++++-------- 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3960b1b26178..f41ddc2d48be 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4845,17 +4845,19 @@ static int bond_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -static int bond_get_tx_queues(struct net *net, struct nlattr *tb[]) +static unsigned int bond_get_num_tx_queues(void) { return tx_queues; } static struct rtnl_link_ops bond_link_ops __read_mostly = { - .kind = "bond", - .priv_size = sizeof(struct bonding), - .setup = bond_setup, - .validate = bond_validate, - .get_tx_queues = bond_get_tx_queues, + .kind = "bond", + .priv_size = sizeof(struct bonding), + .setup = bond_setup, + .validate = bond_validate, + .get_num_tx_queues = bond_get_num_tx_queues, + .get_num_rx_queues = bond_get_num_tx_queues, /* Use the same number + as for TX queues */ }; /* Create a new bond based on the specified name and bonding parameters. diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index bbcfd0993432..6b00c4fc4291 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -44,8 +44,10 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh) * @get_xstats_size: Function to calculate required room for dumping device * specific statistics * @fill_xstats: Function to dump device specific statistics - * @get_tx_queues: Function to determine number of transmit queues to create when - * creating a new device. + * @get_num_tx_queues: Function to determine number of transmit queues + * to create when creating a new device. + * @get_num_rx_queues: Function to determine number of receive queues + * to create when creating a new device. */ struct rtnl_link_ops { struct list_head list; @@ -77,8 +79,8 @@ struct rtnl_link_ops { size_t (*get_xstats_size)(const struct net_device *dev); int (*fill_xstats)(struct sk_buff *skb, const struct net_device *dev); - int (*get_tx_queues)(struct net *net, - struct nlattr *tb[]); + unsigned int (*get_num_tx_queues)(void); + unsigned int (*get_num_rx_queues)(void); }; extern int __rtnl_link_register(struct rtnl_link_ops *ops); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 045db8ad87c8..db5a8ad8a79b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1624,17 +1624,17 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, { int err; struct net_device *dev; - unsigned int num_queues = 1; + unsigned int num_tx_queues = 1; + unsigned int num_rx_queues = 1; - if (ops->get_tx_queues) { - err = ops->get_tx_queues(src_net, tb); - if (err < 0) - goto err; - num_queues = err; - } + if (ops->get_num_tx_queues) + num_tx_queues = ops->get_num_tx_queues(); + if (ops->get_num_rx_queues) + num_rx_queues = ops->get_num_rx_queues(); err = -ENOMEM; - dev = alloc_netdev_mq(ops->priv_size, ifname, ops->setup, num_queues); + dev = alloc_netdev_mqs(ops->priv_size, ifname, ops->setup, + num_tx_queues, num_rx_queues); if (!dev) goto err; From 76ff5cc91935c51fcf1a6a99ffa28b97a6e7a884 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 20 Jul 2012 02:28:48 +0000 Subject: [PATCH 3/6] rtnl: allow to specify number of rx and tx queues on device creation This patch introduces IFLA_NUM_TX_QUEUES and IFLA_NUM_RX_QUEUES by which userspace can set number of rx and/or tx queues to be allocated for newly created netdevice. This overrides ops->get_num_[tr]x_queues() Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_link.h | 2 ++ net/core/rtnetlink.c | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/linux/if_link.h b/include/linux/if_link.h index f715750d0b87..ac173bd2ab65 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -140,6 +140,8 @@ enum { IFLA_EXT_MASK, /* Extended info mask, VFs, etc */ IFLA_PROMISCUITY, /* Promiscuity count: > 0 means acts PROMISC */ #define IFLA_PROMISCUITY IFLA_PROMISCUITY + IFLA_NUM_TX_QUEUES, + IFLA_NUM_RX_QUEUES, __IFLA_MAX }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index db5a8ad8a79b..5bb1ebca2eb0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -771,6 +771,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_LINK */ + nla_total_size(4) /* IFLA_MASTER */ + nla_total_size(4) /* IFLA_PROMISCUITY */ + + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ + + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(ext_filter_mask @@ -889,6 +891,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put_u32(skb, IFLA_MTU, dev->mtu) || nla_put_u32(skb, IFLA_GROUP, dev->group) || nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || + nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || + nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || (dev->ifindex != dev->iflink && nla_put_u32(skb, IFLA_LINK, dev->iflink)) || (dev->master && @@ -1106,6 +1110,8 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_AF_SPEC] = { .type = NLA_NESTED }, [IFLA_EXT_MASK] = { .type = NLA_U32 }, [IFLA_PROMISCUITY] = { .type = NLA_U32 }, + [IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 }, + [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 }, }; EXPORT_SYMBOL(ifla_policy); @@ -1627,9 +1633,14 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, unsigned int num_tx_queues = 1; unsigned int num_rx_queues = 1; - if (ops->get_num_tx_queues) + if (tb[IFLA_NUM_TX_QUEUES]) + num_tx_queues = nla_get_u32(tb[IFLA_NUM_TX_QUEUES]); + else if (ops->get_num_tx_queues) num_tx_queues = ops->get_num_tx_queues(); - if (ops->get_num_rx_queues) + + if (tb[IFLA_NUM_RX_QUEUES]) + num_rx_queues = nla_get_u32(tb[IFLA_NUM_RX_QUEUES]); + else if (ops->get_num_rx_queues) num_rx_queues = ops->get_num_rx_queues(); err = -ENOMEM; From df4ab5b3c295050da09153fa9760042e4de3ffff Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 20 Jul 2012 02:28:49 +0000 Subject: [PATCH 4/6] net: rename bond_queue_mapping to slave_dev_queue_mapping As this is going to be used not only by bonding. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 6 +++--- include/net/sch_generic.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index f41ddc2d48be..6fae5f3ec7f6 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -395,8 +395,8 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, skb->dev = slave_dev; BUILD_BUG_ON(sizeof(skb->queue_mapping) != - sizeof(qdisc_skb_cb(skb)->bond_queue_mapping)); - skb->queue_mapping = qdisc_skb_cb(skb)->bond_queue_mapping; + sizeof(qdisc_skb_cb(skb)->slave_dev_queue_mapping)); + skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping; if (unlikely(netpoll_tx_running(slave_dev))) bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb); @@ -4184,7 +4184,7 @@ static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb) /* * Save the original txq to restore before passing to the driver */ - qdisc_skb_cb(skb)->bond_queue_mapping = skb->queue_mapping; + qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; if (unlikely(txq >= dev->real_num_tx_queues)) { do { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 9d7d54a00e63..d9611e032418 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -220,7 +220,7 @@ struct tcf_proto { struct qdisc_skb_cb { unsigned int pkt_len; - u16 bond_queue_mapping; + u16 slave_dev_queue_mapping; u16 _pad; unsigned char data[20]; }; From 8a540ff9e1d685b5feb0a9df5d2b74db1e0b4d39 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 20 Jul 2012 02:28:50 +0000 Subject: [PATCH 5/6] bond_sysfs: use real_num_tx_queues rather than params.tx_queue Since now number of tx queues can be specified during bond instance creation and therefore it may differ from params.tx_queues, use rather real_num_tx_queues for boundary check. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 485bedb8278c..dc15d248443f 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -1495,7 +1495,7 @@ static ssize_t bonding_store_queue_id(struct device *d, /* Check buffer length, valid ifname and queue id */ if (strlen(buffer) > IFNAMSIZ || !dev_valid_name(buffer) || - qid > bond->params.tx_queues) + qid > bond->dev->real_num_tx_queues) goto err_no_cmd; /* Get the pointer to that interface if it exists */ From 6c85f2bdda2086d804e198a3f31b685bc2f86b04 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 20 Jul 2012 02:28:51 +0000 Subject: [PATCH 6/6] team: add multiqueue support Largely copied from bonding code. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 65 +++++++++++++++++++++++++++++++++++++---- include/linux/if_team.h | 8 +++++ 2 files changed, 68 insertions(+), 5 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 813e1319095f..b104c05225f7 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #define DRV_NAME "team" @@ -1121,6 +1122,22 @@ static const struct team_option team_options[] = { }, }; +static struct lock_class_key team_netdev_xmit_lock_key; +static struct lock_class_key team_netdev_addr_lock_key; + +static void team_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *unused) +{ + lockdep_set_class(&txq->_xmit_lock, &team_netdev_xmit_lock_key); +} + +static void team_set_lockdep_class(struct net_device *dev) +{ + lockdep_set_class(&dev->addr_list_lock, &team_netdev_addr_lock_key); + netdev_for_each_tx_queue(dev, team_set_lockdep_class_one, NULL); +} + static int team_init(struct net_device *dev) { struct team *team = netdev_priv(dev); @@ -1148,6 +1165,8 @@ static int team_init(struct net_device *dev) goto err_options_register; netif_carrier_off(dev); + team_set_lockdep_class(dev); + return 0; err_options_register: @@ -1216,6 +1235,29 @@ static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } +static u16 team_select_queue(struct net_device *dev, struct sk_buff *skb) +{ + /* + * This helper function exists to help dev_pick_tx get the correct + * destination queue. Using a helper function skips a call to + * skb_tx_hash and will put the skbs in the queue we expect on their + * way down to the team driver. + */ + u16 txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0; + + /* + * Save the original txq to restore before passing to the driver + */ + qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; + + if (unlikely(txq >= dev->real_num_tx_queues)) { + do { + txq -= dev->real_num_tx_queues; + } while (txq >= dev->real_num_tx_queues); + } + return txq; +} + static void team_change_rx_flags(struct net_device *dev, int change) { struct team *team = netdev_priv(dev); @@ -1469,6 +1511,7 @@ static const struct net_device_ops team_netdev_ops = { .ndo_open = team_open, .ndo_stop = team_close, .ndo_start_xmit = team_xmit, + .ndo_select_queue = team_select_queue, .ndo_change_rx_flags = team_change_rx_flags, .ndo_set_rx_mode = team_set_rx_mode, .ndo_set_mac_address = team_set_mac_address, @@ -1543,12 +1586,24 @@ static int team_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } +static unsigned int team_get_num_tx_queues(void) +{ + return TEAM_DEFAULT_NUM_TX_QUEUES; +} + +static unsigned int team_get_num_rx_queues(void) +{ + return TEAM_DEFAULT_NUM_RX_QUEUES; +} + static struct rtnl_link_ops team_link_ops __read_mostly = { - .kind = DRV_NAME, - .priv_size = sizeof(struct team), - .setup = team_setup, - .newlink = team_newlink, - .validate = team_validate, + .kind = DRV_NAME, + .priv_size = sizeof(struct team), + .setup = team_setup, + .newlink = team_newlink, + .validate = team_validate, + .get_num_tx_queues = team_get_num_tx_queues, + .get_num_rx_queues = team_get_num_rx_queues, }; diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 7fd0cdeb9444..6960fc1841a7 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -14,6 +14,7 @@ #ifdef __KERNEL__ #include +#include struct team_pcpu_stats { u64 rx_packets; @@ -98,6 +99,10 @@ static inline void team_netpoll_send_skb(struct team_port *port, static inline int team_dev_queue_xmit(struct team *team, struct team_port *port, struct sk_buff *skb) { + BUILD_BUG_ON(sizeof(skb->queue_mapping) != + sizeof(qdisc_skb_cb(skb)->slave_dev_queue_mapping)); + skb_set_queue_mapping(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping); + skb->dev = port->dev; if (unlikely(netpoll_tx_running(port->dev))) { team_netpoll_send_skb(port, skb); @@ -236,6 +241,9 @@ extern void team_options_unregister(struct team *team, extern int team_mode_register(const struct team_mode *mode); extern void team_mode_unregister(const struct team_mode *mode); +#define TEAM_DEFAULT_NUM_TX_QUEUES 16 +#define TEAM_DEFAULT_NUM_RX_QUEUES 16 + #endif /* __KERNEL__ */ #define TEAM_STRING_MAX_LEN 32