diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst index ab790e7980b8..7627b1da01f2 100644 --- a/Documentation/networking/devlink/devlink-port.rst +++ b/Documentation/networking/devlink/devlink-port.rst @@ -164,6 +164,41 @@ device to instantiate the subfunction device on particular PCI function. A subfunction device is created on the :ref:`Documentation/driver-api/auxiliary_bus.rst `. At this point a matching subfunction driver binds to the subfunction's auxiliary device. +Rate object management +====================== + +Devlink provides API to manage tx rates of single devlink port or a group. +This is done through rate objects, which can be one of the two types: + +``leaf`` + Represents a single devlink port; created/destroyed by the driver. Since leaf + have 1to1 mapping to its devlink port, in user space it is referred as + ``pci//``; + +``node`` + Represents a group of rate objects (leafs and/or nodes); created/deleted by + request from the userspace; initially empty (no rate objects added). In + userspace it is referred as ``pci//``, where + ``node_name`` can be any identifier, except decimal number, to avoid + collisions with leafs. + +API allows to configure following rate object's parameters: + +``tx_share`` + Minimum TX rate value shared among all other rate objects, or rate objects + that parts of the parent group, if it is a part of the same group. + +``tx_max`` + Maximum TX rate value. + +``parent`` + Parent node name. Parent node rate limits are considered as additional limits + to all node children limits. ``tx_max`` is an upper limit for children. + ``tx_share`` is a total bandwidth distributed among children. + +Driver implementations are allowed to support both or either rate object types +and setting methods of their parameters. + Terms and Definitions ===================== diff --git a/Documentation/networking/devlink/netdevsim.rst b/Documentation/networking/devlink/netdevsim.rst index 02c2d20dc673..8a292fb5aaea 100644 --- a/Documentation/networking/devlink/netdevsim.rst +++ b/Documentation/networking/devlink/netdevsim.rst @@ -57,6 +57,32 @@ entries, FIB rule entries and nexthops that the driver will allow. $ devlink resource set netdevsim/netdevsim0 path /nexthops size 16 $ devlink dev reload netdevsim/netdevsim0 +Rate objects +============ + +The ``netdevsim`` driver supports rate objects management, which includes: + +- registerging/unregistering leaf rate objects per VF devlink port; +- creation/deletion node rate objects; +- setting tx_share and tx_max rate values for any rate object type; +- setting parent node for any rate object type. + +Rate nodes and it's parameters are exposed in ``netdevsim`` debugfs in RO mode. +For example created rate node with name ``some_group``: + +.. code:: shell + + $ ls /sys/kernel/debug/netdevsim/netdevsim0/rate_groups/some_group + rate_parent tx_max tx_share + +Same parameters are exposed for leaf objects in corresponding ports directories. +For ex.: + +.. code:: shell + + $ ls /sys/kernel/debug/netdevsim/netdevsim0/ports/1 + dev ethtool rate_parent tx_max tx_share + Driver-specific Traps ===================== diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index 0e9511661601..b56003dfe3cc 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -27,21 +27,34 @@ static struct nsim_bus_dev *to_nsim_bus_dev(struct device *dev) static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev *nsim_bus_dev, unsigned int num_vfs) { - nsim_bus_dev->vfconfigs = kcalloc(num_vfs, - sizeof(struct nsim_vf_config), - GFP_KERNEL | __GFP_NOWARN); + struct nsim_dev *nsim_dev; + int err = 0; + + if (nsim_bus_dev->max_vfs < num_vfs) + return -ENOMEM; + if (!nsim_bus_dev->vfconfigs) return -ENOMEM; nsim_bus_dev->num_vfs = num_vfs; - return 0; + nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); + if (nsim_esw_mode_is_switchdev(nsim_dev)) { + err = nsim_esw_switchdev_enable(nsim_dev, NULL); + if (err) + nsim_bus_dev->num_vfs = 0; + } + + return err; } -static void nsim_bus_dev_vfs_disable(struct nsim_bus_dev *nsim_bus_dev) +void nsim_bus_dev_vfs_disable(struct nsim_bus_dev *nsim_bus_dev) { - kfree(nsim_bus_dev->vfconfigs); - nsim_bus_dev->vfconfigs = NULL; + struct nsim_dev *nsim_dev; + nsim_bus_dev->num_vfs = 0; + nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); + if (nsim_esw_mode_is_switchdev(nsim_dev)) + nsim_esw_legacy_enable(nsim_dev, NULL); } static ssize_t @@ -56,7 +69,7 @@ nsim_bus_dev_numvfs_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; - rtnl_lock(); + mutex_lock(&nsim_bus_dev->vfs_lock); if (nsim_bus_dev->num_vfs == num_vfs) goto exit_good; if (nsim_bus_dev->num_vfs && num_vfs) { @@ -74,7 +87,7 @@ nsim_bus_dev_numvfs_store(struct device *dev, struct device_attribute *attr, exit_good: ret = count; exit_unlock: - rtnl_unlock(); + mutex_unlock(&nsim_bus_dev->vfs_lock); return ret; } @@ -92,6 +105,79 @@ static struct device_attribute nsim_bus_dev_numvfs_attr = __ATTR(sriov_numvfs, 0664, nsim_bus_dev_numvfs_show, nsim_bus_dev_numvfs_store); +ssize_t nsim_bus_dev_max_vfs_read(struct file *file, + char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_bus_dev *nsim_bus_dev = file->private_data; + char buf[11]; + size_t len; + + len = snprintf(buf, sizeof(buf), "%u\n", nsim_bus_dev->max_vfs); + if (len < 0) + return len; + + return simple_read_from_buffer(data, count, ppos, buf, len); +} + +ssize_t nsim_bus_dev_max_vfs_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_bus_dev *nsim_bus_dev = file->private_data; + struct nsim_vf_config *vfconfigs; + ssize_t ret; + char buf[10]; + u32 val; + + if (*ppos != 0) + return 0; + + if (count >= sizeof(buf)) + return -ENOSPC; + + mutex_lock(&nsim_bus_dev->vfs_lock); + /* Reject if VFs are configured */ + if (nsim_bus_dev->num_vfs) { + ret = -EBUSY; + goto unlock; + } + + ret = copy_from_user(buf, data, count); + if (ret) { + ret = -EFAULT; + goto unlock; + } + + buf[count] = '\0'; + ret = kstrtouint(buf, 10, &val); + if (ret) { + ret = -EIO; + goto unlock; + } + + /* max_vfs limited by the maximum number of provided port indexes */ + if (val > NSIM_DEV_VF_PORT_INDEX_MAX - NSIM_DEV_VF_PORT_INDEX_BASE) { + ret = -ERANGE; + goto unlock; + } + + vfconfigs = kcalloc(val, sizeof(struct nsim_vf_config), GFP_KERNEL | __GFP_NOWARN); + if (!vfconfigs) { + ret = -ENOMEM; + goto unlock; + } + + kfree(nsim_bus_dev->vfconfigs); + nsim_bus_dev->vfconfigs = vfconfigs; + nsim_bus_dev->max_vfs = val; + *ppos += count; + ret = count; +unlock: + mutex_unlock(&nsim_bus_dev->vfs_lock); + return ret; +} + static ssize_t new_port_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -113,7 +199,7 @@ new_port_store(struct device *dev, struct device_attribute *attr, mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); devlink_reload_disable(devlink); - ret = nsim_dev_port_add(nsim_bus_dev, port_index); + ret = nsim_dev_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index); devlink_reload_enable(devlink); mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return ret ? ret : count; @@ -142,7 +228,7 @@ del_port_store(struct device *dev, struct device_attribute *attr, mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); devlink_reload_disable(devlink); - ret = nsim_dev_port_del(nsim_bus_dev, port_index); + ret = nsim_dev_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index); devlink_reload_enable(devlink); mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return ret ? ret : count; @@ -168,9 +254,6 @@ static const struct attribute_group *nsim_bus_dev_attr_groups[] = { static void nsim_bus_dev_release(struct device *dev) { - struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); - - nsim_bus_dev_vfs_disable(nsim_bus_dev); } static struct device_type nsim_bus_dev_type = { @@ -311,6 +394,8 @@ static struct bus_type nsim_bus = { .num_vf = nsim_num_vf, }; +#define NSIM_BUS_DEV_MAX_VFS 4 + static struct nsim_bus_dev * nsim_bus_dev_new(unsigned int id, unsigned int port_count) { @@ -329,15 +414,28 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count) nsim_bus_dev->dev.type = &nsim_bus_dev_type; nsim_bus_dev->port_count = port_count; nsim_bus_dev->initial_net = current->nsproxy->net_ns; + nsim_bus_dev->max_vfs = NSIM_BUS_DEV_MAX_VFS; mutex_init(&nsim_bus_dev->nsim_bus_reload_lock); + mutex_init(&nsim_bus_dev->vfs_lock); /* Disallow using nsim_bus_dev */ smp_store_release(&nsim_bus_dev->init, false); + nsim_bus_dev->vfconfigs = kcalloc(nsim_bus_dev->max_vfs, + sizeof(struct nsim_vf_config), + GFP_KERNEL | __GFP_NOWARN); + if (!nsim_bus_dev->vfconfigs) { + err = -ENOMEM; + goto err_nsim_bus_dev_id_free; + } + err = device_register(&nsim_bus_dev->dev); if (err) - goto err_nsim_bus_dev_id_free; + goto err_nsim_vfs_free; + return nsim_bus_dev; +err_nsim_vfs_free: + kfree(nsim_bus_dev->vfconfigs); err_nsim_bus_dev_id_free: ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id); err_nsim_bus_dev_free: @@ -351,6 +449,7 @@ static void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev) smp_store_release(&nsim_bus_dev->init, false); device_unregister(&nsim_bus_dev->dev); ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id); + kfree(nsim_bus_dev->vfconfigs); kfree(nsim_bus_dev); } diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 6189a4c0d39e..527b019ae0b2 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -35,6 +35,25 @@ #include "netdevsim.h" +static unsigned int +nsim_dev_port_index(enum nsim_dev_port_type type, unsigned int port_index) +{ + switch (type) { + case NSIM_DEV_PORT_TYPE_VF: + port_index = NSIM_DEV_VF_PORT_INDEX_BASE + port_index; + break; + case NSIM_DEV_PORT_TYPE_PF: + break; + } + + return port_index; +} + +static inline unsigned int nsim_dev_port_index_to_vf_index(unsigned int port_index) +{ + return port_index - NSIM_DEV_VF_PORT_INDEX_BASE; +} + static struct dentry *nsim_dev_ddir; #define NSIM_DEV_DUMMY_REGION_SIZE (1024 * 32) @@ -192,9 +211,18 @@ static const struct file_operations nsim_dev_trap_fa_cookie_fops = { .owner = THIS_MODULE, }; +static const struct file_operations nsim_dev_max_vfs_fops = { + .open = simple_open, + .read = nsim_bus_dev_max_vfs_read, + .write = nsim_bus_dev_max_vfs_write, + .llseek = generic_file_llseek, + .owner = THIS_MODULE, +}; + static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) { char dev_ddir_name[sizeof(DRV_NAME) + 10]; + int err; sprintf(dev_ddir_name, DRV_NAME "%u", nsim_dev->nsim_bus_dev->dev.id); nsim_dev->ddir = debugfs_create_dir(dev_ddir_name, nsim_dev_ddir); @@ -231,30 +259,81 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) debugfs_create_bool("fail_trap_policer_counter_get", 0600, nsim_dev->ddir, &nsim_dev->fail_trap_policer_counter_get); + nsim_dev->max_vfs = debugfs_create_file("max_vfs", + 0600, + nsim_dev->ddir, + nsim_dev->nsim_bus_dev, + &nsim_dev_max_vfs_fops); + nsim_dev->nodes_ddir = debugfs_create_dir("rate_nodes", nsim_dev->ddir); + if (IS_ERR(nsim_dev->nodes_ddir)) { + err = PTR_ERR(nsim_dev->nodes_ddir); + goto err_out; + } nsim_udp_tunnels_debugfs_create(nsim_dev); return 0; + +err_out: + debugfs_remove_recursive(nsim_dev->ports_ddir); + debugfs_remove_recursive(nsim_dev->ddir); + return err; } static void nsim_dev_debugfs_exit(struct nsim_dev *nsim_dev) { + debugfs_remove_recursive(nsim_dev->nodes_ddir); debugfs_remove_recursive(nsim_dev->ports_ddir); debugfs_remove_recursive(nsim_dev->ddir); } +static ssize_t nsim_dev_rate_parent_read(struct file *file, + char __user *data, + size_t count, loff_t *ppos) +{ + char **name_ptr = file->private_data; + size_t len; + + if (!*name_ptr) + return 0; + + len = strlen(*name_ptr); + return simple_read_from_buffer(data, count, ppos, *name_ptr, len); +} + +static const struct file_operations nsim_dev_rate_parent_fops = { + .open = simple_open, + .read = nsim_dev_rate_parent_read, + .llseek = generic_file_llseek, + .owner = THIS_MODULE, +}; + static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) { + struct nsim_bus_dev *nsim_bus_dev = nsim_dev->nsim_bus_dev; + unsigned int port_index = nsim_dev_port->port_index; char port_ddir_name[16]; char dev_link_name[32]; - sprintf(port_ddir_name, "%u", nsim_dev_port->port_index); + sprintf(port_ddir_name, "%u", port_index); nsim_dev_port->ddir = debugfs_create_dir(port_ddir_name, nsim_dev->ports_ddir); if (IS_ERR(nsim_dev_port->ddir)) return PTR_ERR(nsim_dev_port->ddir); - sprintf(dev_link_name, "../../../" DRV_NAME "%u", - nsim_dev->nsim_bus_dev->dev.id); + sprintf(dev_link_name, "../../../" DRV_NAME "%u", nsim_bus_dev->dev.id); + if (nsim_dev_port_is_vf(nsim_dev_port)) { + unsigned int vf_id = nsim_dev_port_index_to_vf_index(port_index); + + debugfs_create_u16("tx_share", 0400, nsim_dev_port->ddir, + &nsim_bus_dev->vfconfigs[vf_id].min_tx_rate); + debugfs_create_u16("tx_max", 0400, nsim_dev_port->ddir, + &nsim_bus_dev->vfconfigs[vf_id].max_tx_rate); + nsim_dev_port->rate_parent = debugfs_create_file("rate_parent", + 0400, + nsim_dev_port->ddir, + &nsim_dev_port->parent_name, + &nsim_dev_rate_parent_fops); + } debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name); return 0; @@ -407,6 +486,74 @@ static void nsim_dev_dummy_region_exit(struct nsim_dev *nsim_dev) devlink_region_destroy(nsim_dev->dummy_region); } +static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port); +int nsim_esw_legacy_enable(struct nsim_dev *nsim_dev, struct netlink_ext_ack *extack) +{ + struct devlink *devlink = priv_to_devlink(nsim_dev); + struct nsim_dev_port *nsim_dev_port, *tmp; + + devlink_rate_nodes_destroy(devlink); + mutex_lock(&nsim_dev->port_list_lock); + list_for_each_entry_safe(nsim_dev_port, tmp, &nsim_dev->port_list, list) + if (nsim_dev_port_is_vf(nsim_dev_port)) + __nsim_dev_port_del(nsim_dev_port); + mutex_unlock(&nsim_dev->port_list_lock); + nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_LEGACY; + return 0; +} + +int nsim_esw_switchdev_enable(struct nsim_dev *nsim_dev, struct netlink_ext_ack *extack) +{ + struct nsim_bus_dev *nsim_bus_dev = nsim_dev->nsim_bus_dev; + int i, err; + + for (i = 0; i < nsim_bus_dev->num_vfs; i++) { + err = nsim_dev_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_VF, i); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to initialize VFs' netdevsim ports"); + pr_err("Failed to initialize VF id=%d. %d.\n", i, err); + goto err_port_add_vfs; + } + } + nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; + return 0; + +err_port_add_vfs: + for (i--; i >= 0; i--) + nsim_dev_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_VF, i); + return err; +} + +static int nsim_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + int err = 0; + + mutex_lock(&nsim_dev->nsim_bus_dev->vfs_lock); + if (mode == nsim_dev->esw_mode) + goto unlock; + + if (mode == DEVLINK_ESWITCH_MODE_LEGACY) + err = nsim_esw_legacy_enable(nsim_dev, extack); + else if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) + err = nsim_esw_switchdev_enable(nsim_dev, extack); + else + err = -EINVAL; + +unlock: + mutex_unlock(&nsim_dev->nsim_bus_dev->vfs_lock); + return err; +} + +static int nsim_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + + *mode = nsim_dev->esw_mode; + return 0; +} + struct nsim_trap_item { void *trap_ctx; enum devlink_trap_action action; @@ -892,7 +1039,187 @@ nsim_dev_devlink_trap_policer_counter_get(struct devlink *devlink, return 0; } +#define NSIM_LINK_SPEED_MAX 5000 /* Mbps */ +#define NSIM_LINK_SPEED_UNIT 125000 /* 1 Mbps given in bytes/sec to avoid + * u64 overflow during conversion from + * bytes to bits. + */ + +static int nsim_rate_bytes_to_units(char *name, u64 *rate, struct netlink_ext_ack *extack) +{ + u64 val; + u32 rem; + + val = div_u64_rem(*rate, NSIM_LINK_SPEED_UNIT, &rem); + if (rem) { + pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n", + name, *rate); + NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps."); + return -EINVAL; + } + + if (val > NSIM_LINK_SPEED_MAX) { + pr_err("%s rate value %lluMbps exceed link maximum speed 5000Mbps.\n", + name, val); + NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed 5000Mbps."); + return -EINVAL; + } + *rate = val; + return 0; +} + +static int nsim_leaf_tx_share_set(struct devlink_rate *devlink_rate, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct nsim_dev_port *nsim_dev_port = priv; + struct nsim_bus_dev *nsim_bus_dev = nsim_dev_port->ns->nsim_bus_dev; + int vf_id = nsim_dev_port_index_to_vf_index(nsim_dev_port->port_index); + int err; + + err = nsim_rate_bytes_to_units("tx_share", &tx_share, extack); + if (err) + return err; + + nsim_bus_dev->vfconfigs[vf_id].min_tx_rate = tx_share; + return 0; +} + +static int nsim_leaf_tx_max_set(struct devlink_rate *devlink_rate, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct nsim_dev_port *nsim_dev_port = priv; + struct nsim_bus_dev *nsim_bus_dev = nsim_dev_port->ns->nsim_bus_dev; + int vf_id = nsim_dev_port_index_to_vf_index(nsim_dev_port->port_index); + int err; + + err = nsim_rate_bytes_to_units("tx_max", &tx_max, extack); + if (err) + return err; + + nsim_bus_dev->vfconfigs[vf_id].max_tx_rate = tx_max; + return 0; +} + +struct nsim_rate_node { + struct dentry *ddir; + struct dentry *rate_parent; + char *parent_name; + u16 tx_share; + u16 tx_max; +}; + +static int nsim_node_tx_share_set(struct devlink_rate *devlink_rate, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct nsim_rate_node *nsim_node = priv; + int err; + + err = nsim_rate_bytes_to_units("tx_share", &tx_share, extack); + if (err) + return err; + + nsim_node->tx_share = tx_share; + return 0; +} + +static int nsim_node_tx_max_set(struct devlink_rate *devlink_rate, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct nsim_rate_node *nsim_node = priv; + int err; + + err = nsim_rate_bytes_to_units("tx_max", &tx_max, extack); + if (err) + return err; + + nsim_node->tx_max = tx_max; + return 0; +} + +static int nsim_rate_node_new(struct devlink_rate *node, void **priv, + struct netlink_ext_ack *extack) +{ + struct nsim_dev *nsim_dev = devlink_priv(node->devlink); + struct nsim_rate_node *nsim_node; + int err; + + if (!nsim_esw_mode_is_switchdev(nsim_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Node creation allowed only in switchdev mode."); + return -EOPNOTSUPP; + } + + nsim_node = kzalloc(sizeof(*nsim_node), GFP_KERNEL); + if (!nsim_node) + return -ENOMEM; + + nsim_node->ddir = debugfs_create_dir(node->name, nsim_dev->nodes_ddir); + if (!nsim_node->ddir) { + err = -ENOMEM; + goto err_node; + } + debugfs_create_u16("tx_share", 0400, nsim_node->ddir, &nsim_node->tx_share); + debugfs_create_u16("tx_max", 0400, nsim_node->ddir, &nsim_node->tx_max); + nsim_node->rate_parent = debugfs_create_file("rate_parent", 0400, + nsim_node->ddir, + &nsim_node->parent_name, + &nsim_dev_rate_parent_fops); + if (IS_ERR(nsim_node->rate_parent)) { + err = PTR_ERR(nsim_node->rate_parent); + goto err_ddir; + } + + *priv = nsim_node; + return 0; + +err_ddir: + debugfs_remove_recursive(nsim_node->ddir); +err_node: + kfree(nsim_node); + return err; +} + +static int nsim_rate_node_del(struct devlink_rate *node, void *priv, + struct netlink_ext_ack *extack) +{ + struct nsim_rate_node *nsim_node = priv; + + debugfs_remove(nsim_node->rate_parent); + debugfs_remove_recursive(nsim_node->ddir); + kfree(nsim_node); + return 0; +} + +static int nsim_rate_leaf_parent_set(struct devlink_rate *child, + struct devlink_rate *parent, + void *priv_child, void *priv_parent, + struct netlink_ext_ack *extack) +{ + struct nsim_dev_port *nsim_dev_port = priv_child; + + if (parent) + nsim_dev_port->parent_name = parent->name; + else + nsim_dev_port->parent_name = NULL; + return 0; +} + +static int nsim_rate_node_parent_set(struct devlink_rate *child, + struct devlink_rate *parent, + void *priv_child, void *priv_parent, + struct netlink_ext_ack *extack) +{ + struct nsim_rate_node *nsim_node = priv_child; + + if (parent) + nsim_node->parent_name = parent->name; + else + nsim_node->parent_name = NULL; + return 0; +} + static const struct devlink_ops nsim_dev_devlink_ops = { + .eswitch_mode_set = nsim_devlink_eswitch_mode_set, + .eswitch_mode_get = nsim_devlink_eswitch_mode_get, .supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT | DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK, .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT), @@ -905,32 +1232,51 @@ static const struct devlink_ops nsim_dev_devlink_ops = { .trap_group_set = nsim_dev_devlink_trap_group_set, .trap_policer_set = nsim_dev_devlink_trap_policer_set, .trap_policer_counter_get = nsim_dev_devlink_trap_policer_counter_get, + .rate_leaf_tx_share_set = nsim_leaf_tx_share_set, + .rate_leaf_tx_max_set = nsim_leaf_tx_max_set, + .rate_node_tx_share_set = nsim_node_tx_share_set, + .rate_node_tx_max_set = nsim_node_tx_max_set, + .rate_node_new = nsim_rate_node_new, + .rate_node_del = nsim_rate_node_del, + .rate_leaf_parent_set = nsim_rate_leaf_parent_set, + .rate_node_parent_set = nsim_rate_node_parent_set, }; #define NSIM_DEV_MAX_MACS_DEFAULT 32 #define NSIM_DEV_TEST1_DEFAULT true -static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, +static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type, unsigned int port_index) { + struct nsim_bus_dev *nsim_bus_dev = nsim_dev->nsim_bus_dev; struct devlink_port_attrs attrs = {}; struct nsim_dev_port *nsim_dev_port; struct devlink_port *devlink_port; int err; + if (type == NSIM_DEV_PORT_TYPE_VF && !nsim_bus_dev->num_vfs) + return -EINVAL; + nsim_dev_port = kzalloc(sizeof(*nsim_dev_port), GFP_KERNEL); if (!nsim_dev_port) return -ENOMEM; - nsim_dev_port->port_index = port_index; + nsim_dev_port->port_index = nsim_dev_port_index(type, port_index); + nsim_dev_port->port_type = type; devlink_port = &nsim_dev_port->devlink_port; - attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - attrs.phys.port_number = port_index + 1; + if (nsim_dev_port_is_pf(nsim_dev_port)) { + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.phys.port_number = port_index + 1; + } else { + attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF; + attrs.pci_vf.pf = 0; + attrs.pci_vf.vf = port_index; + } memcpy(attrs.switch_id.id, nsim_dev->switch_id.id, nsim_dev->switch_id.id_len); attrs.switch_id.id_len = nsim_dev->switch_id.id_len; devlink_port_attrs_set(devlink_port, &attrs); err = devlink_port_register(priv_to_devlink(nsim_dev), devlink_port, - port_index); + nsim_dev_port->port_index); if (err) goto err_port_free; @@ -944,11 +1290,20 @@ static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, goto err_port_debugfs_exit; } + if (nsim_dev_port_is_vf(nsim_dev_port)) { + err = devlink_rate_leaf_create(&nsim_dev_port->devlink_port, + nsim_dev_port); + if (err) + goto err_nsim_destroy; + } + devlink_port_type_eth_set(devlink_port, nsim_dev_port->ns->netdev); list_add(&nsim_dev_port->list, &nsim_dev->port_list); return 0; +err_nsim_destroy: + nsim_destroy(nsim_dev_port->ns); err_port_debugfs_exit: nsim_dev_port_debugfs_exit(nsim_dev_port); err_dl_port_unregister: @@ -963,6 +1318,8 @@ static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port) struct devlink_port *devlink_port = &nsim_dev_port->devlink_port; list_del(&nsim_dev_port->list); + if (nsim_dev_port_is_vf(nsim_dev_port)) + devlink_rate_leaf_destroy(&nsim_dev_port->devlink_port); devlink_port_type_clear(devlink_port); nsim_destroy(nsim_dev_port->ns); nsim_dev_port_debugfs_exit(nsim_dev_port); @@ -987,7 +1344,7 @@ static int nsim_dev_port_add_all(struct nsim_dev *nsim_dev, int i, err; for (i = 0; i < port_count; i++) { - err = __nsim_dev_port_add(nsim_dev, i); + err = __nsim_dev_port_add(nsim_dev, NSIM_DEV_PORT_TYPE_PF, i); if (err) goto err_port_del_all; } @@ -1134,6 +1491,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) devlink_params_publish(devlink); devlink_reload_enable(devlink); + nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_LEGACY; return 0; err_psample_exit: @@ -1169,6 +1527,12 @@ static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev) if (devlink_is_reload_failed(devlink)) return; debugfs_remove(nsim_dev->take_snapshot); + + mutex_lock(&nsim_dev->nsim_bus_dev->vfs_lock); + if (nsim_dev->nsim_bus_dev->num_vfs) + nsim_bus_dev_vfs_disable(nsim_dev->nsim_bus_dev); + mutex_unlock(&nsim_dev->nsim_bus_dev->vfs_lock); + nsim_dev_port_del_all(nsim_dev); nsim_dev_psample_exit(nsim_dev); nsim_dev_health_exit(nsim_dev); @@ -1197,32 +1561,34 @@ void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev) } static struct nsim_dev_port * -__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, unsigned int port_index) +__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type, + unsigned int port_index) { struct nsim_dev_port *nsim_dev_port; + port_index = nsim_dev_port_index(type, port_index); list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) if (nsim_dev_port->port_index == port_index) return nsim_dev_port; return NULL; } -int nsim_dev_port_add(struct nsim_bus_dev *nsim_bus_dev, +int nsim_dev_port_add(struct nsim_bus_dev *nsim_bus_dev, enum nsim_dev_port_type type, unsigned int port_index) { struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); int err; mutex_lock(&nsim_dev->port_list_lock); - if (__nsim_dev_port_lookup(nsim_dev, port_index)) + if (__nsim_dev_port_lookup(nsim_dev, type, port_index)) err = -EEXIST; else - err = __nsim_dev_port_add(nsim_dev, port_index); + err = __nsim_dev_port_add(nsim_dev, type, port_index); mutex_unlock(&nsim_dev->port_list_lock); return err; } -int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev, +int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev, enum nsim_dev_port_type type, unsigned int port_index) { struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); @@ -1230,7 +1596,7 @@ int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev, int err = 0; mutex_lock(&nsim_dev->port_list_lock); - nsim_dev_port = __nsim_dev_port_lookup(nsim_dev, port_index); + nsim_dev_port = __nsim_dev_port_lookup(nsim_dev, type, port_index); if (!nsim_dev_port) err = -ENOENT; else diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 659d3dceb687..c3aeb15843e2 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -113,6 +113,11 @@ static int nsim_set_vf_rate(struct net_device *dev, int vf, int min, int max) struct netdevsim *ns = netdev_priv(dev); struct nsim_bus_dev *nsim_bus_dev = ns->nsim_bus_dev; + if (nsim_esw_mode_is_switchdev(ns->nsim_dev)) { + pr_err("Not supported in switchdev mode. Please use devlink API.\n"); + return -EOPNOTSUPP; + } + if (vf >= nsim_bus_dev->num_vfs) return -EINVAL; @@ -261,6 +266,18 @@ static const struct net_device_ops nsim_netdev_ops = { .ndo_get_devlink_port = nsim_get_devlink_port, }; +static const struct net_device_ops nsim_vf_netdev_ops = { + .ndo_start_xmit = nsim_start_xmit, + .ndo_set_rx_mode = nsim_set_rx_mode, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = nsim_change_mtu, + .ndo_get_stats64 = nsim_get_stats64, + .ndo_setup_tc = nsim_setup_tc, + .ndo_set_features = nsim_set_features, + .ndo_get_devlink_port = nsim_get_devlink_port, +}; + static void nsim_setup(struct net_device *dev) { ether_setup(dev); @@ -280,6 +297,49 @@ static void nsim_setup(struct net_device *dev) dev->max_mtu = ETH_MAX_MTU; } +static int nsim_init_netdevsim(struct netdevsim *ns) +{ + int err; + + ns->netdev->netdev_ops = &nsim_netdev_ops; + + err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev); + if (err) + return err; + + rtnl_lock(); + err = nsim_bpf_init(ns); + if (err) + goto err_utn_destroy; + + nsim_ipsec_init(ns); + + err = register_netdevice(ns->netdev); + if (err) + goto err_ipsec_teardown; + rtnl_unlock(); + return 0; + +err_ipsec_teardown: + nsim_ipsec_teardown(ns); + nsim_bpf_uninit(ns); +err_utn_destroy: + rtnl_unlock(); + nsim_udp_tunnels_info_destroy(ns->netdev); + return err; +} + +static int nsim_init_netdevsim_vf(struct netdevsim *ns) +{ + int err; + + ns->netdev->netdev_ops = &nsim_vf_netdev_ops; + rtnl_lock(); + err = register_netdevice(ns->netdev); + rtnl_unlock(); + return err; +} + struct netdevsim * nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) { @@ -299,33 +359,15 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) ns->nsim_dev_port = nsim_dev_port; ns->nsim_bus_dev = nsim_dev->nsim_bus_dev; SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev); - dev->netdev_ops = &nsim_netdev_ops; nsim_ethtool_init(ns); - - err = nsim_udp_tunnels_info_create(nsim_dev, dev); + if (nsim_dev_port_is_pf(nsim_dev_port)) + err = nsim_init_netdevsim(ns); + else + err = nsim_init_netdevsim_vf(ns); if (err) goto err_free_netdev; - - rtnl_lock(); - err = nsim_bpf_init(ns); - if (err) - goto err_utn_destroy; - - nsim_ipsec_init(ns); - - err = register_netdevice(dev); - if (err) - goto err_ipsec_teardown; - rtnl_unlock(); - return ns; -err_ipsec_teardown: - nsim_ipsec_teardown(ns); - nsim_bpf_uninit(ns); -err_utn_destroy: - rtnl_unlock(); - nsim_udp_tunnels_info_destroy(dev); err_free_netdev: free_netdev(dev); return ERR_PTR(err); @@ -337,10 +379,13 @@ void nsim_destroy(struct netdevsim *ns) rtnl_lock(); unregister_netdevice(dev); - nsim_ipsec_teardown(ns); - nsim_bpf_uninit(ns); + if (nsim_dev_port_is_pf(ns->nsim_dev_port)) { + nsim_ipsec_teardown(ns); + nsim_bpf_uninit(ns); + } rtnl_unlock(); - nsim_udp_tunnels_info_destroy(dev); + if (nsim_dev_port_is_pf(ns->nsim_dev_port)) + nsim_udp_tunnels_info_destroy(dev); free_netdev(dev); } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 7ff24e03577b..cdfdf2a99578 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -197,11 +197,22 @@ static inline void nsim_dev_psample_exit(struct nsim_dev *nsim_dev) } #endif +enum nsim_dev_port_type { + NSIM_DEV_PORT_TYPE_PF, + NSIM_DEV_PORT_TYPE_VF, +}; + +#define NSIM_DEV_VF_PORT_INDEX_BASE 128 +#define NSIM_DEV_VF_PORT_INDEX_MAX UINT_MAX + struct nsim_dev_port { struct list_head list; struct devlink_port devlink_port; unsigned int port_index; + enum nsim_dev_port_type port_type; struct dentry *ddir; + struct dentry *rate_parent; + char *parent_name; struct netdevsim *ns; }; @@ -212,6 +223,8 @@ struct nsim_dev { struct dentry *ddir; struct dentry *ports_ddir; struct dentry *take_snapshot; + struct dentry *max_vfs; + struct dentry *nodes_ddir; struct bpf_offload_dev *bpf_dev; bool bpf_bind_accept; bool bpf_bind_verifier_accept; @@ -247,8 +260,22 @@ struct nsim_dev { u32 sleep; } udp_ports; struct nsim_dev_psample *psample; + u16 esw_mode; }; +int nsim_esw_legacy_enable(struct nsim_dev *nsim_dev, struct netlink_ext_ack *extack); +int nsim_esw_switchdev_enable(struct nsim_dev *nsim_dev, struct netlink_ext_ack *extack); + +static inline bool nsim_esw_mode_is_legacy(struct nsim_dev *nsim_dev) +{ + return nsim_dev->esw_mode == DEVLINK_ESWITCH_MODE_LEGACY; +} + +static inline bool nsim_esw_mode_is_switchdev(struct nsim_dev *nsim_dev) +{ + return nsim_dev->esw_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV; +} + static inline struct net *nsim_dev_net(struct nsim_dev *nsim_dev) { return devlink_net(priv_to_devlink(nsim_dev)); @@ -259,8 +286,10 @@ void nsim_dev_exit(void); int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev); void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev); int nsim_dev_port_add(struct nsim_bus_dev *nsim_bus_dev, + enum nsim_dev_port_type type, unsigned int port_index); int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev, + enum nsim_dev_port_type type, unsigned int port_index); struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, @@ -269,6 +298,23 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *fib_data); u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, enum nsim_resource_id res_id, bool max); +ssize_t nsim_bus_dev_max_vfs_read(struct file *file, + char __user *data, + size_t count, loff_t *ppos); +ssize_t nsim_bus_dev_max_vfs_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos); +void nsim_bus_dev_vfs_disable(struct nsim_bus_dev *nsim_bus_dev); + +static inline bool nsim_dev_port_is_pf(struct nsim_dev_port *nsim_dev_port) +{ + return nsim_dev_port->port_type == NSIM_DEV_PORT_TYPE_PF; +} + +static inline bool nsim_dev_port_is_vf(struct nsim_dev_port *nsim_dev_port) +{ + return nsim_dev_port->port_type == NSIM_DEV_PORT_TYPE_VF; +} #if IS_ENABLED(CONFIG_XFRM_OFFLOAD) void nsim_ipsec_init(struct netdevsim *ns); void nsim_ipsec_teardown(struct netdevsim *ns); @@ -308,7 +354,9 @@ struct nsim_bus_dev { struct net *initial_net; /* Purpose of this is to carry net pointer * during the probe time only. */ + unsigned int max_vfs; unsigned int num_vfs; + struct mutex vfs_lock; /* Protects vfconfigs */ struct nsim_vf_config *vfconfigs; /* Lock for devlink->reload_enabled in netdevsim module */ struct mutex nsim_bus_reload_lock; diff --git a/include/net/devlink.h b/include/net/devlink.h index 7c984cadfec4..eb045f1b5d1d 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -34,6 +34,7 @@ struct devlink_ops; struct devlink { struct list_head list; struct list_head port_list; + struct list_head rate_list; struct list_head sb_list; struct list_head dpipe_table_list; struct list_head resource_list; @@ -133,6 +134,24 @@ struct devlink_port_attrs { }; }; +struct devlink_rate { + struct list_head list; + enum devlink_rate_type type; + struct devlink *devlink; + void *priv; + u64 tx_share; + u64 tx_max; + + struct devlink_rate *parent; + union { + struct devlink_port *devlink_port; + struct { + char *name; + refcount_t refcnt; + }; + }; +}; + struct devlink_port { struct list_head list; struct list_head param_list; @@ -152,6 +171,8 @@ struct devlink_port { struct delayed_work type_warn_dw; struct list_head reporter_list; struct mutex reporters_lock; /* Protects reporter_list */ + + struct devlink_rate *devlink_rate; }; struct devlink_port_new_attrs { @@ -1453,6 +1474,30 @@ struct devlink_ops { struct devlink_port *port, enum devlink_port_fn_state state, struct netlink_ext_ack *extack); + + /** + * Rate control callbacks. + */ + int (*rate_leaf_tx_share_set)(struct devlink_rate *devlink_rate, void *priv, + u64 tx_share, struct netlink_ext_ack *extack); + int (*rate_leaf_tx_max_set)(struct devlink_rate *devlink_rate, void *priv, + u64 tx_max, struct netlink_ext_ack *extack); + int (*rate_node_tx_share_set)(struct devlink_rate *devlink_rate, void *priv, + u64 tx_share, struct netlink_ext_ack *extack); + int (*rate_node_tx_max_set)(struct devlink_rate *devlink_rate, void *priv, + u64 tx_max, struct netlink_ext_ack *extack); + int (*rate_node_new)(struct devlink_rate *rate_node, void **priv, + struct netlink_ext_ack *extack); + int (*rate_node_del)(struct devlink_rate *rate_node, void *priv, + struct netlink_ext_ack *extack); + int (*rate_leaf_parent_set)(struct devlink_rate *child, + struct devlink_rate *parent, + void *priv_child, void *priv_parent, + struct netlink_ext_ack *extack); + int (*rate_node_parent_set)(struct devlink_rate *child, + struct devlink_rate *parent, + void *priv_child, void *priv_parent, + struct netlink_ext_ack *extack); }; static inline void *devlink_priv(struct devlink *devlink) @@ -1512,6 +1557,9 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller, u16 pf, u32 sf, bool external); +int devlink_rate_leaf_create(struct devlink_port *port, void *priv); +void devlink_rate_leaf_destroy(struct devlink_port *devlink_port); +void devlink_rate_nodes_destroy(struct devlink *devlink); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index f6008b2fa60f..32f53a0069d6 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -126,6 +126,11 @@ enum devlink_command { DEVLINK_CMD_HEALTH_REPORTER_TEST, + DEVLINK_CMD_RATE_GET, /* can dump */ + DEVLINK_CMD_RATE_SET, + DEVLINK_CMD_RATE_NEW, + DEVLINK_CMD_RATE_DEL, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -206,6 +211,11 @@ enum devlink_port_flavour { */ }; +enum devlink_rate_type { + DEVLINK_RATE_TYPE_LEAF, + DEVLINK_RATE_TYPE_NODE, +}; + enum devlink_param_cmode { DEVLINK_PARAM_CMODE_RUNTIME, DEVLINK_PARAM_CMODE_DRIVERINIT, @@ -534,6 +544,13 @@ enum devlink_attr { DEVLINK_ATTR_RELOAD_ACTION_STATS, /* nested */ DEVLINK_ATTR_PORT_PCI_SF_NUMBER, /* u32 */ + + DEVLINK_ATTR_RATE_TYPE, /* u16 */ + DEVLINK_ATTR_RATE_TX_SHARE, /* u64 */ + DEVLINK_ATTR_RATE_TX_MAX, /* u64 */ + DEVLINK_ATTR_RATE_NODE_NAME, /* string */ + DEVLINK_ATTR_RATE_PARENT_NODE_NAME, /* string */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 69681f19388e..464f56408247 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -190,6 +190,80 @@ static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink, return devlink_port_get_from_attrs(devlink, info->attrs); } +static inline bool +devlink_rate_is_leaf(struct devlink_rate *devlink_rate) +{ + return devlink_rate->type == DEVLINK_RATE_TYPE_LEAF; +} + +static inline bool +devlink_rate_is_node(struct devlink_rate *devlink_rate) +{ + return devlink_rate->type == DEVLINK_RATE_TYPE_NODE; +} + +static struct devlink_rate * +devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info) +{ + struct devlink_rate *devlink_rate; + struct devlink_port *devlink_port; + + devlink_port = devlink_port_get_from_attrs(devlink, info->attrs); + if (IS_ERR(devlink_port)) + return ERR_CAST(devlink_port); + devlink_rate = devlink_port->devlink_rate; + return devlink_rate ?: ERR_PTR(-ENODEV); +} + +static struct devlink_rate * +devlink_rate_node_get_by_name(struct devlink *devlink, const char *node_name) +{ + static struct devlink_rate *devlink_rate; + + list_for_each_entry(devlink_rate, &devlink->rate_list, list) { + if (devlink_rate_is_node(devlink_rate) && + !strcmp(node_name, devlink_rate->name)) + return devlink_rate; + } + return ERR_PTR(-ENODEV); +} + +static struct devlink_rate * +devlink_rate_node_get_from_attrs(struct devlink *devlink, struct nlattr **attrs) +{ + const char *rate_node_name; + size_t len; + + if (!attrs[DEVLINK_ATTR_RATE_NODE_NAME]) + return ERR_PTR(-EINVAL); + rate_node_name = nla_data(attrs[DEVLINK_ATTR_RATE_NODE_NAME]); + len = strlen(rate_node_name); + /* Name cannot be empty or decimal number */ + if (!len || strspn(rate_node_name, "0123456789") == len) + return ERR_PTR(-EINVAL); + + return devlink_rate_node_get_by_name(devlink, rate_node_name); +} + +static struct devlink_rate * +devlink_rate_node_get_from_info(struct devlink *devlink, struct genl_info *info) +{ + return devlink_rate_node_get_from_attrs(devlink, info->attrs); +} + +static struct devlink_rate * +devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info) +{ + struct nlattr **attrs = info->attrs; + + if (attrs[DEVLINK_ATTR_PORT_INDEX]) + return devlink_rate_leaf_get_from_info(devlink, info); + else if (attrs[DEVLINK_ATTR_RATE_NODE_NAME]) + return devlink_rate_node_get_from_info(devlink, info); + else + return ERR_PTR(-EINVAL); +} + struct devlink_sb { struct list_head list; unsigned int index; @@ -408,12 +482,14 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) #define DEVLINK_NL_FLAG_NEED_PORT BIT(0) #define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1) +#define DEVLINK_NL_FLAG_NEED_RATE BIT(2) +#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3) /* The per devlink instance lock is taken by default in the pre-doit * operation, yet several commands do not require this. The global * devlink lock is taken and protects from disruption by user-calls. */ -#define DEVLINK_NL_FLAG_NO_LOCK BIT(2) +#define DEVLINK_NL_FLAG_NO_LOCK BIT(4) static int devlink_nl_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) @@ -442,6 +518,24 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, devlink_port = devlink_port_get_from_info(devlink, info); if (!IS_ERR(devlink_port)) info->user_ptr[1] = devlink_port; + } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) { + struct devlink_rate *devlink_rate; + + devlink_rate = devlink_rate_get_from_info(devlink, info); + if (IS_ERR(devlink_rate)) { + err = PTR_ERR(devlink_rate); + goto unlock; + } + info->user_ptr[1] = devlink_rate; + } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE_NODE) { + struct devlink_rate *rate_node; + + rate_node = devlink_rate_node_get_from_info(devlink, info); + if (IS_ERR(rate_node)) { + err = PTR_ERR(rate_node); + goto unlock; + } + info->user_ptr[1] = rate_node; } return 0; @@ -749,6 +843,56 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops * return 0; } +static int devlink_nl_rate_fill(struct sk_buff *msg, + struct devlink *devlink, + struct devlink_rate *devlink_rate, + enum devlink_command cmd, u32 portid, + u32 seq, int flags, + struct netlink_ext_ack *extack) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + + if (nla_put_u16(msg, DEVLINK_ATTR_RATE_TYPE, devlink_rate->type)) + goto nla_put_failure; + + if (devlink_rate_is_leaf(devlink_rate)) { + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, + devlink_rate->devlink_port->index)) + goto nla_put_failure; + } else if (devlink_rate_is_node(devlink_rate)) { + if (nla_put_string(msg, DEVLINK_ATTR_RATE_NODE_NAME, + devlink_rate->name)) + goto nla_put_failure; + } + + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE, + devlink_rate->tx_share, DEVLINK_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_MAX, + devlink_rate->tx_max, DEVLINK_ATTR_PAD)) + goto nla_put_failure; + + if (devlink_rate->parent) + if (nla_put_string(msg, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, + devlink_rate->parent->name)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + static bool devlink_port_fn_state_valid(enum devlink_port_fn_state state) { @@ -920,6 +1064,111 @@ static void devlink_port_notify(struct devlink_port *devlink_port, msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } +static void devlink_rate_notify(struct devlink_rate *devlink_rate, + enum devlink_command cmd) +{ + struct devlink *devlink = devlink_rate->devlink; + struct sk_buff *msg; + int err; + + WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && + cmd != DEVLINK_CMD_RATE_DEL); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + err = devlink_nl_rate_fill(msg, devlink, devlink_rate, + cmd, 0, 0, 0, NULL); + if (err) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), + msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); +} + +static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink_rate *devlink_rate; + struct devlink *devlink; + int start = cb->args[0]; + int idx = 0; + int err = 0; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + mutex_lock(&devlink->lock); + list_for_each_entry(devlink_rate, &devlink->rate_list, list) { + enum devlink_command cmd = DEVLINK_CMD_RATE_NEW; + u32 id = NETLINK_CB(cb->skb).portid; + + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_rate_fill(msg, devlink, + devlink_rate, + cmd, id, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, NULL); + if (err) { + mutex_unlock(&devlink->lock); + goto out; + } + idx++; + } + mutex_unlock(&devlink->lock); + } +out: + mutex_unlock(&devlink_mutex); + if (err != -EMSGSIZE) + return err; + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_rate *devlink_rate = info->user_ptr[1]; + struct devlink *devlink = devlink_rate->devlink; + struct sk_buff *msg; + int err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_rate_fill(msg, devlink, devlink_rate, + DEVLINK_CMD_RATE_NEW, + info->snd_portid, info->snd_seq, 0, + info->extack); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static bool +devlink_rate_is_parent_node(struct devlink_rate *devlink_rate, + struct devlink_rate *parent) +{ + while (parent) { + if (parent == devlink_rate) + return true; + parent = parent->parent; + } + return false; +} + static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; @@ -1340,6 +1589,255 @@ static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, return devlink->ops->port_del(devlink, port_index, extack); } +static int +devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate, + struct genl_info *info, + struct nlattr *nla_parent) +{ + struct devlink *devlink = devlink_rate->devlink; + const char *parent_name = nla_data(nla_parent); + const struct devlink_ops *ops = devlink->ops; + size_t len = strlen(parent_name); + struct devlink_rate *parent; + int err = -EOPNOTSUPP; + + parent = devlink_rate->parent; + if (parent && len) { + NL_SET_ERR_MSG_MOD(info->extack, "Rate object already has parent."); + return -EBUSY; + } else if (parent && !len) { + if (devlink_rate_is_leaf(devlink_rate)) + err = ops->rate_leaf_parent_set(devlink_rate, NULL, + devlink_rate->priv, NULL, + info->extack); + else if (devlink_rate_is_node(devlink_rate)) + err = ops->rate_node_parent_set(devlink_rate, NULL, + devlink_rate->priv, NULL, + info->extack); + if (err) + return err; + + refcount_dec(&parent->refcnt); + devlink_rate->parent = NULL; + } else if (!parent && len) { + parent = devlink_rate_node_get_by_name(devlink, parent_name); + if (IS_ERR(parent)) + return -ENODEV; + + if (parent == devlink_rate) { + NL_SET_ERR_MSG_MOD(info->extack, "Parent to self is not allowed"); + return -EINVAL; + } + + if (devlink_rate_is_node(devlink_rate) && + devlink_rate_is_parent_node(devlink_rate, parent->parent)) { + NL_SET_ERR_MSG_MOD(info->extack, "Node is already a parent of parent node."); + return -EEXIST; + } + + if (devlink_rate_is_leaf(devlink_rate)) + err = ops->rate_leaf_parent_set(devlink_rate, parent, + devlink_rate->priv, parent->priv, + info->extack); + else if (devlink_rate_is_node(devlink_rate)) + err = ops->rate_node_parent_set(devlink_rate, parent, + devlink_rate->priv, parent->priv, + info->extack); + if (err) + return err; + + refcount_inc(&parent->refcnt); + devlink_rate->parent = parent; + } + + return 0; +} + +static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, + const struct devlink_ops *ops, + struct genl_info *info) +{ + struct nlattr *nla_parent, **attrs = info->attrs; + int err = -EOPNOTSUPP; + u64 rate; + + if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) { + rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_SHARE]); + if (devlink_rate_is_leaf(devlink_rate)) + err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv, + rate, info->extack); + else if (devlink_rate_is_node(devlink_rate)) + err = ops->rate_node_tx_share_set(devlink_rate, devlink_rate->priv, + rate, info->extack); + if (err) + return err; + devlink_rate->tx_share = rate; + } + + if (attrs[DEVLINK_ATTR_RATE_TX_MAX]) { + rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_MAX]); + if (devlink_rate_is_leaf(devlink_rate)) + err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv, + rate, info->extack); + else if (devlink_rate_is_node(devlink_rate)) + err = ops->rate_node_tx_max_set(devlink_rate, devlink_rate->priv, + rate, info->extack); + if (err) + return err; + devlink_rate->tx_max = rate; + } + + nla_parent = attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME]; + if (nla_parent) { + err = devlink_nl_rate_parent_node_set(devlink_rate, info, + nla_parent); + if (err) + return err; + } + + return 0; +} + +static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, + struct genl_info *info, + enum devlink_rate_type type) +{ + struct nlattr **attrs = info->attrs; + + if (type == DEVLINK_RATE_TYPE_LEAF) { + if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_leaf_tx_share_set) { + NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the leafs"); + return false; + } + if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_leaf_tx_max_set) { + NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the leafs"); + return false; + } + if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] && + !ops->rate_leaf_parent_set) { + NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the leafs"); + return false; + } + } else if (type == DEVLINK_RATE_TYPE_NODE) { + if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) { + NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the nodes"); + return false; + } + if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_node_tx_max_set) { + NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the nodes"); + return false; + } + if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] && + !ops->rate_node_parent_set) { + NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the nodes"); + return false; + } + } else { + WARN_ON("Unknown type of rate object"); + return false; + } + + return true; +} + +static int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_rate *devlink_rate = info->user_ptr[1]; + struct devlink *devlink = devlink_rate->devlink; + const struct devlink_ops *ops = devlink->ops; + int err; + + if (!ops || !devlink_rate_set_ops_supported(ops, info, devlink_rate->type)) + return -EOPNOTSUPP; + + err = devlink_nl_rate_set(devlink_rate, ops, info); + + if (!err) + devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW); + return err; +} + +static int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_rate *rate_node; + const struct devlink_ops *ops; + int err; + + ops = devlink->ops; + if (!ops || !ops->rate_node_new || !ops->rate_node_del) { + NL_SET_ERR_MSG_MOD(info->extack, "Rate nodes aren't supported"); + return -EOPNOTSUPP; + } + + if (!devlink_rate_set_ops_supported(ops, info, DEVLINK_RATE_TYPE_NODE)) + return -EOPNOTSUPP; + + rate_node = devlink_rate_node_get_from_attrs(devlink, info->attrs); + if (!IS_ERR(rate_node)) + return -EEXIST; + else if (rate_node == ERR_PTR(-EINVAL)) + return -EINVAL; + + rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL); + if (!rate_node) + return -ENOMEM; + + rate_node->devlink = devlink; + rate_node->type = DEVLINK_RATE_TYPE_NODE; + rate_node->name = nla_strdup(info->attrs[DEVLINK_ATTR_RATE_NODE_NAME], GFP_KERNEL); + if (!rate_node->name) { + err = -ENOMEM; + goto err_strdup; + } + + err = ops->rate_node_new(rate_node, &rate_node->priv, info->extack); + if (err) + goto err_node_new; + + err = devlink_nl_rate_set(rate_node, ops, info); + if (err) + goto err_rate_set; + + refcount_set(&rate_node->refcnt, 1); + list_add(&rate_node->list, &devlink->rate_list); + devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW); + return 0; + +err_rate_set: + ops->rate_node_del(rate_node, rate_node->priv, info->extack); +err_node_new: + kfree(rate_node->name); +err_strdup: + kfree(rate_node); + return err; +} + +static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_rate *rate_node = info->user_ptr[1]; + struct devlink *devlink = rate_node->devlink; + const struct devlink_ops *ops = devlink->ops; + int err; + + if (refcount_read(&rate_node->refcnt) > 1) { + NL_SET_ERR_MSG_MOD(info->extack, "Node has children. Cannot delete node."); + return -EBUSY; + } + + devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL); + err = ops->rate_node_del(rate_node, rate_node->priv, info->extack); + if (rate_node->parent) + refcount_dec(&rate_node->parent->refcnt); + list_del(&rate_node->list); + kfree(rate_node->name); + kfree(rate_node); + return err; +} + static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_sb *devlink_sb, enum devlink_command cmd, u32 portid, @@ -2208,6 +2706,30 @@ static int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, return genlmsg_reply(msg, info); } +static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack) +{ + struct devlink_rate *devlink_rate; + u16 old_mode; + int err; + + if (!devlink->ops->eswitch_mode_get) + return -EOPNOTSUPP; + err = devlink->ops->eswitch_mode_get(devlink, &old_mode); + if (err) + return err; + + if (old_mode == mode) + return 0; + + list_for_each_entry(devlink_rate, &devlink->rate_list, list) + if (devlink_rate_is_node(devlink_rate)) { + NL_SET_ERR_MSG_MOD(extack, "Rate node(s) exists."); + return -EBUSY; + } + return 0; +} + static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info) { @@ -2222,6 +2744,9 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, if (!ops->eswitch_mode_set) return -EOPNOTSUPP; mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]); + err = devlink_rate_nodes_check(devlink, mode, info->extack); + if (err) + return err; err = ops->eswitch_mode_set(devlink, mode, info->extack); if (err) return err; @@ -7802,6 +8327,11 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 }, [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 }, [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 }, + [DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 }, + [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 }, + [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 }, + [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING }, }; static const struct genl_small_ops devlink_nl_ops[] = { @@ -7827,6 +8357,30 @@ static const struct genl_small_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, + { + .cmd = DEVLINK_CMD_RATE_GET, + .doit = devlink_nl_cmd_rate_get_doit, + .dumpit = devlink_nl_cmd_rate_get_dumpit, + .internal_flags = DEVLINK_NL_FLAG_NEED_RATE, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_RATE_SET, + .doit = devlink_nl_cmd_rate_set_doit, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_RATE, + }, + { + .cmd = DEVLINK_CMD_RATE_NEW, + .doit = devlink_nl_cmd_rate_new_doit, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = DEVLINK_CMD_RATE_DEL, + .doit = devlink_nl_cmd_rate_del_doit, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_RATE_NODE, + }, { .cmd = DEVLINK_CMD_PORT_SPLIT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, @@ -8202,6 +8756,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); __devlink_net_set(devlink, &init_net); INIT_LIST_HEAD(&devlink->port_list); + INIT_LIST_HEAD(&devlink->rate_list); INIT_LIST_HEAD(&devlink->sb_list); INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list); INIT_LIST_HEAD(&devlink->resource_list); @@ -8304,6 +8859,7 @@ void devlink_free(struct devlink *devlink) WARN_ON(!list_empty(&devlink->resource_list)); WARN_ON(!list_empty(&devlink->dpipe_table_list)); WARN_ON(!list_empty(&devlink->sb_list)); + WARN_ON(!list_empty(&devlink->rate_list)); WARN_ON(!list_empty(&devlink->port_list)); xa_destroy(&devlink->snapshot_ids); @@ -8620,6 +9176,108 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro } EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set); +/** + * devlink_rate_leaf_create - create devlink rate leaf + * + * @devlink_port: devlink port object to create rate object on + * @priv: driver private data + * + * Create devlink rate object of type leaf on provided @devlink_port. + * Throws call trace if @devlink_port already has a devlink rate object. + * + * Context: Takes and release devlink->lock . + * + * Return: -ENOMEM if failed to allocate rate object, 0 otherwise. + */ +int +devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv) +{ + struct devlink *devlink = devlink_port->devlink; + struct devlink_rate *devlink_rate; + + devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL); + if (!devlink_rate) + return -ENOMEM; + + mutex_lock(&devlink->lock); + WARN_ON(devlink_port->devlink_rate); + devlink_rate->type = DEVLINK_RATE_TYPE_LEAF; + devlink_rate->devlink = devlink; + devlink_rate->devlink_port = devlink_port; + devlink_rate->priv = priv; + list_add_tail(&devlink_rate->list, &devlink->rate_list); + devlink_port->devlink_rate = devlink_rate; + devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW); + mutex_unlock(&devlink->lock); + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_rate_leaf_create); + +/** + * devlink_rate_leaf_destroy - destroy devlink rate leaf + * + * @devlink_port: devlink port linked to the rate object + * + * Context: Takes and release devlink->lock . + */ +void devlink_rate_leaf_destroy(struct devlink_port *devlink_port) +{ + struct devlink_rate *devlink_rate = devlink_port->devlink_rate; + struct devlink *devlink = devlink_port->devlink; + + if (!devlink_rate) + return; + + mutex_lock(&devlink->lock); + devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL); + list_del(&devlink_rate->list); + devlink_port->devlink_rate = NULL; + mutex_unlock(&devlink->lock); + kfree(devlink_rate); +} +EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy); + +/** + * devlink_rate_nodes_destroy - destroy all devlink rate nodes on device + * + * @devlink: devlink instance + * + * Unset parent for all rate objects and destroy all rate nodes + * on specified device. + * + * Context: Takes and release devlink->lock . + */ +void devlink_rate_nodes_destroy(struct devlink *devlink) +{ + static struct devlink_rate *devlink_rate, *tmp; + const struct devlink_ops *ops = devlink->ops; + + mutex_lock(&devlink->lock); + list_for_each_entry(devlink_rate, &devlink->rate_list, list) { + if (!devlink_rate->parent) + continue; + + refcount_dec(&devlink_rate->parent->refcnt); + if (devlink_rate_is_leaf(devlink_rate)) + ops->rate_leaf_parent_set(devlink_rate, NULL, devlink_rate->priv, + NULL, NULL); + else if (devlink_rate_is_node(devlink_rate)) + ops->rate_node_parent_set(devlink_rate, NULL, devlink_rate->priv, + NULL, NULL); + } + list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) { + if (devlink_rate_is_node(devlink_rate)) { + ops->rate_node_del(devlink_rate, devlink_rate->priv, NULL); + list_del(&devlink_rate->list); + kfree(devlink_rate->name); + kfree(devlink_rate); + } + } + mutex_unlock(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devlink_rate_nodes_destroy); + static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, char *name, size_t len) { diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 40909c254365..9de1d123f4f5 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -5,12 +5,13 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS="fw_flash_test params_test regions_test reload_test \ netns_reload_test resource_test dev_info_test \ - empty_reporter_test dummy_reporter_test" + empty_reporter_test dummy_reporter_test rate_test" NUM_NETIFS=0 source $lib_dir/lib.sh BUS_ADDR=10 PORT_COUNT=4 +VF_COUNT=4 DEV_NAME=netdevsim$BUS_ADDR SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/ DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV_NAME/ @@ -507,6 +508,170 @@ dummy_reporter_test() log_test "dummy reporter test" } +rate_leafs_get() +{ + local handle=$1 + + cmd_jq "devlink port function rate show -j" \ + '.[] | to_entries | .[] | select(.value.type == "leaf") | .key | select(contains("'$handle'"))' +} + +rate_nodes_get() +{ + local handle=$1 + + cmd_jq "devlink port function rate show -j" \ + '.[] | to_entries | .[] | select(.value.type == "node") | .key | select(contains("'$handle'"))' +} + +rate_attr_set() +{ + local handle=$1 + local name=$2 + local value=$3 + local units=$4 + + devlink port function rate set $handle $name $value$units +} + +rate_attr_get() +{ + local handle=$1 + local name=$2 + + cmd_jq "devlink port function rate show $handle -j" '.[][].'$name +} + +rate_attr_tx_rate_check() +{ + local handle=$1 + local name=$2 + local rate=$3 + local debug_file=$4 + + rate_attr_set $handle $name $rate mbit + check_err $? "Failed to set $name value" + + local debug_value=$(cat $debug_file) + check_err $? "Failed to read $name value from debugfs" + [ "$debug_value" == "$rate" ] + check_err $? "Unexpected $name debug value $debug_value != $rate" + + local api_value=$(( $(rate_attr_get $handle $name) * 8 / 1000000 )) + check_err $? "Failed to get $name attr value" + [ "$api_value" == "$rate" ] + check_err $? "Unexpected $name attr value $api_value != $rate" +} + +rate_attr_parent_check() +{ + local handle=$1 + local parent=$2 + local debug_file=$3 + + rate_attr_set $handle parent $parent + check_err $? "Failed to set parent" + + debug_value=$(cat $debug_file) + check_err $? "Failed to get parent debugfs value" + [ "$debug_value" == "$parent" ] + check_err $? "Unexpected parent debug value $debug_value != $parent" + + api_value=$(rate_attr_get $r_obj parent) + check_err $? "Failed to get parent attr value" + [ "$api_value" == "$parent" ] + check_err $? "Unexpected parent attr value $api_value != $parent" +} + +rate_node_add() +{ + local handle=$1 + + devlink port function rate add $handle +} + +rate_node_del() +{ + local handle=$1 + + devlink port function rate del $handle +} + +rate_test() +{ + RET=0 + + echo $VF_COUNT > /sys/bus/netdevsim/devices/$DEV_NAME/sriov_numvfs + devlink dev eswitch set $DL_HANDLE mode switchdev + local leafs=`rate_leafs_get $DL_HANDLE` + local num_leafs=`echo $leafs | wc -w` + [ "$num_leafs" == "$VF_COUNT" ] + check_err $? "Expected $VF_COUNT rate leafs but got $num_leafs" + + rate=10 + for r_obj in $leafs + do + rate_attr_tx_rate_check $r_obj tx_share $rate \ + $DEBUGFS_DIR/ports/${r_obj##*/}/tx_share + rate=$(($rate+10)) + done + + rate=100 + for r_obj in $leafs + do + rate_attr_tx_rate_check $r_obj tx_max $rate \ + $DEBUGFS_DIR/ports/${r_obj##*/}/tx_max + rate=$(($rate+100)) + done + + local node1_name='group1' + local node1="$DL_HANDLE/$node1_name" + rate_node_add "$node1" + check_err $? "Failed to add node $node1" + + local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w` + [ $num_nodes == 1 ] + check_err $? "Expected 1 rate node in output but got $num_nodes" + + local node_tx_share=10 + rate_attr_tx_rate_check $node1 tx_share $node_tx_share \ + $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_share + + local node_tx_max=100 + rate_attr_tx_rate_check $node1 tx_max $node_tx_max \ + $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max + + rate_node_del "$node1" + check_err $? "Failed to delete node $node1" + local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w` + [ $num_nodes == 0 ] + check_err $? "Expected 0 rate node but got $num_nodes" + + local node1_name='group1' + local node1="$DL_HANDLE/$node1_name" + rate_node_add "$node1" + check_err $? "Failed to add node $node1" + + rate_attr_parent_check $r_obj $node1_name \ + $DEBUGFS_DIR/ports/${r_obj##*/}/rate_parent + + local node2_name='group2' + local node2="$DL_HANDLE/$node2_name" + rate_node_add "$node2" + check_err $? "Failed to add node $node2" + + rate_attr_parent_check $node2 $node1_name \ + $DEBUGFS_DIR/rate_nodes/$node2_name/rate_parent + rate_node_del "$node2" + check_err $? "Failed to delete node $node2" + rate_attr_set "$r_obj" noparent + check_err $? "Failed to unset $r_obj parent node" + rate_node_del "$node1" + check_err $? "Failed to delete node $node1" + + log_test "rate test" +} + setup_prepare() { modprobe netdevsim