From 7a1b0b1a555e62734fd6c6fd9953d0add1d9c9b1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 18 Mar 2022 12:23:40 -0700 Subject: [PATCH 1/5] bnxt: use the devlink instance lock to protect sriov In prep for .eswitch_mode_set being called with the devlink instance lock held use that lock explicitly instead of creating a local mutex just for the sriov reconfig. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 - drivers/net/ethernet/broadcom/bnxt/bnxt.h | 6 ------ drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 4 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c | 4 ++-- 4 files changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 92a1a43b3bee..1c28495875cf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13470,7 +13470,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) #ifdef CONFIG_BNXT_SRIOV init_waitqueue_head(&bp->sriov_cfg_wait); - mutex_init(&bp->sriov_lock); #endif if (BNXT_SUPPORTS_TPA(bp)) { bp->gro_func = bnxt_gro_func_5730x; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 447a9406b8a2..61aa3e8c5952 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2072,12 +2072,6 @@ struct bnxt { wait_queue_head_t sriov_cfg_wait; bool sriov_cfg; #define BNXT_SRIOV_CFG_WAIT_TMO msecs_to_jiffies(10000) - - /* lock to protect VF-rep creation/cleanup via - * multiple paths such as ->sriov_configure() and - * devlink ->eswitch_mode_set() - */ - struct mutex sriov_lock; #endif #if BITS_PER_LONG == 32 diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 1d177fed44a6..ddf2f3963abe 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -846,7 +846,7 @@ void bnxt_sriov_disable(struct bnxt *bp) return; /* synchronize VF and VF-rep create and destroy */ - mutex_lock(&bp->sriov_lock); + devl_lock(bp->dl); bnxt_vf_reps_destroy(bp); if (pci_vfs_assigned(bp->pdev)) { @@ -859,7 +859,7 @@ void bnxt_sriov_disable(struct bnxt *bp) /* Free the HW resources reserved for various VF's */ bnxt_hwrm_func_vf_resource_free(bp, num_vfs); } - mutex_unlock(&bp->sriov_lock); + devl_unlock(bp->dl); bnxt_free_vf_resources(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index 8eb28e088582..b2a9528b456b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -561,7 +561,7 @@ int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode, struct bnxt *bp = bnxt_get_bp_from_dl(devlink); int rc = 0; - mutex_lock(&bp->sriov_lock); + devl_lock(devlink); if (bp->eswitch_mode == mode) { netdev_info(bp->dev, "already in %s eswitch mode\n", mode == DEVLINK_ESWITCH_MODE_LEGACY ? @@ -595,7 +595,7 @@ int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode, goto done; } done: - mutex_unlock(&bp->sriov_lock); + devl_unlock(devlink); return rc; } From 8879b32a3a809a183e6e2998725a0f33c4e42d41 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 18 Mar 2022 12:23:41 -0700 Subject: [PATCH 2/5] devlink: add explicitly locked flavor of the rate node APIs We'll need an explicitly locked rate node API for netdevsim to switch eswitch mode setting to locked. Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/devlink.h | 4 ++ net/core/devlink.c | 86 ++++++++++++++++++++++++++++++------------- 2 files changed, 65 insertions(+), 25 deletions(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index fd89a17adea1..a30180c0988a 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1490,6 +1490,10 @@ int devl_port_register(struct devlink *devlink, unsigned int port_index); void devl_port_unregister(struct devlink_port *devlink_port); +int devl_rate_leaf_create(struct devlink_port *port, void *priv); +void devl_rate_leaf_destroy(struct devlink_port *devlink_port); +void devl_rate_nodes_destroy(struct devlink *devlink); + struct ib_device; struct net *devlink_net(const struct devlink *devlink); diff --git a/net/core/devlink.c b/net/core/devlink.c index f2a277053ec6..5aac5370c136 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2868,7 +2868,7 @@ static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode, { struct devlink_rate *devlink_rate; - /* Take the lock to sync with devlink_rate_nodes_destroy() */ + /* Take the lock to sync with destroy */ mutex_lock(&devlink->lock); list_for_each_entry(devlink_rate, &devlink->rate_list, list) if (devlink_rate_is_node(devlink_rate)) { @@ -9548,30 +9548,26 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set); /** - * devlink_rate_leaf_create - create devlink rate leaf - * + * devl_rate_leaf_create - create devlink rate leaf * @devlink_port: devlink port object to create rate object on * @priv: driver private data * * Create devlink rate object of type leaf on provided @devlink_port. - * Throws call trace if @devlink_port already has a devlink rate object. - * - * Context: Takes and release devlink->lock . - * - * Return: -ENOMEM if failed to allocate rate object, 0 otherwise. */ -int -devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv) +int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv) { struct devlink *devlink = devlink_port->devlink; struct devlink_rate *devlink_rate; + devl_assert_locked(devlink_port->devlink); + + if (WARN_ON(devlink_port->devlink_rate)) + return -EBUSY; + devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL); if (!devlink_rate) return -ENOMEM; - mutex_lock(&devlink->lock); - WARN_ON(devlink_port->devlink_rate); devlink_rate->type = DEVLINK_RATE_TYPE_LEAF; devlink_rate->devlink = devlink; devlink_rate->devlink_port = devlink_port; @@ -9579,12 +9575,42 @@ devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv) list_add_tail(&devlink_rate->list, &devlink->rate_list); devlink_port->devlink_rate = devlink_rate; devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW); - mutex_unlock(&devlink->lock); return 0; } +EXPORT_SYMBOL_GPL(devl_rate_leaf_create); + +int +devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv) +{ + struct devlink *devlink = devlink_port->devlink; + int ret; + + mutex_lock(&devlink->lock); + ret = devl_rate_leaf_create(devlink_port, priv); + mutex_unlock(&devlink->lock); + + return ret; +} EXPORT_SYMBOL_GPL(devlink_rate_leaf_create); +void devl_rate_leaf_destroy(struct devlink_port *devlink_port) +{ + struct devlink_rate *devlink_rate = devlink_port->devlink_rate; + + devl_assert_locked(devlink_port->devlink); + if (!devlink_rate) + return; + + devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL); + if (devlink_rate->parent) + refcount_dec(&devlink_rate->parent->refcnt); + list_del(&devlink_rate->list); + devlink_port->devlink_rate = NULL; + kfree(devlink_rate); +} +EXPORT_SYMBOL_GPL(devl_rate_leaf_destroy); + /** * devlink_rate_leaf_destroy - destroy devlink rate leaf * @@ -9601,32 +9627,25 @@ void devlink_rate_leaf_destroy(struct devlink_port *devlink_port) return; mutex_lock(&devlink->lock); - devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL); - if (devlink_rate->parent) - refcount_dec(&devlink_rate->parent->refcnt); - list_del(&devlink_rate->list); - devlink_port->devlink_rate = NULL; + devl_rate_leaf_destroy(devlink_port); mutex_unlock(&devlink->lock); - kfree(devlink_rate); } EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy); /** - * devlink_rate_nodes_destroy - destroy all devlink rate nodes on device - * + * devl_rate_nodes_destroy - destroy all devlink rate nodes on device * @devlink: devlink instance * * Unset parent for all rate objects and destroy all rate nodes * on specified device. - * - * Context: Takes and release devlink->lock . */ -void devlink_rate_nodes_destroy(struct devlink *devlink) +void devl_rate_nodes_destroy(struct devlink *devlink) { static struct devlink_rate *devlink_rate, *tmp; const struct devlink_ops *ops = devlink->ops; - mutex_lock(&devlink->lock); + devl_assert_locked(devlink); + list_for_each_entry(devlink_rate, &devlink->rate_list, list) { if (!devlink_rate->parent) continue; @@ -9647,6 +9666,23 @@ void devlink_rate_nodes_destroy(struct devlink *devlink) kfree(devlink_rate); } } +} +EXPORT_SYMBOL_GPL(devl_rate_nodes_destroy); + +/** + * devlink_rate_nodes_destroy - destroy all devlink rate nodes on device + * + * @devlink: devlink instance + * + * Unset parent for all rate objects and destroy all rate nodes + * on specified device. + * + * Context: Takes and release devlink->lock . + */ +void devlink_rate_nodes_destroy(struct devlink *devlink) +{ + mutex_lock(&devlink->lock); + devl_rate_nodes_destroy(devlink); mutex_unlock(&devlink->lock); } EXPORT_SYMBOL_GPL(devlink_rate_nodes_destroy); From 76eea6c2e663f7e9fbc258c82811ad9e657e085f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 18 Mar 2022 12:23:42 -0700 Subject: [PATCH 3/5] netdevsim: replace port_list_lock with devlink instance lock Take advantage of the devlink instance lock for protecting the port list. This will simplify locking even more once all devlink callbacks hold the instance lock. We need to add locking in nsim_dev_port_add_all() which used to assume higher layer protection when accessing the list. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 40 +++++++++++++++---------------- drivers/net/netdevsim/netdevsim.h | 1 - 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index dbc8e88d2841..dd650d4301e5 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -576,11 +576,11 @@ static int nsim_esw_legacy_enable(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port, *tmp; devlink_rate_nodes_destroy(devlink); - mutex_lock(&nsim_dev->port_list_lock); + devl_lock(devlink); list_for_each_entry_safe(nsim_dev_port, tmp, &nsim_dev->port_list, list) if (nsim_dev_port_is_vf(nsim_dev_port)) __nsim_dev_port_del(nsim_dev_port); - mutex_unlock(&nsim_dev->port_list_lock); + devl_unlock(devlink); nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_LEGACY; return 0; } @@ -835,14 +835,14 @@ static void nsim_dev_trap_report_work(struct work_struct *work) /* For each running port and enabled packet trap, generate a UDP * packet with a random 5-tuple and report it. */ - mutex_lock(&nsim_dev->port_list_lock); + devl_lock(priv_to_devlink(nsim_dev)); list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) { if (!netif_running(nsim_dev_port->ns->netdev)) continue; nsim_dev_trap_report(nsim_dev_port); } - mutex_unlock(&nsim_dev->port_list_lock); + devl_unlock(priv_to_devlink(nsim_dev)); schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS)); @@ -924,6 +924,7 @@ static void nsim_dev_traps_exit(struct devlink *devlink) { struct nsim_dev *nsim_dev = devlink_priv(devlink); + /* caution, trap work takes devlink lock */ cancel_delayed_work_sync(&nsim_dev->trap_data->trap_report_dw); devlink_traps_unregister(devlink, nsim_traps_arr, ARRAY_SIZE(nsim_traps_arr)); @@ -1380,8 +1381,8 @@ static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, enum nsim_dev_port_typ memcpy(attrs.switch_id.id, nsim_dev->switch_id.id, nsim_dev->switch_id.id_len); attrs.switch_id.id_len = nsim_dev->switch_id.id_len; devlink_port_attrs_set(devlink_port, &attrs); - err = devlink_port_register(priv_to_devlink(nsim_dev), devlink_port, - nsim_dev_port->port_index); + err = devl_port_register(priv_to_devlink(nsim_dev), devlink_port, + nsim_dev_port->port_index); if (err) goto err_port_free; @@ -1396,8 +1397,8 @@ static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, enum nsim_dev_port_typ } if (nsim_dev_port_is_vf(nsim_dev_port)) { - err = devlink_rate_leaf_create(&nsim_dev_port->devlink_port, - nsim_dev_port); + err = devl_rate_leaf_create(&nsim_dev_port->devlink_port, + nsim_dev_port); if (err) goto err_nsim_destroy; } @@ -1412,7 +1413,7 @@ err_nsim_destroy: err_port_debugfs_exit: nsim_dev_port_debugfs_exit(nsim_dev_port); err_dl_port_unregister: - devlink_port_unregister(devlink_port); + devl_port_unregister(devlink_port); err_port_free: kfree(nsim_dev_port); return err; @@ -1424,11 +1425,11 @@ static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port) list_del(&nsim_dev_port->list); if (nsim_dev_port_is_vf(nsim_dev_port)) - devlink_rate_leaf_destroy(&nsim_dev_port->devlink_port); + devl_rate_leaf_destroy(&nsim_dev_port->devlink_port); devlink_port_type_clear(devlink_port); nsim_destroy(nsim_dev_port->ns); nsim_dev_port_debugfs_exit(nsim_dev_port); - devlink_port_unregister(devlink_port); + devl_port_unregister(devlink_port); kfree(nsim_dev_port); } @@ -1436,11 +1437,11 @@ static void nsim_dev_port_del_all(struct nsim_dev *nsim_dev) { struct nsim_dev_port *nsim_dev_port, *tmp; - mutex_lock(&nsim_dev->port_list_lock); + devl_lock(priv_to_devlink(nsim_dev)); list_for_each_entry_safe(nsim_dev_port, tmp, &nsim_dev->port_list, list) __nsim_dev_port_del(nsim_dev_port); - mutex_unlock(&nsim_dev->port_list_lock); + devl_unlock(priv_to_devlink(nsim_dev)); } static int nsim_dev_port_add_all(struct nsim_dev *nsim_dev, @@ -1449,7 +1450,9 @@ static int nsim_dev_port_add_all(struct nsim_dev *nsim_dev, int i, err; for (i = 0; i < port_count; i++) { + devl_lock(priv_to_devlink(nsim_dev)); err = __nsim_dev_port_add(nsim_dev, NSIM_DEV_PORT_TYPE_PF, i); + devl_unlock(priv_to_devlink(nsim_dev)); if (err) goto err_port_del_all; } @@ -1470,7 +1473,6 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, devlink = priv_to_devlink(nsim_dev); nsim_dev = devlink_priv(devlink); INIT_LIST_HEAD(&nsim_dev->port_list); - mutex_init(&nsim_dev->port_list_lock); nsim_dev->fw_update_status = true; nsim_dev->fw_update_overwrite_mask = 0; @@ -1544,7 +1546,6 @@ int nsim_drv_probe(struct nsim_bus_dev *nsim_bus_dev) get_random_bytes(nsim_dev->switch_id.id, nsim_dev->switch_id.id_len); INIT_LIST_HEAD(&nsim_dev->port_list); mutex_init(&nsim_dev->vfs_lock); - mutex_init(&nsim_dev->port_list_lock); nsim_dev->fw_update_status = true; nsim_dev->fw_update_overwrite_mask = 0; nsim_dev->max_macs = NSIM_DEV_MAX_MACS_DEFAULT; @@ -1666,7 +1667,6 @@ static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev) nsim_fib_destroy(devlink, nsim_dev->fib_data); nsim_dev_traps_exit(devlink); nsim_dev_dummy_region_exit(nsim_dev); - mutex_destroy(&nsim_dev->port_list_lock); } void nsim_drv_remove(struct nsim_bus_dev *nsim_bus_dev) @@ -1706,12 +1706,12 @@ int nsim_drv_port_add(struct nsim_bus_dev *nsim_bus_dev, enum nsim_dev_port_type struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); int err; - mutex_lock(&nsim_dev->port_list_lock); + devl_lock(priv_to_devlink(nsim_dev)); if (__nsim_dev_port_lookup(nsim_dev, type, port_index)) err = -EEXIST; else err = __nsim_dev_port_add(nsim_dev, type, port_index); - mutex_unlock(&nsim_dev->port_list_lock); + devl_unlock(priv_to_devlink(nsim_dev)); return err; } @@ -1722,13 +1722,13 @@ int nsim_drv_port_del(struct nsim_bus_dev *nsim_bus_dev, enum nsim_dev_port_type struct nsim_dev_port *nsim_dev_port; int err = 0; - mutex_lock(&nsim_dev->port_list_lock); + devl_lock(priv_to_devlink(nsim_dev)); nsim_dev_port = __nsim_dev_port_lookup(nsim_dev, type, port_index); if (!nsim_dev_port) err = -ENOENT; else __nsim_dev_port_del(nsim_dev_port); - mutex_unlock(&nsim_dev->port_list_lock); + devl_unlock(priv_to_devlink(nsim_dev)); return err; } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 128f229d9b4d..8dd6f975f32d 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -274,7 +274,6 @@ struct nsim_dev { struct list_head bpf_bound_maps; struct netdev_phys_item_id switch_id; struct list_head port_list; - struct mutex port_list_lock; /* protects port list */ bool fw_update_status; u32 fw_update_overwrite_mask; u32 max_macs; From aff3a925094633a5b77058b9a715efbb12fc2698 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 18 Mar 2022 12:23:43 -0700 Subject: [PATCH 4/5] netdevsim: replace vfs_lock with devlink instance lock Similarly to the previous commit, use the devlink instance lock and let it replace the vfs_lock. nsim_esw_legacy_enable() was locked by both port lock and vfs lock so one set of lock/unlocks goes away. netdevsim's .eswitch_mode_set callback is now ready for the callback to take the instance lock. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 37 +++++++++++++++++-------------- drivers/net/netdevsim/netdevsim.h | 1 - 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index dd650d4301e5..68cd1defe990 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -59,7 +59,7 @@ static struct dentry *nsim_dev_ddir; unsigned int nsim_dev_get_vfs(struct nsim_dev *nsim_dev) { WARN_ON(!lockdep_rtnl_is_held() && - !lockdep_is_held(&nsim_dev->vfs_lock)); + !devl_lock_is_held(priv_to_devlink(nsim_dev))); return nsim_dev->nsim_bus_dev->num_vfs; } @@ -275,7 +275,7 @@ static ssize_t nsim_bus_dev_max_vfs_write(struct file *file, return -ENOMEM; nsim_dev = file->private_data; - mutex_lock(&nsim_dev->vfs_lock); + devl_lock(priv_to_devlink(nsim_dev)); /* Reject if VFs are configured */ if (nsim_dev_get_vfs(nsim_dev)) { ret = -EBUSY; @@ -285,7 +285,7 @@ static ssize_t nsim_bus_dev_max_vfs_write(struct file *file, *ppos += count; ret = count; } - mutex_unlock(&nsim_dev->vfs_lock); + devl_unlock(priv_to_devlink(nsim_dev)); kfree(vfconfigs); return ret; @@ -339,6 +339,7 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) debugfs_create_bool("fail_trap_policer_counter_get", 0600, nsim_dev->ddir, &nsim_dev->fail_trap_policer_counter_get); + /* caution, dev_max_vfs write takes devlink lock */ debugfs_create_file("max_vfs", 0600, nsim_dev->ddir, nsim_dev, &nsim_dev_max_vfs_fops); @@ -567,6 +568,9 @@ static void nsim_dev_dummy_region_exit(struct nsim_dev *nsim_dev) devlink_region_destroy(nsim_dev->dummy_region); } +static int +__nsim_dev_port_add(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type, + unsigned int port_index); static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port); static int nsim_esw_legacy_enable(struct nsim_dev *nsim_dev, @@ -575,12 +579,10 @@ static int nsim_esw_legacy_enable(struct nsim_dev *nsim_dev, struct devlink *devlink = priv_to_devlink(nsim_dev); struct nsim_dev_port *nsim_dev_port, *tmp; - devlink_rate_nodes_destroy(devlink); - devl_lock(devlink); + devl_rate_nodes_destroy(devlink); list_for_each_entry_safe(nsim_dev_port, tmp, &nsim_dev->port_list, list) if (nsim_dev_port_is_vf(nsim_dev_port)) __nsim_dev_port_del(nsim_dev_port); - devl_unlock(devlink); nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_LEGACY; return 0; } @@ -588,11 +590,11 @@ static int nsim_esw_legacy_enable(struct nsim_dev *nsim_dev, static int nsim_esw_switchdev_enable(struct nsim_dev *nsim_dev, struct netlink_ext_ack *extack) { - struct nsim_bus_dev *nsim_bus_dev = nsim_dev->nsim_bus_dev; + struct nsim_dev_port *nsim_dev_port, *tmp; int i, err; for (i = 0; i < nsim_dev_get_vfs(nsim_dev); i++) { - err = nsim_drv_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_VF, i); + err = __nsim_dev_port_add(nsim_dev, NSIM_DEV_PORT_TYPE_VF, i); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to initialize VFs' netdevsim ports"); pr_err("Failed to initialize VF id=%d. %d.\n", i, err); @@ -603,8 +605,9 @@ static int nsim_esw_switchdev_enable(struct nsim_dev *nsim_dev, return 0; err_port_add_vfs: - for (i--; i >= 0; i--) - nsim_drv_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_VF, i); + list_for_each_entry_safe(nsim_dev_port, tmp, &nsim_dev->port_list, list) + if (nsim_dev_port_is_vf(nsim_dev_port)) + __nsim_dev_port_del(nsim_dev_port); return err; } @@ -614,7 +617,7 @@ static int nsim_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, struct nsim_dev *nsim_dev = devlink_priv(devlink); int err = 0; - mutex_lock(&nsim_dev->vfs_lock); + devl_lock(devlink); if (mode == nsim_dev->esw_mode) goto unlock; @@ -626,7 +629,7 @@ static int nsim_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, err = -EINVAL; unlock: - mutex_unlock(&nsim_dev->vfs_lock); + devl_unlock(devlink); return err; } @@ -1545,7 +1548,6 @@ int nsim_drv_probe(struct nsim_bus_dev *nsim_bus_dev) nsim_dev->switch_id.id_len = sizeof(nsim_dev->switch_id.id); get_random_bytes(nsim_dev->switch_id.id, nsim_dev->switch_id.id_len); INIT_LIST_HEAD(&nsim_dev->port_list); - mutex_init(&nsim_dev->vfs_lock); nsim_dev->fw_update_status = true; nsim_dev->fw_update_overwrite_mask = 0; nsim_dev->max_macs = NSIM_DEV_MAX_MACS_DEFAULT; @@ -1652,13 +1654,13 @@ static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev) return; debugfs_remove(nsim_dev->take_snapshot); - mutex_lock(&nsim_dev->vfs_lock); + devl_lock(devlink); if (nsim_dev_get_vfs(nsim_dev)) { nsim_bus_dev_set_vfs(nsim_dev->nsim_bus_dev, 0); if (nsim_esw_mode_is_switchdev(nsim_dev)) nsim_esw_legacy_enable(nsim_dev, NULL); } - mutex_unlock(&nsim_dev->vfs_lock); + devl_unlock(devlink); nsim_dev_port_del_all(nsim_dev); nsim_dev_hwstats_exit(nsim_dev); @@ -1736,9 +1738,10 @@ int nsim_drv_configure_vfs(struct nsim_bus_dev *nsim_bus_dev, unsigned int num_vfs) { struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); + struct devlink *devlink = priv_to_devlink(nsim_dev); int ret = 0; - mutex_lock(&nsim_dev->vfs_lock); + devl_lock(devlink); if (nsim_bus_dev->num_vfs == num_vfs) goto exit_unlock; if (nsim_bus_dev->num_vfs && num_vfs) { @@ -1764,7 +1767,7 @@ int nsim_drv_configure_vfs(struct nsim_bus_dev *nsim_bus_dev, } exit_unlock: - mutex_unlock(&nsim_dev->vfs_lock); + devl_unlock(devlink); return ret; } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 8dd6f975f32d..0b122872b2c9 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -261,7 +261,6 @@ struct nsim_dev { struct dentry *take_snapshot; struct dentry *nodes_ddir; - struct mutex vfs_lock; /* Protects vfconfigs */ struct nsim_vf_config *vfconfigs; struct bpf_offload_dev *bpf_dev; From 14e426bf1a4d77ac87d0fa2a964092a23f863e44 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 18 Mar 2022 12:23:44 -0700 Subject: [PATCH 5/5] devlink: hold the instance lock during eswitch_mode callbacks Make the devlink core hold the instance lock during eswitch_mode callbacks. Cheat in case of mlx5 (see the cover letter). Reviewed-by: Leon Romanovsky Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c | 22 +++----- .../mellanox/mlx5/core/eswitch_offloads.c | 54 ++++++++++++++----- .../net/ethernet/netronome/nfp/nfp_devlink.c | 7 +-- drivers/net/netdevsim/dev.c | 16 ++---- net/core/devlink.c | 6 --- 5 files changed, 54 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index b2a9528b456b..eb4803b11c0e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -559,44 +559,34 @@ int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack) { struct bnxt *bp = bnxt_get_bp_from_dl(devlink); - int rc = 0; - devl_lock(devlink); if (bp->eswitch_mode == mode) { netdev_info(bp->dev, "already in %s eswitch mode\n", mode == DEVLINK_ESWITCH_MODE_LEGACY ? "legacy" : "switchdev"); - rc = -EINVAL; - goto done; + return -EINVAL; } switch (mode) { case DEVLINK_ESWITCH_MODE_LEGACY: bnxt_vf_reps_destroy(bp); - break; + return 0; case DEVLINK_ESWITCH_MODE_SWITCHDEV: if (bp->hwrm_spec_code < 0x10803) { netdev_warn(bp->dev, "FW does not support SRIOV E-Switch SWITCHDEV mode\n"); - rc = -ENOTSUPP; - goto done; + return -ENOTSUPP; } if (pci_num_vf(bp->pdev) == 0) { netdev_info(bp->dev, "Enable VFs before setting switchdev mode\n"); - rc = -EPERM; - goto done; + return -EPERM; } - rc = bnxt_vf_reps_create(bp); - break; + return bnxt_vf_reps_create(bp); default: - rc = -EINVAL; - goto done; + return -EINVAL; } -done: - devl_unlock(devlink); - return rc; } #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 35cf4cb3098e..3f63df127091 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3337,6 +3337,27 @@ static int eswitch_devlink_esw_mode_check(const struct mlx5_eswitch *esw) !mlx5_core_is_ecpf_esw_manager(esw->dev)) ? -EOPNOTSUPP : 0; } +/* FIXME: devl_unlock() followed by devl_lock() inside driver callback + * is never correct and prone to races. It's a transitional workaround, + * never repeat this pattern. + * + * This code MUST be fixed before removing devlink_mutex as it is safe + * to do only because of that mutex. + */ +static void mlx5_eswtich_mode_callback_enter(struct devlink *devlink, + struct mlx5_eswitch *esw) +{ + devl_unlock(devlink); + down_write(&esw->mode_lock); +} + +static void mlx5_eswtich_mode_callback_exit(struct devlink *devlink, + struct mlx5_eswitch *esw) +{ + up_write(&esw->mode_lock); + devl_lock(devlink); +} + int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack) { @@ -3351,6 +3372,15 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; + /* FIXME: devl_unlock() followed by devl_lock() inside driver callback + * is never correct and prone to races. It's a transitional workaround, + * never repeat this pattern. + * + * This code MUST be fixed before removing devlink_mutex as it is safe + * to do only because of that mutex. + */ + devl_unlock(devlink); + mlx5_lag_disable_change(esw->dev); err = mlx5_esw_try_lock(esw); if (err < 0) { @@ -3381,6 +3411,7 @@ unlock: mlx5_esw_unlock(esw); enable_lag: mlx5_lag_enable_change(esw->dev); + devl_lock(devlink); return err; } @@ -3393,14 +3424,14 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) if (IS_ERR(esw)) return PTR_ERR(esw); - down_write(&esw->mode_lock); + mlx5_eswtich_mode_callback_enter(devlink, esw); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock; err = esw_mode_to_devlink(esw->mode, mode); unlock: - up_write(&esw->mode_lock); + mlx5_eswtich_mode_callback_exit(devlink, esw); return err; } @@ -3447,7 +3478,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, if (IS_ERR(esw)) return PTR_ERR(esw); - down_write(&esw->mode_lock); + mlx5_eswtich_mode_callback_enter(devlink, esw); err = eswitch_devlink_esw_mode_check(esw); if (err) goto out; @@ -3484,11 +3515,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, goto out; esw->offloads.inline_mode = mlx5_mode; - up_write(&esw->mode_lock); + mlx5_eswtich_mode_callback_exit(devlink, esw); return 0; out: - up_write(&esw->mode_lock); + mlx5_eswtich_mode_callback_exit(devlink, esw); return err; } @@ -3501,14 +3532,14 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) if (IS_ERR(esw)) return PTR_ERR(esw); - down_write(&esw->mode_lock); + mlx5_eswtich_mode_callback_enter(devlink, esw); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock; err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); unlock: - up_write(&esw->mode_lock); + mlx5_eswtich_mode_callback_exit(devlink, esw); return err; } @@ -3524,7 +3555,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, if (IS_ERR(esw)) return PTR_ERR(esw); - down_write(&esw->mode_lock); + mlx5_eswtich_mode_callback_enter(devlink, esw); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock; @@ -3570,7 +3601,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, } unlock: - up_write(&esw->mode_lock); + mlx5_eswtich_mode_callback_exit(devlink, esw); return err; } @@ -3584,15 +3615,14 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, if (IS_ERR(esw)) return PTR_ERR(esw); - - down_write(&esw->mode_lock); + mlx5_eswtich_mode_callback_enter(devlink, esw); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock; *encap = esw->offloads.encap; unlock: - up_write(&esw->mode_lock); + mlx5_eswtich_mode_callback_exit(devlink, esw); return err; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index 48b95566b52b..405786c00334 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -144,13 +144,8 @@ static int nfp_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack) { struct nfp_pf *pf = devlink_priv(devlink); - int ret; - devl_lock(devlink); - ret = nfp_app_eswitch_mode_set(pf->app, mode); - devl_unlock(devlink); - - return ret; + return nfp_app_eswitch_mode_set(pf->app, mode); } static const struct nfp_devlink_versions_simple { diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 68cd1defe990..57a3ac893792 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -615,22 +615,16 @@ static int nsim_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack) { struct nsim_dev *nsim_dev = devlink_priv(devlink); - int err = 0; - devl_lock(devlink); if (mode == nsim_dev->esw_mode) - goto unlock; + return 0; if (mode == DEVLINK_ESWITCH_MODE_LEGACY) - err = nsim_esw_legacy_enable(nsim_dev, extack); - else if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) - err = nsim_esw_switchdev_enable(nsim_dev, extack); - else - err = -EINVAL; + return nsim_esw_legacy_enable(nsim_dev, extack); + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) + return nsim_esw_switchdev_enable(nsim_dev, extack); -unlock: - devl_unlock(devlink); - return err; + return -EINVAL; } static int nsim_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) diff --git a/net/core/devlink.c b/net/core/devlink.c index 5aac5370c136..aeca13b6e57b 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2868,15 +2868,11 @@ static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode, { struct devlink_rate *devlink_rate; - /* Take the lock to sync with destroy */ - mutex_lock(&devlink->lock); list_for_each_entry(devlink_rate, &devlink->rate_list, list) if (devlink_rate_is_node(devlink_rate)) { - mutex_unlock(&devlink->lock); NL_SET_ERR_MSG_MOD(extack, "Rate node(s) exists."); return -EBUSY; } - mutex_unlock(&devlink->lock); return 0; } @@ -8735,14 +8731,12 @@ static const struct genl_small_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_eswitch_get_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_ESWITCH_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_eswitch_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_DPIPE_TABLE_GET,