diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index b1377503cb9d..9e765c79a892 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2236,6 +2236,9 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, if (!rdma_is_port_valid(ib_dev, port)) return NULL; + if (!ib_dev->port_data) + return NULL; + pdata = &ib_dev->port_data[port]; /* @@ -2254,6 +2257,7 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, return res; } +EXPORT_SYMBOL(ib_device_get_netdev); /** * ib_device_get_by_netdev - Find an IB device associated with a netdev diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 1ad934685d80..49af1cfbe6d1 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -13,6 +13,7 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, int vport_index) { struct mlx5_ib_dev *ibdev; + struct net_device *ndev; ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB); if (!ibdev) @@ -20,12 +21,9 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, ibdev->port[vport_index].rep = rep; rep->rep_data[REP_IB].priv = ibdev; - write_lock(&ibdev->port[vport_index].roce.netdev_lock); - ibdev->port[vport_index].roce.netdev = - mlx5_ib_get_rep_netdev(rep->esw, rep->vport); - write_unlock(&ibdev->port[vport_index].roce.netdev_lock); + ndev = mlx5_ib_get_rep_netdev(rep->esw, rep->vport); - return 0; + return ib_device_set_netdev(&ibdev->ib_dev, ndev, vport_index + 1); } static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev); @@ -104,11 +102,15 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) ibdev->is_rep = true; vport_index = rep->vport_index; ibdev->port[vport_index].rep = rep; - ibdev->ib_dev.phys_port_cnt = num_ports; - ibdev->port[vport_index].roce.netdev = - mlx5_ib_get_rep_netdev(lag_master->priv.eswitch, rep->vport); ibdev->mdev = lag_master; ibdev->num_ports = num_ports; + ibdev->ib_dev.phys_port_cnt = num_ports; + ret = ib_device_set_netdev(&ibdev->ib_dev, + mlx5_ib_get_rep_netdev(lag_master->priv.eswitch, + rep->vport), + vport_index + 1); + if (ret) + goto fail_add; ret = __mlx5_ib_add(ibdev, profile); if (ret) @@ -161,9 +163,8 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) } port = &dev->port[vport_index]; - write_lock(&port->roce.netdev_lock); - port->roce.netdev = NULL; - write_unlock(&port->roce.netdev_lock); + + ib_device_set_netdev(&dev->ib_dev, NULL, vport_index + 1); rep->rep_data[REP_IB].priv = NULL; port->rep = NULL; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ad8a2b5517bf..4999239c8f41 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -147,16 +147,52 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev, if (upper && port->rep->vport == MLX5_VPORT_UPLINK) continue; - - read_lock(&port->roce.netdev_lock); - rep_ndev = mlx5_ib_get_rep_netdev(port->rep->esw, - port->rep->vport); - if (rep_ndev == ndev) { - read_unlock(&port->roce.netdev_lock); + rep_ndev = ib_device_get_netdev(&dev->ib_dev, i + 1); + if (rep_ndev && rep_ndev == ndev) { + dev_put(rep_ndev); *port_num = i + 1; return &port->roce; } - read_unlock(&port->roce.netdev_lock); + + dev_put(rep_ndev); + } + + return NULL; +} + +static bool mlx5_netdev_send_event(struct mlx5_ib_dev *dev, + struct net_device *ndev, + struct net_device *upper, + struct net_device *ib_ndev) +{ + if (!dev->ib_active) + return false; + + /* Event is about our upper device */ + if (upper == ndev) + return true; + + /* RDMA device is not in lag and not in switchdev */ + if (!dev->is_rep && !upper && ndev == ib_ndev) + return true; + + /* RDMA devie is in switchdev */ + if (dev->is_rep && ndev == ib_ndev) + return true; + + return false; +} + +static struct net_device *mlx5_ib_get_rep_uplink_netdev(struct mlx5_ib_dev *ibdev) +{ + struct mlx5_ib_port *port; + int i; + + for (i = 0; i < ibdev->num_ports; i++) { + port = &ibdev->port[i]; + if (port->rep && port->rep->vport == MLX5_VPORT_UPLINK) { + return ib_device_get_netdev(&ibdev->ib_dev, i + 1); + } } return NULL; @@ -168,6 +204,7 @@ static int mlx5_netdev_event(struct notifier_block *this, struct mlx5_roce *roce = container_of(this, struct mlx5_roce, nb); struct net_device *ndev = netdev_notifier_info_to_dev(ptr); u32 port_num = roce->native_port_num; + struct net_device *ib_ndev = NULL; struct mlx5_core_dev *mdev; struct mlx5_ib_dev *ibdev; @@ -181,29 +218,38 @@ static int mlx5_netdev_event(struct notifier_block *this, /* Should already be registered during the load */ if (ibdev->is_rep) break; - write_lock(&roce->netdev_lock); + + ib_ndev = ib_device_get_netdev(&ibdev->ib_dev, port_num); + /* Exit if already registered */ + if (ib_ndev) + goto put_ndev; + if (ndev->dev.parent == mdev->device) - roce->netdev = ndev; - write_unlock(&roce->netdev_lock); + ib_device_set_netdev(&ibdev->ib_dev, ndev, port_num); break; case NETDEV_UNREGISTER: /* In case of reps, ib device goes away before the netdevs */ - write_lock(&roce->netdev_lock); - if (roce->netdev == ndev) - roce->netdev = NULL; - write_unlock(&roce->netdev_lock); - break; + if (ibdev->is_rep) + break; + ib_ndev = ib_device_get_netdev(&ibdev->ib_dev, port_num); + if (ib_ndev == ndev) + ib_device_set_netdev(&ibdev->ib_dev, NULL, port_num); + goto put_ndev; case NETDEV_CHANGE: case NETDEV_UP: case NETDEV_DOWN: { struct net_device *upper = NULL; - if (mlx5_lag_is_roce(mdev)) { + if (mlx5_lag_is_roce(mdev) || mlx5_lag_is_sriov(mdev)) { struct net_device *lag_ndev; - lag_ndev = mlx5_lag_get_roce_netdev(mdev); + if(mlx5_lag_is_roce(mdev)) + lag_ndev = ib_device_get_netdev(&ibdev->ib_dev, 1); + else /* sriov lag */ + lag_ndev = mlx5_ib_get_rep_uplink_netdev(ibdev); + if (lag_ndev) { upper = netdev_master_upper_dev_get(lag_ndev); dev_put(lag_ndev); @@ -216,18 +262,19 @@ static int mlx5_netdev_event(struct notifier_block *this, roce = mlx5_get_rep_roce(ibdev, ndev, upper, &port_num); if (!roce) return NOTIFY_DONE; - if ((upper == ndev || - ((!upper || ibdev->is_rep) && ndev == roce->netdev)) && - ibdev->ib_active) { + + ib_ndev = ib_device_get_netdev(&ibdev->ib_dev, port_num); + + if (mlx5_netdev_send_event(ibdev, ndev, upper, ib_ndev)) { struct ib_event ibev = { }; enum ib_port_state port_state; if (get_port_state(&ibdev->ib_dev, port_num, &port_state)) - goto done; + goto put_ndev; if (roce->last_port_state == port_state) - goto done; + goto put_ndev; roce->last_port_state = port_state; ibev.device = &ibdev->ib_dev; @@ -236,7 +283,7 @@ static int mlx5_netdev_event(struct notifier_block *this, else if (port_state == IB_PORT_ACTIVE) ibev.event = IB_EVENT_PORT_ACTIVE; else - goto done; + goto put_ndev; ibev.element.port_num = port_num; ib_dispatch_event(&ibev); @@ -247,39 +294,13 @@ static int mlx5_netdev_event(struct notifier_block *this, default: break; } +put_ndev: + dev_put(ib_ndev); done: mlx5_ib_put_native_port_mdev(ibdev, port_num); return NOTIFY_DONE; } -static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, - u32 port_num) -{ - struct mlx5_ib_dev *ibdev = to_mdev(device); - struct net_device *ndev; - struct mlx5_core_dev *mdev; - - mdev = mlx5_ib_get_native_port_mdev(ibdev, port_num, NULL); - if (!mdev) - return NULL; - - if (mlx5_lag_is_roce(mdev)) { - ndev = mlx5_lag_get_roce_netdev(mdev); - goto out; - } - - /* Ensure ndev does not disappear before we invoke dev_hold() - */ - read_lock(&ibdev->port[port_num - 1].roce.netdev_lock); - ndev = ibdev->port[port_num - 1].roce.netdev; - dev_hold(ndev); - read_unlock(&ibdev->port[port_num - 1].roce.netdev_lock); - -out: - mlx5_ib_put_native_port_mdev(ibdev, port_num); - return ndev; -} - struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev, u32 ib_port_num, u32 *native_port_num) @@ -554,7 +575,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u32 port_num, if (!put_mdev) goto out; - ndev = mlx5_ib_get_netdev(device, port_num); + ndev = ib_device_get_netdev(device, port_num); if (!ndev) goto out; @@ -3185,6 +3206,60 @@ static void get_dev_fw_str(struct ib_device *ibdev, char *str) fw_rev_sub(dev->mdev)); } +static int lag_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5_ib_dev *dev = container_of(nb, struct mlx5_ib_dev, + lag_events); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_ib_port *port; + struct net_device *ndev; + int i, err; + int portnum; + + portnum = 0; + switch (event) { + case MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE: + ndev = data; + if (ndev) { + if (!mlx5_lag_is_roce(mdev)) { + // sriov lag + for (i = 0; i < dev->num_ports; i++) { + port = &dev->port[i]; + if (port->rep && port->rep->vport == + MLX5_VPORT_UPLINK) { + portnum = i; + break; + } + } + } + err = ib_device_set_netdev(&dev->ib_dev, ndev, + portnum + 1); + dev_put(ndev); + if (err) + return err; + /* Rescan gids after new netdev assignment */ + rdma_roce_rescan_device(&dev->ib_dev); + } + break; + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static void mlx5e_lag_event_register(struct mlx5_ib_dev *dev) +{ + dev->lag_events.notifier_call = lag_event; + blocking_notifier_chain_register(&dev->mdev->priv.lag_nh, + &dev->lag_events); +} + +static void mlx5e_lag_event_unregister(struct mlx5_ib_dev *dev) +{ + blocking_notifier_chain_unregister(&dev->mdev->priv.lag_nh, + &dev->lag_events); +} + static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; @@ -3206,6 +3281,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) goto err_destroy_vport_lag; } + mlx5e_lag_event_register(dev); dev->flow_db->lag_demux_ft = ft; dev->lag_ports = mlx5_lag_get_num_ports(mdev); dev->lag_active = true; @@ -3223,6 +3299,7 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev) if (dev->lag_active) { dev->lag_active = false; + mlx5e_lag_event_unregister(dev); mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft); dev->flow_db->lag_demux_ft = NULL; @@ -3939,7 +4016,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) for (i = 0; i < dev->num_ports; i++) { spin_lock_init(&dev->port[i].mp.mpi_lock); - rwlock_init(&dev->port[i].roce.netdev_lock); dev->port[i].roce.dev = dev; dev->port[i].roce.native_port_num = i + 1; dev->port[i].roce.last_port_state = IB_PORT_DOWN; @@ -4204,7 +4280,6 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = { .create_wq = mlx5_ib_create_wq, .destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table, .destroy_wq = mlx5_ib_destroy_wq, - .get_netdev = mlx5_ib_get_netdev, .modify_wq = mlx5_ib_modify_wq, INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx5_ib_rwq_ind_table, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 59ce407ce505..23fd72f7f63d 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -888,8 +888,6 @@ struct mlx5_roce { /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL * netdev pointer */ - rwlock_t netdev_lock; - struct net_device *netdev; struct notifier_block nb; struct netdev_net_notifier nn; struct notifier_block mdev_nb; @@ -1138,6 +1136,7 @@ struct mlx5_ib_dev { /* protect accessing data_direct_dev */ struct mutex data_direct_lock; struct notifier_block mdev_events; + struct notifier_block lag_events; int num_ports; /* serialize update of capability mask */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index cf8045b92689..8577db3308cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -445,6 +445,34 @@ static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) return mlx5_cmd_modify_lag(dev0, ldev->ports, ports); } +static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev) +{ + struct net_device *ndev = NULL; + struct mlx5_lag *ldev; + unsigned long flags; + int i; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + + if (!ldev) + goto unlock; + + for (i = 0; i < ldev->ports; i++) + if (ldev->tracker.netdev_state[i].tx_enabled) + ndev = ldev->pf[i].netdev; + if (!ndev) + ndev = ldev->pf[ldev->ports - 1].netdev; + + if (ndev) + dev_hold(ndev); + +unlock: + spin_unlock_irqrestore(&lag_lock, flags); + + return ndev; +} + void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker) { @@ -477,9 +505,18 @@ void mlx5_modify_lag(struct mlx5_lag *ldev, } } - if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && - !(ldev->mode == MLX5_LAG_MODE_ROCE)) - mlx5_lag_drop_rule_setup(ldev, tracker); + if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { + struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0); + + if(!(ldev->mode == MLX5_LAG_MODE_ROCE)) + mlx5_lag_drop_rule_setup(ldev, tracker); + /** Only sriov and roce lag should have tracker->tx_type set so + * no need to check the mode + */ + blocking_notifier_call_chain(&dev0->priv.lag_nh, + MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE, + ndev); + } } static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, @@ -613,6 +650,7 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, mlx5_core_err(dev0, "Failed to deactivate RoCE LAG; driver restart required\n"); } + BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh); return err; } @@ -1492,38 +1530,6 @@ void mlx5_lag_enable_change(struct mlx5_core_dev *dev) mlx5_queue_bond_work(ldev, 0); } -struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) -{ - struct net_device *ndev = NULL; - struct mlx5_lag *ldev; - unsigned long flags; - int i; - - spin_lock_irqsave(&lag_lock, flags); - ldev = mlx5_lag_dev(dev); - - if (!(ldev && __mlx5_lag_is_roce(ldev))) - goto unlock; - - if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { - for (i = 0; i < ldev->ports; i++) - if (ldev->tracker.netdev_state[i].tx_enabled) - ndev = ldev->pf[i].netdev; - if (!ndev) - ndev = ldev->pf[ldev->ports - 1].netdev; - } else { - ndev = ldev->pf[MLX5_LAG_P1].netdev; - } - if (ndev) - dev_hold(ndev); - -unlock: - spin_unlock_irqrestore(&lag_lock, flags); - - return ndev; -} -EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); - u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, struct net_device *slave) { diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 57c9b18c3adb..dac33cfe9c0c 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -371,6 +371,7 @@ enum mlx5_driver_event { MLX5_DRIVER_EVENT_SF_PEER_DEVLINK, MLX5_DRIVER_EVENT_AFFILIATION_DONE, MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, + MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE, }; enum { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a96438ded15f..46a7a3d11048 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -643,6 +643,7 @@ struct mlx5_priv { struct mlx5_sf_hw_table *sf_hw_table; struct mlx5_sf_table *sf_table; #endif + struct blocking_notifier_head lag_nh; }; enum mlx5_device_state { @@ -1181,7 +1182,6 @@ bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev); bool mlx5_lag_is_master(struct mlx5_core_dev *dev); bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev); bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev); -struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, struct net_device *slave); int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a1dcf812d787..aa8ede439905 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4453,6 +4453,8 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u32 port, const struct sockaddr *addr); int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, unsigned int port); +struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, + u32 port); struct ib_wq *ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr); int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata);