net/mlx5: Make RoCE and SR-IOV LAG modes explicit
With the introduction of SR-IOV LAG, checking whether LAG is active is no longer good enough, since RoCE and SR-IOV LAG each entails different behavior by both the core and infiniband drivers. This patch introduces facilities to discern LAG type, in addition to mlx5_lag_is_active(). These are implemented in such a way as to allow more complex mode combinations in the future. Signed-off-by: Aviv Heller <avivh@mellanox.com> Reviewed-by: Roi Dayan <roid@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
This commit is contained in:
		
							parent
							
								
									292612d68c
								
							
						
					
					
						commit
						7c34ec19e1
					
				| @ -445,7 +445,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, | ||||
| 	if (!ndev) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	if (mlx5_lag_is_active(dev->mdev)) { | ||||
| 	if (dev->lag_active) { | ||||
| 		rcu_read_lock(); | ||||
| 		upper = netdev_master_upper_dev_get_rcu(ndev); | ||||
| 		if (upper) { | ||||
| @ -1848,7 +1848,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, | ||||
| 	context->lib_caps = req.lib_caps; | ||||
| 	print_lib_caps(dev, context->lib_caps); | ||||
| 
 | ||||
| 	if (mlx5_lag_is_active(dev->mdev)) { | ||||
| 	if (dev->lag_active) { | ||||
| 		u8 port = mlx5_core_native_port_num(dev->mdev); | ||||
| 
 | ||||
| 		atomic_set(&context->tx_port_affinity, | ||||
| @ -4841,7 +4841,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) | ||||
| 	struct mlx5_flow_table *ft; | ||||
| 	int err; | ||||
| 
 | ||||
| 	if (!ns || !mlx5_lag_is_active(mdev)) | ||||
| 	if (!ns || !mlx5_lag_is_roce(mdev)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	err = mlx5_cmd_create_vport_lag(mdev); | ||||
| @ -4855,6 +4855,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) | ||||
| 	} | ||||
| 
 | ||||
| 	dev->flow_db->lag_demux_ft = ft; | ||||
| 	dev->lag_active = true; | ||||
| 	return 0; | ||||
| 
 | ||||
| err_destroy_vport_lag: | ||||
| @ -4866,7 +4867,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev) | ||||
| { | ||||
| 	struct mlx5_core_dev *mdev = dev->mdev; | ||||
| 
 | ||||
| 	if (dev->flow_db->lag_demux_ft) { | ||||
| 	if (dev->lag_active) { | ||||
| 		dev->lag_active = false; | ||||
| 
 | ||||
| 		mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft); | ||||
| 		dev->flow_db->lag_demux_ft = NULL; | ||||
| 
 | ||||
| @ -6173,7 +6176,7 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) | ||||
| 	const char *name; | ||||
| 
 | ||||
| 	rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group); | ||||
| 	if (!mlx5_lag_is_active(dev->mdev)) | ||||
| 	if (!mlx5_lag_is_roce(dev->mdev)) | ||||
| 		name = "mlx5_%d"; | ||||
| 	else | ||||
| 		name = "mlx5_bond_%d"; | ||||
|  | ||||
| @ -936,6 +936,7 @@ struct mlx5_ib_dev { | ||||
| 	struct mlx5_ib_delay_drop	delay_drop; | ||||
| 	const struct mlx5_ib_profile	*profile; | ||||
| 	struct mlx5_eswitch_rep		*rep; | ||||
| 	int				lag_active; | ||||
| 
 | ||||
| 	struct mlx5_ib_lb_state		lb; | ||||
| 	u8			umr_fence; | ||||
|  | ||||
| @ -3258,7 +3258,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, | ||||
| 		    (ibqp->qp_type == IB_QPT_RAW_PACKET) || | ||||
| 		    (ibqp->qp_type == IB_QPT_XRC_INI) || | ||||
| 		    (ibqp->qp_type == IB_QPT_XRC_TGT)) { | ||||
| 			if (mlx5_lag_is_active(dev->mdev)) { | ||||
| 			if (dev->lag_active) { | ||||
| 				u8 p = mlx5_core_native_port_num(dev->mdev); | ||||
| 				tx_affinity = get_tx_affinity(dev, pd, base, p); | ||||
| 				context->flags |= cpu_to_be32(tx_affinity << 24); | ||||
|  | ||||
| @ -35,7 +35,7 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, | ||||
| 	dst_is_lag_dev = (uplink_upper && | ||||
| 			  netif_is_lag_master(uplink_upper) && | ||||
| 			  rt->dst.dev == uplink_upper && | ||||
| 			  mlx5_lag_is_active(priv->mdev)); | ||||
| 			  mlx5_lag_is_sriov(priv->mdev)); | ||||
| 
 | ||||
| 	/* if the egress device isn't on the same HW e-switch or
 | ||||
| 	 * it's a LAG device, use the uplink | ||||
| @ -94,7 +94,7 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, | ||||
| 	dst_is_lag_dev = (uplink_upper && | ||||
| 			  netif_is_lag_master(uplink_upper) && | ||||
| 			  dst->dev == uplink_upper && | ||||
| 			  mlx5_lag_is_active(priv->mdev)); | ||||
| 			  mlx5_lag_is_sriov(priv->mdev)); | ||||
| 
 | ||||
| 	/* if the egress device isn't on the same HW e-switch or
 | ||||
| 	 * it's a LAG device, use the uplink | ||||
|  | ||||
| @ -314,7 +314,7 @@ int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) | ||||
| 	switch (attr->id) { | ||||
| 	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: | ||||
| 		attr->u.ppid.id_len = ETH_ALEN; | ||||
| 		if (uplink_upper && mlx5_lag_is_active(uplink_priv->mdev)) { | ||||
| 		if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) { | ||||
| 			ether_addr_copy(attr->u.ppid.id, uplink_upper->dev_addr); | ||||
| 		} else { | ||||
| 			struct mlx5e_rep_priv *rpriv = priv->ppriv; | ||||
|  | ||||
| @ -2718,7 +2718,7 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) | ||||
| 	bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom, | ||||
| 						MLX5_DEVCOM_ESW_OFFLOADS); | ||||
| 
 | ||||
| 	return esw_paired && mlx5_lag_is_active(attr->in_mdev) && | ||||
| 	return esw_paired && mlx5_lag_is_sriov(attr->in_mdev) && | ||||
| 	       (is_rep_ingress || act_is_encap); | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -37,9 +37,12 @@ | ||||
| #include "eswitch.h" | ||||
| 
 | ||||
| enum { | ||||
| 	MLX5_LAG_FLAG_BONDED = 1 << 0, | ||||
| 	MLX5_LAG_FLAG_ROCE   = 1 << 0, | ||||
| 	MLX5_LAG_FLAG_SRIOV  = 1 << 1, | ||||
| }; | ||||
| 
 | ||||
| #define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV) | ||||
| 
 | ||||
| struct lag_func { | ||||
| 	struct mlx5_core_dev *dev; | ||||
| 	struct net_device    *netdev; | ||||
| @ -161,9 +164,19 @@ static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, | ||||
| 	return -1; | ||||
| } | ||||
| 
 | ||||
| static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) | ||||
| { | ||||
| 	return !!(ldev->flags & MLX5_LAG_FLAG_ROCE); | ||||
| } | ||||
| 
 | ||||
| static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) | ||||
| { | ||||
| 	return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV); | ||||
| } | ||||
| 
 | ||||
| static bool __mlx5_lag_is_active(struct mlx5_lag *ldev) | ||||
| { | ||||
| 	return !!(ldev->flags & MLX5_LAG_FLAG_BONDED); | ||||
| 	return !!(ldev->flags & MLX5_LAG_MODE_FLAGS); | ||||
| } | ||||
| 
 | ||||
| static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, | ||||
| @ -229,9 +242,10 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, | ||||
| } | ||||
| 
 | ||||
| static void mlx5_activate_lag(struct mlx5_lag *ldev, | ||||
| 			      struct lag_tracker *tracker) | ||||
| 			      struct lag_tracker *tracker, | ||||
| 			      u8 flags) | ||||
| { | ||||
| 	ldev->flags |= MLX5_LAG_FLAG_BONDED; | ||||
| 	ldev->flags |= flags; | ||||
| 	mlx5_create_lag(ldev, tracker); | ||||
| } | ||||
| 
 | ||||
| @ -240,7 +254,7 @@ static void mlx5_deactivate_lag(struct mlx5_lag *ldev) | ||||
| 	struct mlx5_core_dev *dev0 = ldev->pf[0].dev; | ||||
| 	int err; | ||||
| 
 | ||||
| 	ldev->flags &= ~MLX5_LAG_FLAG_BONDED; | ||||
| 	ldev->flags &= ~MLX5_LAG_MODE_FLAGS; | ||||
| 
 | ||||
| 	err = mlx5_cmd_destroy_lag(dev0); | ||||
| 	if (err) | ||||
| @ -263,15 +277,13 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) | ||||
| { | ||||
| 	struct mlx5_core_dev *dev0 = ldev->pf[0].dev; | ||||
| 	struct mlx5_core_dev *dev1 = ldev->pf[1].dev; | ||||
| 	bool do_bond, sriov_enabled; | ||||
| 	struct lag_tracker tracker; | ||||
| 	bool do_bond, roce_lag; | ||||
| 	int i; | ||||
| 
 | ||||
| 	if (!dev0 || !dev1) | ||||
| 		return; | ||||
| 
 | ||||
| 	sriov_enabled = mlx5_sriov_is_enabled(dev0) || mlx5_sriov_is_enabled(dev1); | ||||
| 
 | ||||
| 	mutex_lock(&lag_mutex); | ||||
| 	tracker = ldev->tracker; | ||||
| 	mutex_unlock(&lag_mutex); | ||||
| @ -279,28 +291,35 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) | ||||
| 	do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); | ||||
| 
 | ||||
| 	if (do_bond && !__mlx5_lag_is_active(ldev)) { | ||||
| 		if (!sriov_enabled) | ||||
| 		roce_lag = !mlx5_sriov_is_enabled(dev0) && | ||||
| 			   !mlx5_sriov_is_enabled(dev1); | ||||
| 
 | ||||
| 		if (roce_lag) | ||||
| 			for (i = 0; i < MLX5_MAX_PORTS; i++) | ||||
| 				mlx5_remove_dev_by_protocol(ldev->pf[i].dev, | ||||
| 							    MLX5_INTERFACE_PROTOCOL_IB); | ||||
| 
 | ||||
| 		mlx5_activate_lag(ldev, &tracker); | ||||
| 		mlx5_activate_lag(ldev, &tracker, | ||||
| 				  roce_lag ? MLX5_LAG_FLAG_ROCE : | ||||
| 				  MLX5_LAG_FLAG_SRIOV); | ||||
| 
 | ||||
| 		if (!sriov_enabled) { | ||||
| 		if (roce_lag) { | ||||
| 			mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); | ||||
| 			mlx5_nic_vport_enable_roce(dev1); | ||||
| 		} | ||||
| 	} else if (do_bond && __mlx5_lag_is_active(ldev)) { | ||||
| 		mlx5_modify_lag(ldev, &tracker); | ||||
| 	} else if (!do_bond && __mlx5_lag_is_active(ldev)) { | ||||
| 		if (!sriov_enabled) { | ||||
| 		roce_lag = __mlx5_lag_is_roce(ldev); | ||||
| 
 | ||||
| 		if (roce_lag) { | ||||
| 			mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); | ||||
| 			mlx5_nic_vport_disable_roce(dev1); | ||||
| 		} | ||||
| 
 | ||||
| 		mlx5_deactivate_lag(ldev); | ||||
| 
 | ||||
| 		if (!sriov_enabled) | ||||
| 		if (roce_lag) | ||||
| 			for (i = 0; i < MLX5_MAX_PORTS; i++) | ||||
| 				if (ldev->pf[i].dev) | ||||
| 					mlx5_add_dev_by_protocol(ldev->pf[i].dev, | ||||
| @ -572,6 +591,20 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) | ||||
| { | ||||
| 	struct mlx5_lag *ldev; | ||||
| 	bool res; | ||||
| 
 | ||||
| 	mutex_lock(&lag_mutex); | ||||
| 	ldev = mlx5_lag_dev_get(dev); | ||||
| 	res  = ldev && __mlx5_lag_is_roce(ldev); | ||||
| 	mutex_unlock(&lag_mutex); | ||||
| 
 | ||||
| 	return res; | ||||
| } | ||||
| EXPORT_SYMBOL(mlx5_lag_is_roce); | ||||
| 
 | ||||
| bool mlx5_lag_is_active(struct mlx5_core_dev *dev) | ||||
| { | ||||
| 	struct mlx5_lag *ldev; | ||||
| @ -586,6 +619,20 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev) | ||||
| } | ||||
| EXPORT_SYMBOL(mlx5_lag_is_active); | ||||
| 
 | ||||
| bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) | ||||
| { | ||||
| 	struct mlx5_lag *ldev; | ||||
| 	bool res; | ||||
| 
 | ||||
| 	mutex_lock(&lag_mutex); | ||||
| 	ldev = mlx5_lag_dev_get(dev); | ||||
| 	res  = ldev && __mlx5_lag_is_sriov(ldev); | ||||
| 	mutex_unlock(&lag_mutex); | ||||
| 
 | ||||
| 	return res; | ||||
| } | ||||
| EXPORT_SYMBOL(mlx5_lag_is_sriov); | ||||
| 
 | ||||
| void mlx5_lag_update(struct mlx5_core_dev *dev) | ||||
| { | ||||
| 	struct mlx5_lag *ldev; | ||||
| @ -609,7 +656,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) | ||||
| 	mutex_lock(&lag_mutex); | ||||
| 	ldev = mlx5_lag_dev_get(dev); | ||||
| 
 | ||||
| 	if (!(ldev && __mlx5_lag_is_active(ldev))) | ||||
| 	if (!(ldev && __mlx5_lag_is_roce(ldev))) | ||||
| 		goto unlock; | ||||
| 
 | ||||
| 	if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { | ||||
| @ -638,7 +685,7 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) | ||||
| 		return true; | ||||
| 
 | ||||
| 	ldev = mlx5_lag_dev_get(dev); | ||||
| 	if (!ldev || !__mlx5_lag_is_active(ldev) || ldev->pf[0].dev == dev) | ||||
| 	if (!ldev || !__mlx5_lag_is_roce(ldev) || ldev->pf[0].dev == dev) | ||||
| 		return true; | ||||
| 
 | ||||
| 	/* If bonded, we do not add an IB device for PF1. */ | ||||
| @ -665,7 +712,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, | ||||
| 
 | ||||
| 	mutex_lock(&lag_mutex); | ||||
| 	ldev = mlx5_lag_dev_get(dev); | ||||
| 	if (ldev && __mlx5_lag_is_active(ldev)) { | ||||
| 	if (ldev && __mlx5_lag_is_roce(ldev)) { | ||||
| 		num_ports = MLX5_MAX_PORTS; | ||||
| 		mdev[0] = ldev->pf[0].dev; | ||||
| 		mdev[1] = ldev->pf[1].dev; | ||||
|  | ||||
| @ -1019,6 +1019,8 @@ int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); | ||||
| 
 | ||||
| int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); | ||||
| int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); | ||||
| bool mlx5_lag_is_roce(struct mlx5_core_dev *dev); | ||||
| bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev); | ||||
| bool mlx5_lag_is_active(struct mlx5_core_dev *dev); | ||||
| struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); | ||||
| int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user